Skip to content

Commit

Permalink
wip: add cuml solver
Browse files Browse the repository at this point in the history
  • Loading branch information
fcharras committed Sep 13, 2023
1 parent 0698c7f commit a23f683
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 6 deletions.
100 changes: 100 additions & 0 deletions benchmarks/kmeans/solvers/cuml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from contextlib import nullcontext

from benchopt import BaseSolver, safe_import_context
from benchopt.stopping_criterion import SingleRunCriterion

with safe_import_context() as import_ctx:
import cuml
import cupy
from cuml.common.device_selection import using_device_type


class Solver(BaseSolver):
name = "cuml"
requirements = ["cuml"]

parameters = dict(device=["cpu", "gpu"])

stopping_criterion = SingleRunCriterion(1)

def skip(self, **objective_dict):

init = objective_dict["init"]
if not hasattr(init, "copy") and (init == "k-means++"):
return True, (
"Support for k-means++ is not implemented in cuml. cuml only "
"implements k-means|| whose walltime can't be compared with "
"k-means++. "
)

def set_objective(
self,
X,
sample_weight,
init,
n_clusters,
n_init,
max_iter,
tol,
verbose,
algorithm,
random_state,
):
if self.device == "cpu":
# Copy the data before running the benchmark to ensure that no unfortunate
# side effects can happen
self.X = X.copy()
if hasattr(sample_weight, "copy"):
sample_weight = sample_weight.copy()
self.sample_weight = sample_weight
if hasattr(init, "copy"):
init = init.copy()

else:
self.X = cupy.asarray(X, copy=True)
if hasattr(sample_weight, "copy"):
sample_weight = cupy.asarray(sample_weight, copy=True)
if hasattr(init, "copy"):
init = cupy.asarray(init, copy=True)

self.init = init
self.n_clusters = n_clusters
self.n_init = n_init
self.max_iter = max_iter
self.tol = tol
self.verbose = verbose
self.algorithm = algorithm
self.random_state = random_state

def warm_up(self):
with using_device_type("cpu") if (self.device == "cpu") else nullcontext():
cuml.KMeans(
n_clusters=self.n_clusters,
init=self.init,
n_init=self.n_init,
max_iter=1,
tol=self.tol,
verbose=self.verbose,
random_state=self.random_state,
copy_x=False,
algorithm=self.algorithm,
).fit(self.X, sample_weight=self.sample_weight)

def run(self, _):
with using_device_type("cpu") if (self.device == "cpu") else nullcontext():
estimator = cuml.KMeans(
n_clusters=self.n_clusters,
init=self.init,
n_init=self.n_init,
max_iter=self.max_iter,
tol=self.tol,
verbose=self.verbose,
random_state=self.random_state,
copy_x=False,
algorithm=self.algorithm,
).fit(self.X, sample_weight=self.sample_weight)
self.inertia_ = estimator.inertia_
self.n_iter_ = estimator.n_iter_

def get_result(self):
return {"inertia": self.inertia_, "n_iter": self.n_iter_}
16 changes: 10 additions & 6 deletions benchmarks/kmeans/solvers/scikit_learn_intelex.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,16 @@ def skip(self, **objective_dict):
f"This {self.device} device has no support for float64 compute"
)

init = objective_dict["init"]
if not hasattr(init, "copy") and (init == "k-means++"):
return True, (
"support for k-means++ is not implemented in scikit-learn-intelex "
"for devices other than cpu."
)
init = objective_dict["init"]
if (
(not hasattr(init, "copy"))
and (init == "k-means++")
and (self.device != "cpu")
):
return True, (
"support for k-means++ is not implemented in scikit-learn-intelex "
"for devices other than cpu."
)

sample_weight = objective_dict["sample_weight"]
if sample_weight is not None:
Expand Down

0 comments on commit a23f683

Please sign in to comment.