wip: add cuml solver

soda-inria · Sep 13, 2023 · a23f683 · a23f683
1 parent 0698c7f
commit a23f683
Show file tree

Hide file tree

Showing 2 changed files with 110 additions and 6 deletions.
diff --git a/benchmarks/kmeans/solvers/cuml.py b/benchmarks/kmeans/solvers/cuml.py
@@ -0,0 +1,100 @@
+from contextlib import nullcontext
+
+from benchopt import BaseSolver, safe_import_context
+from benchopt.stopping_criterion import SingleRunCriterion
+
+with safe_import_context() as import_ctx:
+    import cuml
+    import cupy
+    from cuml.common.device_selection import using_device_type
+
+
+class Solver(BaseSolver):
+    name = "cuml"
+    requirements = ["cuml"]
+
+    parameters = dict(device=["cpu", "gpu"])
+
+    stopping_criterion = SingleRunCriterion(1)
+
+    def skip(self, **objective_dict):
+
+        init = objective_dict["init"]
+        if not hasattr(init, "copy") and (init == "k-means++"):
+            return True, (
+                "Support for k-means++ is not implemented in cuml. cuml only "
+                "implements k-means|| whose walltime can't be compared with "
+                "k-means++. "
+            )
+
+    def set_objective(
+        self,
+        X,
+        sample_weight,
+        init,
+        n_clusters,
+        n_init,
+        max_iter,
+        tol,
+        verbose,
+        algorithm,
+        random_state,
+    ):
+        if self.device == "cpu":
+            # Copy the data before running the benchmark to ensure that no unfortunate
+            # side effects can happen
+            self.X = X.copy()
+            if hasattr(sample_weight, "copy"):
+                sample_weight = sample_weight.copy()
+            self.sample_weight = sample_weight
+            if hasattr(init, "copy"):
+                init = init.copy()
+
+        else:
+            self.X = cupy.asarray(X, copy=True)
+            if hasattr(sample_weight, "copy"):
+                sample_weight = cupy.asarray(sample_weight, copy=True)
+            if hasattr(init, "copy"):
+                init = cupy.asarray(init, copy=True)
+
+        self.init = init
+        self.n_clusters = n_clusters
+        self.n_init = n_init
+        self.max_iter = max_iter
+        self.tol = tol
+        self.verbose = verbose
+        self.algorithm = algorithm
+        self.random_state = random_state
+
+    def warm_up(self):
+        with using_device_type("cpu") if (self.device == "cpu") else nullcontext():
+            cuml.KMeans(
+                n_clusters=self.n_clusters,
+                init=self.init,
+                n_init=self.n_init,
+                max_iter=1,
+                tol=self.tol,
+                verbose=self.verbose,
+                random_state=self.random_state,
+                copy_x=False,
+                algorithm=self.algorithm,
+            ).fit(self.X, sample_weight=self.sample_weight)
+
+    def run(self, _):
+        with using_device_type("cpu") if (self.device == "cpu") else nullcontext():
+            estimator = cuml.KMeans(
+                n_clusters=self.n_clusters,
+                init=self.init,
+                n_init=self.n_init,
+                max_iter=self.max_iter,
+                tol=self.tol,
+                verbose=self.verbose,
+                random_state=self.random_state,
+                copy_x=False,
+                algorithm=self.algorithm,
+            ).fit(self.X, sample_weight=self.sample_weight)
+        self.inertia_ = estimator.inertia_
+        self.n_iter_ = estimator.n_iter_
+
+    def get_result(self):
+        return {"inertia": self.inertia_, "n_iter": self.n_iter_}
diff --git a/benchmarks/kmeans/solvers/scikit_learn_intelex.py b/benchmarks/kmeans/solvers/scikit_learn_intelex.py
@@ -53,12 +53,16 @@ def skip(self, **objective_dict):
                     f"This {self.device} device has no support for float64 compute"
                 )
 
-            init = objective_dict["init"]
-            if not hasattr(init, "copy") and (init == "k-means++"):
-                return True, (
-                    "support for k-means++ is not implemented in scikit-learn-intelex "
-                    "for devices other than cpu."
-                )
+        init = objective_dict["init"]
+        if (
+            (not hasattr(init, "copy"))
+            and (init == "k-means++")
+            and (self.device != "cpu")
+        ):
+            return True, (
+                "support for k-means++ is not implemented in scikit-learn-intelex "
+                "for devices other than cpu."
+            )
 
         sample_weight = objective_dict["sample_weight"]
         if sample_weight is not None: