From 974d307cf1524d7f327a79594cebcbb5de1e6e8f Mon Sep 17 00:00:00 2001
From: Franck Charras <29153872+fcharras@users.noreply.github.com>
Date: Wed, 18 Oct 2023 17:21:25 +0200
Subject: [PATCH] Always pass numpy inputs to scikit-learn-intelex, rather than
 usm_ndarray that are not well supported currently

---
 .../kmeans/solvers/scikit_learn_intelex.py    | 76 ++++++++++++-------
 1 file changed, 49 insertions(+), 27 deletions(-)

diff --git a/benchmarks/kmeans/solvers/scikit_learn_intelex.py b/benchmarks/kmeans/solvers/scikit_learn_intelex.py
index 086a8f4..b195d49 100644
--- a/benchmarks/kmeans/solvers/scikit_learn_intelex.py
+++ b/benchmarks/kmeans/solvers/scikit_learn_intelex.py
@@ -1,3 +1,4 @@
+from contextlib import nullcontext
 from importlib.metadata import version
 
 from benchopt import BaseSolver, safe_import_context
@@ -6,9 +7,9 @@
 with safe_import_context() as import_ctx:
     # isort: off
     import dpctl
-    import dpctl.tensor as dpt
     import numpy as np
     from sklearnex.cluster import KMeans
+    from sklearnex import config_context
 
     # isort: on
 
@@ -78,23 +79,40 @@ def set_objective(
         algorithm,
         random_state,
     ):
+        # TODO: the overhead of the copy of the data from host to device could be
+        # eliminated if scikit-learn-intelex could just take usm_ndarray objects as
+        # input and directly run compute with the underlying memory buffer. The
+        # documentation at
+        # https://intel.github.io/scikit-learn-intelex/latest/oneapi-gpu.html#device-offloading  # noqa
+        # suggests that it is the intended behavior, however in practice
+        # scikit-learn-intelex currently always perform underlying copies
+        # under the hood no matter what, and sometimes fails at doing so. See e.g.
+        # issue at
+        # https://github.com/intel/scikit-learn-intelex/issues/1534#issuecomment-1766266299  # noqa
+
+        # if self.runtime != "numpy":
+        #     device = device = dpctl.SyclDevice(f"{self.runtime}:{self.device}")
+        #     self.X = dpt.asarray(X, copy=True, device=device)
+
+        #     if hasattr(sample_weight, "copy"):
+        #         sample_weight = dpt.asarray(sample_weight, copy=True, device=device)
+
+        #     if hasattr(init, "copy"):
+        #         init = dpt.asarray(init, copy=True, device=device)
+        # else:
+        #     self.X = X.copy()
+        #     if hasattr(sample_weight, "copy"):
+        #         sample_weight = sample_weight.copy()
+        #     if hasattr(init, "copy"):
+        #         init = init.copy()
+
         # Copy the data before running the benchmark to ensure that no unfortunate
         # side effects can happen
-        if self.runtime != "numpy":
-            device = device = dpctl.SyclDevice(f"{self.runtime}:{self.device}")
-            self.X = dpt.asarray(X, copy=True, device=device)
-
-            if hasattr(sample_weight, "copy"):
-                sample_weight = dpt.asarray(sample_weight, copy=True, device=device)
-
-            if hasattr(init, "copy"):
-                init = dpt.asarray(init, copy=True, device=device)
-        else:
-            self.X = X.copy()
-            if hasattr(sample_weight, "copy"):
-                sample_weight = sample_weight.copy()
-            if hasattr(init, "copy"):
-                init = init.copy()
+        self.X = X.copy()
+        if hasattr(sample_weight, "copy"):
+            sample_weight = sample_weight.copy()
+        if hasattr(init, "copy"):
+            init = init.copy()
 
         self.sample_weight = sample_weight
         self.init = init
@@ -120,17 +138,21 @@ def warm_up(self):
         ).fit(self.X, y=None, sample_weight=self.sample_weight)
 
     def run(self, _):
-        estimator = KMeans(
-            n_clusters=self.n_clusters,
-            init=self.init,
-            n_init=self.n_init,
-            max_iter=self.max_iter,
-            tol=self.tol,
-            verbose=self.verbose,
-            random_state=self.random_state,
-            copy_x=False,
-            algorithm=self.algorithm,
-        ).fit(self.X, y=None, sample_weight=self.sample_weight)
+        with nullcontext() if (self.runtime == "numpy") else config_context(
+            target_offload=f"{self.runtime}:{self.device}"
+        ):
+            estimator = KMeans(
+                n_clusters=self.n_clusters,
+                init=self.init,
+                n_init=self.n_init,
+                max_iter=self.max_iter,
+                tol=self.tol,
+                verbose=self.verbose,
+                random_state=self.random_state,
+                copy_x=False,
+                algorithm=self.algorithm,
+            ).fit(self.X, y=None, sample_weight=self.sample_weight)
+
         self.inertia_ = estimator.inertia_
         self.n_iter_ = estimator.n_iter_