From 974d307cf1524d7f327a79594cebcbb5de1e6e8f Mon Sep 17 00:00:00 2001 From: Franck Charras <29153872+fcharras@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:21:25 +0200 Subject: [PATCH] Always pass numpy inputs to scikit-learn-intelex, rather than usm_ndarray that are not well supported currently --- .../kmeans/solvers/scikit_learn_intelex.py | 76 ++++++++++++------- 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/benchmarks/kmeans/solvers/scikit_learn_intelex.py b/benchmarks/kmeans/solvers/scikit_learn_intelex.py index 086a8f4..b195d49 100644 --- a/benchmarks/kmeans/solvers/scikit_learn_intelex.py +++ b/benchmarks/kmeans/solvers/scikit_learn_intelex.py @@ -1,3 +1,4 @@ +from contextlib import nullcontext from importlib.metadata import version from benchopt import BaseSolver, safe_import_context @@ -6,9 +7,9 @@ with safe_import_context() as import_ctx: # isort: off import dpctl - import dpctl.tensor as dpt import numpy as np from sklearnex.cluster import KMeans + from sklearnex import config_context # isort: on @@ -78,23 +79,40 @@ def set_objective( algorithm, random_state, ): + # TODO: the overhead of the copy of the data from host to device could be + # eliminated if scikit-learn-intelex could just take usm_ndarray objects as + # input and directly run compute with the underlying memory buffer. The + # documentation at + # https://intel.github.io/scikit-learn-intelex/latest/oneapi-gpu.html#device-offloading # noqa + # suggests that it is the intended behavior, however in practice + # scikit-learn-intelex currently always perform underlying copies + # under the hood no matter what, and sometimes fails at doing so. See e.g. + # issue at + # https://github.com/intel/scikit-learn-intelex/issues/1534#issuecomment-1766266299 # noqa + + # if self.runtime != "numpy": + # device = device = dpctl.SyclDevice(f"{self.runtime}:{self.device}") + # self.X = dpt.asarray(X, copy=True, device=device) + + # if hasattr(sample_weight, "copy"): + # sample_weight = dpt.asarray(sample_weight, copy=True, device=device) + + # if hasattr(init, "copy"): + # init = dpt.asarray(init, copy=True, device=device) + # else: + # self.X = X.copy() + # if hasattr(sample_weight, "copy"): + # sample_weight = sample_weight.copy() + # if hasattr(init, "copy"): + # init = init.copy() + # Copy the data before running the benchmark to ensure that no unfortunate # side effects can happen - if self.runtime != "numpy": - device = device = dpctl.SyclDevice(f"{self.runtime}:{self.device}") - self.X = dpt.asarray(X, copy=True, device=device) - - if hasattr(sample_weight, "copy"): - sample_weight = dpt.asarray(sample_weight, copy=True, device=device) - - if hasattr(init, "copy"): - init = dpt.asarray(init, copy=True, device=device) - else: - self.X = X.copy() - if hasattr(sample_weight, "copy"): - sample_weight = sample_weight.copy() - if hasattr(init, "copy"): - init = init.copy() + self.X = X.copy() + if hasattr(sample_weight, "copy"): + sample_weight = sample_weight.copy() + if hasattr(init, "copy"): + init = init.copy() self.sample_weight = sample_weight self.init = init @@ -120,17 +138,21 @@ def warm_up(self): ).fit(self.X, y=None, sample_weight=self.sample_weight) def run(self, _): - estimator = KMeans( - n_clusters=self.n_clusters, - init=self.init, - n_init=self.n_init, - max_iter=self.max_iter, - tol=self.tol, - verbose=self.verbose, - random_state=self.random_state, - copy_x=False, - algorithm=self.algorithm, - ).fit(self.X, y=None, sample_weight=self.sample_weight) + with nullcontext() if (self.runtime == "numpy") else config_context( + target_offload=f"{self.runtime}:{self.device}" + ): + estimator = KMeans( + n_clusters=self.n_clusters, + init=self.init, + n_init=self.n_init, + max_iter=self.max_iter, + tol=self.tol, + verbose=self.verbose, + random_state=self.random_state, + copy_x=False, + algorithm=self.algorithm, + ).fit(self.X, y=None, sample_weight=self.sample_weight) + self.inertia_ = estimator.inertia_ self.n_iter_ = estimator.n_iter_