From 3ed52b001cc87b562405910340d738cb9a343341 Mon Sep 17 00:00:00 2001 From: Di-Is Date: Sun, 19 Jan 2025 00:49:41 -0800 Subject: [PATCH 1/2] Add `ngpu` default argument to `knn_ground_truth` (#4123) Summary: This pull request introduces a new default argument, `ngpu=-1`, to the `knn_ground_truth` function in the `faiss.contrib`. ## Purpose of Change ### Bug Fix In the current implementation, running tests under the tests directory (CPU tests) in an environment with faiss-gpu installed would inadvertently use the GPU and cause unintended behavior. This pull request prevents the GPU from being used during CPU-only tests by explicitly controlling GPU allocation via the ngpu parameter. ### API Consistency Other functions that call `faiss.get_num_gpus` in `faiss.contrib`, such as `range_search_max_results` and `range_ground_truth`, already include the `ngpu` argument. Adding this parameter to `knn_ground_truth` will ensure consistency across the API, reduce potential confusion, and improve ease of use. Differential Revision: D68199506 Pulled By: junjieqi --- contrib/exhaustive_search.py | 13 +++++++++---- tests/test_contrib.py | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/contrib/exhaustive_search.py b/contrib/exhaustive_search.py index 808453e6c1..257427777d 100644 --- a/contrib/exhaustive_search.py +++ b/contrib/exhaustive_search.py @@ -11,7 +11,7 @@ LOG = logging.getLogger(__name__) -def knn_ground_truth(xq, db_iterator, k, metric_type=faiss.METRIC_L2): +def knn_ground_truth(xq, db_iterator, k, metric_type=faiss.METRIC_L2, shard=False, ngpu=-1): """Computes the exact KNN search results for a dataset that possibly does not fit in RAM but for which we have an iterator that returns it block by block. @@ -23,9 +23,14 @@ def knn_ground_truth(xq, db_iterator, k, metric_type=faiss.METRIC_L2): rh = faiss.ResultHeap(nq, k, keep_max=keep_max) index = faiss.IndexFlat(d, metric_type) - if faiss.get_num_gpus(): - LOG.info('running on %d GPUs' % faiss.get_num_gpus()) - index = faiss.index_cpu_to_all_gpus(index) + if ngpu == -1: + ngpu = faiss.get_num_gpus() + + if ngpu: + LOG.info('running on %d GPUs' % ngpu) + co = faiss.GpuMultipleClonerOptions() + co.shard = shard + index = faiss.index_cpu_to_all_gpus(index, co=co, ngpu=ngpu) # compute ground-truth by blocks, and add to heaps i0 = 0 diff --git a/tests/test_contrib.py b/tests/test_contrib.py index ba185f92b2..fb778afb81 100644 --- a/tests/test_contrib.py +++ b/tests/test_contrib.py @@ -50,7 +50,7 @@ def matrix_iterator(xb, bs): yield xb[i0:i0 + bs] Dnew, Inew = knn_ground_truth( - xq, matrix_iterator(xb, 1000), 10, metric) + xq, matrix_iterator(xb, 1000), 10, metric, ngpu=0) np.testing.assert_array_equal(Iref, Inew) # decimal = 4 required when run on GPU From 7e7f94d738077f419662ad01393ee994390e8f92 Mon Sep 17 00:00:00 2001 From: Junjie Qi Date: Sun, 19 Jan 2025 00:49:41 -0800 Subject: [PATCH 2/2] add test to cover GPU (#4130) Summary: same as title Differential Revision: D68388863 --- tests/test_contrib.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_contrib.py b/tests/test_contrib.py index fb778afb81..ca5d2bcca7 100644 --- a/tests/test_contrib.py +++ b/tests/test_contrib.py @@ -35,7 +35,7 @@ class TestComputeGT(unittest.TestCase): - def do_test_compute_GT(self, metric=faiss.METRIC_L2): + def do_test_compute_GT(self, metric=faiss.METRIC_L2, ngpu=0): d = 64 xt, xb, xq = get_dataset_2(d, 0, 10000, 100) @@ -50,7 +50,7 @@ def matrix_iterator(xb, bs): yield xb[i0:i0 + bs] Dnew, Inew = knn_ground_truth( - xq, matrix_iterator(xb, 1000), 10, metric, ngpu=0) + xq, matrix_iterator(xb, 1000), 10, metric, ngpu=ngpu) np.testing.assert_array_equal(Iref, Inew) # decimal = 4 required when run on GPU @@ -62,6 +62,12 @@ def test_compute_GT(self): def test_compute_GT_ip(self): self.do_test_compute_GT(faiss.METRIC_INNER_PRODUCT) + def test_compute_GT_gpu(self): + self.do_test_compute_GT(ngpu=-1) + + def test_compute_GT_ip_gpu(self): + self.do_test_compute_GT(faiss.METRIC_INNER_PRODUCT, ngpu=-1) + class TestDatasets(unittest.TestCase): """here we test only the synthetic dataset. Datasets that require