diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index d6870ae1c..73baae6be 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -66,13 +66,13 @@ class hnsw_lib : public algo { struct build_param { int m; int ef_construction; - int num_threads = omp_get_num_procs(); + int num_threads = omp_get_max_threads(); }; using search_param_base = typename algo::search_param; struct search_param : public search_param_base { int ef; - int num_threads = 1; + int num_threads = omp_get_max_threads(); }; hnsw_lib(Metric metric, int dim, const build_param& param); @@ -175,12 +175,7 @@ void hnsw_lib::set_search_param(const search_param_base& param_, const void* auto param = dynamic_cast(param_); appr_alg_->ef_ = param.ef; num_threads_ = param.num_threads; - // bench_mode_ = param.metric_objective; bench_mode_ = Mode::kLatency; // TODO(achirkin): pass the benchmark mode in the algo parameters - - // Create a pool if multiple query threads have been set and the pool hasn't been created already - bool create_pool = (bench_mode_ == Mode::kLatency && num_threads_ > 1 && !thread_pool_); - if (create_pool) { thread_pool_ = std::make_shared(num_threads_); } } template @@ -192,7 +187,10 @@ void hnsw_lib::search( get_search_knn_results(query + i * dim_, k, indices + i * k, distances + i * k); }; if (bench_mode_ == Mode::kLatency && num_threads_ > 1) { - thread_pool_->submit(f, batch_size); +#pragma omp parallel for num_threads(num_threads_) + for (int i = 0; i < batch_size; i++) { + f(i); + } } else { for (int i = 0; i < batch_size; i++) { f(i); diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index 6ab8631d4..787646ee6 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -489,26 +489,15 @@ void search(raft::resources const& res, auto const* hnswlib_index = reinterpret_cast::type> const*>( idx.get_index()); + auto num_threads = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads; - // when num_threads == 0, automatically maximize parallelism - if (params.num_threads) { #pragma omp parallel for num_threads(params.num_threads) - for (int64_t i = 0; i < queries.extent(0); ++i) { - get_search_knn_results(hnswlib_index, - queries.data_handle() + i * queries.extent(1), - neighbors.extent(1), - neighbors.data_handle() + i * neighbors.extent(1), - distances.data_handle() + i * distances.extent(1)); - } - } else { -#pragma omp parallel for - for (int64_t i = 0; i < queries.extent(0); ++i) { - get_search_knn_results(hnswlib_index, - queries.data_handle() + i * queries.extent(1), - neighbors.extent(1), - neighbors.data_handle() + i * neighbors.extent(1), - distances.data_handle() + i * distances.extent(1)); - } + for (int64_t i = 0; i < queries.extent(0); ++i) { + get_search_knn_results(hnswlib_index, + queries.data_handle() + i * queries.extent(1), + neighbors.extent(1), + neighbors.data_handle() + i * neighbors.extent(1), + distances.data_handle() + i * distances.extent(1)); } } diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml index 063502290..bf5cd35d3 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml @@ -8,7 +8,7 @@ groups: graph_degree: [32, 64, 96, 128] intermediate_graph_degree: [32, 64, 96, 128] graph_build_algo: ["NN_DESCENT"] - hierarchy: ["none", "cpu"] + hierarchy: ["none", "cpu", "gpu"] ef_construction: [64, 128, 256, 512] search: ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800]