diff --git a/CMakeLists.txt b/CMakeLists.txt index 510e74ae6..56e876a5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ knowhere_option(WITH_DISKANN "Build with diskann index" OFF) knowhere_option(WITH_BENCHMARK "Build with benchmark" OFF) knowhere_option(WITH_COVERAGE "Build with coverage" OFF) knowhere_option(WITH_CCACHE "Build with ccache" ON) +knowhere_option(WITH_PROFILER "Build with profiler" OFF) if(KNOWHERE_VERSION) message(STATUS "Building KNOWHERE version: ${KNOWHERE_VERSION}") @@ -46,6 +47,7 @@ if(WITH_CCACHE) endif() if(USE_CUDA) + add_definitions(-DUSE_CUDA) set(CMAKE_CUDA_ARCHITECTURES 75;70;61;60) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) @@ -90,8 +92,8 @@ else() endif() if(NOT USE_CUDA) - knowhere_file_glob(GLOB_RECURSE KNOWHERE_GPU_SRCS src/index/ivf_gpu/*.cc - src/index/flat_gpu/*.cc) + knowhere_file_glob(GLOB_RECURSE KNOWHERE_GPU_SRCS src/index/flat_gpu/*.cc + src/index/ivf_gpu/*.cc) list(REMOVE_ITEM KNOWHERE_SRCS ${KNOWHERE_GPU_SRCS}) endif() diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 3896126a7..ebdc7c228 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -9,6 +9,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under the License +include_directories(${CMAKE_SOURCE_DIR}) include_directories(${CMAKE_SOURCE_DIR}/include) include_directories(/usr/local/hdf5/include) @@ -24,12 +25,12 @@ set(depend_libs ${LAPACK_LIBRARIES} ) -if ( LINUX AND ENABLE_PROFILING ) - set( depend_libs +if(WITH_PROFILER) + set(depend_libs ${depend_libs} - gperftools - ) -endif () + tcmalloc_and_profiler + ) +endif() #============================================================================== macro(benchmark_test target file) diff --git a/benchmark/hdf5/benchmark_knowhere_float.cpp b/benchmark/hdf5/benchmark_knowhere_float.cpp index 48fd5a138..055ce3135 100644 --- a/benchmark/hdf5/benchmark_knowhere_float.cpp +++ b/benchmark/hdf5/benchmark_knowhere_float.cpp @@ -18,6 +18,8 @@ #include "knowhere/comp/knowhere_config.h" #include "knowhere/dataset.h" +const int32_t GPU_DEVICE_ID = 0; + class Benchmark_knowhere_float : public Benchmark_knowhere, public ::testing::Test { public: void @@ -108,11 +110,18 @@ class Benchmark_knowhere_float : public Benchmark_knowhere, public ::testing::Te cfg_[knowhere::meta::METRIC_TYPE] = metric_type_; knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2); printf("faiss::distance_compute_blas_threshold: %ld\n", knowhere::KnowhereConfig::GetBlasThreshold()); +#ifdef USE_CUDA + knowhere::KnowhereConfig::InitGPUResource(GPU_DEVICE_ID); + cfg_[knowhere::meta::DEVICE_ID] = GPU_DEVICE_ID; +#endif } void TearDown() override { free_all(); +#ifdef USE_CUDA + knowhere::KnowhereConfig::FreeGPUResource(); +#endif } protected: @@ -138,7 +147,11 @@ class Benchmark_knowhere_float : public Benchmark_knowhere, public ::testing::Te }; TEST_F(Benchmark_knowhere_float, TEST_IDMAP) { +#ifdef USE_CUDA + index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IDMAP; +#else index_type_ = knowhere::IndexEnum::INDEX_FAISS_IDMAP; +#endif knowhere::Json conf = cfg_; std::string index_file_name = get_index_name({}); @@ -149,7 +162,11 @@ TEST_F(Benchmark_knowhere_float, TEST_IDMAP) { } TEST_F(Benchmark_knowhere_float, TEST_IVF_FLAT_NM) { +#ifdef USE_CUDA + index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IVFFLAT; +#else index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT; +#endif knowhere::Json conf = cfg_; for (auto nlist : NLISTs_) { @@ -171,7 +188,11 @@ TEST_F(Benchmark_knowhere_float, TEST_IVF_FLAT_NM) { } TEST_F(Benchmark_knowhere_float, TEST_IVF_SQ8) { +#ifdef USE_CUDA + index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8; +#else index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8; +#endif knowhere::Json conf = cfg_; for (auto nlist : NLISTs_) { @@ -186,7 +207,11 @@ TEST_F(Benchmark_knowhere_float, TEST_IVF_SQ8) { } TEST_F(Benchmark_knowhere_float, TEST_IVF_PQ) { +#ifdef USE_CUDA + index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IVFPQ; +#else index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFPQ; +#endif knowhere::Json conf = cfg_; conf[knowhere::indexparam::NBITS] = NBITS_; diff --git a/include/knowhere/comp/blocking_queue.h b/include/knowhere/comp/blocking_queue.h new file mode 100644 index 000000000..d7de27147 --- /dev/null +++ b/include/knowhere/comp/blocking_queue.h @@ -0,0 +1,96 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#pragma once + +#include + +#include +#include +#include +#include + +namespace knowhere { + +template +class BlockingQueue { + public: + BlockingQueue() : mtx(), full_(), empty_() { + } + + virtual ~BlockingQueue() { + } + + BlockingQueue(const BlockingQueue& rhs) = delete; + + BlockingQueue& + operator=(const BlockingQueue& rhs) = delete; + + void + Put(const T& task) { + std::unique_lock lock(mtx); + full_.wait(lock, [this] { return (queue_.size() < capacity_); }); + queue_.push(task); + empty_.notify_all(); + } + + T + Take() { + std::unique_lock lock(mtx); + empty_.wait(lock, [this] { return !queue_.empty(); }); + T front(queue_.front()); + queue_.pop(); + full_.notify_all(); + return front; + } + + T + Front() { + std::unique_lock lock(mtx); + empty_.wait(lock, [this] { return !queue_.empty(); }); + T front(queue_.front()); + return front; + } + + T + Back() { + std::unique_lock lock(mtx); + empty_.wait(lock, [this] { return !queue_.empty(); }); + T back(queue_.back()); + return back; + } + + size_t + Size() const { + std::lock_guard lock(mtx); + return queue_.size(); + } + + bool + Empty() const { + std::unique_lock lock(mtx); + return queue_.empty(); + } + + void + SetCapacity(const size_t capacity) { + capacity_ = (capacity > 0 ? capacity : capacity_); + } + + protected: + mutable std::mutex mtx; + std::condition_variable full_; + std::condition_variable empty_; + std::queue queue_; + size_t capacity_ = 32; +}; + +} // namespace knowhere diff --git a/include/knowhere/comp/index_param.h b/include/knowhere/comp/index_param.h index 48815cd55..d52bcb796 100644 --- a/include/knowhere/comp/index_param.h +++ b/include/knowhere/comp/index_param.h @@ -29,6 +29,11 @@ constexpr const char* INDEX_FAISS_IVFFLAT = "IVF_FLAT"; constexpr const char* INDEX_FAISS_IVFPQ = "IVF_PQ"; constexpr const char* INDEX_FAISS_IVFSQ8 = "IVF_SQ8"; +constexpr const char* INDEX_FAISS_GPU_IDMAP = "GPU_FLAT"; +constexpr const char* INDEX_FAISS_GPU_IVFFLAT = "GPU_IVF_FLAT"; +constexpr const char* INDEX_FAISS_GPU_IVFPQ = "GPU_IVF_PQ"; +constexpr const char* INDEX_FAISS_GPU_IVFSQ8 = "GPU_IVF_SQ8"; + constexpr const char* INDEX_ANNOY = "ANNOY"; constexpr const char* INDEX_HNSW = "HNSW"; diff --git a/include/knowhere/comp/knowhere_config.h b/include/knowhere/comp/knowhere_config.h index f8037f455..adc2c627a 100644 --- a/include/knowhere/comp/knowhere_config.h +++ b/include/knowhere/comp/knowhere_config.h @@ -93,6 +93,18 @@ class KnowhereConfig { */ static void SetAioContextPool(size_t num_ctx, size_t max_events); + + /** + * init GPU Resource + */ + static void + InitGPUResource(int64_t gpu_id, int64_t res_num = 2); + + /** + * free GPU Resource + */ + static void + FreeGPUResource(); }; } // namespace knowhere diff --git a/include/knowhere/gpu/gpu_res_mgr.h b/include/knowhere/gpu/gpu_res_mgr.h new file mode 100644 index 000000000..8ad030b29 --- /dev/null +++ b/include/knowhere/gpu/gpu_res_mgr.h @@ -0,0 +1,152 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#pragma once + +#include + +#include +#include +#include + +#include "knowhere/comp/blocking_queue.h" +#include "knowhere/log.h" + +namespace knowhere { + +constexpr int64_t MB = 1LL << 20; + +struct Resource { + Resource(int64_t gpu_id, faiss::gpu::StandardGpuResources* r) : faiss_res_(r), gpu_id_(gpu_id) { + static int64_t global_id = 0; + id_ = global_id++; + } + + std::unique_ptr faiss_res_; + int64_t id_; + int64_t gpu_id_; + std::mutex mutex_; +}; +using ResPtr = std::shared_ptr; +using ResWPtr = std::weak_ptr; + +struct GPUParams { + int64_t tmp_mem_sz_ = 256 * MB; + int64_t pin_mem_sz_ = 256 * MB; + int64_t res_num_ = 2; + + GPUParams() { + } + + GPUParams(int64_t res_num) : res_num_(res_num) { + } +}; + +class GPUResMgr { + public: + friend class ResScope; + using ResBQ = BlockingQueue; + + public: + static GPUResMgr& + GetInstance() { + static GPUResMgr instance; + return instance; + } + + void + InitDevice(const int64_t gpu_id, const GPUParams& gpu_params) { + gpu_id_ = gpu_id; + gpu_params_.res_num_ = gpu_params.res_num_; + gpu_params_.tmp_mem_sz_ = gpu_params.tmp_mem_sz_; + gpu_params_.pin_mem_sz_ = gpu_params.pin_mem_sz_; + + LOG_KNOWHERE_DEBUG_ << "InitDevice gpu_id " << gpu_id_ << "resource count " << gpu_params_.res_num_ + << ", tmp_mem_sz " << gpu_params_.tmp_mem_sz_ / MB << "MB, pin_mem_sz " + << gpu_params_.pin_mem_sz_ / MB << "MB"; + } + + void + Init() { + if (!init_) { + for (int64_t i = 0; i < gpu_params_.res_num_; ++i) { + auto gpu_res = new faiss::gpu::StandardGpuResources(); + gpu_res->setTempMemory(gpu_params_.tmp_mem_sz_); + // need not set pinned memory by now + + auto res = std::make_shared(gpu_id_, gpu_res); + res_bq_.Put(res); + } + LOG_KNOWHERE_DEBUG_ << "Init gpu_id " << gpu_id_ << ", resource count " << res_bq_.Size() << ", tmp_mem_sz " + << gpu_params_.tmp_mem_sz_ / MB << "MB"; + init_ = true; + } + } + + // Free GPU resource, avoid cudaGetDevice error when deallocate. + // This func should be invoked before main return + void + Free() { + while (!res_bq_.Empty()) { + res_bq_.Take(); + } + init_ = false; + } + + ResPtr + GetRes() { + if (init_) { + auto res = res_bq_.Take(); + return res; + } else { + KNOWHERE_THROW_MSG("GPUResMgr not initialized"); + } + } + + void + PutRes(const ResPtr& res) { + if (init_) { + res_bq_.Put(res); + } else { + KNOWHERE_THROW_MSG("GPUResMgr not initialized"); + } + } + + protected: + bool init_ = false; + int64_t gpu_id_ = 0; + GPUParams gpu_params_; + ResBQ res_bq_; +}; + +class ResScope { + public: + ResScope(ResPtr& res, const bool renew) : res_(res), renew_(renew) { + res_->mutex_.lock(); + } + + ResScope(ResWPtr& res, const bool renew) : res_(res.lock()), renew_(renew) { + res_->mutex_.lock(); + } + + ~ResScope() { + if (renew_) { + GPUResMgr::GetInstance().PutRes(res_); + } + res_->mutex_.unlock(); + } + + private: + ResPtr res_; // hold resource until deconstruct + bool renew_; +}; + +} // namespace knowhere diff --git a/src/common/comp/knowhere_config.cc b/src/common/comp/knowhere_config.cc index 79b6fac40..29817b816 100644 --- a/src/common/comp/knowhere_config.cc +++ b/src/common/comp/knowhere_config.cc @@ -17,6 +17,9 @@ #include "faiss/utils/distances.h" #include "faiss/utils/utils.h" #include "knowhere/log.h" +#ifdef USE_CUDA +#include "knowhere/gpu/gpu_res_mgr.h" +#endif #include "simd/hook.h" namespace knowhere { @@ -123,4 +126,22 @@ KnowhereConfig::SetAioContextPool(size_t num_ctx, size_t max_events) { #endif } +void +KnowhereConfig::InitGPUResource(int64_t gpu_id, int64_t res_num) { +#ifdef USE_CUDA + LOG_KNOWHERE_INFO_ << "init GPU resource for gpu id " << gpu_id << ", resource num " << res_num; + knowhere::GPUParams gpu_params(res_num); + knowhere::GPUResMgr::GetInstance().InitDevice(gpu_id, gpu_params); + knowhere::GPUResMgr::GetInstance().Init(); +#endif +} + +void +KnowhereConfig::FreeGPUResource() { +#ifdef USE_CUDA + LOG_KNOWHERE_INFO_ << "free GPU resource"; + knowhere::GPUResMgr::GetInstance().Free(); +#endif +} + } // namespace knowhere diff --git a/src/index/flat_gpu/flat_gpu.cc b/src/index/flat_gpu/flat_gpu.cc index 94434bcfa..a7c846f59 100644 --- a/src/index/flat_gpu/flat_gpu.cc +++ b/src/index/flat_gpu/flat_gpu.cc @@ -9,28 +9,17 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License. -#include -#include - #include "common/metric.h" #include "faiss/IndexFlat.h" #include "faiss/gpu/GpuCloner.h" -#include "faiss/gpu/GpuIndexFlat.h" -#include "faiss/gpu/StandardGpuResources.h" #include "faiss/index_io.h" #include "index/flat_gpu/flat_gpu_config.h" #include "io/FaissIO.h" #include "knowhere/factory.h" -#include "knowhere/index_node_thread_pool_wrapper.h" +#include "knowhere/gpu/gpu_res_mgr.h" namespace knowhere { -static faiss::gpu::StandardGpuResources* -GetGpuRes() { - static faiss::gpu::StandardGpuResources res; - return &res; -} - class GpuFlatIndexNode : public IndexNode { public: GpuFlatIndexNode(const Object& object) : gpu_index_(nullptr) { @@ -59,28 +48,25 @@ class GpuFlatIndexNode : public IndexNode { return metric.error(); } - for (auto dev : f_cfg.gpu_ids) { - this->devs_.push_back(dev); - this->res_.push_back(new (std::nothrow) faiss::gpu::StandardGpuResources); - } - const void* x = dataset.GetTensor(); const int64_t n = dataset.GetRows(); const int64_t dim = dataset.GetDim(); - faiss::Index* gpu_index = nullptr; + faiss::Index* host_index = nullptr; try { - auto host_index = std::make_unique(dim, metric.value()); - gpu_index = faiss::gpu::index_cpu_to_gpu_multiple(this->res_, this->devs_, host_index.get()); - gpu_index->add(n, (const float*)x); + host_index = new faiss::IndexFlat(dim, metric.value()); + host_index->add(n, (const float*)x); + // need not copy index from CPU to GPU for IDMAP } catch (const std::exception& e) { - if (gpu_index) - delete gpu_index; + if (host_index) { + delete host_index; + } LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what(); return Status::faiss_inner_error; } - if (this->gpu_index_) + if (this->gpu_index_) { delete this->gpu_index_; - this->gpu_index_ = gpu_index; + } + this->gpu_index_ = host_index; return Status::success; } @@ -100,6 +86,8 @@ class GpuFlatIndexNode : public IndexNode { try { ids = new (std::nothrow) int64_t[len]; dis = new (std::nothrow) float[len]; + + ResScope rs(res_, false); gpu_index_->search(nq, (const float*)x, f_cfg.k, dis, ids, bitset); } catch (const std::exception& e) { std::unique_ptr auto_delete_ids(ids); @@ -148,20 +136,13 @@ class GpuFlatIndexNode : public IndexNode { } try { MemoryIOWriter writer; - std::unique_ptr host_index(faiss::gpu::index_gpu_to_cpu(gpu_index_)); + // Serialize() is called after Add(), at this time gpu_index_ is CPU index actually + faiss::Index* host_index = gpu_index_; - faiss::write_index(host_index.get(), &writer); + faiss::write_index(host_index, &writer); std::shared_ptr data(writer.data_); binset.Append("FLAT", data, writer.rp); - - size_t dev_s = this->devs_.size(); - uint8_t* buf = new uint8_t[sizeof(dev_s) + sizeof(int) * dev_s]; - auto device_id_ = std::shared_ptr(buf); - memcpy(buf, &dev_s, sizeof(dev_s)); - memcpy(buf + sizeof(dev_s), this->devs_.data(), sizeof(devs_[0]) * dev_s); - binset.Append("device_ids", device_id_, sizeof(size_t) + sizeof(int) * dev_s); - } catch (const std::exception& e) { LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what(); return Status::faiss_inner_error; @@ -178,14 +159,10 @@ class GpuFlatIndexNode : public IndexNode { reader.data_ = binary->data.get(); std::unique_ptr index(faiss::read_index(&reader)); - size_t dev_s = 1; - auto device_ids = binset.GetByName("device_ids"); - memcpy(&dev_s, device_ids->data.get(), sizeof(dev_s)); - this->devs_.resize(dev_s); - memcpy(this->devs_.data(), device_ids->data.get() + sizeof(size_t), sizeof(int) * dev_s); - for (size_t i = 0; i < dev_s; ++i) - this->res_.push_back(new (std::nothrow) faiss::gpu::StandardGpuResources); - gpu_index_ = faiss::gpu::index_cpu_to_gpu_multiple(this->res_, this->devs_, index.get()); + auto gpu_res = GPUResMgr::GetInstance().GetRes(); + ResScope rs(gpu_res, true); + gpu_index_ = faiss::gpu::index_cpu_to_gpu(gpu_res->faiss_res_.get(), gpu_res->gpu_id_, index.get()); + res_ = gpu_res; } catch (const std::exception& e) { LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what(); return Status::faiss_inner_error; @@ -216,22 +193,20 @@ class GpuFlatIndexNode : public IndexNode { virtual std::string Type() const override { - return "GPUFLAT"; + return knowhere::IndexEnum::INDEX_FAISS_GPU_IDMAP; } virtual ~GpuFlatIndexNode() { - if (gpu_index_) + if (gpu_index_) { delete gpu_index_; + } } private: - std::vector devs_; - std::vector res_; + mutable ResWPtr res_; faiss::Index* gpu_index_; }; -KNOWHERE_REGISTER_GLOBAL(GPUFLAT, [](const Object& object) { - return Index::Create(std::make_unique(object)); -}); +KNOWHERE_REGISTER_GLOBAL(GPU_FLAT, [](const Object& object) { return Index::Create(object); }); } // namespace knowhere diff --git a/src/index/flat_gpu/flat_gpu_config.h b/src/index/flat_gpu/flat_gpu_config.h index 614ece7da..ba73691e4 100644 --- a/src/index/flat_gpu/flat_gpu_config.h +++ b/src/index/flat_gpu/flat_gpu_config.h @@ -18,14 +18,9 @@ namespace knowhere { class GpuFlatConfig : public FlatConfig { public: - CFG_LIST gpu_ids; + CFG_INT gpu_id; KNOHWERE_DECLARE_CONFIG(GpuFlatConfig) { - KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids) - .description("gpu device ids") - .set_default({ - 0, - }) - .for_train(); + KNOWHERE_CONFIG_DECLARE_FIELD(gpu_id).description("gpu device id").set_default(0).for_train(); } }; diff --git a/src/index/ivf_gpu/ivf_gpu.cc b/src/index/ivf_gpu/ivf_gpu.cc index a410b9a71..d47d97dfd 100644 --- a/src/index/ivf_gpu/ivf_gpu.cc +++ b/src/index/ivf_gpu/ivf_gpu.cc @@ -17,12 +17,15 @@ #include "faiss/IndexScalarQuantizer.h" #include "faiss/gpu/GpuCloner.h" #include "faiss/gpu/GpuIndexIVF.h" -#include "faiss/gpu/StandardGpuResources.h" +#include "faiss/gpu/GpuIndexIVFFlat.h" +#include "faiss/gpu/GpuIndexIVFPQ.h" +#include "faiss/gpu/GpuIndexIVFScalarQuantizer.h" #include "faiss/index_io.h" #include "index/ivf_gpu/ivf_gpu_config.h" #include "io/FaissIO.h" +#include "knowhere/comp/index_param.h" #include "knowhere/factory.h" -#include "knowhere/index_node_thread_pool_wrapper.h" +#include "knowhere/gpu/gpu_res_mgr.h" namespace knowhere { @@ -45,7 +48,7 @@ struct KnowhereConfigType { template class GpuIvfIndexNode : public IndexNode { public: - GpuIvfIndexNode(const Object& object) : devs_({}), res_{}, gpu_index_(nullptr) { + GpuIvfIndexNode(const Object& object) : gpu_index_(nullptr) { static_assert(std::is_same::value || std::is_same::value || std::is_same::value); } @@ -70,52 +73,39 @@ class GpuIvfIndexNode : public IndexNode { auto dim = dataset.GetDim(); auto ivf_gpu_cfg = static_cast::Type&>(cfg); - for (size_t i = 0; i < ivf_gpu_cfg.gpu_ids.size(); ++i) { - this->devs_.push_back(i); - this->res_.push_back(new (std::nothrow) faiss::gpu::StandardGpuResources); - } - auto metric = Str2FaissMetricType(ivf_gpu_cfg.metric_type); if (!metric.has_value()) { LOG_KNOWHERE_WARNING_ << "please check metric value: " << ivf_gpu_cfg.metric_type; return metric.error(); } + faiss::Index* gpu_index = nullptr; try { - auto qzr = new (std::nothrow) faiss::IndexFlat(dim, metric.value()); - if (qzr == nullptr) { - LOG_KNOWHERE_WARNING_ << "memory malloc error"; - return Status::malloc_error; - } - std::unique_ptr auto_delele_qzr(qzr); - T* host_index = nullptr; + auto gpu_res = GPUResMgr::GetInstance().GetRes(); + ResScope rs(gpu_res, true); + if constexpr (std::is_same::value) { - host_index = new (std::nothrow) faiss::IndexIVFFlat(qzr, dim, ivf_gpu_cfg.nlist, metric.value()); - if (host_index == nullptr) { - LOG_KNOWHERE_WARNING_ << "memory malloc error"; - return Status::malloc_error; - } + faiss::gpu::GpuIndexIVFFlatConfig f_cfg; + f_cfg.device = static_cast(gpu_res->gpu_id_); + gpu_index = new faiss::gpu::GpuIndexIVFFlat(gpu_res->faiss_res_.get(), dim, ivf_gpu_cfg.nlist, + metric.value(), f_cfg); } if constexpr (std::is_same::value) { - host_index = new (std::nothrow) - faiss::IndexIVFPQ(qzr, dim, ivf_gpu_cfg.nlist, ivf_gpu_cfg.m, ivf_gpu_cfg.nbits, metric.value()); - if (host_index == nullptr) { - LOG_KNOWHERE_WARNING_ << "memory malloc error"; - return Status::malloc_error; - } + faiss::gpu::GpuIndexIVFPQConfig f_cfg; + f_cfg.device = static_cast(gpu_res->gpu_id_); + gpu_index = new faiss::gpu::GpuIndexIVFPQ(gpu_res->faiss_res_.get(), dim, ivf_gpu_cfg.nlist, + ivf_gpu_cfg.m, ivf_gpu_cfg.nbits, metric.value(), f_cfg); } if constexpr (std::is_same::value) { - host_index = new (std::nothrow) faiss::IndexIVFScalarQuantizer( - qzr, dim, ivf_gpu_cfg.nlist, faiss::QuantizerType::QT_8bit, metric.value()); - if (host_index == nullptr) { - LOG_KNOWHERE_WARNING_ << "memory malloc error"; - return Status::malloc_error; - } + faiss::gpu::GpuIndexIVFScalarQuantizerConfig f_cfg; + f_cfg.device = static_cast(gpu_res->gpu_id_); + gpu_index = new faiss::gpu::GpuIndexIVFScalarQuantizer(gpu_res->faiss_res_.get(), dim, + ivf_gpu_cfg.nlist, faiss::QuantizerType::QT_8bit, + metric.value(), true, f_cfg); } - std::unique_ptr auto_delete_host_index(host_index); - gpu_index = faiss::gpu::index_cpu_to_gpu_multiple(this->res_, this->devs_, host_index); - gpu_index->train(rows, reinterpret_cast(tensor)); + gpu_index->train(rows, reinterpret_cast(tensor)); + res_ = gpu_res; } catch (std::exception& e) { if (gpu_index) { delete gpu_index; @@ -136,6 +126,7 @@ class GpuIvfIndexNode : public IndexNode { auto rows = dataset.GetRows(); auto tensor = dataset.GetTensor(); try { + ResScope rs(res_, false); gpu_index_->add(rows, (const float*)tensor); } catch (std::exception& e) { LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what(); @@ -147,16 +138,11 @@ class GpuIvfIndexNode : public IndexNode { virtual expected Search(const DataSet& dataset, const Config& cfg, const BitsetView& bitset) const override { auto ivf_gpu_cfg = static_cast::Type&>(cfg); - if (auto ix = dynamic_cast(gpu_index_)) { - for (int i = 0; i < ix->count(); ++i) { - auto idx = dynamic_cast(ix->at(i)); - assert(idx != nullptr); - idx->setNumProbes(ivf_gpu_cfg.nprobe); - } - } if (auto ix = dynamic_cast(gpu_index_)) { ix->setNumProbes(ivf_gpu_cfg.nprobe); } + ResScope rs(res_, false); + constexpr int64_t block_size = 2048; auto rows = dataset.GetRows(); auto k = ivf_gpu_cfg.k; @@ -205,19 +191,12 @@ class GpuIvfIndexNode : public IndexNode { try { MemoryIOWriter writer; { - faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(this->gpu_index_); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(gpu_index_); faiss::write_index(host_index, &writer); delete host_index; } std::shared_ptr data(writer.data_); - binset.Append("IVF", data, writer.rp); - size_t dev_s = this->devs_.size(); - uint8_t* buf = new uint8_t[sizeof(dev_s) + sizeof(int) * dev_s]; - auto device_id_ = std::shared_ptr(buf); - memcpy(buf, &dev_s, sizeof(dev_s)); - memcpy(buf + sizeof(dev_s), this->devs_.data(), sizeof(devs_[0]) * dev_s); - binset.Append("device_ids", device_id_, sizeof(size_t) + sizeof(int) * dev_s); } catch (std::exception& e) { LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what(); return Status::faiss_inner_error; @@ -235,14 +214,10 @@ class GpuIvfIndexNode : public IndexNode { reader.data_ = binary->data.get(); std::unique_ptr index(faiss::read_index(&reader)); - size_t dev_s = 1; - auto device_ids = binset.GetByName("device_ids"); - memcpy(&dev_s, device_ids->data.get(), sizeof(dev_s)); - this->devs_.resize(dev_s); - memcpy(this->devs_.data(), device_ids->data.get() + sizeof(size_t), sizeof(int) * dev_s); - for (size_t i = 0; i < dev_s; ++i) - this->res_.push_back(new (std::nothrow) faiss::gpu::StandardGpuResources); - gpu_index_ = faiss::gpu::index_cpu_to_gpu_multiple(this->res_, this->devs_, index.get()); + auto gpu_res = GPUResMgr::GetInstance().GetRes(); + ResScope rs(gpu_res, true); + gpu_index_ = faiss::gpu::index_cpu_to_gpu(gpu_res->faiss_res_.get(), gpu_res->gpu_id_, index.get()); + res_ = gpu_res; } catch (std::exception& e) { LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what(); return Status::faiss_inner_error; @@ -277,13 +252,13 @@ class GpuIvfIndexNode : public IndexNode { virtual std::string Type() const override { if constexpr (std::is_same::value) { - return "GPUIVFFLAT"; + return knowhere::IndexEnum::INDEX_FAISS_GPU_IVFFLAT; } if constexpr (std::is_same::value) { - return "GPUIVFPQ"; + return knowhere::IndexEnum::INDEX_FAISS_GPU_IVFPQ; } if constexpr (std::is_same::value) { - return "GPUIVFSQ"; + return knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8; } } @@ -291,25 +266,21 @@ class GpuIvfIndexNode : public IndexNode { if (gpu_index_) { delete gpu_index_; } - for (auto&& p : res_) { - delete p; - } } private: - std::vector devs_; - std::vector res_; + mutable ResWPtr res_; faiss::Index* gpu_index_; }; -KNOWHERE_REGISTER_GLOBAL(GPUIVFFLAT, [](const Object& object) { - return Index::Create(std::make_unique>(object)); +KNOWHERE_REGISTER_GLOBAL(GPU_IVF_FLAT, [](const Object& object) { + return Index>::Create(object); }); -KNOWHERE_REGISTER_GLOBAL(GPUIVFPQ, [](const Object& object) { - return Index::Create(std::make_unique>(object)); +KNOWHERE_REGISTER_GLOBAL(GPU_IVF_PQ, [](const Object& object) { + return Index>::Create(object); }); -KNOWHERE_REGISTER_GLOBAL(GPUIVFSQ, [](const Object& object) { - return Index::Create( - std::make_unique>(object)); +KNOWHERE_REGISTER_GLOBAL(GPU_IVF_SQ8, [](const Object& object) { + return Index>::Create(object); }); + } // namespace knowhere diff --git a/src/index/ivf_gpu/ivf_gpu_config.h b/src/index/ivf_gpu/ivf_gpu_config.h index 6a3d4b7cb..63b31de46 100644 --- a/src/index/ivf_gpu/ivf_gpu_config.h +++ b/src/index/ivf_gpu/ivf_gpu_config.h @@ -15,40 +15,25 @@ namespace knowhere { class GpuIvfFlatConfig : public IvfFlatConfig { public: - CFG_LIST gpu_ids; + CFG_INT gpu_id; KNOHWERE_DECLARE_CONFIG(GpuIvfFlatConfig) { - KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids) - .description("gpu device ids") - .set_default({ - 0, - }) - .for_train(); + KNOWHERE_CONFIG_DECLARE_FIELD(gpu_id).description("gpu device id").set_default(0).for_train(); } }; class GpuIvfPqConfig : public IvfPqConfig { public: - CFG_LIST gpu_ids; + CFG_INT gpu_id; KNOHWERE_DECLARE_CONFIG(GpuIvfPqConfig) { - KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids) - .description("gpu device ids") - .set_default({ - 0, - }) - .for_train(); + KNOWHERE_CONFIG_DECLARE_FIELD(gpu_id).description("gpu device id").set_default(0).for_train(); } }; class GpuIvfSqConfig : public IvfSqConfig { public: - CFG_LIST gpu_ids; + CFG_INT gpu_id; KNOHWERE_DECLARE_CONFIG(GpuIvfSqConfig) { - KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids) - .description("gpu device ids") - .set_default({ - 0, - }) - .for_train(); + KNOWHERE_CONFIG_DECLARE_FIELD(gpu_id).description("gpu device id").set_default(0).for_train(); } }; diff --git a/tests/ut/CMakeLists.txt b/tests/ut/CMakeLists.txt index 43613c0d9..e884cc1ab 100644 --- a/tests/ut/CMakeLists.txt +++ b/tests/ut/CMakeLists.txt @@ -26,9 +26,6 @@ set_target_properties(knowhere_tests PROPERTIES CXX_STANDARD 17 ) -if(USE_CUDA) - add_definitions(-DUSE_CUDA) -endif() if(WITH_DISKANN) target_link_libraries(knowhere_tests PRIVATE Catch2::Catch2WithMain knowhere stdc++fs) else() diff --git a/tests/ut/test_gpu_search.cc b/tests/ut/test_gpu_search.cc new file mode 100644 index 000000000..dd070a178 --- /dev/null +++ b/tests/ut/test_gpu_search.cc @@ -0,0 +1,121 @@ +// Copyright (C) 2019-2023 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#include "catch2/catch_approx.hpp" +#include "catch2/catch_test_macros.hpp" +#include "catch2/generators/catch_generators.hpp" +#include "knowhere/comp/index_param.h" +#include "knowhere/comp/knowhere_config.h" +#include "knowhere/factory.h" +#include "utils.h" + +#ifdef USE_CUDA +TEST_CASE("Test All GPU Index", "[search]") { + using Catch::Approx; + + knowhere::KnowhereConfig::InitGPUResource(0); + + int64_t nb = 10000, nq = 1000; + int64_t dim = 128; + int64_t seed = 42; + + auto base_gen = [&]() { + knowhere::Json json; + json[knowhere::meta::DIM] = dim; + json[knowhere::meta::METRIC_TYPE] = knowhere::metric::L2; + json[knowhere::meta::TOPK] = 1; + json[knowhere::meta::RADIUS] = 10.0; + json[knowhere::meta::RANGE_FILTER] = 0.0; + return json; + }; + + auto ivfflat_gen = [&base_gen]() { + knowhere::Json json = base_gen(); + json[knowhere::indexparam::NLIST] = 16; + json[knowhere::indexparam::NPROBE] = 4; + return json; + }; + + auto ivfsq_gen = ivfflat_gen; + + auto ivfpq_gen = [&ivfflat_gen]() { + knowhere::Json json = ivfflat_gen(); + json[knowhere::indexparam::M] = 4; + json[knowhere::indexparam::NBITS] = 8; + return json; + }; + + auto gpu_flat_gen = [&base_gen]() { + auto json = base_gen(); + return json; + }; + + SECTION("Test Gpu Index Search") { + using std::make_tuple; + auto [name, gen] = GENERATE_REF(table>({ + // GPU_FLAT cannot run this test is because its Train() and Add() actually run in CPU, + // "res_" in gpu_index_ is not set correctly + // make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IDMAP, gpu_flat_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFFLAT, ivfflat_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFPQ, ivfpq_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8, ivfsq_gen), + })); + auto idx = knowhere::IndexFactory::Instance().Create(name); + auto cfg_json = gen().dump(); + CAPTURE(name, cfg_json); + knowhere::Json json = knowhere::Json::parse(cfg_json); + auto train_ds = GenDataSet(nb, dim, seed); + auto query_ds = GenDataSet(nq, dim, seed); + REQUIRE(idx.Type() == name); + auto res = idx.Build(*train_ds, json); + REQUIRE(res == knowhere::Status::success); + auto results = idx.Search(*query_ds, json, nullptr); + REQUIRE(results.has_value()); + auto ids = results.value()->GetIds(); + for (int i = 0; i < nq; ++i) { + CHECK(ids[i] == i); + } + } + + SECTION("Test Gpu Index Serialize/Deserialize") { + using std::make_tuple; + auto [name, gen] = GENERATE_REF(table>({ + make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IDMAP, gpu_flat_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFFLAT, ivfflat_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFPQ, ivfpq_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8, ivfsq_gen), + })); + + auto idx = knowhere::IndexFactory::Instance().Create(name); + auto cfg_json = gen().dump(); + CAPTURE(name, cfg_json); + knowhere::Json json = knowhere::Json::parse(cfg_json); + auto train_ds = GenDataSet(nb, dim, seed); + auto query_ds = GenDataSet(nq, dim, seed); + REQUIRE(idx.Type() == name); + auto res = idx.Build(*train_ds, json); + REQUIRE(res == knowhere::Status::success); + knowhere::BinarySet bs; + idx.Serialize(bs); + + auto idx_ = knowhere::IndexFactory::Instance().Create(name); + idx_.Deserialize(bs); + auto results = idx_.Search(*query_ds, json, nullptr); + REQUIRE(results.has_value()); + auto ids = results.value()->GetIds(); + for (int i = 0; i < nq; ++i) { + CHECK(ids[i] == i); + } + } + + knowhere::KnowhereConfig::FreeGPUResource(); +} +#endif diff --git a/tests/ut/test_search.cc b/tests/ut/test_search.cc index 94e681520..52cee24c9 100644 --- a/tests/ut/test_search.cc +++ b/tests/ut/test_search.cc @@ -13,6 +13,7 @@ #include "catch2/catch_test_macros.hpp" #include "catch2/generators/catch_generators.hpp" #include "knowhere/comp/index_param.h" +#include "knowhere/comp/knowhere_config.h" #include "knowhere/factory.h" #include "utils.h" @@ -82,12 +83,6 @@ TEST_CASE("Test All Mem Index Search", "[search]") { REQUIRE(res == knowhere::Status::success); }; -#ifdef USE_CUDA - auto gpu_flat_gen = [&base_gen]() { - auto json = base_gen(); - return json; - }; -#endif SECTION("Test Cpu Index Search") { using std::make_tuple; auto [name, gen] = GENERATE_REF(table>({ @@ -97,12 +92,6 @@ TEST_CASE("Test All Mem Index Search", "[search]") { make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, ivfpq_gen), make_tuple(knowhere::IndexEnum::INDEX_HNSW, hnsw_gen), -#ifdef USE_CUDA - make_tuple("GPUFLAT", gpu_flat_gen), - make_tuple("GPUIVFFLAT", ivfflat_gen), - make_tuple("GPUIVFPQ", ivfpq_gen), - make_tuple("GPUIVFSQ", ivfsq_gen), -#endif })); auto idx = knowhere::IndexFactory::Instance().Create(name); auto cfg_json = gen().dump(); @@ -163,12 +152,6 @@ TEST_CASE("Test All Mem Index Search", "[search]") { make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, ivfpq_gen), make_tuple(knowhere::IndexEnum::INDEX_HNSW, hnsw_gen), -#ifdef USE_CUDA - make_tuple("GPUFLAT", gpu_flat_gen), - make_tuple("GPUIVFFLAT", ivfflat_gen), - make_tuple("GPUIVFPQ", ivfpq_gen), - make_tuple("GPUIVFSQ", ivfsq_gen), -#endif })); auto idx = knowhere::IndexFactory::Instance().Create(name);