diff --git a/CMakeLists.txt b/CMakeLists.txt
index 510e74ae6..56e876a5f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,6 +27,7 @@ knowhere_option(WITH_DISKANN "Build with diskann index" OFF)
 knowhere_option(WITH_BENCHMARK "Build with benchmark" OFF)
 knowhere_option(WITH_COVERAGE "Build with coverage" OFF)
 knowhere_option(WITH_CCACHE "Build with ccache" ON)
+knowhere_option(WITH_PROFILER "Build with profiler" OFF)
 
 if(KNOWHERE_VERSION)
   message(STATUS "Building KNOWHERE version: ${KNOWHERE_VERSION}")
@@ -46,6 +47,7 @@ if(WITH_CCACHE)
 endif()
 
 if(USE_CUDA)
+  add_definitions(-DUSE_CUDA)
   set(CMAKE_CUDA_ARCHITECTURES 75;70;61;60)
   enable_language(CUDA)
   find_package(CUDAToolkit REQUIRED)
@@ -90,8 +92,8 @@ else()
 endif()
 
 if(NOT USE_CUDA)
-  knowhere_file_glob(GLOB_RECURSE KNOWHERE_GPU_SRCS src/index/ivf_gpu/*.cc
-                     src/index/flat_gpu/*.cc)
+  knowhere_file_glob(GLOB_RECURSE KNOWHERE_GPU_SRCS src/index/flat_gpu/*.cc
+                     src/index/ivf_gpu/*.cc)
   list(REMOVE_ITEM KNOWHERE_SRCS ${KNOWHERE_GPU_SRCS})
 endif()
 
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 3896126a7..ebdc7c228 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -9,6 +9,7 @@
 # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 # or implied. See the License for the specific language governing permissions and limitations under the License
 
+include_directories(${CMAKE_SOURCE_DIR})
 include_directories(${CMAKE_SOURCE_DIR}/include)
 
 include_directories(/usr/local/hdf5/include)
@@ -24,12 +25,12 @@ set(depend_libs
         ${LAPACK_LIBRARIES}
         )
 
-if ( LINUX AND ENABLE_PROFILING )
-    set( depend_libs
+if(WITH_PROFILER)
+    set(depend_libs
         ${depend_libs}
-        gperftools
-        )
-endif ()
+        tcmalloc_and_profiler
+    )
+endif()
 
 #==============================================================================
 macro(benchmark_test target file)
diff --git a/benchmark/hdf5/benchmark_knowhere_float.cpp b/benchmark/hdf5/benchmark_knowhere_float.cpp
index 48fd5a138..055ce3135 100644
--- a/benchmark/hdf5/benchmark_knowhere_float.cpp
+++ b/benchmark/hdf5/benchmark_knowhere_float.cpp
@@ -18,6 +18,8 @@
 #include "knowhere/comp/knowhere_config.h"
 #include "knowhere/dataset.h"
 
+const int32_t GPU_DEVICE_ID = 0;
+
 class Benchmark_knowhere_float : public Benchmark_knowhere, public ::testing::Test {
  public:
     void
@@ -108,11 +110,18 @@ class Benchmark_knowhere_float : public Benchmark_knowhere, public ::testing::Te
         cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
         knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2);
         printf("faiss::distance_compute_blas_threshold: %ld\n", knowhere::KnowhereConfig::GetBlasThreshold());
+#ifdef USE_CUDA
+        knowhere::KnowhereConfig::InitGPUResource(GPU_DEVICE_ID);
+        cfg_[knowhere::meta::DEVICE_ID] = GPU_DEVICE_ID;
+#endif
     }
 
     void
     TearDown() override {
         free_all();
+#ifdef USE_CUDA
+        knowhere::KnowhereConfig::FreeGPUResource();
+#endif
     }
 
  protected:
@@ -138,7 +147,11 @@ class Benchmark_knowhere_float : public Benchmark_knowhere, public ::testing::Te
 };
 
 TEST_F(Benchmark_knowhere_float, TEST_IDMAP) {
+#ifdef USE_CUDA
+    index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IDMAP;
+#else
     index_type_ = knowhere::IndexEnum::INDEX_FAISS_IDMAP;
+#endif
 
     knowhere::Json conf = cfg_;
     std::string index_file_name = get_index_name({});
@@ -149,7 +162,11 @@ TEST_F(Benchmark_knowhere_float, TEST_IDMAP) {
 }
 
 TEST_F(Benchmark_knowhere_float, TEST_IVF_FLAT_NM) {
+#ifdef USE_CUDA
+    index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IVFFLAT;
+#else
     index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
+#endif
 
     knowhere::Json conf = cfg_;
     for (auto nlist : NLISTs_) {
@@ -171,7 +188,11 @@ TEST_F(Benchmark_knowhere_float, TEST_IVF_FLAT_NM) {
 }
 
 TEST_F(Benchmark_knowhere_float, TEST_IVF_SQ8) {
+#ifdef USE_CUDA
+    index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8;
+#else
     index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8;
+#endif
 
     knowhere::Json conf = cfg_;
     for (auto nlist : NLISTs_) {
@@ -186,7 +207,11 @@ TEST_F(Benchmark_knowhere_float, TEST_IVF_SQ8) {
 }
 
 TEST_F(Benchmark_knowhere_float, TEST_IVF_PQ) {
+#ifdef USE_CUDA
+    index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IVFPQ;
+#else
     index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFPQ;
+#endif
 
     knowhere::Json conf = cfg_;
     conf[knowhere::indexparam::NBITS] = NBITS_;
diff --git a/include/knowhere/comp/blocking_queue.h b/include/knowhere/comp/blocking_queue.h
new file mode 100644
index 000000000..d7de27147
--- /dev/null
+++ b/include/knowhere/comp/blocking_queue.h
@@ -0,0 +1,96 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#pragma once
+
+#include <assert.h>
+
+#include <condition_variable>
+#include <iostream>
+#include <queue>
+#include <vector>
+
+namespace knowhere {
+
+template <typename T>
+class BlockingQueue {
+ public:
+    BlockingQueue() : mtx(), full_(), empty_() {
+    }
+
+    virtual ~BlockingQueue() {
+    }
+
+    BlockingQueue(const BlockingQueue& rhs) = delete;
+
+    BlockingQueue&
+    operator=(const BlockingQueue& rhs) = delete;
+
+    void
+    Put(const T& task) {
+        std::unique_lock<std::mutex> lock(mtx);
+        full_.wait(lock, [this] { return (queue_.size() < capacity_); });
+        queue_.push(task);
+        empty_.notify_all();
+    }
+
+    T
+    Take() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T front(queue_.front());
+        queue_.pop();
+        full_.notify_all();
+        return front;
+    }
+
+    T
+    Front() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T front(queue_.front());
+        return front;
+    }
+
+    T
+    Back() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T back(queue_.back());
+        return back;
+    }
+
+    size_t
+    Size() const {
+        std::lock_guard<std::mutex> lock(mtx);
+        return queue_.size();
+    }
+
+    bool
+    Empty() const {
+        std::unique_lock<std::mutex> lock(mtx);
+        return queue_.empty();
+    }
+
+    void
+    SetCapacity(const size_t capacity) {
+        capacity_ = (capacity > 0 ? capacity : capacity_);
+    }
+
+ protected:
+    mutable std::mutex mtx;
+    std::condition_variable full_;
+    std::condition_variable empty_;
+    std::queue<T> queue_;
+    size_t capacity_ = 32;
+};
+
+}  // namespace knowhere
diff --git a/include/knowhere/comp/index_param.h b/include/knowhere/comp/index_param.h
index 48815cd55..d52bcb796 100644
--- a/include/knowhere/comp/index_param.h
+++ b/include/knowhere/comp/index_param.h
@@ -29,6 +29,11 @@ constexpr const char* INDEX_FAISS_IVFFLAT = "IVF_FLAT";
 constexpr const char* INDEX_FAISS_IVFPQ = "IVF_PQ";
 constexpr const char* INDEX_FAISS_IVFSQ8 = "IVF_SQ8";
 
+constexpr const char* INDEX_FAISS_GPU_IDMAP = "GPU_FLAT";
+constexpr const char* INDEX_FAISS_GPU_IVFFLAT = "GPU_IVF_FLAT";
+constexpr const char* INDEX_FAISS_GPU_IVFPQ = "GPU_IVF_PQ";
+constexpr const char* INDEX_FAISS_GPU_IVFSQ8 = "GPU_IVF_SQ8";
+
 constexpr const char* INDEX_ANNOY = "ANNOY";
 constexpr const char* INDEX_HNSW = "HNSW";
 
diff --git a/include/knowhere/comp/knowhere_config.h b/include/knowhere/comp/knowhere_config.h
index f8037f455..adc2c627a 100644
--- a/include/knowhere/comp/knowhere_config.h
+++ b/include/knowhere/comp/knowhere_config.h
@@ -93,6 +93,18 @@ class KnowhereConfig {
      */
     static void
     SetAioContextPool(size_t num_ctx, size_t max_events);
+
+    /**
+     * init GPU Resource
+     */
+    static void
+    InitGPUResource(int64_t gpu_id, int64_t res_num = 2);
+
+    /**
+     * free GPU Resource
+     */
+    static void
+    FreeGPUResource();
 };
 
 }  // namespace knowhere
diff --git a/include/knowhere/gpu/gpu_res_mgr.h b/include/knowhere/gpu/gpu_res_mgr.h
new file mode 100644
index 000000000..8ad030b29
--- /dev/null
+++ b/include/knowhere/gpu/gpu_res_mgr.h
@@ -0,0 +1,152 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License
+
+#pragma once
+
+#include <faiss/gpu/StandardGpuResources.h>
+
+#include <memory>
+#include <mutex>
+#include <utility>
+
+#include "knowhere/comp/blocking_queue.h"
+#include "knowhere/log.h"
+
+namespace knowhere {
+
+constexpr int64_t MB = 1LL << 20;
+
+struct Resource {
+    Resource(int64_t gpu_id, faiss::gpu::StandardGpuResources* r) : faiss_res_(r), gpu_id_(gpu_id) {
+        static int64_t global_id = 0;
+        id_ = global_id++;
+    }
+
+    std::unique_ptr<faiss::gpu::StandardGpuResources> faiss_res_;
+    int64_t id_;
+    int64_t gpu_id_;
+    std::mutex mutex_;
+};
+using ResPtr = std::shared_ptr<Resource>;
+using ResWPtr = std::weak_ptr<Resource>;
+
+struct GPUParams {
+    int64_t tmp_mem_sz_ = 256 * MB;
+    int64_t pin_mem_sz_ = 256 * MB;
+    int64_t res_num_ = 2;
+
+    GPUParams() {
+    }
+
+    GPUParams(int64_t res_num) : res_num_(res_num) {
+    }
+};
+
+class GPUResMgr {
+ public:
+    friend class ResScope;
+    using ResBQ = BlockingQueue<ResPtr>;
+
+ public:
+    static GPUResMgr&
+    GetInstance() {
+        static GPUResMgr instance;
+        return instance;
+    }
+
+    void
+    InitDevice(const int64_t gpu_id, const GPUParams& gpu_params) {
+        gpu_id_ = gpu_id;
+        gpu_params_.res_num_ = gpu_params.res_num_;
+        gpu_params_.tmp_mem_sz_ = gpu_params.tmp_mem_sz_;
+        gpu_params_.pin_mem_sz_ = gpu_params.pin_mem_sz_;
+
+        LOG_KNOWHERE_DEBUG_ << "InitDevice gpu_id " << gpu_id_ << "resource count " << gpu_params_.res_num_
+                            << ", tmp_mem_sz " << gpu_params_.tmp_mem_sz_ / MB << "MB, pin_mem_sz "
+                            << gpu_params_.pin_mem_sz_ / MB << "MB";
+    }
+
+    void
+    Init() {
+        if (!init_) {
+            for (int64_t i = 0; i < gpu_params_.res_num_; ++i) {
+                auto gpu_res = new faiss::gpu::StandardGpuResources();
+                gpu_res->setTempMemory(gpu_params_.tmp_mem_sz_);
+                // need not set pinned memory by now
+
+                auto res = std::make_shared<Resource>(gpu_id_, gpu_res);
+                res_bq_.Put(res);
+            }
+            LOG_KNOWHERE_DEBUG_ << "Init gpu_id " << gpu_id_ << ", resource count " << res_bq_.Size() << ", tmp_mem_sz "
+                                << gpu_params_.tmp_mem_sz_ / MB << "MB";
+            init_ = true;
+        }
+    }
+
+    // Free GPU resource, avoid cudaGetDevice error when deallocate.
+    // This func should be invoked before main return
+    void
+    Free() {
+        while (!res_bq_.Empty()) {
+            res_bq_.Take();
+        }
+        init_ = false;
+    }
+
+    ResPtr
+    GetRes() {
+        if (init_) {
+            auto res = res_bq_.Take();
+            return res;
+        } else {
+            KNOWHERE_THROW_MSG("GPUResMgr not initialized");
+        }
+    }
+
+    void
+    PutRes(const ResPtr& res) {
+        if (init_) {
+            res_bq_.Put(res);
+        } else {
+            KNOWHERE_THROW_MSG("GPUResMgr not initialized");
+        }
+    }
+
+ protected:
+    bool init_ = false;
+    int64_t gpu_id_ = 0;
+    GPUParams gpu_params_;
+    ResBQ res_bq_;
+};
+
+class ResScope {
+ public:
+    ResScope(ResPtr& res, const bool renew) : res_(res), renew_(renew) {
+        res_->mutex_.lock();
+    }
+
+    ResScope(ResWPtr& res, const bool renew) : res_(res.lock()), renew_(renew) {
+        res_->mutex_.lock();
+    }
+
+    ~ResScope() {
+        if (renew_) {
+            GPUResMgr::GetInstance().PutRes(res_);
+        }
+        res_->mutex_.unlock();
+    }
+
+ private:
+    ResPtr res_;  // hold resource until deconstruct
+    bool renew_;
+};
+
+}  // namespace knowhere
diff --git a/src/common/comp/knowhere_config.cc b/src/common/comp/knowhere_config.cc
index 79b6fac40..29817b816 100644
--- a/src/common/comp/knowhere_config.cc
+++ b/src/common/comp/knowhere_config.cc
@@ -17,6 +17,9 @@
 #include "faiss/utils/distances.h"
 #include "faiss/utils/utils.h"
 #include "knowhere/log.h"
+#ifdef USE_CUDA
+#include "knowhere/gpu/gpu_res_mgr.h"
+#endif
 #include "simd/hook.h"
 
 namespace knowhere {
@@ -123,4 +126,22 @@ KnowhereConfig::SetAioContextPool(size_t num_ctx, size_t max_events) {
 #endif
 }
 
+void
+KnowhereConfig::InitGPUResource(int64_t gpu_id, int64_t res_num) {
+#ifdef USE_CUDA
+    LOG_KNOWHERE_INFO_ << "init GPU resource for gpu id " << gpu_id << ", resource num " << res_num;
+    knowhere::GPUParams gpu_params(res_num);
+    knowhere::GPUResMgr::GetInstance().InitDevice(gpu_id, gpu_params);
+    knowhere::GPUResMgr::GetInstance().Init();
+#endif
+}
+
+void
+KnowhereConfig::FreeGPUResource() {
+#ifdef USE_CUDA
+    LOG_KNOWHERE_INFO_ << "free GPU resource";
+    knowhere::GPUResMgr::GetInstance().Free();
+#endif
+}
+
 }  // namespace knowhere
diff --git a/src/index/flat_gpu/flat_gpu.cc b/src/index/flat_gpu/flat_gpu.cc
index 94434bcfa..a7c846f59 100644
--- a/src/index/flat_gpu/flat_gpu.cc
+++ b/src/index/flat_gpu/flat_gpu.cc
@@ -9,28 +9,17 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 // or implied. See the License for the specific language governing permissions and limitations under the License.
 
-#include <functional>
-#include <map>
-
 #include "common/metric.h"
 #include "faiss/IndexFlat.h"
 #include "faiss/gpu/GpuCloner.h"
-#include "faiss/gpu/GpuIndexFlat.h"
-#include "faiss/gpu/StandardGpuResources.h"
 #include "faiss/index_io.h"
 #include "index/flat_gpu/flat_gpu_config.h"
 #include "io/FaissIO.h"
 #include "knowhere/factory.h"
-#include "knowhere/index_node_thread_pool_wrapper.h"
+#include "knowhere/gpu/gpu_res_mgr.h"
 
 namespace knowhere {
 
-static faiss::gpu::StandardGpuResources*
-GetGpuRes() {
-    static faiss::gpu::StandardGpuResources res;
-    return &res;
-}
-
 class GpuFlatIndexNode : public IndexNode {
  public:
     GpuFlatIndexNode(const Object& object) : gpu_index_(nullptr) {
@@ -59,28 +48,25 @@ class GpuFlatIndexNode : public IndexNode {
             return metric.error();
         }
 
-        for (auto dev : f_cfg.gpu_ids) {
-            this->devs_.push_back(dev);
-            this->res_.push_back(new (std::nothrow) faiss::gpu::StandardGpuResources);
-        }
-
         const void* x = dataset.GetTensor();
         const int64_t n = dataset.GetRows();
         const int64_t dim = dataset.GetDim();
-        faiss::Index* gpu_index = nullptr;
+        faiss::Index* host_index = nullptr;
         try {
-            auto host_index = std::make_unique<faiss::IndexFlat>(dim, metric.value());
-            gpu_index = faiss::gpu::index_cpu_to_gpu_multiple(this->res_, this->devs_, host_index.get());
-            gpu_index->add(n, (const float*)x);
+            host_index = new faiss::IndexFlat(dim, metric.value());
+            host_index->add(n, (const float*)x);
+            // need not copy index from CPU to GPU for IDMAP
         } catch (const std::exception& e) {
-            if (gpu_index)
-                delete gpu_index;
+            if (host_index) {
+                delete host_index;
+            }
             LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what();
             return Status::faiss_inner_error;
         }
-        if (this->gpu_index_)
+        if (this->gpu_index_) {
             delete this->gpu_index_;
-        this->gpu_index_ = gpu_index;
+        }
+        this->gpu_index_ = host_index;
         return Status::success;
     }
 
@@ -100,6 +86,8 @@ class GpuFlatIndexNode : public IndexNode {
         try {
             ids = new (std::nothrow) int64_t[len];
             dis = new (std::nothrow) float[len];
+
+            ResScope rs(res_, false);
             gpu_index_->search(nq, (const float*)x, f_cfg.k, dis, ids, bitset);
         } catch (const std::exception& e) {
             std::unique_ptr<int64_t[]> auto_delete_ids(ids);
@@ -148,20 +136,13 @@ class GpuFlatIndexNode : public IndexNode {
         }
         try {
             MemoryIOWriter writer;
-            std::unique_ptr<faiss::Index> host_index(faiss::gpu::index_gpu_to_cpu(gpu_index_));
+            // Serialize() is called after Add(), at this time gpu_index_ is CPU index actually
+            faiss::Index* host_index = gpu_index_;
 
-            faiss::write_index(host_index.get(), &writer);
+            faiss::write_index(host_index, &writer);
             std::shared_ptr<uint8_t[]> data(writer.data_);
 
             binset.Append("FLAT", data, writer.rp);
-
-            size_t dev_s = this->devs_.size();
-            uint8_t* buf = new uint8_t[sizeof(dev_s) + sizeof(int) * dev_s];
-            auto device_id_ = std::shared_ptr<uint8_t[]>(buf);
-            memcpy(buf, &dev_s, sizeof(dev_s));
-            memcpy(buf + sizeof(dev_s), this->devs_.data(), sizeof(devs_[0]) * dev_s);
-            binset.Append("device_ids", device_id_, sizeof(size_t) + sizeof(int) * dev_s);
-
         } catch (const std::exception& e) {
             LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what();
             return Status::faiss_inner_error;
@@ -178,14 +159,10 @@ class GpuFlatIndexNode : public IndexNode {
             reader.data_ = binary->data.get();
             std::unique_ptr<faiss::Index> index(faiss::read_index(&reader));
 
-            size_t dev_s = 1;
-            auto device_ids = binset.GetByName("device_ids");
-            memcpy(&dev_s, device_ids->data.get(), sizeof(dev_s));
-            this->devs_.resize(dev_s);
-            memcpy(this->devs_.data(), device_ids->data.get() + sizeof(size_t), sizeof(int) * dev_s);
-            for (size_t i = 0; i < dev_s; ++i)
-                this->res_.push_back(new (std::nothrow) faiss::gpu::StandardGpuResources);
-            gpu_index_ = faiss::gpu::index_cpu_to_gpu_multiple(this->res_, this->devs_, index.get());
+            auto gpu_res = GPUResMgr::GetInstance().GetRes();
+            ResScope rs(gpu_res, true);
+            gpu_index_ = faiss::gpu::index_cpu_to_gpu(gpu_res->faiss_res_.get(), gpu_res->gpu_id_, index.get());
+            res_ = gpu_res;
         } catch (const std::exception& e) {
             LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what();
             return Status::faiss_inner_error;
@@ -216,22 +193,20 @@ class GpuFlatIndexNode : public IndexNode {
 
     virtual std::string
     Type() const override {
-        return "GPUFLAT";
+        return knowhere::IndexEnum::INDEX_FAISS_GPU_IDMAP;
     }
 
     virtual ~GpuFlatIndexNode() {
-        if (gpu_index_)
+        if (gpu_index_) {
             delete gpu_index_;
+        }
     }
 
  private:
-    std::vector<int> devs_;
-    std::vector<faiss::gpu::GpuResourcesProvider*> res_;
+    mutable ResWPtr res_;
     faiss::Index* gpu_index_;
 };
 
-KNOWHERE_REGISTER_GLOBAL(GPUFLAT, [](const Object& object) {
-    return Index<IndexNodeThreadPoolWrapper>::Create(std::make_unique<GpuFlatIndexNode>(object));
-});
+KNOWHERE_REGISTER_GLOBAL(GPU_FLAT, [](const Object& object) { return Index<GpuFlatIndexNode>::Create(object); });
 
 }  // namespace knowhere
diff --git a/src/index/flat_gpu/flat_gpu_config.h b/src/index/flat_gpu/flat_gpu_config.h
index 614ece7da..ba73691e4 100644
--- a/src/index/flat_gpu/flat_gpu_config.h
+++ b/src/index/flat_gpu/flat_gpu_config.h
@@ -18,14 +18,9 @@ namespace knowhere {
 
 class GpuFlatConfig : public FlatConfig {
  public:
-    CFG_LIST gpu_ids;
+    CFG_INT gpu_id;
     KNOHWERE_DECLARE_CONFIG(GpuFlatConfig) {
-        KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids)
-            .description("gpu device ids")
-            .set_default({
-                0,
-            })
-            .for_train();
+        KNOWHERE_CONFIG_DECLARE_FIELD(gpu_id).description("gpu device id").set_default(0).for_train();
     }
 };
 
diff --git a/src/index/ivf_gpu/ivf_gpu.cc b/src/index/ivf_gpu/ivf_gpu.cc
index a410b9a71..d47d97dfd 100644
--- a/src/index/ivf_gpu/ivf_gpu.cc
+++ b/src/index/ivf_gpu/ivf_gpu.cc
@@ -17,12 +17,15 @@
 #include "faiss/IndexScalarQuantizer.h"
 #include "faiss/gpu/GpuCloner.h"
 #include "faiss/gpu/GpuIndexIVF.h"
-#include "faiss/gpu/StandardGpuResources.h"
+#include "faiss/gpu/GpuIndexIVFFlat.h"
+#include "faiss/gpu/GpuIndexIVFPQ.h"
+#include "faiss/gpu/GpuIndexIVFScalarQuantizer.h"
 #include "faiss/index_io.h"
 #include "index/ivf_gpu/ivf_gpu_config.h"
 #include "io/FaissIO.h"
+#include "knowhere/comp/index_param.h"
 #include "knowhere/factory.h"
-#include "knowhere/index_node_thread_pool_wrapper.h"
+#include "knowhere/gpu/gpu_res_mgr.h"
 
 namespace knowhere {
 
@@ -45,7 +48,7 @@ struct KnowhereConfigType<faiss::IndexIVFScalarQuantizer> {
 template <typename T>
 class GpuIvfIndexNode : public IndexNode {
  public:
-    GpuIvfIndexNode(const Object& object) : devs_({}), res_{}, gpu_index_(nullptr) {
+    GpuIvfIndexNode(const Object& object) : gpu_index_(nullptr) {
         static_assert(std::is_same<T, faiss::IndexIVFFlat>::value || std::is_same<T, faiss::IndexIVFPQ>::value ||
                       std::is_same<T, faiss::IndexIVFScalarQuantizer>::value);
     }
@@ -70,52 +73,39 @@ class GpuIvfIndexNode : public IndexNode {
         auto dim = dataset.GetDim();
         auto ivf_gpu_cfg = static_cast<const typename KnowhereConfigType<T>::Type&>(cfg);
 
-        for (size_t i = 0; i < ivf_gpu_cfg.gpu_ids.size(); ++i) {
-            this->devs_.push_back(i);
-            this->res_.push_back(new (std::nothrow) faiss::gpu::StandardGpuResources);
-        }
-
         auto metric = Str2FaissMetricType(ivf_gpu_cfg.metric_type);
         if (!metric.has_value()) {
             LOG_KNOWHERE_WARNING_ << "please check metric value: " << ivf_gpu_cfg.metric_type;
             return metric.error();
         }
+
         faiss::Index* gpu_index = nullptr;
         try {
-            auto qzr = new (std::nothrow) faiss::IndexFlat(dim, metric.value());
-            if (qzr == nullptr) {
-                LOG_KNOWHERE_WARNING_ << "memory malloc error";
-                return Status::malloc_error;
-            }
-            std::unique_ptr<faiss::IndexFlat> auto_delele_qzr(qzr);
-            T* host_index = nullptr;
+            auto gpu_res = GPUResMgr::GetInstance().GetRes();
+            ResScope rs(gpu_res, true);
+
             if constexpr (std::is_same<T, faiss::IndexIVFFlat>::value) {
-                host_index = new (std::nothrow) faiss::IndexIVFFlat(qzr, dim, ivf_gpu_cfg.nlist, metric.value());
-                if (host_index == nullptr) {
-                    LOG_KNOWHERE_WARNING_ << "memory malloc error";
-                    return Status::malloc_error;
-                }
+                faiss::gpu::GpuIndexIVFFlatConfig f_cfg;
+                f_cfg.device = static_cast<int32_t>(gpu_res->gpu_id_);
+                gpu_index = new faiss::gpu::GpuIndexIVFFlat(gpu_res->faiss_res_.get(), dim, ivf_gpu_cfg.nlist,
+                                                            metric.value(), f_cfg);
             }
             if constexpr (std::is_same<T, faiss::IndexIVFPQ>::value) {
-                host_index = new (std::nothrow)
-                    faiss::IndexIVFPQ(qzr, dim, ivf_gpu_cfg.nlist, ivf_gpu_cfg.m, ivf_gpu_cfg.nbits, metric.value());
-                if (host_index == nullptr) {
-                    LOG_KNOWHERE_WARNING_ << "memory malloc error";
-                    return Status::malloc_error;
-                }
+                faiss::gpu::GpuIndexIVFPQConfig f_cfg;
+                f_cfg.device = static_cast<int32_t>(gpu_res->gpu_id_);
+                gpu_index = new faiss::gpu::GpuIndexIVFPQ(gpu_res->faiss_res_.get(), dim, ivf_gpu_cfg.nlist,
+                                                          ivf_gpu_cfg.m, ivf_gpu_cfg.nbits, metric.value(), f_cfg);
             }
             if constexpr (std::is_same<T, faiss::IndexIVFScalarQuantizer>::value) {
-                host_index = new (std::nothrow) faiss::IndexIVFScalarQuantizer(
-                    qzr, dim, ivf_gpu_cfg.nlist, faiss::QuantizerType::QT_8bit, metric.value());
-                if (host_index == nullptr) {
-                    LOG_KNOWHERE_WARNING_ << "memory malloc error";
-                    return Status::malloc_error;
-                }
+                faiss::gpu::GpuIndexIVFScalarQuantizerConfig f_cfg;
+                f_cfg.device = static_cast<int32_t>(gpu_res->gpu_id_);
+                gpu_index = new faiss::gpu::GpuIndexIVFScalarQuantizer(gpu_res->faiss_res_.get(), dim,
+                                                                       ivf_gpu_cfg.nlist, faiss::QuantizerType::QT_8bit,
+                                                                       metric.value(), true, f_cfg);
             }
-            std::unique_ptr<T> auto_delete_host_index(host_index);
-            gpu_index = faiss::gpu::index_cpu_to_gpu_multiple(this->res_, this->devs_, host_index);
-            gpu_index->train(rows, reinterpret_cast<const float*>(tensor));
 
+            gpu_index->train(rows, reinterpret_cast<const float*>(tensor));
+            res_ = gpu_res;
         } catch (std::exception& e) {
             if (gpu_index) {
                 delete gpu_index;
@@ -136,6 +126,7 @@ class GpuIvfIndexNode : public IndexNode {
         auto rows = dataset.GetRows();
         auto tensor = dataset.GetTensor();
         try {
+            ResScope rs(res_, false);
             gpu_index_->add(rows, (const float*)tensor);
         } catch (std::exception& e) {
             LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what();
@@ -147,16 +138,11 @@ class GpuIvfIndexNode : public IndexNode {
     virtual expected<DataSetPtr, Status>
     Search(const DataSet& dataset, const Config& cfg, const BitsetView& bitset) const override {
         auto ivf_gpu_cfg = static_cast<const typename KnowhereConfigType<T>::Type&>(cfg);
-        if (auto ix = dynamic_cast<faiss::IndexReplicas*>(gpu_index_)) {
-            for (int i = 0; i < ix->count(); ++i) {
-                auto idx = dynamic_cast<faiss::gpu::GpuIndexIVF*>(ix->at(i));
-                assert(idx != nullptr);
-                idx->setNumProbes(ivf_gpu_cfg.nprobe);
-            }
-        }
         if (auto ix = dynamic_cast<faiss::gpu::GpuIndexIVF*>(gpu_index_)) {
             ix->setNumProbes(ivf_gpu_cfg.nprobe);
         }
+        ResScope rs(res_, false);
+
         constexpr int64_t block_size = 2048;
         auto rows = dataset.GetRows();
         auto k = ivf_gpu_cfg.k;
@@ -205,19 +191,12 @@ class GpuIvfIndexNode : public IndexNode {
         try {
             MemoryIOWriter writer;
             {
-                faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(this->gpu_index_);
+                faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(gpu_index_);
                 faiss::write_index(host_index, &writer);
                 delete host_index;
             }
             std::shared_ptr<uint8_t[]> data(writer.data_);
-
             binset.Append("IVF", data, writer.rp);
-            size_t dev_s = this->devs_.size();
-            uint8_t* buf = new uint8_t[sizeof(dev_s) + sizeof(int) * dev_s];
-            auto device_id_ = std::shared_ptr<uint8_t[]>(buf);
-            memcpy(buf, &dev_s, sizeof(dev_s));
-            memcpy(buf + sizeof(dev_s), this->devs_.data(), sizeof(devs_[0]) * dev_s);
-            binset.Append("device_ids", device_id_, sizeof(size_t) + sizeof(int) * dev_s);
         } catch (std::exception& e) {
             LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what();
             return Status::faiss_inner_error;
@@ -235,14 +214,10 @@ class GpuIvfIndexNode : public IndexNode {
             reader.data_ = binary->data.get();
 
             std::unique_ptr<faiss::Index> index(faiss::read_index(&reader));
-            size_t dev_s = 1;
-            auto device_ids = binset.GetByName("device_ids");
-            memcpy(&dev_s, device_ids->data.get(), sizeof(dev_s));
-            this->devs_.resize(dev_s);
-            memcpy(this->devs_.data(), device_ids->data.get() + sizeof(size_t), sizeof(int) * dev_s);
-            for (size_t i = 0; i < dev_s; ++i)
-                this->res_.push_back(new (std::nothrow) faiss::gpu::StandardGpuResources);
-            gpu_index_ = faiss::gpu::index_cpu_to_gpu_multiple(this->res_, this->devs_, index.get());
+            auto gpu_res = GPUResMgr::GetInstance().GetRes();
+            ResScope rs(gpu_res, true);
+            gpu_index_ = faiss::gpu::index_cpu_to_gpu(gpu_res->faiss_res_.get(), gpu_res->gpu_id_, index.get());
+            res_ = gpu_res;
         } catch (std::exception& e) {
             LOG_KNOWHERE_WARNING_ << "faiss inner error, " << e.what();
             return Status::faiss_inner_error;
@@ -277,13 +252,13 @@ class GpuIvfIndexNode : public IndexNode {
     virtual std::string
     Type() const override {
         if constexpr (std::is_same<faiss::IndexIVFFlat, T>::value) {
-            return "GPUIVFFLAT";
+            return knowhere::IndexEnum::INDEX_FAISS_GPU_IVFFLAT;
         }
         if constexpr (std::is_same<faiss::IndexIVFPQ, T>::value) {
-            return "GPUIVFPQ";
+            return knowhere::IndexEnum::INDEX_FAISS_GPU_IVFPQ;
         }
         if constexpr (std::is_same<faiss::IndexIVFScalarQuantizer, T>::value) {
-            return "GPUIVFSQ";
+            return knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8;
         }
     }
 
@@ -291,25 +266,21 @@ class GpuIvfIndexNode : public IndexNode {
         if (gpu_index_) {
             delete gpu_index_;
         }
-        for (auto&& p : res_) {
-            delete p;
-        }
     }
 
  private:
-    std::vector<int32_t> devs_;
-    std::vector<faiss::gpu::GpuResourcesProvider*> res_;
+    mutable ResWPtr res_;
     faiss::Index* gpu_index_;
 };
 
-KNOWHERE_REGISTER_GLOBAL(GPUIVFFLAT, [](const Object& object) {
-    return Index<IndexNodeThreadPoolWrapper>::Create(std::make_unique<GpuIvfIndexNode<faiss::IndexIVFFlat>>(object));
+KNOWHERE_REGISTER_GLOBAL(GPU_IVF_FLAT, [](const Object& object) {
+    return Index<GpuIvfIndexNode<faiss::IndexIVFFlat>>::Create(object);
 });
-KNOWHERE_REGISTER_GLOBAL(GPUIVFPQ, [](const Object& object) {
-    return Index<IndexNodeThreadPoolWrapper>::Create(std::make_unique<GpuIvfIndexNode<faiss::IndexIVFPQ>>(object));
+KNOWHERE_REGISTER_GLOBAL(GPU_IVF_PQ, [](const Object& object) {
+    return Index<GpuIvfIndexNode<faiss::IndexIVFPQ>>::Create(object);
 });
-KNOWHERE_REGISTER_GLOBAL(GPUIVFSQ, [](const Object& object) {
-    return Index<IndexNodeThreadPoolWrapper>::Create(
-        std::make_unique<GpuIvfIndexNode<faiss::IndexIVFScalarQuantizer>>(object));
+KNOWHERE_REGISTER_GLOBAL(GPU_IVF_SQ8, [](const Object& object) {
+    return Index<GpuIvfIndexNode<faiss::IndexIVFScalarQuantizer>>::Create(object);
 });
+
 }  // namespace knowhere
diff --git a/src/index/ivf_gpu/ivf_gpu_config.h b/src/index/ivf_gpu/ivf_gpu_config.h
index 6a3d4b7cb..63b31de46 100644
--- a/src/index/ivf_gpu/ivf_gpu_config.h
+++ b/src/index/ivf_gpu/ivf_gpu_config.h
@@ -15,40 +15,25 @@ namespace knowhere {
 
 class GpuIvfFlatConfig : public IvfFlatConfig {
  public:
-    CFG_LIST gpu_ids;
+    CFG_INT gpu_id;
     KNOHWERE_DECLARE_CONFIG(GpuIvfFlatConfig) {
-        KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids)
-            .description("gpu device ids")
-            .set_default({
-                0,
-            })
-            .for_train();
+        KNOWHERE_CONFIG_DECLARE_FIELD(gpu_id).description("gpu device id").set_default(0).for_train();
     }
 };
 
 class GpuIvfPqConfig : public IvfPqConfig {
  public:
-    CFG_LIST gpu_ids;
+    CFG_INT gpu_id;
     KNOHWERE_DECLARE_CONFIG(GpuIvfPqConfig) {
-        KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids)
-            .description("gpu device ids")
-            .set_default({
-                0,
-            })
-            .for_train();
+        KNOWHERE_CONFIG_DECLARE_FIELD(gpu_id).description("gpu device id").set_default(0).for_train();
     }
 };
 
 class GpuIvfSqConfig : public IvfSqConfig {
  public:
-    CFG_LIST gpu_ids;
+    CFG_INT gpu_id;
     KNOHWERE_DECLARE_CONFIG(GpuIvfSqConfig) {
-        KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids)
-            .description("gpu device ids")
-            .set_default({
-                0,
-            })
-            .for_train();
+        KNOWHERE_CONFIG_DECLARE_FIELD(gpu_id).description("gpu device id").set_default(0).for_train();
     }
 };
 
diff --git a/tests/ut/CMakeLists.txt b/tests/ut/CMakeLists.txt
index 43613c0d9..e884cc1ab 100644
--- a/tests/ut/CMakeLists.txt
+++ b/tests/ut/CMakeLists.txt
@@ -26,9 +26,6 @@ set_target_properties(knowhere_tests PROPERTIES
   CXX_STANDARD 17
 )
 
-if(USE_CUDA)
-  add_definitions(-DUSE_CUDA)
-endif()
 if(WITH_DISKANN)
     target_link_libraries(knowhere_tests PRIVATE Catch2::Catch2WithMain knowhere stdc++fs)
 else()
diff --git a/tests/ut/test_gpu_search.cc b/tests/ut/test_gpu_search.cc
new file mode 100644
index 000000000..dd070a178
--- /dev/null
+++ b/tests/ut/test_gpu_search.cc
@@ -0,0 +1,121 @@
+// Copyright (C) 2019-2023 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#include "catch2/catch_approx.hpp"
+#include "catch2/catch_test_macros.hpp"
+#include "catch2/generators/catch_generators.hpp"
+#include "knowhere/comp/index_param.h"
+#include "knowhere/comp/knowhere_config.h"
+#include "knowhere/factory.h"
+#include "utils.h"
+
+#ifdef USE_CUDA
+TEST_CASE("Test All GPU Index", "[search]") {
+    using Catch::Approx;
+
+    knowhere::KnowhereConfig::InitGPUResource(0);
+
+    int64_t nb = 10000, nq = 1000;
+    int64_t dim = 128;
+    int64_t seed = 42;
+
+    auto base_gen = [&]() {
+        knowhere::Json json;
+        json[knowhere::meta::DIM] = dim;
+        json[knowhere::meta::METRIC_TYPE] = knowhere::metric::L2;
+        json[knowhere::meta::TOPK] = 1;
+        json[knowhere::meta::RADIUS] = 10.0;
+        json[knowhere::meta::RANGE_FILTER] = 0.0;
+        return json;
+    };
+
+    auto ivfflat_gen = [&base_gen]() {
+        knowhere::Json json = base_gen();
+        json[knowhere::indexparam::NLIST] = 16;
+        json[knowhere::indexparam::NPROBE] = 4;
+        return json;
+    };
+
+    auto ivfsq_gen = ivfflat_gen;
+
+    auto ivfpq_gen = [&ivfflat_gen]() {
+        knowhere::Json json = ivfflat_gen();
+        json[knowhere::indexparam::M] = 4;
+        json[knowhere::indexparam::NBITS] = 8;
+        return json;
+    };
+
+    auto gpu_flat_gen = [&base_gen]() {
+        auto json = base_gen();
+        return json;
+    };
+
+    SECTION("Test Gpu Index Search") {
+        using std::make_tuple;
+        auto [name, gen] = GENERATE_REF(table<std::string, std::function<knowhere::Json()>>({
+            // GPU_FLAT cannot run this test is because its Train() and Add() actually run in CPU,
+            // "res_" in gpu_index_ is not set correctly
+            // make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IDMAP, gpu_flat_gen),
+            make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFFLAT, ivfflat_gen),
+            make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFPQ, ivfpq_gen),
+            make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8, ivfsq_gen),
+        }));
+        auto idx = knowhere::IndexFactory::Instance().Create(name);
+        auto cfg_json = gen().dump();
+        CAPTURE(name, cfg_json);
+        knowhere::Json json = knowhere::Json::parse(cfg_json);
+        auto train_ds = GenDataSet(nb, dim, seed);
+        auto query_ds = GenDataSet(nq, dim, seed);
+        REQUIRE(idx.Type() == name);
+        auto res = idx.Build(*train_ds, json);
+        REQUIRE(res == knowhere::Status::success);
+        auto results = idx.Search(*query_ds, json, nullptr);
+        REQUIRE(results.has_value());
+        auto ids = results.value()->GetIds();
+        for (int i = 0; i < nq; ++i) {
+            CHECK(ids[i] == i);
+        }
+    }
+
+    SECTION("Test Gpu Index Serialize/Deserialize") {
+        using std::make_tuple;
+        auto [name, gen] = GENERATE_REF(table<std::string, std::function<knowhere::Json()>>({
+            make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IDMAP, gpu_flat_gen),
+            make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFFLAT, ivfflat_gen),
+            make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFPQ, ivfpq_gen),
+            make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8, ivfsq_gen),
+        }));
+
+        auto idx = knowhere::IndexFactory::Instance().Create(name);
+        auto cfg_json = gen().dump();
+        CAPTURE(name, cfg_json);
+        knowhere::Json json = knowhere::Json::parse(cfg_json);
+        auto train_ds = GenDataSet(nb, dim, seed);
+        auto query_ds = GenDataSet(nq, dim, seed);
+        REQUIRE(idx.Type() == name);
+        auto res = idx.Build(*train_ds, json);
+        REQUIRE(res == knowhere::Status::success);
+        knowhere::BinarySet bs;
+        idx.Serialize(bs);
+
+        auto idx_ = knowhere::IndexFactory::Instance().Create(name);
+        idx_.Deserialize(bs);
+        auto results = idx_.Search(*query_ds, json, nullptr);
+        REQUIRE(results.has_value());
+        auto ids = results.value()->GetIds();
+        for (int i = 0; i < nq; ++i) {
+            CHECK(ids[i] == i);
+        }
+    }
+
+    knowhere::KnowhereConfig::FreeGPUResource();
+}
+#endif
diff --git a/tests/ut/test_search.cc b/tests/ut/test_search.cc
index 94e681520..52cee24c9 100644
--- a/tests/ut/test_search.cc
+++ b/tests/ut/test_search.cc
@@ -13,6 +13,7 @@
 #include "catch2/catch_test_macros.hpp"
 #include "catch2/generators/catch_generators.hpp"
 #include "knowhere/comp/index_param.h"
+#include "knowhere/comp/knowhere_config.h"
 #include "knowhere/factory.h"
 #include "utils.h"
 
@@ -82,12 +83,6 @@ TEST_CASE("Test All Mem Index Search", "[search]") {
         REQUIRE(res == knowhere::Status::success);
     };
 
-#ifdef USE_CUDA
-    auto gpu_flat_gen = [&base_gen]() {
-        auto json = base_gen();
-        return json;
-    };
-#endif
     SECTION("Test Cpu Index Search") {
         using std::make_tuple;
         auto [name, gen] = GENERATE_REF(table<std::string, std::function<knowhere::Json()>>({
@@ -97,12 +92,6 @@ TEST_CASE("Test All Mem Index Search", "[search]") {
             make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, ivfsq_gen),
             make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, ivfpq_gen),
             make_tuple(knowhere::IndexEnum::INDEX_HNSW, hnsw_gen),
-#ifdef USE_CUDA
-            make_tuple("GPUFLAT", gpu_flat_gen),
-            make_tuple("GPUIVFFLAT", ivfflat_gen),
-            make_tuple("GPUIVFPQ", ivfpq_gen),
-            make_tuple("GPUIVFSQ", ivfsq_gen),
-#endif
         }));
         auto idx = knowhere::IndexFactory::Instance().Create(name);
         auto cfg_json = gen().dump();
@@ -163,12 +152,6 @@ TEST_CASE("Test All Mem Index Search", "[search]") {
             make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, ivfsq_gen),
             make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, ivfpq_gen),
             make_tuple(knowhere::IndexEnum::INDEX_HNSW, hnsw_gen),
-#ifdef USE_CUDA
-            make_tuple("GPUFLAT", gpu_flat_gen),
-            make_tuple("GPUIVFFLAT", ivfflat_gen),
-            make_tuple("GPUIVFPQ", ivfpq_gen),
-            make_tuple("GPUIVFSQ", ivfsq_gen),
-#endif
         }));
 
         auto idx = knowhere::IndexFactory::Instance().Create(name);