add SONG (#72)

* SONG * SONG --------- Co-authored-by: MingqiWang-coder <[email protected]>
intellistream · Feb 13, 2025 · bb330f0 · bb330f0
1 parent 2127b19
commit bb330f0
Show file tree

Hide file tree

Showing 21 changed files with 2,010 additions and 9 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,8 +1,15 @@
-
-cmake_minimum_required(VERSION 3.14)
+#set(CMAKE_C_COMPILER "/usr/bin/gcc-11")
+#set(CMAKE_CXX_COMPILER "/usr/bin/g++-11")
+#set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
+cmake_minimum_required(VERSION 3.27)
 project(CANDYBENCH CXX)
+
+option(ENABLE_CUDA "Enable cuda" ON)
+message(STATUS "1.0 CUDA enabled: ${ENABLE_CUDA}")
 include (cmake/FindCuda.cmake)
 include (cmake/FindTorch.cmake)
+#set(CMAKE_CUDA_ARCHITECTURES "70;75;80")
+set(CMAKE_CUDA_ARCHITECTURES ALL)
 find_package(Torch REQUIRED)
 find_package(Python3 REQUIRED COMPONENTS Development)
 include_directories(${Python3_INCLUDE_DIRS})
@@ -27,6 +34,18 @@ include(cmake/default.cmake)
 #set(CMAKE_CXX_FLAGS "-fno-openmp")
 #test avx2
 
+# Option to enable/disable CUDA
+message(STATUS "2.0 CUDA enabled: ${ENABLE_CUDA}")
+if (ENABLE_CUDA)
+    enable_language(CUDA)
+    set(CMAKE_CUDA_STANDARD 20)
+    set(CMAKE_CUDA_ARCHITECTURES OFF)
+    add_definitions(-DENABLE_CUDA=1)
+    message(STATUS "CUDA is enabled")
+else()
+    message(STATUS "CUDA is not enabled")
+endif ()
+
 add_subdirectory(thirdparty/faiss)
 
 # Set specific options for Faiss compilation
@@ -263,10 +282,10 @@ else ()
     find_package(glog)
     #set(GFLAGS_LIBRARIES /usr/local/lib/libgflags.so)
     #set(GLOG_LIBRARIES /usr/local/lib/libglog.so)
-#
+    #
     add_compile_definitions(GLOG_USE_GLOG_EXPORT)
     set(LIBRARIES ${LIBRARIES} gflags)
-#
+    #
     set(LIBRARIES ${LIBRARIES} glog)
 
 
@@ -323,11 +342,32 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/)
 # Add Library
 get_sources(CANDY_SOURCE_FILES)
 get_headers(CANDY_HEADER_FILES)
-add_library(CANDYBENCH SHARED ${CANDY_SOURCE_FILES} ${CANDY_HEADER_FILES} ${CMAKE_CURRENT_BINARY_DIR})
+
+if (ENABLE_CUDA)
+    set_source_files_properties(src/CANDY/IndexTable.cpp
+            PROPERTIES
+            LANGUAGE CUDA
+    )
+endif ()
+
+add_library(CANDYBENCH SHARED ${CANDY_SOURCE_FILES} ${CANDY_HEADER_FILES} ${CMAKE_CURRENT_BINARY_DIR}
+        src/CANDY/IndexTable.cpp)
 
 set_property(TARGET CANDYBENCH PROPERTY CXX_STANDARD 20)
 target_include_directories(CANDYBENCH PUBLIC "include")
 
+if (ENABLE_CUDA)
+    set(LIBRARIES ${LIBRARIES} cublas cudart)
+    set_target_properties(CANDYBENCH PROPERTIES
+            CUDA_STANDARD 20
+            CXX_STANDARD 20
+    )
+else ()
+    set_target_properties(CANDYBENCH PROPERTIES
+            CXX_STANDARD 20
+    )
+endif ()
+
 # 设置 MKL 库的路径
 set(MKL_INCLUDE_DIR "/usr/include/mkl")
 set(MKL_LIB_DIR "/usr/lib/x86_64-linux-gnu")
@@ -428,4 +468,4 @@ message(STATUS "build path:" ${CMAKE_CURRENT_BINARY_DIR})
 # copy files needed for real world matrix loader
 set(source_directory "${CMAKE_SOURCE_DIR}/benchmark/datasets/")
 set(destination_directory "${CMAKE_BINARY_DIR}/benchmark/datasets/")
-file(COPY ${source_directory} DESTINATION ${destination_directory})
+file(COPY ${source_directory} DESTINATION ${destination_directory})
diff --git a/cmake/FindCuda.cmake b/cmake/FindCuda.cmake
@@ -42,6 +42,7 @@ function(find_valid_cuda MIN_CUDA_VERSION MAX_CUDA_VERSION)
 
     # If no valid CUDA was found, print a warning
     if (NOT VALID_CUDA_FOUND)
+        set(ENABLE_CUDA OFF PARENT_SCOPE)
         message(WARNING "No valid CUDA compiler found in the range ${MIN_CUDA_VERSION} - ${MAX_CUDA_VERSION}. I don't think you can use CUDA...")
     else()
         set(ENV{CUDACXX} ${CUDA_COMPILER_PATH})
@@ -50,4 +51,4 @@ function(find_valid_cuda MIN_CUDA_VERSION MAX_CUDA_VERSION)
 endfunction()
 
 # Call the function to find valid CUDA compilers with a specific version range
-find_valid_cuda("11.0" "12.5")
+find_valid_cuda("11.0" "12.6")
diff --git a/cmake/macros.cmake b/cmake/macros.cmake
@@ -53,7 +53,8 @@ macro(get_sources SOURCE_FILES)
 endmacro()
 
 macro(get_headers HEADER_FILES)
-    file(GLOB_RECURSE ${HEADER_FILES} "include/*.h" "include/*.hpp")
+    #    file(GLOB_RECURSE ${HEADER_FILES} "include/*.h" "include/*.hpp")
+    file(GLOB_RECURSE ${HEADER_FILES} "include/*.h" "include/*.hpp" "include/*.cuh")
 endmacro()
 
 # Define the function to detect AVX-512 support

diff --git a/include/CANDY/SONG/SONG.hpp b/include/CANDY/SONG/SONG.hpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2024 by the INTELLI team
+ * Created by: Ziao Wang
+ * Created on: 2024/11/18
+ * Description: [Provide description here]
+ */
+
+#ifndef CANDY__SONG_SONG_HPP
+#define CANDY_S_SONG_SONG_HPP
+
+#include <CANDY/AbstractIndex.h>
+#include <Utils/ConfigMap.hpp>
+#include <memory>
+#include <vector>
+#include "config.hpp"
+#include "data.hpp"
+#include "kernelgraph.cuh"
+
+namespace CANDY{
+
+class SONG : public AbstractIndex {
+ protected:
+  INTELLI::ConfigMapPtr myCfg = nullptr;
+  torch::Tensor dbTensor, objTensor;
+  int64_t vecDim = 768;
+  int64_t vecVolume = 1000000;
+  int64_t idx = 0;
+  faiss::MetricType Metric = faiss::METRIC_L2;
+  std::unique_ptr<SONG_KERNEL::Data> data = nullptr;
+  std::unique_ptr<SONG_KERNEL::GraphWrapper> graph = nullptr;
+
+  /**
+   * @brief convert a query tensor to a vector of pairs
+   * @param[in] t the query tensor
+   * @param[out] res the result vector
+   */
+  static void convertTensorToVectorPair(
+      torch::Tensor& t, std::vector<std::pair<int, SONG_KERNEL::value_t>>& res);
+
+  /**
+   * @brief convert a batch of query tensors to a batch of vectors of pairs
+   * @param[in] ts the query tensors
+   * @param[out] res the result vector
+   */
+  static void convertTensorToVectorPairBatch(
+      torch::Tensor& ts,
+      std::vector<std::vector<std::pair<int, SONG_KERNEL::value_t>>>& res);
+
+ public:
+  SONG() = default;
+
+  ~SONG() = default;
+
+  int64_t gpuComputingUs = 0;
+  int64_t gpuCommunicationUs = 0;
+
+  virtual bool setConfig(INTELLI::ConfigMapPtr cfg);
+
+  virtual bool insertTensor(torch::Tensor &t);
+
+  virtual bool deleteTensor(torch::Tensor& t, int64_t k = 1);
+
+  virtual bool reviseTensor(torch::Tensor& t, torch::Tensor& w);
+
+  virtual std::vector<torch::Tensor> searchTensor(torch::Tensor& q,int64_t k);
+
+  [[nodiscard]] int64_t size() const { return idx; }
+
+  virtual bool resetIndexStatistics();
+
+  virtual INTELLI::ConfigMapPtr getIndexStatistics();
+};
+
+/**
+ * @ingroup  CANDY_lib_bottom
+ * @typedef SONGPtr
+ * @brief The class to describe a shared pointer to @ref  SONG
+
+ */
+typedef std::shared_ptr<class CANDY::SONG> SONGPtr;
+#define newSONG std::make_shared<CANDY::SONG>
+}  // namespace CANDY
+
+#endif  //CANDY_INCLUDE_CANDY_SONG_HPP
diff --git a/include/CANDY/SONG/bin_heap.hpp b/include/CANDY/SONG/bin_heap.hpp
@@ -0,0 +1,51 @@
+#pragma once
+#ifndef CANDY_INCLUDE_ALGORITHMS_SONG_BINHEAP_HPP
+#define CANDY_INCLUDE_ALGORITHMS_SONG_BINHEAP_HPP
+
+namespace SONG_KERNEL {
+// [begin,end)
+template <class T>
+__device__ void push_heap(T* begin, T* end) {
+  T* now = end - 1;
+  int parent = (now - begin - 1) / 2;
+  while (parent >= 0) {
+    if (*(begin + parent) < *now) {
+      auto tmp = *now;
+      *now = *(begin + parent);
+      *(begin + parent) = tmp;
+      now = begin + parent;
+      parent = (parent - 1) / 2;
+    } else {
+      break;
+    }
+  }
+}
+
+template <class T>
+__device__ T pop_heap(T* begin, T* end) {
+  T ret = *begin;
+  *begin = *(end - 1);
+  int len = end - begin;
+  T* now = begin;
+  while (now + 1 < end) {
+    int left = (now - begin) * 2 + 1;
+    int right = (now - begin) * 2 + 2;
+    int next = -1;
+    if (right < len) {
+      next = *(begin + left) < *(begin + right) ? right : left;
+    } else if (left < len) {
+      next = left;
+    }
+    if (next == -1 || !(*now < *(begin + next))) {
+      break;
+    } else {
+      T tmp = *now;
+      *now = *(begin + next);
+      *(begin + next) = tmp;
+      now = begin + next;
+    }
+  }
+  return ret;
+}
+}  // namespace SONG_KERNEL
+#endif
diff --git a/include/CANDY/SONG/blocked_bloomfilter.hpp b/include/CANDY/SONG/blocked_bloomfilter.hpp
@@ -0,0 +1,83 @@
+#pragma once
+#ifndef CANDY_INCLUDE_ALGORITHMS_SONG_BLOCKEDBLOOMFILTER_HPP
+#define CANDY_INCLUDE_ALGORITHMS_SONG_BLOCKEDBLOOMFILTER_HPP
+
+#define GPU_CACHE_LINE_SIZE64 1
+#define GPU_CACHE_LINE_SHIFT 0
+#define BLOOMFILTER_DATA_T uint32_t
+#define BLOOMFILTER_SIZE64MULT 2
+#define BLOOMFILTER_SIZE_SHIFT 5
+
+namespace SONG_KERNEL {
+template <const int size64, const int shift, const int num_hash>
+struct BlockedBloomFilter {
+  BLOOMFILTER_DATA_T data[size64 * BLOOMFILTER_SIZE64MULT];
+  //const static int num_hash = 7;
+
+  const uint64_t random_number[10 * 2] = {
+      0x4bcb391f924ed183ULL, 0xa0ab69ccd854fc0aULL, 0x91086b9cecf5e3b7ULL,
+      0xc68e01641bead407ULL, 0x3a7b976128a30449ULL, 0x6d122efabfc4d99fULL,
+      0xe6700ef8715030e2ULL, 0x80dd0c3bffcfb45bULL, 0xe80f45af6e4ce166ULL,
+      0x6cf43e5aeb53c362ULL, 0x31a27265a93c4f40ULL, 0x743de943cecde0a4ULL,
+      0x5ed25dba0288592dULL, 0xa69eb51a362c37bcULL, 0x9a558fed9d4824f0ULL,
+      0xf75678c2fdbdd68bULL, 0x34423f0963258c85ULL, 0x3532778d6726905cULL,
+      0x6fef7cbe609500f9ULL,
+      0xb4419d54de48422ULL  //,0xda2157c5b12f41b6ULL,0xb315fbc927cae57eULL,0x4a6a38aaa5dcc71cULL,0x86b8c876df8a93f1ULL,0x20ee1d11467a102aULL,0x181399179bae820dULL,0x754794ac0581f2deULL,0xbb7dd7b268a1b05fULL,0x51f3f6b9061423e7ULL,0x2bc1feada8d098c0ULL,0x9629581689d33379ULL,0xa7db527f1e730387ULL,0x5d84ff10cd4d94d6ULL,0x86bc263fccb53eb7ULL,0xca1c3c264474cf4ULL,0x67eea94e006ddd46ULL,0x71d965ad9969018aULL,0xaf497940b2a58b9dULL,0x666c1a4a0bfb7d2eULL,0x13e52fdfab38213cULL,0x5aecd595110f8dfcULL,0xce3bb15c0334a4a8ULL,0xbdd3dbe329975051ULL,0xbb905e5237d4d0caULL,0xb07a1f2382567678ULL,0xc532f79af3352014ULL,0x6b7e603d5948f57bULL,0xc4c91c988f2a874fULL,0xed8c88a357a7e631ULL,0x83e7044453e44307ULL,0x58d175e98509c816ULL,0x5e0b9a22c7cb3beULL,0x2b391d3377c181eaULL,0x41e2b6d7fd610dd8ULL,0x15545fc7f219b48eULL,0x63baf917fa36f69eULL,0xa091555b086fc61eULL,0xda72de0a0625ef02ULL,0x70a6739cae181b68ULL,0x3a306eeb92f0dc4bULL,0xaab82d42e889cf80ULL,0x7fd20e629628bfacULL,0x22c09f4593f19b27ULL,0x74e124cbfe6a12f8ULL
+  };
+
+  __device__ BlockedBloomFilter() {
+    for (int i = 0; i < size64; ++i)
+      data[i] = 0;
+  }
+
+  __device__ int pure_hash(int h, idx_t x) {
+    x ^= x >> 33;
+    x *= random_number[h << 1];
+    x ^= x >> 33;
+    x *= random_number[(h << 1) + 1];
+    x ^= x >> 33;
+    return x;
+  }
+
+  __device__ int hash(int h, idx_t x) {
+    x ^= x >> 33;
+    x *= random_number[h << 1];
+    x ^= x >> 33;
+    x *= random_number[(h << 1) + 1];
+    x ^= x >> 33;
+    return x & ((GPU_CACHE_LINE_SIZE64 << BLOOMFILTER_SIZE_SHIFT) - 1);
+    //return (x ^ (x >> 32) * random_number[h << 1] ^ random_number[(h << 1) + 1]) & ((size64 << 6) - 1);
+  }
+
+  __device__ void set_bit(int offset, int x) {
+    data[offset + (x & (GPU_CACHE_LINE_SIZE64 - 1))] |=
+        (1ULL << (x >> GPU_CACHE_LINE_SHIFT));
+  }
+
+  __device__ bool test_bit(int offset, int x) {
+    return ((data[offset + (x & (GPU_CACHE_LINE_SIZE64 - 1))] >>
+             (x >> GPU_CACHE_LINE_SHIFT)) &
+            1);
+  }
+
+  __device__ int get_offset(idx_t x) {
+    return (pure_hash(9, x) & ((size64 >> GPU_CACHE_LINE_SHIFT) - 1)) *
+           GPU_CACHE_LINE_SIZE64;
+  }
+
+  __device__ void add(idx_t x) {
+    int offset = get_offset(x);
+    for (int i = 0; i < num_hash; ++i)
+      set_bit(offset, hash(i, x));
+  }
+
+  __device__ bool test(idx_t x) {
+    int offset = get_offset(x);
+    bool ok = true;
+    for (int i = 0; i < num_hash; ++i)
+      ok &= test_bit(offset, hash(i, x));
+    return ok;
+  }
+};
+}  // namespace SONG_KERNEL
+#endif
diff --git a/include/CANDY/SONG/bloomfilter.hpp b/include/CANDY/SONG/bloomfilter.hpp
@@ -0,0 +1,57 @@
+#pragma once
+#ifndef CANDY_INCLUDE_ALGORITHMS_SONG_BLOOMFILTER_HPP
+#define CANDY_INCLUDE_ALGORITHMS_SONG_BLOOMFILTER_HPP
+
+namespace SONG_KERNEL {
+template <const int size64, const int shift, const int num_hash>
+struct BloomFilter {
+  uint64_t data[size64];
+  //const static int num_hash = 7;
+
+  const uint64_t random_number[10 * 2] = {
+      0x4bcb391f924ed183ULL, 0xa0ab69ccd854fc0aULL, 0x91086b9cecf5e3b7ULL,
+      0xc68e01641bead407ULL, 0x3a7b976128a30449ULL, 0x6d122efabfc4d99fULL,
+      0xe6700ef8715030e2ULL, 0x80dd0c3bffcfb45bULL, 0xe80f45af6e4ce166ULL,
+      0x6cf43e5aeb53c362ULL, 0x31a27265a93c4f40ULL, 0x743de943cecde0a4ULL,
+      0x5ed25dba0288592dULL, 0xa69eb51a362c37bcULL, 0x9a558fed9d4824f0ULL,
+      0xf75678c2fdbdd68bULL, 0x34423f0963258c85ULL, 0x3532778d6726905cULL,
+      0x6fef7cbe609500f9ULL,
+      0xb4419d54de48422ULL  //,0xda2157c5b12f41b6ULL,0xb315fbc927cae57eULL,0x4a6a38aaa5dcc71cULL,0x86b8c876df8a93f1ULL,0x20ee1d11467a102aULL,0x181399179bae820dULL,0x754794ac0581f2deULL,0xbb7dd7b268a1b05fULL,0x51f3f6b9061423e7ULL,0x2bc1feada8d098c0ULL,0x9629581689d33379ULL,0xa7db527f1e730387ULL,0x5d84ff10cd4d94d6ULL,0x86bc263fccb53eb7ULL,0xca1c3c264474cf4ULL,0x67eea94e006ddd46ULL,0x71d965ad9969018aULL,0xaf497940b2a58b9dULL,0x666c1a4a0bfb7d2eULL,0x13e52fdfab38213cULL,0x5aecd595110f8dfcULL,0xce3bb15c0334a4a8ULL,0xbdd3dbe329975051ULL,0xbb905e5237d4d0caULL,0xb07a1f2382567678ULL,0xc532f79af3352014ULL,0x6b7e603d5948f57bULL,0xc4c91c988f2a874fULL,0xed8c88a357a7e631ULL,0x83e7044453e44307ULL,0x58d175e98509c816ULL,0x5e0b9a22c7cb3beULL,0x2b391d3377c181eaULL,0x41e2b6d7fd610dd8ULL,0x15545fc7f219b48eULL,0x63baf917fa36f69eULL,0xa091555b086fc61eULL,0xda72de0a0625ef02ULL,0x70a6739cae181b68ULL,0x3a306eeb92f0dc4bULL,0xaab82d42e889cf80ULL,0x7fd20e629628bfacULL,0x22c09f4593f19b27ULL,0x74e124cbfe6a12f8ULL
+  };
+
+  __device__ BloomFilter() {
+    for (int i = 0; i < size64; ++i)
+      data[i] = 0;
+  }
+
+  __device__ int hash(int h, idx_t x) {
+    x ^= x >> 33;
+    x *= random_number[h << 1];
+    x ^= x >> 33;
+    x *= random_number[(h << 1) + 1];
+    x ^= x >> 33;
+    return x % ((size64 << 6));
+    //return (x ^ (x >> 16) * random_number[h << 1] ^ random_number[(h << 1) + 1]) & ((size64 << 6) - 1);
+    //return (x ^ (x >> 32) * random_number[h << 1] ^ random_number[(h << 1) + 1]) & ((size64 << 6) - 1);
+  }
+
+  __device__ void set_bit(int x) { data[x % size64] |= (1ULL << (x / size64)); }
+
+  __device__ bool test_bit(int x) {
+    return ((data[x % size64] >> (x / size64)) & 1);
+  }
+
+  __device__ void add(idx_t x) {
+    for (int i = 0; i < num_hash; ++i)
+      set_bit(hash(i, x));
+  }
+
+  __device__ bool test(idx_t x) {
+    bool ok = true;
+    for (int i = 0; i < num_hash; ++i)
+      ok &= test_bit(hash(i, x));
+    return ok;
+  }
+};
+}  // namespace SONG_KERNEL
+#endif