Skip to content

Commit

Permalink
add SONG (#72)
Browse files Browse the repository at this point in the history
* SONG

* SONG

---------

Co-authored-by: MingqiWang-coder <[email protected]>
  • Loading branch information
MingqiWang-coder and wangyaostar authored Feb 13, 2025
1 parent 2127b19 commit bb330f0
Show file tree
Hide file tree
Showing 21 changed files with 2,010 additions and 9 deletions.
52 changes: 46 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@

cmake_minimum_required(VERSION 3.14)
#set(CMAKE_C_COMPILER "/usr/bin/gcc-11")
#set(CMAKE_CXX_COMPILER "/usr/bin/g++-11")
#set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
cmake_minimum_required(VERSION 3.27)
project(CANDYBENCH CXX)

option(ENABLE_CUDA "Enable cuda" ON)
message(STATUS "1.0 CUDA enabled: ${ENABLE_CUDA}")
include (cmake/FindCuda.cmake)
include (cmake/FindTorch.cmake)
#set(CMAKE_CUDA_ARCHITECTURES "70;75;80")
set(CMAKE_CUDA_ARCHITECTURES ALL)
find_package(Torch REQUIRED)
find_package(Python3 REQUIRED COMPONENTS Development)
include_directories(${Python3_INCLUDE_DIRS})
Expand All @@ -27,6 +34,18 @@ include(cmake/default.cmake)
#set(CMAKE_CXX_FLAGS "-fno-openmp")
#test avx2

# Option to enable/disable CUDA
message(STATUS "2.0 CUDA enabled: ${ENABLE_CUDA}")
if (ENABLE_CUDA)
enable_language(CUDA)
set(CMAKE_CUDA_STANDARD 20)
set(CMAKE_CUDA_ARCHITECTURES OFF)
add_definitions(-DENABLE_CUDA=1)
message(STATUS "CUDA is enabled")
else()
message(STATUS "CUDA is not enabled")
endif ()

add_subdirectory(thirdparty/faiss)

# Set specific options for Faiss compilation
Expand Down Expand Up @@ -263,10 +282,10 @@ else ()
find_package(glog)
#set(GFLAGS_LIBRARIES /usr/local/lib/libgflags.so)
#set(GLOG_LIBRARIES /usr/local/lib/libglog.so)
#
#
add_compile_definitions(GLOG_USE_GLOG_EXPORT)
set(LIBRARIES ${LIBRARIES} gflags)
#
#
set(LIBRARIES ${LIBRARIES} glog)


Expand Down Expand Up @@ -323,11 +342,32 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/)
# Add Library
get_sources(CANDY_SOURCE_FILES)
get_headers(CANDY_HEADER_FILES)
add_library(CANDYBENCH SHARED ${CANDY_SOURCE_FILES} ${CANDY_HEADER_FILES} ${CMAKE_CURRENT_BINARY_DIR})

if (ENABLE_CUDA)
set_source_files_properties(src/CANDY/IndexTable.cpp
PROPERTIES
LANGUAGE CUDA
)
endif ()

add_library(CANDYBENCH SHARED ${CANDY_SOURCE_FILES} ${CANDY_HEADER_FILES} ${CMAKE_CURRENT_BINARY_DIR}
src/CANDY/IndexTable.cpp)

set_property(TARGET CANDYBENCH PROPERTY CXX_STANDARD 20)
target_include_directories(CANDYBENCH PUBLIC "include")

if (ENABLE_CUDA)
set(LIBRARIES ${LIBRARIES} cublas cudart)
set_target_properties(CANDYBENCH PROPERTIES
CUDA_STANDARD 20
CXX_STANDARD 20
)
else ()
set_target_properties(CANDYBENCH PROPERTIES
CXX_STANDARD 20
)
endif ()

# 设置 MKL 库的路径
set(MKL_INCLUDE_DIR "/usr/include/mkl")
set(MKL_LIB_DIR "/usr/lib/x86_64-linux-gnu")
Expand Down Expand Up @@ -428,4 +468,4 @@ message(STATUS "build path:" ${CMAKE_CURRENT_BINARY_DIR})
# copy files needed for real world matrix loader
set(source_directory "${CMAKE_SOURCE_DIR}/benchmark/datasets/")
set(destination_directory "${CMAKE_BINARY_DIR}/benchmark/datasets/")
file(COPY ${source_directory} DESTINATION ${destination_directory})
file(COPY ${source_directory} DESTINATION ${destination_directory})
3 changes: 2 additions & 1 deletion cmake/FindCuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ function(find_valid_cuda MIN_CUDA_VERSION MAX_CUDA_VERSION)

# If no valid CUDA was found, print a warning
if (NOT VALID_CUDA_FOUND)
set(ENABLE_CUDA OFF PARENT_SCOPE)
message(WARNING "No valid CUDA compiler found in the range ${MIN_CUDA_VERSION} - ${MAX_CUDA_VERSION}. I don't think you can use CUDA...")
else()
set(ENV{CUDACXX} ${CUDA_COMPILER_PATH})
Expand All @@ -50,4 +51,4 @@ function(find_valid_cuda MIN_CUDA_VERSION MAX_CUDA_VERSION)
endfunction()

# Call the function to find valid CUDA compilers with a specific version range
find_valid_cuda("11.0" "12.5")
find_valid_cuda("11.0" "12.6")
3 changes: 2 additions & 1 deletion cmake/macros.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ macro(get_sources SOURCE_FILES)
endmacro()

macro(get_headers HEADER_FILES)
file(GLOB_RECURSE ${HEADER_FILES} "include/*.h" "include/*.hpp")
# file(GLOB_RECURSE ${HEADER_FILES} "include/*.h" "include/*.hpp")
file(GLOB_RECURSE ${HEADER_FILES} "include/*.h" "include/*.hpp" "include/*.cuh")
endmacro()

# Define the function to detect AVX-512 support
Expand Down
84 changes: 84 additions & 0 deletions include/CANDY/SONG/SONG.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright (C) 2024 by the INTELLI team
* Created by: Ziao Wang
* Created on: 2024/11/18
* Description: [Provide description here]
*/

#ifndef CANDY__SONG_SONG_HPP
#define CANDY_S_SONG_SONG_HPP

#include <CANDY/AbstractIndex.h>
#include <Utils/ConfigMap.hpp>
#include <memory>
#include <vector>
#include "config.hpp"
#include "data.hpp"
#include "kernelgraph.cuh"

namespace CANDY{

class SONG : public AbstractIndex {
protected:
INTELLI::ConfigMapPtr myCfg = nullptr;
torch::Tensor dbTensor, objTensor;
int64_t vecDim = 768;
int64_t vecVolume = 1000000;
int64_t idx = 0;
faiss::MetricType Metric = faiss::METRIC_L2;
std::unique_ptr<SONG_KERNEL::Data> data = nullptr;
std::unique_ptr<SONG_KERNEL::GraphWrapper> graph = nullptr;

/**
* @brief convert a query tensor to a vector of pairs
* @param[in] t the query tensor
* @param[out] res the result vector
*/
static void convertTensorToVectorPair(
torch::Tensor& t, std::vector<std::pair<int, SONG_KERNEL::value_t>>& res);

/**
* @brief convert a batch of query tensors to a batch of vectors of pairs
* @param[in] ts the query tensors
* @param[out] res the result vector
*/
static void convertTensorToVectorPairBatch(
torch::Tensor& ts,
std::vector<std::vector<std::pair<int, SONG_KERNEL::value_t>>>& res);

public:
SONG() = default;

~SONG() = default;

int64_t gpuComputingUs = 0;
int64_t gpuCommunicationUs = 0;

virtual bool setConfig(INTELLI::ConfigMapPtr cfg);

virtual bool insertTensor(torch::Tensor &t);

virtual bool deleteTensor(torch::Tensor& t, int64_t k = 1);

virtual bool reviseTensor(torch::Tensor& t, torch::Tensor& w);

virtual std::vector<torch::Tensor> searchTensor(torch::Tensor& q,int64_t k);

[[nodiscard]] int64_t size() const { return idx; }

virtual bool resetIndexStatistics();

virtual INTELLI::ConfigMapPtr getIndexStatistics();
};

/**
* @ingroup CANDY_lib_bottom
* @typedef SONGPtr
* @brief The class to describe a shared pointer to @ref SONG
*/
typedef std::shared_ptr<class CANDY::SONG> SONGPtr;
#define newSONG std::make_shared<CANDY::SONG>
} // namespace CANDY

#endif //CANDY_INCLUDE_CANDY_SONG_HPP
51 changes: 51 additions & 0 deletions include/CANDY/SONG/bin_heap.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#pragma once
#ifndef CANDY_INCLUDE_ALGORITHMS_SONG_BINHEAP_HPP
#define CANDY_INCLUDE_ALGORITHMS_SONG_BINHEAP_HPP

namespace SONG_KERNEL {
// [begin,end)
template <class T>
__device__ void push_heap(T* begin, T* end) {
T* now = end - 1;
int parent = (now - begin - 1) / 2;
while (parent >= 0) {
if (*(begin + parent) < *now) {
auto tmp = *now;
*now = *(begin + parent);
*(begin + parent) = tmp;
now = begin + parent;
parent = (parent - 1) / 2;
} else {
break;
}
}
}

template <class T>
__device__ T pop_heap(T* begin, T* end) {
T ret = *begin;
*begin = *(end - 1);
int len = end - begin;
T* now = begin;
while (now + 1 < end) {
int left = (now - begin) * 2 + 1;
int right = (now - begin) * 2 + 2;
int next = -1;
if (right < len) {
next = *(begin + left) < *(begin + right) ? right : left;
} else if (left < len) {
next = left;
}
if (next == -1 || !(*now < *(begin + next))) {
break;
} else {
T tmp = *now;
*now = *(begin + next);
*(begin + next) = tmp;
now = begin + next;
}
}
return ret;
}
} // namespace SONG_KERNEL
#endif
83 changes: 83 additions & 0 deletions include/CANDY/SONG/blocked_bloomfilter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#pragma once
#ifndef CANDY_INCLUDE_ALGORITHMS_SONG_BLOCKEDBLOOMFILTER_HPP
#define CANDY_INCLUDE_ALGORITHMS_SONG_BLOCKEDBLOOMFILTER_HPP

#define GPU_CACHE_LINE_SIZE64 1
#define GPU_CACHE_LINE_SHIFT 0
#define BLOOMFILTER_DATA_T uint32_t
#define BLOOMFILTER_SIZE64MULT 2
#define BLOOMFILTER_SIZE_SHIFT 5

namespace SONG_KERNEL {
template <const int size64, const int shift, const int num_hash>
struct BlockedBloomFilter {
BLOOMFILTER_DATA_T data[size64 * BLOOMFILTER_SIZE64MULT];
//const static int num_hash = 7;

const uint64_t random_number[10 * 2] = {
0x4bcb391f924ed183ULL, 0xa0ab69ccd854fc0aULL, 0x91086b9cecf5e3b7ULL,
0xc68e01641bead407ULL, 0x3a7b976128a30449ULL, 0x6d122efabfc4d99fULL,
0xe6700ef8715030e2ULL, 0x80dd0c3bffcfb45bULL, 0xe80f45af6e4ce166ULL,
0x6cf43e5aeb53c362ULL, 0x31a27265a93c4f40ULL, 0x743de943cecde0a4ULL,
0x5ed25dba0288592dULL, 0xa69eb51a362c37bcULL, 0x9a558fed9d4824f0ULL,
0xf75678c2fdbdd68bULL, 0x34423f0963258c85ULL, 0x3532778d6726905cULL,
0x6fef7cbe609500f9ULL,
0xb4419d54de48422ULL //,0xda2157c5b12f41b6ULL,0xb315fbc927cae57eULL,0x4a6a38aaa5dcc71cULL,0x86b8c876df8a93f1ULL,0x20ee1d11467a102aULL,0x181399179bae820dULL,0x754794ac0581f2deULL,0xbb7dd7b268a1b05fULL,0x51f3f6b9061423e7ULL,0x2bc1feada8d098c0ULL,0x9629581689d33379ULL,0xa7db527f1e730387ULL,0x5d84ff10cd4d94d6ULL,0x86bc263fccb53eb7ULL,0xca1c3c264474cf4ULL,0x67eea94e006ddd46ULL,0x71d965ad9969018aULL,0xaf497940b2a58b9dULL,0x666c1a4a0bfb7d2eULL,0x13e52fdfab38213cULL,0x5aecd595110f8dfcULL,0xce3bb15c0334a4a8ULL,0xbdd3dbe329975051ULL,0xbb905e5237d4d0caULL,0xb07a1f2382567678ULL,0xc532f79af3352014ULL,0x6b7e603d5948f57bULL,0xc4c91c988f2a874fULL,0xed8c88a357a7e631ULL,0x83e7044453e44307ULL,0x58d175e98509c816ULL,0x5e0b9a22c7cb3beULL,0x2b391d3377c181eaULL,0x41e2b6d7fd610dd8ULL,0x15545fc7f219b48eULL,0x63baf917fa36f69eULL,0xa091555b086fc61eULL,0xda72de0a0625ef02ULL,0x70a6739cae181b68ULL,0x3a306eeb92f0dc4bULL,0xaab82d42e889cf80ULL,0x7fd20e629628bfacULL,0x22c09f4593f19b27ULL,0x74e124cbfe6a12f8ULL
};

__device__ BlockedBloomFilter() {
for (int i = 0; i < size64; ++i)
data[i] = 0;
}

__device__ int pure_hash(int h, idx_t x) {
x ^= x >> 33;
x *= random_number[h << 1];
x ^= x >> 33;
x *= random_number[(h << 1) + 1];
x ^= x >> 33;
return x;
}

__device__ int hash(int h, idx_t x) {
x ^= x >> 33;
x *= random_number[h << 1];
x ^= x >> 33;
x *= random_number[(h << 1) + 1];
x ^= x >> 33;
return x & ((GPU_CACHE_LINE_SIZE64 << BLOOMFILTER_SIZE_SHIFT) - 1);
//return (x ^ (x >> 32) * random_number[h << 1] ^ random_number[(h << 1) + 1]) & ((size64 << 6) - 1);
}

__device__ void set_bit(int offset, int x) {
data[offset + (x & (GPU_CACHE_LINE_SIZE64 - 1))] |=
(1ULL << (x >> GPU_CACHE_LINE_SHIFT));
}

__device__ bool test_bit(int offset, int x) {
return ((data[offset + (x & (GPU_CACHE_LINE_SIZE64 - 1))] >>
(x >> GPU_CACHE_LINE_SHIFT)) &
1);
}

__device__ int get_offset(idx_t x) {
return (pure_hash(9, x) & ((size64 >> GPU_CACHE_LINE_SHIFT) - 1)) *
GPU_CACHE_LINE_SIZE64;
}

__device__ void add(idx_t x) {
int offset = get_offset(x);
for (int i = 0; i < num_hash; ++i)
set_bit(offset, hash(i, x));
}

__device__ bool test(idx_t x) {
int offset = get_offset(x);
bool ok = true;
for (int i = 0; i < num_hash; ++i)
ok &= test_bit(offset, hash(i, x));
return ok;
}
};
} // namespace SONG_KERNEL
#endif
57 changes: 57 additions & 0 deletions include/CANDY/SONG/bloomfilter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma once
#ifndef CANDY_INCLUDE_ALGORITHMS_SONG_BLOOMFILTER_HPP
#define CANDY_INCLUDE_ALGORITHMS_SONG_BLOOMFILTER_HPP

namespace SONG_KERNEL {
template <const int size64, const int shift, const int num_hash>
struct BloomFilter {
uint64_t data[size64];
//const static int num_hash = 7;

const uint64_t random_number[10 * 2] = {
0x4bcb391f924ed183ULL, 0xa0ab69ccd854fc0aULL, 0x91086b9cecf5e3b7ULL,
0xc68e01641bead407ULL, 0x3a7b976128a30449ULL, 0x6d122efabfc4d99fULL,
0xe6700ef8715030e2ULL, 0x80dd0c3bffcfb45bULL, 0xe80f45af6e4ce166ULL,
0x6cf43e5aeb53c362ULL, 0x31a27265a93c4f40ULL, 0x743de943cecde0a4ULL,
0x5ed25dba0288592dULL, 0xa69eb51a362c37bcULL, 0x9a558fed9d4824f0ULL,
0xf75678c2fdbdd68bULL, 0x34423f0963258c85ULL, 0x3532778d6726905cULL,
0x6fef7cbe609500f9ULL,
0xb4419d54de48422ULL //,0xda2157c5b12f41b6ULL,0xb315fbc927cae57eULL,0x4a6a38aaa5dcc71cULL,0x86b8c876df8a93f1ULL,0x20ee1d11467a102aULL,0x181399179bae820dULL,0x754794ac0581f2deULL,0xbb7dd7b268a1b05fULL,0x51f3f6b9061423e7ULL,0x2bc1feada8d098c0ULL,0x9629581689d33379ULL,0xa7db527f1e730387ULL,0x5d84ff10cd4d94d6ULL,0x86bc263fccb53eb7ULL,0xca1c3c264474cf4ULL,0x67eea94e006ddd46ULL,0x71d965ad9969018aULL,0xaf497940b2a58b9dULL,0x666c1a4a0bfb7d2eULL,0x13e52fdfab38213cULL,0x5aecd595110f8dfcULL,0xce3bb15c0334a4a8ULL,0xbdd3dbe329975051ULL,0xbb905e5237d4d0caULL,0xb07a1f2382567678ULL,0xc532f79af3352014ULL,0x6b7e603d5948f57bULL,0xc4c91c988f2a874fULL,0xed8c88a357a7e631ULL,0x83e7044453e44307ULL,0x58d175e98509c816ULL,0x5e0b9a22c7cb3beULL,0x2b391d3377c181eaULL,0x41e2b6d7fd610dd8ULL,0x15545fc7f219b48eULL,0x63baf917fa36f69eULL,0xa091555b086fc61eULL,0xda72de0a0625ef02ULL,0x70a6739cae181b68ULL,0x3a306eeb92f0dc4bULL,0xaab82d42e889cf80ULL,0x7fd20e629628bfacULL,0x22c09f4593f19b27ULL,0x74e124cbfe6a12f8ULL
};

__device__ BloomFilter() {
for (int i = 0; i < size64; ++i)
data[i] = 0;
}

__device__ int hash(int h, idx_t x) {
x ^= x >> 33;
x *= random_number[h << 1];
x ^= x >> 33;
x *= random_number[(h << 1) + 1];
x ^= x >> 33;
return x % ((size64 << 6));
//return (x ^ (x >> 16) * random_number[h << 1] ^ random_number[(h << 1) + 1]) & ((size64 << 6) - 1);
//return (x ^ (x >> 32) * random_number[h << 1] ^ random_number[(h << 1) + 1]) & ((size64 << 6) - 1);
}

__device__ void set_bit(int x) { data[x % size64] |= (1ULL << (x / size64)); }

__device__ bool test_bit(int x) {
return ((data[x % size64] >> (x / size64)) & 1);
}

__device__ void add(idx_t x) {
for (int i = 0; i < num_hash; ++i)
set_bit(hash(i, x));
}

__device__ bool test(idx_t x) {
bool ok = true;
for (int i = 0; i < num_hash; ++i)
ok &= test_bit(hash(i, x));
return ok;
}
};
} // namespace SONG_KERNEL
#endif
Loading

0 comments on commit bb330f0

Please sign in to comment.