Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sptag2 #36

Merged
merged 13 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 2 additions & 1 deletion .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_PREFIX_PATH='/usr/local/lib/python3.10/dist-packages/torch/share/cmake' -DENABLE_HDF5=ON -DENABLE_PYBIND=ON
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_PREFIX_PATH='/usr/local/lib/python3.10/dist-packages/torch/share/cmake' -DENABLE_HDF5=ON -DENABLE_PYBIND=ON -DENABLE_SPTAG=ON
- name: Build
# Build your program with the given configuration
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
Expand All @@ -49,5 +49,6 @@ jobs:
./flatIndex_test "--success"
./ppIndex_test "--success"
./onlineIVFLSH_test "--success"
./sptagIndex_test "--success"


3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
*.exe
*.out
*.app
*build
/build
thirdparty/papi_build
__pycache*/
doc/
/cmake-build-debug/
Expand Down
38 changes: 35 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

cmake_minimum_required(VERSION 3.14)
project(CANDY CXX)


set(CMAKE_VERBOSE_MAKEFILE OFF)
set(CMAKE_RULE_MESSAGES OFF)
# Custom CMake find instructions and macros
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}")
include(cmake/macros.cmake)
Expand All @@ -27,7 +27,7 @@ find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
set(LIBRARIES ${LIBRARIES} ${TORCH_LIBRARIES})
# Set Optimization Flags
set(CMAKE_CXX_FLAGS "-std=c++20 -Wall -Werror=return-type")
set(CMAKE_CXX_FLAGS "-std=c++20 -Wall -Werror=return-type -Wno-interference-size")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DNO_RACE_CHECK -DCANDY_DEBUG_MODE=1")
set(CMAKE_CXX_FLAGS_RELEASE "-Wno-ignored-qualifiers -Wno-sign-compare -O3")
set(PROJECT_BINARY_DIR_RAW ${PROJECT_BINARY_DIR})
Expand Down Expand Up @@ -72,10 +72,15 @@ option(ENABLE_HDF5
"Enable load data from hdf5 file"
OFF
)
option(ENABLE_SPTAG
"Integrate SPTAG LIBS"
OFF
)
option(ENABLE_PYBIND
"Enable original pybind and build CANDY python"
OFF
)

#OPTIONAL OPENCL
if (NOT ENABLE_OPENCL)
message(STATUS "I will NOT include opencl support ")
Expand Down Expand Up @@ -155,7 +160,31 @@ configure_file(
"${PROJECT_SOURCE_DIR}/include/hdf5_config.h.in"
"${PROJECT_BINARY_DIR}/include/hdf5_config.h"
)
# Set SPTAG build directory
set(SPTAG_BUILD_DIR ${CMAKE_BINARY_DIR}/SPTAG_build)

# Configure build output directories specifically for SPTAG
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${SPTAG_BUILD_DIR}/bin)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${SPTAG_BUILD_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${SPTAG_BUILD_DIR}/lib)
if (NOT ENABLE_SPTAG)
message(STATUS "I will NOT include SPTAG LIBS")
set(CANDY_SPTAG 0)
else ()
set(CANDY_SPTAG 1)
message(STATUS "I will include support for SPTAG")
add_subdirectory(thirdparty/SPTAG)
set(LIBRARIES ${LIBRARIES} SPTAGLib)
endif ()
configure_file(
"${PROJECT_SOURCE_DIR}/include/sptag_config.h.in"
"${PROJECT_BINARY_DIR}/include/sptag_config.h"
)

# Reset output directories for the rest of your project
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)

message(STATUS "CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}")
Expand Down Expand Up @@ -195,6 +224,9 @@ set_property(TARGET CANDY PROPERTY CXX_STANDARD 20)
target_include_directories(CANDY PUBLIC "include")
target_include_directories(CANDY PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
target_include_directories(CANDY PUBLIC "thirdparty/papi_build/include")
target_include_directories(CANDY PUBLIC "thirdparty/")
target_include_directories(CANDY PUBLIC "thirdparty/SPTAG")
target_include_directories(CANDY PUBLIC "thirdparty/SPTAG/AnnService")
target_link_options(CANDY PUBLIC "-Wl,-rpath,./")
target_link_options(CANDY PUBLIC "-Wl,-rpath,./")
target_link_libraries(CANDY PUBLIC ${LIBRARIES})
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,20 @@ When developing in CLion, you must manually configure:

1. **CMake Prefix Path:**


### Requires BLAS, LAPACK, boost and swig

```shell
sudo apt install liblapack-dev libblas-dev libboost-all-dev swig
```

- Run the following command in your terminal to get the CMake prefix path:

```shell
python3 -c 'import torch; print(torch.utils.cmake_prefix_path)'
```


- Copy the output path and set it in CLion's CMake settings as:

```
Expand Down
2 changes: 1 addition & 1 deletion buildCPUOnly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ echo "First, make sure you have sudo"
sudo ls
echo "Installing others..."
sudo apt install -y liblapack-dev libblas-dev
sudo apt-get install -y graphviz libboost-dev
sudo apt-get install -y graphviz libboost-all-dev swig
pip install matplotlib pandas==2.0.0
pip install torch>=1.13.0 --index-url https://download.pytorch.org/whl/cpu
echo "Build CANDY and PyCandy"
Expand Down
2 changes: 1 addition & 1 deletion buildWithCuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ else
fi
echo "Installing others..."
sudo apt install -y liblapack-dev libblas-dev
sudo apt-get install -y graphviz libboost-dev
sudo apt-get install -y graphviz libboost-all-dev swig
sudo apt-get install -y libcudnn8 libcudnn8-dev
pip install matplotlib pandas==2.0.0
pip install torch>=1.13.0
Expand Down
114 changes: 114 additions & 0 deletions include/CANDY/SPTAGIndex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*! \file SPTAGIndex.h*/
//
// Created by tony on 04/01/24.
//

#ifndef CANDY_INCLUDE_CANDY_SPTAGIndex_H_
#define CANDY_INCLUDE_CANDY_SPTAGIndex_H_

#include <Utils/AbstractC20Thread.hpp>
#include <Utils/ConfigMap.hpp>
#include <memory>
#include <vector>
#include <Utils/IntelliTensorOP.hpp>
#include <faiss/IndexFlat.h>
#include <CANDY/FlatIndex.h>
#include <SPTAG/AnnService/inc/Core/VectorIndex.h>
namespace CANDY {

/**
* @ingroup CANDY_lib_bottom The main body and interfaces of library function
* @{
*/
/**
* @class SPTAGIndex CANDY/SPTAGIndex.h
* @brief The class of using SPTAG
* @todo the revise and delete is not done yet
* @note currently single thread by default
* @note config parameters
* - vecDim, the dimension of vectors, default 768, I64
* - initialVolume, the initial volume of inline database tensor, default 1000, I64
* - expandStep, the step of expanding inline database, default 100, I64
* - SPTAGThreads, the number of involved threads, default 1, I64
* - SPTAGNumberOfInitialDynamicPivots, Specifies the number of pivots used for partitioning the data into clusters during tree construction (relevant for BKT). Pivots are the points that the algorithm uses to split the data into clusters., DEFAULT 32, I64
* - SPTAGMaxCheck, The number of nodes to examine during a query. This affects the trade-off between query speed and accuracy. A higher value means more nodes are checked, resulting in better accuracy but slower queries., Default 8192. I64
* - SPTAGGraphNeighborhoodSize, Defines the size of the neighborhood graph during graph construction. This is used for neighbor search in the proximity graph. Default 32 I64
* - SPTAGGraphNeighborhoodScale, This parameter controls the scale of how the neighborhood size grows as the algorithm progresses through different stages of tree construction. Default 2.0, DOUBLE
* - SPTAGRefineIterations, The number of iterations used during graph refinement. Refinement improves the quality of the nearest neighbor graph by updating the edges iteratively. dEFAULT 3, I64
*/
class SPTAGIndex : public FlatIndex {
protected:
std::shared_ptr<SPTAG::VectorIndex> sptag;
int64_t SPTAGThreads = 1;
bool isInitialized = true;
int64_t SPTAGNumberOfInitialDynamicPivots,SPTAGMaxCheck,SPTAGGraphNeighborhoodSize,SPTAGRefineIterations;
double SPTAGGraphNeighborhoodScale;
public:
SPTAGIndex() {

}

~SPTAGIndex() {

}

/**
* @brief load the initial tensors of a data base, use this BEFORE @ref insertTensor
* @note This is majorly an offline function, and may be different from @ref insertTensor for some indexes
* @param t the tensor, some index need to be single row
* @return bool whether the loading is successful
*/
virtual bool loadInitialTensor(torch::Tensor &t);
/**
* @brief reset this index to inited status
*/
virtual void reset();
/**
* @brief set the index-specific config related to one index
* @param cfg the config of this class
* @return bool whether the configuration is successful
*/
virtual bool setConfig(INTELLI::ConfigMapPtr cfg);

/**
* @brief insert a tensor
* @param t the tensor, accept multiple rows
* @return bool whether the insertion is successful
*/
virtual bool insertTensor(torch::Tensor &t);

/**
* @brief search the k-NN of a query tensor, return the result tensors
* @param t the tensor, allow multiple rows
* @param k the returned neighbors
* @return std::vector<torch::Tensor> the result tensor for each row of query
*/
virtual std::vector<torch::Tensor> searchTensor(torch::Tensor &q, int64_t k);
/**
* @brief return the size of ingested tensors
* @return
*/
virtual int64_t size() {
return lastNNZ + 1;
}
};

/**
* @ingroup CANDY_lib_bottom
* @typedef SPTAGIndexPtr
* @brief The class to describe a shared pointer to @ref SPTAGIndex

*/
typedef std::shared_ptr<class CANDY::SPTAGIndex> SPTAGIndexPtr;
/**
* @ingroup CANDY_lib_bottom
* @def newSPTAGIndex
* @brief (Macro) To creat a new @ref SPTAGIndex shared pointer.
*/
#define newSPTAGIndex std::make_shared<CANDY::SPTAGIndex>
}
/**
* @}
*/

#endif //INTELLISTREAM_INCLUDE_CPPALGOS_ABSTRACTCPPALGO_H_
8 changes: 8 additions & 0 deletions include/sptag_config.h.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//
// Created by tony on 04/06/22.
//

#ifndef CANDY_SPTAG_CONFIG_H_IN_H_
#define CANDY_SPTAG_CONFIG_H_IN_H_
#define CANDY_SPTAG @CANDY_SPTAG@
#endif
3 changes: 3 additions & 0 deletions src/CANDY/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,7 @@ add_subdirectory(LSHAPGIndex)
if (ENABLE_RAY)
add_subdirectory(DistributedPartitionIndex)
add_sources(DistributedPartitionIndex.cpp)
endif ()
if (ENABLE_SPTAG)
add_sources(SPTAGIndex.cpp)
endif ()
1 change: 1 addition & 0 deletions src/CANDY/FaissIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ std::vector<torch::Tensor> CANDY::FaissIndex::getTensorByIndex(std::vector<faiss
ru[i] = torch::zeros({k, vecDim});
for (int64_t j = 0; j < k; j++) {
int64_t tempIdx = idx[i * k + j];
printf("%ld%ld=%ld\n", i,j,tempIdx);
float tempSlice[vecDim];
// if(index_type=="FaissIVFPQ" || index_type == "FaissPQ"){
// if(vecDim=100 || vecDim == 420){
Expand Down
7 changes: 7 additions & 0 deletions src/CANDY/IndexTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@
#include <CANDY/YinYangGraphSimpleIndex.h>
#include <include/opencl_config.h>
#include <include/ray_config.h>
#include <include/sptag_config.h>
#if CANDY_CL == 1
//#include <CPPAlgos/CLMMCPPAlgo.h>
#endif
#if CANDY_RAY == 1
#include <CANDY/DistributedPartitionIndex.h>
#endif
#if CANDY_SPTAG == 1
#include <CANDY/SPTAGIndex.h>
#endif
namespace CANDY {
CANDY::IndexTable::IndexTable() {
indexMap["null"] = newAbstractIndex();
Expand Down Expand Up @@ -59,5 +63,8 @@ CANDY::IndexTable::IndexTable() {
#if CANDY_RAY == 1
indexMap["distributedPartition"] = newDistributedPartitionIndex();
#endif
#if CANDY_SPTAG == 1
indexMap["SPTAG"] = newSPTAGIndex();
#endif
}
} // namespace CANDY
85 changes: 85 additions & 0 deletions src/CANDY/SPTAGIndex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*! \file SPTAGIndex.cpp*/
//
// Created by tony on 25/05/23.
//

#include <CANDY/SPTAGIndex.h>
#include <Utils/UtilityFunctions.h>
#include <time.h>
#include <chrono>
#include <assert.h>

bool CANDY::SPTAGIndex::setConfig(INTELLI::ConfigMapPtr cfg) {
FlatIndex::setConfig(cfg);
sptag = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::BKT,
SPTAG::VectorValueType::Float);
if(faissMetric == faiss::METRIC_INNER_PRODUCT) {
INTELLI_INFO("Using inner product for SPTAG");
sptag->SetParameter("DistCalcMethod", SPTAG::Helper::Convert::ConvertToString(SPTAG::DistCalcMethod::InnerProduct));
}
else {
INTELLI_INFO("Using l2 for SPTAG");
sptag->SetParameter("DistCalcMethod", SPTAG::Helper::Convert::ConvertToString(SPTAG::DistCalcMethod::L2));
}
SPTAGThreads = cfg->tryI64("SPTAGThreads",1,false);
SPTAGNumberOfInitialDynamicPivots = cfg->tryI64("SPTAGNumberOfInitialDynamicPivots",32,false);
SPTAGMaxCheck = cfg->tryI64("SPTAGMaxCheck",8192,false);
SPTAGGraphNeighborhoodSize = cfg->tryI64("SPTAGGraphNeighborhoodSize",32,false);
SPTAGGraphNeighborhoodScale = cfg->tryDouble("SPTAGGraphNeighborhoodScale",2.0,false);
SPTAGRefineIterations = cfg->tryI64("SPTAGRefineIterations",3,false);
sptag->SetParameter("NumberOfThreads", std::to_string(SPTAGThreads));
sptag->SetParameter("NumberOfInitialDynamicPivots", std::to_string(SPTAGNumberOfInitialDynamicPivots));
sptag->SetParameter("MaxCheck", std::to_string(SPTAGMaxCheck));
sptag->SetParameter("GraphNeighborhoodSize", std::to_string(SPTAGGraphNeighborhoodSize));
sptag->SetParameter("GraphNeighborhoodScale", std::to_string(SPTAGGraphNeighborhoodScale));
sptag->SetParameter("RefineIterations", std::to_string(SPTAGRefineIterations));
isInitialized = false;
return true;
}
void CANDY::SPTAGIndex::reset() {
lastNNZ = -1;
isInitialized = false;
sptag.reset();
}
bool CANDY::SPTAGIndex::loadInitialTensor(torch::Tensor &t) {
FlatIndex::insertTensor(t);
isInitialized = true;
int64_t num_vectors = t.size(0);
float *dbData = t.contiguous().data_ptr<float>();
// Insert new vectors into the SPTAG index

sptag->BuildIndex(dbData,num_vectors,vecDim);

return true;
}
bool CANDY::SPTAGIndex::insertTensor(torch::Tensor &t) {
FlatIndex::insertTensor(t);
int64_t num_vectors = t.size(0);
float *dbData = t.contiguous().data_ptr<float>();
// Insert new vectors into the SPTAG index
sptag->AddIndex(dbData,num_vectors,vecDim,nullptr);
return true;
}


std::vector<torch::Tensor> CANDY::SPTAGIndex::searchTensor(torch::Tensor &q, int64_t k) {
int64_t num_queries = q.size(0);
std::vector<torch::Tensor> ru(num_queries);
int64_t dim = q.size(1);
for (int64_t i = 0; i < num_queries; ++i) {
ru[i]= torch::zeros({k, vecDim});
auto rowI = q.slice(0,i,i+1).contiguous();
float *queryRaw = rowI.data_ptr<float>();
// Prepare query result container for SPTAG
SPTAG::QueryResult query_result(queryRaw, k, true);
// Perform the search for the i-th query vector
sptag->SearchIndex(query_result);
// Store the result indices in the output tensor
for (int64_t j = 0; j < k; ++j) {
auto tempIdx = query_result.GetResult(j)->VID;
printf("%ld%ld=%ld\n", i,j,tempIdx);
ru[i].slice(0, j, j + 1) = dbTensor.slice(0, tempIdx, tempIdx + 1);
}
}
return ru;
}
Loading
Loading