From cc14a65a500957721b47148dc986e01ad0a3f216 Mon Sep 17 00:00:00 2001 From: GuyAv46 <47632673+GuyAv46@users.noreply.github.com> Date: Wed, 12 Jul 2023 20:33:13 +0300 Subject: [PATCH] Feature - HNSW refactor - vector blocks - [MOD-5302] (#397) * HNSW refactor 3 - [MOD-5302] (#389) * moved vector_block * make DataBlock available to use in vectors * some general improvements * implement data blocks in HNSW * disabled serializer benchmarks and some tests * more disable * enabled serialization (for current implementation) * added prefetch for range * move generic meta of element to a separated vector * added prefetch for metadata * enabled save and load from bindings * [TMP] return of the old HNSW REVERT ME * fix bm * some changes in prefetch * shortened BM * reverted prefetches to by as before * shorten BM * small improvement for range * packing structs * unrelated performance improvement * fix * revert adding origin hnsw, remove support for v1 and v2 serialization * update for BM file * some fixes and test updates * [TEMP] disable 2 tests so coverage will run * fix flow test * more test fixes * improved tests * fix for bach iterator scan (needs a benchmark) * for benchmark * reverting some temporary changes * more reverting * fix for clang * file name update * another prefetch option * few improvements * some more trying * final change * another update to all but `hnsw.h` * returned `increaseCapacity` responsibility to `addVector` * make `hnsw.h` use blocks * BF comment fix * comment fix * fix some tests * fixed hnsw tests * fixed hnsw-multi tests * fixed almost all tiered HNSW tests * Fix memory bookkeeping tests * Fix memory bookkeeping tests 2 * fixed estimations and their tests * review fixes * fix review fix * more review fixes * move rounding up of initial capacity to a static function * added comments on data blocks * some optimizations (reduce the use of `getDataByInternalId`) --------- Co-authored-by: alon * HNSW blocks refactor - add lock to graph data struct (#390) * Improved serializing code - [MOD-5372] (#391) * improved serializing code * review fixes * HNSW Refactor - benchmarks - [MOD-5371] (#392) * update benchmark files * updated wget links * publish serialization script * another benchmark cleanup iteration * review fixes * Renaming "meta" variables (#394) * renaming "meta" variables * revert temp change * Optimize Distance Functions - [MOD-5434] (#395) * initial templated with masks implementations * format * tidy up * enabled spaces tests back * changed template type and handle residual first * re-enabled benchmarks (keeping old names) * download fix * improved unit testing * improved spaces benchmarks * verify correctness * some cleanup * give up optimizing dim<16 for safety * aligned serialization links * added lots of comments * added a test and small fix * include opts only on x86 machines * remove AVX512DQ references from the project (not in use) * rename qty to dimension * Update AVX_utils.h comments * Optimize - implement align allocation for vector alignment - [MOD-5433] (#399) * aligning query vector * implement aligned allocation * added alignment hing to VecSimIndexAbstract, used it in block allocation * test fix * review fixes * set default value to the alignment hint (1 - any address is valid) * refactor allocation header to have alignment flag, unify free function * use alignment only on vector blocks * changed default alignment value (0) * updated tests * added missing break * improved comment * removed alignment from allocator test --- .gitignore | 3 +- README.md | 6 +- src/VecSim/CMakeLists.txt | 2 +- .../brute_force/bfm_batch_iterator.h | 2 +- .../brute_force/bfs_batch_iterator.h | 2 +- .../algorithms/brute_force/brute_force.h | 136 +- .../brute_force/brute_force_friend_tests.h | 1 + .../brute_force/brute_force_multi.h | 6 +- .../brute_force/brute_force_single.h | 10 +- .../algorithms/brute_force/vector_block.cpp | 32 - .../algorithms/brute_force/vector_block.h | 43 - src/VecSim/algorithms/hnsw/hnsw.h | 1514 ++++++++--------- .../algorithms/hnsw/hnsw_base_tests_friends.h | 4 + .../algorithms/hnsw/hnsw_batch_iterator.h | 67 +- src/VecSim/algorithms/hnsw/hnsw_multi.h | 60 +- src/VecSim/algorithms/hnsw/hnsw_serializer.h | 392 ++--- .../hnsw/hnsw_serializer_declarations.h | 8 +- src/VecSim/algorithms/hnsw/hnsw_single.h | 56 +- src/VecSim/algorithms/hnsw/hnsw_tiered.h | 26 +- .../index_factories/brute_force_factory.cpp | 9 +- src/VecSim/index_factories/hnsw_factory.cpp | 145 +- src/VecSim/index_factories/hnsw_factory.h | 3 +- src/VecSim/index_factories/tiered_factory.h | 12 +- src/VecSim/memory/vecsim_malloc.cpp | 46 +- src/VecSim/memory/vecsim_malloc.h | 1 + src/VecSim/spaces/AVX_utils.h | 28 + src/VecSim/spaces/CMakeLists.txt | 57 - src/VecSim/spaces/IP/IP.cpp | 20 +- src/VecSim/spaces/IP/IP.h | 8 +- src/VecSim/spaces/IP/IP_AVX.h | 12 +- src/VecSim/spaces/IP/IP_AVX512.h | 16 +- src/VecSim/spaces/IP/IP_AVX512DQ.h | 12 - src/VecSim/spaces/IP/IP_AVX512DQ_FP64.cpp | 62 - src/VecSim/spaces/IP/IP_AVX512_FP32.cpp | 100 -- src/VecSim/spaces/IP/IP_AVX512_FP32.h | 43 + src/VecSim/spaces/IP/IP_AVX512_FP64.cpp | 101 -- src/VecSim/spaces/IP/IP_AVX512_FP64.h | 43 + src/VecSim/spaces/IP/IP_AVX_FP32.cpp | 124 -- src/VecSim/spaces/IP/IP_AVX_FP32.h | 56 + src/VecSim/spaces/IP/IP_AVX_FP64.cpp | 120 -- src/VecSim/spaces/IP/IP_AVX_FP64.h | 55 + src/VecSim/spaces/IP/IP_SSE.h | 12 +- src/VecSim/spaces/IP/IP_SSE_FP32.cpp | 145 -- src/VecSim/spaces/IP/IP_SSE_FP32.h | 73 + src/VecSim/spaces/IP/IP_SSE_FP64.cpp | 144 -- src/VecSim/spaces/IP/IP_SSE_FP64.h | 58 + src/VecSim/spaces/IP_space.cpp | 120 +- src/VecSim/spaces/IP_space.h | 7 +- src/VecSim/spaces/L2/L2.cpp | 8 +- src/VecSim/spaces/L2/L2.h | 4 +- src/VecSim/spaces/L2/L2_AVX.h | 13 +- src/VecSim/spaces/L2/L2_AVX512.h | 14 +- src/VecSim/spaces/L2/L2_AVX512DQ.h | 11 - src/VecSim/spaces/L2/L2_AVX512DQ_FP64.cpp | 63 - src/VecSim/spaces/L2/L2_AVX512_FP32.cpp | 99 -- src/VecSim/spaces/L2/L2_AVX512_FP32.h | 46 + src/VecSim/spaces/L2/L2_AVX512_FP64.cpp | 98 -- src/VecSim/spaces/L2/L2_AVX512_FP64.h | 46 + src/VecSim/spaces/L2/L2_AVX_FP32.cpp | 115 -- src/VecSim/spaces/L2/L2_AVX_FP32.h | 56 + src/VecSim/spaces/L2/L2_AVX_FP64.cpp | 113 -- src/VecSim/spaces/L2/L2_AVX_FP64.h | 56 + src/VecSim/spaces/L2/L2_SSE.h | 13 +- src/VecSim/spaces/L2/L2_SSE_FP32.cpp | 143 -- src/VecSim/spaces/L2/L2_SSE_FP32.h | 73 + src/VecSim/spaces/L2/L2_SSE_FP64.cpp | 144 -- src/VecSim/spaces/L2/L2_SSE_FP64.h | 58 + src/VecSim/spaces/L2_space.cpp | 118 +- src/VecSim/spaces/L2_space.h | 7 +- src/VecSim/spaces/implementation_chooser.h | 44 + .../spaces/implementation_chooser_cleanup.h | 17 + src/VecSim/spaces/space_aux.cpp | 3 - src/VecSim/spaces/space_aux.h | 1 - src/VecSim/spaces/space_includes.h | 23 +- src/VecSim/spaces/spaces.cpp | 55 +- src/VecSim/spaces/spaces.h | 31 +- src/VecSim/tombstone_interface.h | 8 +- src/VecSim/utils/alignment.h | 32 + src/VecSim/utils/data_block.cpp | 34 + src/VecSim/utils/data_block.h | 60 + src/VecSim/utils/serializer.cpp | 14 +- src/VecSim/utils/serializer.h | 8 +- src/VecSim/vec_sim.cpp | 3 - src/VecSim/vec_sim.h | 7 +- src/VecSim/vec_sim_common.h | 10 +- src/VecSim/vec_sim_index.h | 74 +- src/VecSim/vec_sim_interface.h | 5 - src/VecSim/vec_sim_tiered_index.h | 31 +- src/python_bindings/bindings.cpp | 12 +- tests/benchmark/CMakeLists.txt | 6 - tests/benchmark/benchmarks.sh | 36 +- tests/benchmark/bm_datasets.py | 20 +- tests/benchmark/bm_files.sh | 6 +- tests/benchmark/bm_updated_index.h | 15 +- tests/benchmark/bm_vecsim_index.h | 21 +- .../hnsw_indices/ hnsw_indices_updated.txt | 2 - .../data/hnsw_indices/hnsw_indices_all.txt | 20 +- .../data/hnsw_indices/hnsw_indices_ann.txt | 12 +- .../hnsw_indices/hnsw_indices_basic_fp32.txt | 8 +- .../hnsw_indices/hnsw_indices_basic_fp64.txt | 8 +- .../hnsw_indices/hnsw_indices_updated.txt | 3 + tests/benchmark/data/serializer.py | 177 ++ .../run_files/bm_basics_multi_fp32.cpp | 4 +- .../run_files/bm_basics_multi_fp64.cpp | 4 +- .../run_files/bm_basics_single_fp32.cpp | 4 +- .../run_files/bm_basics_single_fp64.cpp | 4 +- .../bm_batch_iterator_multi_fp32.cpp | 4 +- .../bm_batch_iterator_multi_fp64.cpp | 4 +- .../bm_batch_iterator_single_fp32.cpp | 4 +- .../bm_batch_iterator_single_fp64.cpp | 4 +- .../bm_updated_index_single_fp32.cpp | 6 +- tests/benchmark/spaces_benchmarks/bm_spaces.h | 36 +- .../spaces_benchmarks/bm_spaces_fp32.cpp | 56 +- .../spaces_benchmarks/bm_spaces_fp64.cpp | 75 +- tests/unit/test_allocator.cpp | 76 +- tests/unit/test_bruteforce.cpp | 111 +- tests/unit/test_bruteforce_multi.cpp | 47 +- tests/unit/test_common.cpp | 35 +- tests/unit/test_hnsw.cpp | 306 ++-- tests/unit/test_hnsw_multi.cpp | 195 +-- tests/unit/test_hnsw_parallel.cpp | 40 +- tests/unit/test_hnsw_tiered.cpp | 80 +- tests/unit/test_spaces.cpp | 368 ++-- tests/unit/test_utils.cpp | 6 +- 124 files changed, 3223 insertions(+), 4352 deletions(-) delete mode 100644 src/VecSim/algorithms/brute_force/vector_block.cpp delete mode 100644 src/VecSim/algorithms/brute_force/vector_block.h create mode 100644 src/VecSim/spaces/AVX_utils.h delete mode 100644 src/VecSim/spaces/IP/IP_AVX512DQ.h delete mode 100644 src/VecSim/spaces/IP/IP_AVX512DQ_FP64.cpp delete mode 100644 src/VecSim/spaces/IP/IP_AVX512_FP32.cpp create mode 100644 src/VecSim/spaces/IP/IP_AVX512_FP32.h delete mode 100644 src/VecSim/spaces/IP/IP_AVX512_FP64.cpp create mode 100644 src/VecSim/spaces/IP/IP_AVX512_FP64.h delete mode 100644 src/VecSim/spaces/IP/IP_AVX_FP32.cpp create mode 100644 src/VecSim/spaces/IP/IP_AVX_FP32.h delete mode 100644 src/VecSim/spaces/IP/IP_AVX_FP64.cpp create mode 100644 src/VecSim/spaces/IP/IP_AVX_FP64.h delete mode 100644 src/VecSim/spaces/IP/IP_SSE_FP32.cpp create mode 100644 src/VecSim/spaces/IP/IP_SSE_FP32.h delete mode 100644 src/VecSim/spaces/IP/IP_SSE_FP64.cpp create mode 100644 src/VecSim/spaces/IP/IP_SSE_FP64.h delete mode 100644 src/VecSim/spaces/L2/L2_AVX512DQ.h delete mode 100644 src/VecSim/spaces/L2/L2_AVX512DQ_FP64.cpp delete mode 100644 src/VecSim/spaces/L2/L2_AVX512_FP32.cpp create mode 100644 src/VecSim/spaces/L2/L2_AVX512_FP32.h delete mode 100644 src/VecSim/spaces/L2/L2_AVX512_FP64.cpp create mode 100644 src/VecSim/spaces/L2/L2_AVX512_FP64.h delete mode 100644 src/VecSim/spaces/L2/L2_AVX_FP32.cpp create mode 100644 src/VecSim/spaces/L2/L2_AVX_FP32.h delete mode 100644 src/VecSim/spaces/L2/L2_AVX_FP64.cpp create mode 100644 src/VecSim/spaces/L2/L2_AVX_FP64.h delete mode 100644 src/VecSim/spaces/L2/L2_SSE_FP32.cpp create mode 100644 src/VecSim/spaces/L2/L2_SSE_FP32.h delete mode 100644 src/VecSim/spaces/L2/L2_SSE_FP64.cpp create mode 100644 src/VecSim/spaces/L2/L2_SSE_FP64.h create mode 100644 src/VecSim/spaces/implementation_chooser.h create mode 100644 src/VecSim/spaces/implementation_chooser_cleanup.h create mode 100644 src/VecSim/utils/alignment.h create mode 100644 src/VecSim/utils/data_block.cpp create mode 100644 src/VecSim/utils/data_block.h delete mode 100644 tests/benchmark/data/hnsw_indices/ hnsw_indices_updated.txt create mode 100644 tests/benchmark/data/hnsw_indices/hnsw_indices_updated.txt create mode 100644 tests/benchmark/data/serializer.py diff --git a/.gitignore b/.gitignore index f694846fc..38d285aff 100644 --- a/.gitignore +++ b/.gitignore @@ -6,13 +6,14 @@ /build/ /dist/ /venv/ -/deps/readies/ +/deps/ /1/ **/build/ # Ignore benchmark fetched data but not the source file /tests/benchmark/data/* !/tests/benchmark/data/hnsw_indices +!/tests/benchmark/data/serializer.py # Prerequisites *.d diff --git a/README.md b/README.md index 6725538e9..15e2bb3b2 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ All of the algorithms in this library is designed to work inside RediSearch and |-----------|--------|---------|-----------------| | FP32 Internal product |SSE, AVX, AVX512 | No SIMD support | No SIMD support | | FP32 L2 distance |SSE, AVX, AVX512| No SIMD support | No SIMD support | -| FP64 Internal product |SSE, AVX, AVX512, AVX512DQ | No SIMD support | No SIMD support | +| FP64 Internal product |SSE, AVX, AVX512 | No SIMD support | No SIMD support | | FP64 L2 distance |SSE, AVX, AVX512 | No SIMD support | No SIMD support | ### Flat (Brute Force) @@ -92,5 +92,5 @@ tox -e flowenv # Benchmark -To benchmark the capabilities of this library, follow the instructions in the [benchmarks user guide](docs/benchmarks.md). -If you'd like to create your own benchmarks, you can find more information in the [developer guide](docs/benchmarks_developer.md). +To benchmark the capabilities of this library, follow the instructions in the [benchmarks user guide](docs/benchmarks.md). +If you'd like to create your own benchmarks, you can find more information in the [developer guide](docs/benchmarks_developer.md). diff --git a/src/VecSim/CMakeLists.txt b/src/VecSim/CMakeLists.txt index 54986b9ff..4998e17be 100644 --- a/src/VecSim/CMakeLists.txt +++ b/src/VecSim/CMakeLists.txt @@ -19,7 +19,6 @@ add_library(VectorSimilarity ${VECSIM_LIBTYPE} index_factories/hnsw_factory.cpp index_factories/tiered_factory.cpp index_factories/index_factory.cpp - algorithms/brute_force/vector_block.cpp algorithms/hnsw/visited_nodes_handler.cpp vec_sim.cpp vec_sim_interface.cpp @@ -27,6 +26,7 @@ add_library(VectorSimilarity ${VECSIM_LIBTYPE} info_iterator.cpp query_result_struct.cpp utils/vec_utils.cpp + utils/data_block.cpp memory/vecsim_malloc.cpp memory/vecsim_base.cpp ${HEADER_LIST} diff --git a/src/VecSim/algorithms/brute_force/bfm_batch_iterator.h b/src/VecSim/algorithms/brute_force/bfm_batch_iterator.h index ff4bc8e75..f85530d1b 100644 --- a/src/VecSim/algorithms/brute_force/bfm_batch_iterator.h +++ b/src/VecSim/algorithms/brute_force/bfm_batch_iterator.h @@ -24,7 +24,7 @@ class BFM_BatchIterator : public BF_BatchIterator { this->scores.reserve(this->index_label_count); vecsim_stl::unordered_map tmp_scores(this->index_label_count, this->allocator); - vecsim_stl::vector blocks = this->index->getVectorBlocks(); + auto &blocks = this->index->getVectorBlocks(); VecSimQueryResult_Code rc; idType curr_id = 0; diff --git a/src/VecSim/algorithms/brute_force/bfs_batch_iterator.h b/src/VecSim/algorithms/brute_force/bfs_batch_iterator.h index 9e77d1a7e..6328522d9 100644 --- a/src/VecSim/algorithms/brute_force/bfs_batch_iterator.h +++ b/src/VecSim/algorithms/brute_force/bfs_batch_iterator.h @@ -22,7 +22,7 @@ class BFS_BatchIterator : public BF_BatchIterator { inline VecSimQueryResult_Code calculateScores() override { this->index_label_count = this->index->indexLabelCount(); this->scores.reserve(this->index_label_count); - vecsim_stl::vector blocks = this->index->getVectorBlocks(); + auto &blocks = this->index->getVectorBlocks(); VecSimQueryResult_Code rc; idType curr_id = 0; diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h index 0058f2b2c..5889bc6f9 100644 --- a/src/VecSim/algorithms/brute_force/brute_force.h +++ b/src/VecSim/algorithms/brute_force/brute_force.h @@ -6,7 +6,7 @@ #pragma once -#include "vector_block.h" +#include "VecSim/utils/data_block.h" #include "VecSim/vec_sim_index.h" #include "VecSim/spaces/spaces.h" #include "VecSim/utils/vecsim_stl.h" @@ -29,7 +29,7 @@ template class BruteForceIndex : public VecSimIndexAbstract { protected: vecsim_stl::vector idToLabelMapping; - vecsim_stl::vector vectorBlocks; + vecsim_stl::vector vectorBlocks; idType count; public: @@ -37,12 +37,11 @@ class BruteForceIndex : public VecSimIndexAbstract { size_t indexSize() const override; size_t indexCapacity() const override; - void increaseCapacity() override; - vecsim_stl::vector computeBlockScores(VectorBlock *block, const void *queryBlob, + vecsim_stl::vector computeBlockScores(const DataBlock &block, const void *queryBlob, void *timeoutCtx, VecSimQueryResult_Code *rc) const; inline DataType *getDataByInternalId(idType id) const { - return (DataType *)vectorBlocks.at(id / this->blockSize)->getVector(id % this->blockSize); + return (DataType *)vectorBlocks.at(id / this->blockSize).getElement(id % this->blockSize); } virtual VecSimQueryResult_List topKQuery(const void *queryBlob, size_t k, VecSimQueryParams *queryParams) const override; @@ -56,7 +55,7 @@ class BruteForceIndex : public VecSimIndexAbstract { bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) const override; inline labelType getVectorLabel(idType id) const { return idToLabelMapping.at(id); } - inline vecsim_stl::vector getVectorBlocks() const { return vectorBlocks; } + inline const vecsim_stl::vector &getVectorBlocks() const { return vectorBlocks; } inline const labelType getLabelByInternalId(idType internal_id) const { return idToLabelMapping.at(internal_id); } @@ -72,7 +71,7 @@ class BruteForceIndex : public VecSimIndexAbstract { // without duplicates in tiered index). Caller should hold the flat buffer lock for read. virtual inline vecsim_stl::set getLabelsSet() const = 0; - virtual ~BruteForceIndex(); + virtual ~BruteForceIndex() = default; #ifdef BUILD_TESTS /** * @brief Used for testing - store vector(s) data associated with a given label. This function @@ -93,7 +92,30 @@ class BruteForceIndex : public VecSimIndexAbstract { // Private internal function that implements generic single vector deletion. virtual void removeVector(idType id); - inline VectorBlock *getVectorVectorBlock(idType id) const { + inline void growByBlock() { + assert(vectorBlocks.size() == 0 || vectorBlocks.back().getLength() == this->blockSize); + vectorBlocks.emplace_back(this->blockSize, this->dataSize, this->allocator, + this->alignment); + idToLabelMapping.resize(idToLabelMapping.size() + this->blockSize); + idToLabelMapping.shrink_to_fit(); + resizeLabelLookup(idToLabelMapping.size()); + } + + inline void shrinkByBlock() { + assert(indexCapacity() > 0); // should not be called when index is empty + + // remove last block (should be empty) + assert(vectorBlocks.size() > 0 && vectorBlocks.back().getLength() == 0); + vectorBlocks.pop_back(); + + // remove a block size of labels. + assert(idToLabelMapping.size() >= this->blockSize); + idToLabelMapping.resize(idToLabelMapping.size() - this->blockSize); + idToLabelMapping.shrink_to_fit(); + resizeLabelLookup(idToLabelMapping.size()); + } + + inline DataBlock &getVectorVectorBlock(idType id) { return vectorBlocks.at(id / this->blockSize); } inline size_t getVectorRelativeIndex(idType id) const { return id % this->blockSize; } @@ -111,6 +133,7 @@ class BruteForceIndex : public VecSimIndexAbstract { // inline label to id setters that need to be implemented by derived class virtual inline void replaceIdOfLabel(labelType label, idType new_id, idType old_id) = 0; virtual inline void setVectorId(labelType label, idType id) = 0; + virtual inline void resizeLabelLookup(size_t new_max_elements) = 0; virtual inline VecSimBatchIterator * newBatchIterator_Instance(void *queryBlob, VecSimQueryParams *queryParams) const = 0; @@ -129,41 +152,35 @@ BruteForceIndex::BruteForceIndex( : VecSimIndexAbstract(abstractInitParams), idToLabelMapping(this->allocator), vectorBlocks(this->allocator), count(0) { assert(VecSimType_sizeof(this->vecType) == sizeof(DataType)); - this->idToLabelMapping.resize(params->initialCapacity); -} - -template -BruteForceIndex::~BruteForceIndex() { - for (auto &vectorBlock : this->vectorBlocks) { - delete vectorBlock; - } + // Round up the initial capacity to the nearest multiple of the block size. + size_t initialCapacity = RoundUpInitialCapacity(params->initialCapacity, this->blockSize); + this->idToLabelMapping.resize(initialCapacity); + this->vectorBlocks.reserve(initialCapacity / this->blockSize); } /******************** Implementation **************/ template void BruteForceIndex::appendVector(const void *vector_data, labelType label) { - assert(indexCapacity() > indexSize()); // Give the vector new id and increase count. idType id = this->count++; - // Get the last vectors block to store the vector in (we assume that it's not full yet). - VectorBlock *vectorBlock = this->vectorBlocks.back(); - assert(vectorBlock == getVectorVectorBlock(id)); - - // add vector data to vectorBlock - vectorBlock->addVector(vector_data); + // Resize the index if needed. + if (indexSize() > indexCapacity()) { + growByBlock(); + } else if (id % this->blockSize == 0) { + // If we we didn't reach the initial capacity but the last block is full, add a new block + // only. + this->vectorBlocks.emplace_back(this->blockSize, this->dataSize, this->allocator, + this->alignment); + } - // if idToLabelMapping is full, - // resize and align idToLabelMapping by blockSize - size_t idToLabelMapping_size = this->idToLabelMapping.size(); + // Get the last vectors block to store the vector in. + DataBlock &vectorBlock = this->vectorBlocks.back(); + assert(&vectorBlock == &getVectorVectorBlock(id)); - if (id >= idToLabelMapping_size) { - size_t last_block_vectors_count = id % this->blockSize; - this->idToLabelMapping.resize( - idToLabelMapping_size + this->blockSize - last_block_vectors_count, 0); - this->idToLabelMapping.shrink_to_fit(); - } + // add vector data to vectorBlock + vectorBlock.addElement(vector_data); // add label to idToLabelMapping setVectorLabel(id, label); @@ -180,10 +197,10 @@ void BruteForceIndex::removeVector(idType id_to_delete) { labelType last_idx_label = getVectorLabel(last_idx); // Get last vector data. - VectorBlock *last_vector_block = vectorBlocks.back(); - assert(last_vector_block == getVectorVectorBlock(last_idx)); + DataBlock &last_vector_block = vectorBlocks.back(); + assert(&last_vector_block == &getVectorVectorBlock(last_idx)); - void *last_vector_data = last_vector_block->removeAndFetchLastVector(); + void *last_vector_data = last_vector_block.removeAndFetchLastElement(); // If we are *not* trying to remove the last vector, update mapping and move // the data of the last vector in the index in place of the deleted vector. @@ -198,27 +215,16 @@ void BruteForceIndex::removeVector(idType id_to_delete) { replaceIdOfLabel(last_idx_label, id_to_delete, last_idx); // Get the vectorBlock and the relative index of the deleted id. - VectorBlock *deleted_vectorBlock = getVectorVectorBlock(id_to_delete); + DataBlock &deleted_vectorBlock = getVectorVectorBlock(id_to_delete); size_t id_to_delete_rel_idx = getVectorRelativeIndex(id_to_delete); // Put data of last vector inplace of the deleted vector. - deleted_vectorBlock->updateVector(id_to_delete_rel_idx, last_vector_data); + deleted_vectorBlock.updateElement(id_to_delete_rel_idx, last_vector_data); } // If the last vector block is emtpy. - if (last_vector_block->getLength() == 0) { - delete last_vector_block; - this->vectorBlocks.pop_back(); - - // Resize and align the idToLabelMapping. - size_t idToLabel_size = idToLabelMapping.size(); - // If the new size is smaller by at least one block comparing to the idToLabelMapping - // align to be a multiplication of block size and resize by one block. - if (this->count + this->blockSize <= idToLabel_size) { - size_t vector_to_align_count = idToLabel_size % this->blockSize; - this->idToLabelMapping.resize(idToLabel_size - this->blockSize - vector_to_align_count); - this->idToLabelMapping.shrink_to_fit(); - } + if (last_vector_block.getLength() == 0) { + shrinkByBlock(); } } @@ -229,29 +235,23 @@ size_t BruteForceIndex::indexSize() const { template size_t BruteForceIndex::indexCapacity() const { - return this->blockSize * this->vectorBlocks.size(); -} - -template -void BruteForceIndex::increaseCapacity() { - size_t vector_bytes_count = this->dim * VecSimType_sizeof(this->vecType); - auto *new_vector_block = - new (this->allocator) VectorBlock(this->blockSize, vector_bytes_count, this->allocator); - this->vectorBlocks.push_back(new_vector_block); + return this->idToLabelMapping.size(); } // Compute the score for every vector in the block by using the given distance function. template -vecsim_stl::vector BruteForceIndex::computeBlockScores( - VectorBlock *block, const void *queryBlob, void *timeoutCtx, VecSimQueryResult_Code *rc) const { - size_t len = block->getLength(); +vecsim_stl::vector +BruteForceIndex::computeBlockScores(const DataBlock &block, + const void *queryBlob, void *timeoutCtx, + VecSimQueryResult_Code *rc) const { + size_t len = block.getLength(); vecsim_stl::vector scores(len, this->allocator); for (size_t i = 0; i < len; i++) { if (VECSIM_TIMEOUT(timeoutCtx)) { *rc = VecSim_QueryResult_TimedOut; return scores; } - scores[i] = this->dist_func(block->getVector(i), queryBlob, this->dim); + scores[i] = this->distFunc(block.getElement(i), queryBlob, this->dim); } *rc = VecSim_QueryResult_OK; return scores; @@ -264,7 +264,7 @@ BruteForceIndex::topKQuery(const void *queryBlob, size_t k, VecSimQueryResult_List rl = {0}; void *timeoutCtx = queryParams ? queryParams->timeoutCtx : NULL; - this->last_mode = STANDARD_KNN; + this->lastMode = STANDARD_KNN; if (0 == k) { rl.results = array_new(0); @@ -276,7 +276,7 @@ BruteForceIndex::topKQuery(const void *queryBlob, size_t k, getNewMaxPriorityQueue(); // For every block, compute its vectors scores and update the Top candidates max heap idType curr_id = 0; - for (auto vectorBlock : this->vectorBlocks) { + for (auto &vectorBlock : this->vectorBlocks) { auto scores = computeBlockScores(vectorBlock, queryBlob, timeoutCtx, &rl.code); if (VecSim_OK != rl.code) { delete TopCandidates; @@ -314,7 +314,7 @@ BruteForceIndex::rangeQuery(const void *queryBlob, double ra VecSimQueryParams *queryParams) const { auto rl = (VecSimQueryResult_List){0}; void *timeoutCtx = queryParams ? queryParams->timeoutCtx : nullptr; - this->last_mode = RANGE_QUERY; + this->lastMode = RANGE_QUERY; // Compute scores in every block and save results that are within the range. auto res_container = @@ -323,7 +323,7 @@ BruteForceIndex::rangeQuery(const void *queryBlob, double ra DistType radius_ = DistType(radius); idType curr_id = 0; rl.code = VecSim_QueryResult_OK; - for (auto vectorBlock : this->vectorBlocks) { + for (auto &vectorBlock : this->vectorBlocks) { auto scores = computeBlockScores(vectorBlock, queryBlob, timeoutCtx, &rl.code); if (VecSim_OK != rl.code) { break; @@ -458,7 +458,7 @@ bool BruteForceIndex::preferAdHocSearch(size_t subsetSize, s } } // Set the mode - if this isn't the initial check, we switched mode form batches to ad-hoc. - this->last_mode = + this->lastMode = res ? (initial_check ? HYBRID_ADHOC_BF : HYBRID_BATCHES_TO_ADHOC_BF) : HYBRID_BATCHES; return res; } diff --git a/src/VecSim/algorithms/brute_force/brute_force_friend_tests.h b/src/VecSim/algorithms/brute_force/brute_force_friend_tests.h index 66485cb79..39d21db92 100644 --- a/src/VecSim/algorithms/brute_force/brute_force_friend_tests.h +++ b/src/VecSim/algorithms/brute_force/brute_force_friend_tests.h @@ -17,4 +17,5 @@ INDEX_TEST_FRIEND_CLASS(BruteForceTest_test_delete_swap_block_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_test_dynamic_bf_info_iterator_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_zero_minimal_capacity_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_preferAdHocOptimization_Test) +INDEX_TEST_FRIEND_CLASS(IndexAllocatorTest_test_bf_index_block_size_1_Test) INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics) diff --git a/src/VecSim/algorithms/brute_force/brute_force_multi.h b/src/VecSim/algorithms/brute_force/brute_force_multi.h index 0f963e2ba..73dd22c38 100644 --- a/src/VecSim/algorithms/brute_force/brute_force_multi.h +++ b/src/VecSim/algorithms/brute_force/brute_force_multi.h @@ -56,6 +56,10 @@ class BruteForceIndex_Multi : public BruteForceIndex { inline void replaceIdOfLabel(labelType label, idType new_id, idType old_id) override; + inline void resizeLabelLookup(size_t new_max_elements) override { + labelToIdsLookup.reserve(new_max_elements); + } + inline bool isLabelExists(labelType label) override { return labelToIdsLookup.find(label) != labelToIdsLookup.end(); } @@ -198,7 +202,7 @@ double BruteForceIndex_Multi::getDistanceFrom(labelType labe DistType dist = std::numeric_limits::infinity(); for (auto id : IDs->second) { - DistType d = this->dist_func(this->getDataByInternalId(id), vector_data, this->dim); + DistType d = this->distFunc(this->getDataByInternalId(id), vector_data, this->dim); dist = (dist < d) ? dist : d; } diff --git a/src/VecSim/algorithms/brute_force/brute_force_single.h b/src/VecSim/algorithms/brute_force/brute_force_single.h index 15e018150..17aef57ce 100644 --- a/src/VecSim/algorithms/brute_force/brute_force_single.h +++ b/src/VecSim/algorithms/brute_force/brute_force_single.h @@ -56,11 +56,11 @@ class BruteForceIndex_Single : public BruteForceIndex { inline void updateVector(idType id, const void *vector_data) { // Get the vector block - VectorBlock *vectorBlock = this->getVectorVectorBlock(id); + DataBlock &vectorBlock = this->getVectorVectorBlock(id); size_t index = BruteForceIndex::getVectorRelativeIndex(id); // Update vector data in the block. - vectorBlock->updateVector(index, vector_data); + vectorBlock.updateElement(index, vector_data); } inline void setVectorId(labelType label, idType id) override { @@ -71,6 +71,10 @@ class BruteForceIndex_Single : public BruteForceIndex { labelToIdLookup.at(label) = new_id; } + inline void resizeLabelLookup(size_t new_max_elements) override { + labelToIdLookup.reserve(new_max_elements); + } + inline bool isLabelExists(labelType label) override { return labelToIdLookup.find(label) != labelToIdLookup.end(); } @@ -189,5 +193,5 @@ double BruteForceIndex_Single::getDistanceFrom(labelType lab } idType id = optionalId->second; - return this->dist_func(this->getDataByInternalId(id), vector_data, this->dim); + return this->distFunc(this->getDataByInternalId(id), vector_data, this->dim); } diff --git a/src/VecSim/algorithms/brute_force/vector_block.cpp b/src/VecSim/algorithms/brute_force/vector_block.cpp deleted file mode 100644 index 5dd460adb..000000000 --- a/src/VecSim/algorithms/brute_force/vector_block.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - *Copyright Redis Ltd. 2021 - present - *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or - *the Server Side Public License v1 (SSPLv1). - */ - -#include "vector_block.h" -#include "VecSim/memory/vecsim_malloc.h" -#include - -VectorBlock::VectorBlock(size_t blockSize, size_t vectorBytesCount, - std::shared_ptr allocator) - : VecsimBaseObject(allocator), vector_bytes_count(vectorBytesCount), length(0), - blockSize(blockSize) { - this->vectors = (char *)this->allocator->allocate(vectorBytesCount * blockSize); -} - -VectorBlock::~VectorBlock() { - this->allocator->deallocate(vectors, vector_bytes_count * blockSize); -} - -void VectorBlock::addVector(const void *vectorData) { - - // Copy vector data and update block size. - memcpy(this->vectors + (this->length * vector_bytes_count), vectorData, vector_bytes_count); - this->length++; -} - -void VectorBlock::updateVector(size_t index, const void *vector_data) { - char *destinaion = getVector(index); - memcpy(destinaion, vector_data, vector_bytes_count); -} diff --git a/src/VecSim/algorithms/brute_force/vector_block.h b/src/VecSim/algorithms/brute_force/vector_block.h deleted file mode 100644 index 86ed9dec4..000000000 --- a/src/VecSim/algorithms/brute_force/vector_block.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - *Copyright Redis Ltd. 2021 - present - *Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or - *the Server Side Public License v1 (SSPLv1). - */ - -#pragma once -#include -#include "VecSim/memory/vecsim_base.h" -#include "VecSim/utils/vecsim_stl.h" - -#include "VecSim/utils/vec_utils.h" - -struct VectorBlock : public VecsimBaseObject { - -public: - VectorBlock(size_t blockSize, size_t vectorBytesCount, - std::shared_ptr allocator); - - void addVector(const void *vectorData); - - void updateVector(size_t index, const void *vector_data); - - inline char *getVector(size_t index) { return this->vectors + (index * vector_bytes_count); } - - inline char *removeAndFetchLastVector() { - return this->vectors + ((--this->length) * vector_bytes_count); - } - - inline size_t getLength() { return length; } - - virtual ~VectorBlock(); - -private: - // Vector size in bytes (dim * sizeof(data_type)) - size_t vector_bytes_count; - // Current vector block length. - size_t length; - // Vector block size (capacity). - size_t blockSize; - // Vectors hosted in the vector block. - char *vectors; -}; diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index c4726ef66..096dcc985 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -12,6 +12,7 @@ #include "VecSim/memory/vecsim_malloc.h" #include "VecSim/utils/vecsim_stl.h" #include "VecSim/utils/vec_utils.h" +#include "VecSim/utils/data_block.h" #include "VecSim/utils/vecsim_results_container.h" #include "VecSim/query_result_struct.h" #include "VecSim/vec_sim_common.h" @@ -39,7 +40,7 @@ using std::pair; typedef uint16_t linkListSize; -typedef uint16_t elementFlags; +typedef uint8_t elementFlags; template using candidatesMaxHeap = vecsim_stl::max_priority_queue; @@ -47,6 +48,8 @@ template using candidatesLabelsMaxHeap = vecsim_stl::abstract_priority_queue; using graphNodeType = pair; // represented as: (element_id, level) +////////////////////////////////////// Auxiliary HNSW structs ////////////////////////////////////// + // Vectors flags (for marking a specific vector) typedef enum { DELETE_MARK = 0x1, // element is logically deleted, but still exists in the graph @@ -64,6 +67,55 @@ struct AddVectorCtx { int currMaxLevel; }; +#pragma pack(1) +struct ElementMetaData { + labelType label; + elementFlags flags; + + ElementMetaData(labelType label = SIZE_MAX) noexcept : label(label), flags(IN_PROCESS) {} +}; +#pragma pack() // restore default packing + +struct LevelData { + vecsim_stl::vector *incomingEdges; + linkListSize numLinks; + // Flexible array member - https://en.wikipedia.org/wiki/Flexible_array_member + // Using this trick, we can have the links list as part of the LevelData struct, and avoid + // the need to dereference a pointer to get to the links list. + // We have to calculate the size of the struct manually, as `sizeof(LevelData)` will not include + // this member. We do so in the constructor of the index, under the name `levelDataSize` (and + // `elementGraphDataSize`). Notice that this member must be the last member of the struct and + // all nesting structs. + idType links[]; + + LevelData(std::shared_ptr allocator) + : incomingEdges(new (allocator) vecsim_stl::vector(allocator)), numLinks(0) {} +}; + +struct ElementGraphData { + size_t toplevel; + std::mutex neighborsGuard; + LevelData *others; + LevelData level0; + + ElementGraphData(size_t maxLevel, size_t high_level_size, + std::shared_ptr allocator) + : toplevel(maxLevel), others(nullptr), level0(allocator) { + if (toplevel > 0) { + others = (LevelData *)allocator->callocate(high_level_size * toplevel); + if (others == nullptr) { + throw std::runtime_error("VecSim index low memory error"); + } + for (size_t i = 0; i < maxLevel; i++) { + new ((char *)others + i * high_level_size) LevelData(allocator); + } + } + } + ~ElementGraphData() = delete; // Should be destroyed using `destroyGraphData` +}; + +//////////////////////////////////// HNSW index implementation //////////////////////////////////// + template class HNSWIndex : public VecSimIndexAbstract, public VecSimIndexTombstone @@ -74,46 +126,39 @@ class HNSWIndex : public VecSimIndexAbstract, { protected: // Index build parameters - size_t max_elements_; - size_t M_; - size_t maxM_; - size_t maxM0_; - size_t ef_construction_; + size_t maxElements; + size_t M; + size_t M0; + size_t efConstruction; // Index search parameter - size_t ef_; - double epsilon_; + size_t ef; + double epsilon; // Index meta-data (based on the data dimensionality and index parameters) - size_t size_data_per_element_; - size_t size_links_per_element_; - size_t size_links_level0_; - size_t label_offset_; - size_t offsetData_, offsetLevel0_; - size_t incoming_links_offset0; - size_t incoming_links_offset; - double mult_; + size_t elementGraphDataSize; + size_t levelDataSize; + double mult; // Index level generator of the top level for a new element - std::default_random_engine level_generator_; + std::default_random_engine levelGenerator; - // Index global state - these should be guarded by the index_data_guard_ lock in + // Index global state - these should be guarded by the indexDataGuard lock in // multithreaded scenario. - size_t cur_element_count; - vecsim_stl::vector element_levels_; - idType entrypoint_node_; - size_t max_level_; // this is the top level of the entry point's element + size_t curElementCount; + idType entrypointNode; + size_t maxLevel; // this is the top level of the entry point's element // Index data - char *data_level0_memory_; // neighbors in level 0, element label, flags and data (vector) - char **linkLists_; // neighbors in level higher than 0 + vecsim_stl::vector vectorBlocks; + vecsim_stl::vector graphDataBlocks; + vecsim_stl::vector idToMetaData; // Used for marking the visited nodes in graph scans (the pool supports parallel graph scans). // This is mutable since the object changes upon search operations as well (which are const). - mutable VisitedNodesHandlerPool visited_nodes_handler_pool; + mutable VisitedNodesHandlerPool visitedNodesHandlerPool; - mutable std::shared_mutex index_data_guard_; - mutable vecsim_stl::vector element_neighbors_locks_; + mutable std::shared_mutex indexDataGuard; #ifdef BUILD_TESTS #include "VecSim/algorithms/hnsw/hnsw_base_tests_friends.h" @@ -124,30 +169,22 @@ class HNSWIndex : public VecSimIndexAbstract, protected: HNSWIndex() = delete; // default constructor is disabled. HNSWIndex(const HNSWIndex &) = delete; // default (shallow) copy constructor is disabled. - inline void setExternalLabel(idType internal_id, labelType label); - inline labelType *getExternalLabelPtr(idType internal_id) const; inline size_t getRandomLevel(double reverse_size); - inline vecsim_stl::vector *getIncomingEdgesPtr(idType internal_id, size_t level) const; - inline void setIncomingEdgesPtr(idType internal_id, size_t level, void *edges_ptr); - inline elementFlags *getElementFlags(idType internal_id) const; - inline idType *getNodeNeighborsAtBaseLevel(idType internal_id) const; - inline idType *getNodeNeighborsAtNonBaseLevel(idType internal_id, size_t level) const; - inline void setNodeNeighborsCount(idType *list, linkListSize size); inline void removeExtraLinks(candidatesMaxHeap candidates, size_t Mcurmax, - idType *node_neighbors, const vecsim_stl::vector &bitmap, + LevelData &node_level, const vecsim_stl::vector &bitmap, idType *removed_links, size_t *removed_links_num); template // Either idType or labelType - inline DistType + inline void processCandidate(idType curNodeId, const void *data_point, size_t layer, size_t ef, - tag_t visited_tag, tag_t *elements_tags, + tag_t *elements_tags, tag_t visited_tag, vecsim_stl::abstract_priority_queue &top_candidates, - candidatesMaxHeap &candidates_set, DistType lowerBound) const; + candidatesMaxHeap &candidates_set, DistType &lowerBound) const; template inline void processCandidate_RangeSearch( - idType curNodeId, const void *data_point, size_t layer, double epsilon, tag_t visited_tag, - tag_t *elements_tags, + idType curNodeId, const void *data_point, size_t layer, double epsilon, + tag_t *elements_tags, tag_t visited_tag, std::unique_ptr &top_candidates, - candidatesMaxHeap &candidate_set, DistType lowerBound, double radius) const; + candidatesMaxHeap &candidate_set, DistType lowerBound, DistType radius) const; template candidatesMaxHeap searchLayer(idType ep_id, const void *data_point, size_t layer, size_t ef) const; @@ -157,7 +194,7 @@ class HNSWIndex : public VecSimIndexAbstract, void *timeoutCtx, VecSimQueryResult_Code *rc) const; template VecSimQueryResult *searchRangeBottomLayer_WithTimeout(idType ep_id, const void *data_point, - double epsilon, double radius, + double epsilon, DistType radius, void *timeoutCtx, VecSimQueryResult_Code *rc) const; void getNeighborsByHeuristic2(candidatesMaxHeap &top_candidates, size_t M); @@ -167,10 +204,7 @@ class HNSWIndex : public VecSimIndexAbstract, // *Note that node_lock and neighbor_lock should be locked upon calling this function* void revisitNeighborConnections(size_t level, idType new_node_id, const std::pair &neighbor_data, - idType *new_node_neighbors_list, - idType *neighbor_neighbors_list, - std::unique_lock &node_lock, - std::unique_lock &neighbor_lock); + LevelData &new_node_level, LevelData &neighbor_level); inline idType mutuallyConnectNewElement(idType new_node_id, candidatesMaxHeap &top_candidates, size_t level); @@ -184,18 +218,25 @@ class HNSWIndex : public VecSimIndexAbstract, void greedySearchLevel(const void *vector_data, size_t level, idType &curObj, DistType &curDist, void *timeoutCtx = nullptr, VecSimQueryResult_Code *rc = nullptr) const; void repairConnectionsForDeletion(idType element_internal_id, idType neighbour_id, - idType *neighbours_list, idType *neighbour_neighbours_list, + LevelData &node_level, LevelData &neighbor_level, size_t level, vecsim_stl::vector &neighbours_bitmap); + inline void destroyGraphData(ElementGraphData *em); inline void replaceEntryPoint(); - inline void resizeIndexInternal(size_t new_max_elements); template - inline void SwapLastIdWithDeletedId(idType element_internal_id); + inline void SwapLastIdWithDeletedId(idType element_internal_id, ElementGraphData *last_element, + void *last_element_data); // Protected internal function that implements generic single vector insertion. void appendVector(const void *vector_data, labelType label, AddVectorCtx *auxiliaryCtx = nullptr); + // Protected internal functions for index resizing. + inline void growByBlock(); + inline void shrinkByBlock(); + // DO NOT USE DIRECTLY. Use `[grow|shrink]ByBlock` instead. + inline void resizeIndexCommon(size_t new_max_elements); + // Protected internal function that implements generic single vector deletion. void removeVectorInPlace(idType id); @@ -210,6 +251,22 @@ class HNSWIndex : public VecSimIndexAbstract, template void removeAndSwap(idType internalId); + inline size_t getVectorRelativeIndex(idType id) const { return id % this->blockSize; } + + // Flagging API + template + inline void markAs(idType internalId) { + __atomic_fetch_or(&idToMetaData[internalId].flags, FLAG, 0); + } + template + inline void unmarkAs(idType internalId) { + __atomic_fetch_and(&idToMetaData[internalId].flags, ~FLAG, 0); + } + template + inline bool isMarkedAs(idType internalId) const { + return idToMetaData[internalId].flags & FLAG; + } + public: HNSWIndex(const HNSWParams *params, const AbstractIndexInitParams &abstractInitParams, size_t random_seed = 100, size_t initial_pool_size = 1); @@ -225,7 +282,9 @@ class HNSWIndex : public VecSimIndexAbstract, inline size_t getM() const; inline size_t getMaxLevel() const; inline labelType getEntryPointLabel() const; - inline labelType getExternalLabel(idType internal_id) const; + inline labelType getExternalLabel(idType internal_id) const { + return idToMetaData[internal_id].label; + } // Check if the given label exists in the labels lookup while holding the index data lock. // Optionally validate that the associated vector(s) are not in process and done indexing // (this option is used currently for tests). @@ -236,15 +295,18 @@ class HNSWIndex : public VecSimIndexAbstract, inline void unlockIndexDataGuard() const; inline void lockNodeLinks(idType node_id) const; inline void unlockNodeLinks(idType node_id) const; + inline void lockNodeLinks(ElementGraphData *node_data) const; + inline void unlockNodeLinks(ElementGraphData *node_data) const; inline VisitedNodesHandler *getVisitedList() const; inline void returnVisitedList(VisitedNodesHandler *visited_nodes_handler) const; VecSimIndexInfo info() const override; VecSimIndexBasicInfo basicInfo() const override; VecSimInfoIterator *infoIterator() const override; bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) const override; - char *getDataByInternalId(idType internal_id) const; - inline idType *getNodeNeighborsAtLevel(idType internal_id, size_t level) const; - inline linkListSize getNodeNeighborsCount(const idType *list) const; + inline const char *getDataByInternalId(idType internal_id) const; + inline ElementGraphData *getGraphDataByInternalId(idType internal_id) const; + inline LevelData &getLevelData(idType internal_id, size_t level) const; + inline LevelData &getLevelData(ElementGraphData *element, size_t level) const; inline idType searchBottomLayerEP(const void *query_data, void *timeoutCtx, VecSimQueryResult_Code *rc) const; @@ -256,15 +318,15 @@ class HNSWIndex : public VecSimIndexAbstract, inline void markDeletedInternal(idType internalId); inline bool isMarkedDeleted(idType internalId) const; inline bool isInProcess(idType internalId) const; - inline void markInProcess(idType internalId); inline void unmarkInProcess(idType internalId); - void increaseCapacity() override; - AddVectorCtx storeNewElement(labelType label); + AddVectorCtx storeNewElement(labelType label, const void *vector_data); void removeAndSwapDeletedElement(idType internalId); void repairNodeConnections(idType node_id, size_t level); - inline size_t getElementTopLevel(idType internalId); - vecsim_stl::vector safeCollectAllNodeIncomingNeighbors(idType node_id, - size_t node_top_level); + // For prefetching only. + inline const ElementMetaData *getMetaDataAddress(idType internal_id) const { + return idToMetaData.data() + internal_id; + } + vecsim_stl::vector safeCollectAllNodeIncomingNeighbors(idType node_id) const; // Return all the labels in the index - this should be used for computing the number of distinct // labels in a tiered index, and caller should hold the index data guard. virtual inline vecsim_stl::set getLabelsSet() const = 0; @@ -285,6 +347,7 @@ class HNSWIndex : public VecSimIndexAbstract, virtual void getDataByLabel(labelType label, std::vector> &vectors_output) const = 0; #endif + protected: // inline label to id setters that need to be implemented by derived class virtual inline std::unique_ptr @@ -300,233 +363,163 @@ class HNSWIndex : public VecSimIndexAbstract, template void HNSWIndex::setEf(size_t ef) { - ef_ = ef; + this->ef = ef; } template size_t HNSWIndex::getEf() const { - return ef_; + return this->ef; } template void HNSWIndex::setEpsilon(double epsilon) { - epsilon_ = epsilon; + this->epsilon = epsilon; } template double HNSWIndex::getEpsilon() const { - return epsilon_; + return this->epsilon; } template size_t HNSWIndex::indexSize() const { - return cur_element_count; + return this->curElementCount; } template size_t HNSWIndex::indexCapacity() const { - return max_elements_; + return this->maxElements; } template size_t HNSWIndex::getEfConstruction() const { - return ef_construction_; + return this->efConstruction; } template size_t HNSWIndex::getM() const { - return M_; + return this->M; } template size_t HNSWIndex::getMaxLevel() const { - return max_level_; + return this->maxLevel; } template labelType HNSWIndex::getEntryPointLabel() const { - if (entrypoint_node_ != INVALID_ID) - return getExternalLabel(entrypoint_node_); + if (entrypointNode != INVALID_ID) + return getExternalLabel(entrypointNode); return SIZE_MAX; } template -labelType HNSWIndex::getExternalLabel(idType internal_id) const { - labelType return_label; - memcpy(&return_label, - (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), - sizeof(labelType)); - return return_label; -} - -template -void HNSWIndex::setExternalLabel(idType internal_id, labelType label) { - memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), &label, - sizeof(labelType)); -} - -template -labelType *HNSWIndex::getExternalLabelPtr(idType internal_id) const { - return (labelType *)(data_level0_memory_ + internal_id * size_data_per_element_ + - label_offset_); +const char *HNSWIndex::getDataByInternalId(idType internal_id) const { + return vectorBlocks[internal_id / this->blockSize].getElement(internal_id % this->blockSize); } template -char *HNSWIndex::getDataByInternalId(idType internal_id) const { - return (data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_); +ElementGraphData * +HNSWIndex::getGraphDataByInternalId(idType internal_id) const { + return (ElementGraphData *)graphDataBlocks[internal_id / this->blockSize].getElement( + internal_id % this->blockSize); } template size_t HNSWIndex::getRandomLevel(double reverse_size) { std::uniform_real_distribution distribution(0.0, 1.0); - double r = -log(distribution(level_generator_)) * reverse_size; + double r = -log(distribution(levelGenerator)) * reverse_size; return (size_t)r; } template -vecsim_stl::vector *HNSWIndex::getIncomingEdgesPtr(idType internal_id, - size_t level) const { - if (level == 0) { - return reinterpret_cast *>( - *(void **)(data_level0_memory_ + internal_id * size_data_per_element_ + - incoming_links_offset0)); - } - return reinterpret_cast *>( - *(void **)(linkLists_[internal_id] + (level - 1) * size_links_per_element_ + - incoming_links_offset)); +LevelData &HNSWIndex::getLevelData(idType internal_id, size_t level) const { + return getLevelData(getGraphDataByInternalId(internal_id), level); } template -void HNSWIndex::setIncomingEdgesPtr(idType internal_id, size_t level, - void *edges_ptr) { +LevelData &HNSWIndex::getLevelData(ElementGraphData *elem, size_t level) const { + assert(level <= elem->toplevel); if (level == 0) { - memcpy(data_level0_memory_ + internal_id * size_data_per_element_ + incoming_links_offset0, - &edges_ptr, sizeof(void *)); + return elem->level0; } else { - memcpy(linkLists_[internal_id] + (level - 1) * size_links_per_element_ + - incoming_links_offset, - &edges_ptr, sizeof(void *)); + return *(LevelData *)((char *)elem->others + (level - 1) * this->levelDataSize); } } -template -elementFlags *HNSWIndex::getElementFlags(idType internal_id) const { - // elementFlags offset is 0 from the start of the element metadata - return (elementFlags *)(data_level0_memory_ + internal_id * size_data_per_element_ + - offsetLevel0_); -} - -template -idType *HNSWIndex::getNodeNeighborsAtBaseLevel(idType internal_id) const { - // links offset at level 0 is `sizeof(elementFlags) + sizeof(linkListSize)` from the start of - // the element metadata - return (idType *)(data_level0_memory_ + internal_id * size_data_per_element_ + - sizeof(elementFlags) + sizeof(linkListSize) + offsetLevel0_); -} - -template -idType *HNSWIndex::getNodeNeighborsAtNonBaseLevel(idType internal_id, - size_t level) const { - // links offset at level >0 is `sizeof(linkListSize)` from the start of the element metadata - return (idType *)(linkLists_[internal_id] + (level - 1) * size_links_per_element_ + - sizeof(linkListSize)); -} - -template -idType *HNSWIndex::getNodeNeighborsAtLevel(idType internal_id, - size_t level) const { - return level == 0 ? getNodeNeighborsAtBaseLevel(internal_id) - : getNodeNeighborsAtNonBaseLevel(internal_id, level); -} - -template -linkListSize HNSWIndex::getNodeNeighborsCount(const idType *list) const { - return *(((linkListSize *)list) - 1); -} - -template -void HNSWIndex::setNodeNeighborsCount(idType *list, const linkListSize size) { - *(((linkListSize *)list) - 1) = size; -} - template VisitedNodesHandler *HNSWIndex::getVisitedList() const { - return visited_nodes_handler_pool.getAvailableVisitedNodesHandler(); + return visitedNodesHandlerPool.getAvailableVisitedNodesHandler(); } template void HNSWIndex::returnVisitedList( VisitedNodesHandler *visited_nodes_handler) const { - visited_nodes_handler_pool.returnVisitedNodesHandlerToPool(visited_nodes_handler); + visitedNodesHandlerPool.returnVisitedNodesHandlerToPool(visited_nodes_handler); } template void HNSWIndex::markDeletedInternal(idType internalId) { // Here we are holding the global index data guard (and the main index lock of the tiered index // for shared ownership). - assert(internalId < this->cur_element_count); + assert(internalId < this->curElementCount); if (!isMarkedDeleted(internalId)) { - if (internalId == entrypoint_node_) { + if (internalId == entrypointNode) { // Internally, we hold and release the entrypoint neighbors lock. replaceEntryPoint(); } // Atomically set the deletion mark flag (note that other parallel threads may set the flags // at the same time (for changing the IN_PROCESS flag). - __atomic_fetch_or(getElementFlags(internalId), DELETE_MARK, 0); - this->num_marked_deleted++; + markAs(internalId); + this->numMarkedDeleted++; } } template bool HNSWIndex::isMarkedDeleted(idType internalId) const { - elementFlags *flags = getElementFlags(internalId); - return *flags & DELETE_MARK; + return isMarkedAs(internalId); } template bool HNSWIndex::isInProcess(idType internalId) const { - elementFlags *flags = getElementFlags(internalId); - return *flags & IN_PROCESS; -} - -template -void HNSWIndex::markInProcess(idType internalId) { - // Atomically set the IN_PROCESS mark flag. Even though other threads shouldn't modify the flags - // at that time (we're holding index global data guard, so this element cannot be marked as - // deleted in parallel), we do it for safety. - __atomic_fetch_or(getElementFlags(internalId), IN_PROCESS, 0); + return isMarkedAs(internalId); } template void HNSWIndex::unmarkInProcess(idType internalId) { // Atomically unset the IN_PROCESS mark flag (note that other parallel threads may set the flags // at the same time (for marking the element with MARK_DELETE flag). - __atomic_fetch_and(getElementFlags(internalId), ~IN_PROCESS, 0); + unmarkAs(internalId); } template void HNSWIndex::lockIndexDataGuard() const { - index_data_guard_.lock(); + indexDataGuard.lock(); } template void HNSWIndex::unlockIndexDataGuard() const { - index_data_guard_.unlock(); + indexDataGuard.unlock(); } template -void HNSWIndex::lockNodeLinks(idType node_id) const { - element_neighbors_locks_[node_id].lock(); +void HNSWIndex::lockNodeLinks(ElementGraphData *node_data) const { + node_data->neighborsGuard.lock(); } template -void HNSWIndex::unlockNodeLinks(idType node_id) const { - element_neighbors_locks_[node_id].unlock(); +void HNSWIndex::unlockNodeLinks(ElementGraphData *node_data) const { + node_data->neighborsGuard.unlock(); +} + +template +void HNSWIndex::lockNodeLinks(idType node_id) const { + lockNodeLinks(getGraphDataByInternalId(node_id)); } template -inline size_t HNSWIndex::getElementTopLevel(idType internalId) { - return element_levels_[internalId]; +void HNSWIndex::unlockNodeLinks(idType node_id) const { + unlockNodeLinks(getGraphDataByInternalId(node_id)); } /** @@ -534,7 +527,7 @@ inline size_t HNSWIndex::getElementTopLevel(idType internalI */ template void HNSWIndex::removeExtraLinks( - candidatesMaxHeap candidates, size_t Mcurmax, idType *node_neighbors, + candidatesMaxHeap candidates, size_t Mcurmax, LevelData &node_level, const vecsim_stl::vector &neighbors_bitmap, idType *removed_links, size_t *removed_links_num) { @@ -554,12 +547,12 @@ void HNSWIndex::removeExtraLinks( } orig_candidates.pop(); } else { - node_neighbors[link_idx++] = candidates.top().second; + node_level.links[link_idx++] = candidates.top().second; candidates.pop(); orig_candidates.pop(); } } - setNodeNeighborsCount(node_neighbors, link_idx); + node_level.numLinks = link_idx; *removed_links_num = removed_idx; } @@ -580,100 +573,153 @@ void HNSWIndex::emplaceToHeap( // overloading to emplace correctly for both cases. template template -DistType HNSWIndex::processCandidate( - idType curNodeId, const void *data_point, size_t layer, size_t ef, tag_t visited_tag, - tag_t *elements_tags, vecsim_stl::abstract_priority_queue &top_candidates, - candidatesMaxHeap &candidate_set, DistType lowerBound) const { +void HNSWIndex::processCandidate( + idType curNodeId, const void *query_data, size_t layer, size_t ef, tag_t *elements_tags, + tag_t visited_tag, vecsim_stl::abstract_priority_queue &top_candidates, + candidatesMaxHeap &candidate_set, DistType &lowerBound) const { + + ElementGraphData *cur_element = getGraphDataByInternalId(curNodeId); + lockNodeLinks(cur_element); + LevelData &node_level = getLevelData(cur_element, layer); + + if (node_level.numLinks > 0) { + + const char *cur_data, *next_data; + // Pre-fetch first candidate tag address. + __builtin_prefetch(elements_tags + node_level.links[0]); + // Pre-fetch first candidate data block address. + next_data = getDataByInternalId(node_level.links[0]); + __builtin_prefetch(next_data); + + for (linkListSize j = 0; j < node_level.numLinks - 1; j++) { + idType candidate_id = node_level.links[j]; + cur_data = next_data; + + // Pre-fetch next candidate tag address. + __builtin_prefetch(elements_tags + node_level.links[j + 1]); + // Pre-fetch next candidate data block address. + next_data = getDataByInternalId(node_level.links[j + 1]); + __builtin_prefetch(next_data); + + if (elements_tags[candidate_id] == visited_tag || isInProcess(candidate_id)) + continue; - std::unique_lock lock(element_neighbors_locks_[curNodeId]); - idType *node_links = getNodeNeighborsAtLevel(curNodeId, layer); - linkListSize links_num = getNodeNeighborsCount(node_links); + elements_tags[candidate_id] = visited_tag; - __builtin_prefetch(elements_tags + *node_links); - __builtin_prefetch(getDataByInternalId(*node_links)); + DistType cur_dist = this->distFunc(query_data, cur_data, this->dim); + if (lowerBound > cur_dist || top_candidates.size() < ef) { - for (size_t j = 0; j < links_num; j++) { - idType *candidate_pos = node_links + j; - idType candidate_id = *candidate_pos; - idType *next_candidate_pos = node_links + j + 1; + candidate_set.emplace(-cur_dist, candidate_id); - __builtin_prefetch(elements_tags + *next_candidate_pos); - __builtin_prefetch(getDataByInternalId(*next_candidate_pos)); + // Insert the candidate to the top candidates heap only if it is not marked as + // deleted. + if (!has_marked_deleted || !isMarkedDeleted(candidate_id)) + emplaceToHeap(top_candidates, cur_dist, candidate_id); - if (elements_tags[candidate_id] == visited_tag || isInProcess(candidate_id)) - continue; + if (top_candidates.size() > ef) + top_candidates.pop(); + + // If we have marked deleted elements, we need to verify that `top_candidates` is + // not empty (since we might have not added any non-deleted element yet). + if (!has_marked_deleted || !top_candidates.empty()) + lowerBound = top_candidates.top().first; + } + } + + // Running the last neighbor outside the loop to avoid prefetching invalid neighbor + idType candidate_id = node_level.links[node_level.numLinks - 1]; + cur_data = next_data; - elements_tags[candidate_id] = visited_tag; - char *currObj1 = (getDataByInternalId(candidate_id)); + if (elements_tags[candidate_id] != visited_tag && !isInProcess(candidate_id)) { - DistType dist1 = this->dist_func(data_point, currObj1, this->dim); - if (lowerBound > dist1 || top_candidates.size() < ef) { - candidate_set.emplace(-dist1, candidate_id); + elements_tags[candidate_id] = visited_tag; - // Insert the candidate to the top candidates heap only if it is not marked as deleted. - if (!has_marked_deleted || !isMarkedDeleted(candidate_id)) - emplaceToHeap(top_candidates, dist1, candidate_id); + DistType cur_dist = this->distFunc(query_data, cur_data, this->dim); + if (lowerBound > cur_dist || top_candidates.size() < ef) { + candidate_set.emplace(-cur_dist, candidate_id); - if (top_candidates.size() > ef) - top_candidates.pop(); + // Insert the candidate to the top candidates heap only if it is not marked as + // deleted. + if (!has_marked_deleted || !isMarkedDeleted(candidate_id)) + emplaceToHeap(top_candidates, cur_dist, candidate_id); - // If we have marked deleted elements, we need to verify that `top_candidates` is not - // empty (since we might have not added any non-deleted element yet). - if (!has_marked_deleted || !top_candidates.empty()) - lowerBound = top_candidates.top().first; + if (top_candidates.size() > ef) + top_candidates.pop(); + + // If we have marked deleted elements, we need to verify that `top_candidates` is + // not empty (since we might have not added any non-deleted element yet). + if (!has_marked_deleted || !top_candidates.empty()) + lowerBound = top_candidates.top().first; + } } } - // Pre-fetch the neighbours list of the top candidate (the one that is going - // to be processed in the next iteration) into memory cache, to improve performance. - __builtin_prefetch(getNodeNeighborsAtLevel(candidate_set.top().second, layer)); - - return lowerBound; + unlockNodeLinks(cur_element); } template template void HNSWIndex::processCandidate_RangeSearch( - idType curNodeId, const void *query_data, size_t layer, double epsilon, tag_t visited_tag, - tag_t *elements_tags, std::unique_ptr &results, - candidatesMaxHeap &candidate_set, DistType dyn_range, double radius) const { + idType curNodeId, const void *query_data, size_t layer, double epsilon, tag_t *elements_tags, + tag_t visited_tag, std::unique_ptr &results, + candidatesMaxHeap &candidate_set, DistType dyn_range, DistType radius) const { + + auto *cur_element = getGraphDataByInternalId(curNodeId); + lockNodeLinks(cur_element); + LevelData &node_level = getLevelData(cur_element, layer); + if (node_level.numLinks > 0) { + + const char *cur_data, *next_data; + // Pre-fetch first candidate tag address. + __builtin_prefetch(elements_tags + node_level.links[0]); + // Pre-fetch first candidate data block address. + next_data = getDataByInternalId(node_level.links[0]); + __builtin_prefetch(next_data); + + for (linkListSize j = 0; j < node_level.numLinks - 1; j++) { + idType candidate_id = node_level.links[j]; + cur_data = next_data; + + // Pre-fetch next candidate tag address. + __builtin_prefetch(elements_tags + node_level.links[j + 1]); + // Pre-fetch next candidate data block address. + next_data = getDataByInternalId(node_level.links[j + 1]); + __builtin_prefetch(next_data); + + if (elements_tags[candidate_id] == visited_tag || isInProcess(candidate_id)) + continue; - std::unique_lock lock(element_neighbors_locks_[curNodeId]); - idType *node_links = getNodeNeighborsAtLevel(curNodeId, layer); - linkListSize links_num = getNodeNeighborsCount(node_links); + elements_tags[candidate_id] = visited_tag; - __builtin_prefetch(elements_tags + *node_links); - __builtin_prefetch(getDataByInternalId(*node_links)); + DistType cur_dist = this->distFunc(query_data, cur_data, this->dim); + if (cur_dist < dyn_range) { + candidate_set.emplace(-cur_dist, candidate_id); - // Cast radius once instead of each time we check that candidate_dist <= radius_ - DistType radius_ = DistType(radius); - for (size_t j = 0; j < links_num; j++) { - idType *candidate_pos = node_links + j; - idType candidate_id = *candidate_pos; + // If the new candidate is in the requested radius, add it to the results set. + if (cur_dist <= radius && (!has_marked_deleted || !isMarkedDeleted(candidate_id))) { + results->emplace(getExternalLabel(candidate_id), cur_dist); + } + } + } + // Running the last candidate outside the loop to avoid prefetching invalid candidate + idType candidate_id = node_level.links[node_level.numLinks - 1]; + cur_data = next_data; - // Pre-fetch the next candidate data into memory cache, to improve performance. - idType *next_candidate_pos = node_links + j + 1; - __builtin_prefetch(elements_tags + *next_candidate_pos); - __builtin_prefetch(getDataByInternalId(*next_candidate_pos)); + if (elements_tags[candidate_id] != visited_tag && !isInProcess(candidate_id)) { - if (elements_tags[candidate_id] == visited_tag || isInProcess(candidate_id)) - continue; - elements_tags[candidate_id] = visited_tag; - char *candidate_data = getDataByInternalId(candidate_id); + elements_tags[candidate_id] = visited_tag; - DistType candidate_dist = this->dist_func(query_data, candidate_data, this->dim); - if (candidate_dist < dyn_range) { - candidate_set.emplace(-candidate_dist, candidate_id); + DistType cur_dist = this->distFunc(query_data, cur_data, this->dim); + if (cur_dist < dyn_range) { + candidate_set.emplace(-cur_dist, candidate_id); - // If the new candidate is in the requested radius, add it to the results set. - if (candidate_dist <= radius_ && - (!has_marked_deleted || !isMarkedDeleted(candidate_id))) { - results->emplace(getExternalLabel(candidate_id), candidate_dist); + // If the new candidate is in the requested radius, add it to the results set. + if (cur_dist <= radius && (!has_marked_deleted || !isMarkedDeleted(candidate_id))) { + results->emplace(getExternalLabel(candidate_id), cur_dist); + } } } } - // Pre-fetch the neighbours list of the top candidate (the one that is going - // to be processed in the next iteration) into memory cache, to improve performance. - __builtin_prefetch(getNodeNeighborsAtLevel(candidate_set.top().second, layer)); + unlockNodeLinks(cur_element); } template @@ -690,7 +736,7 @@ HNSWIndex::searchLayer(idType ep_id, const void *data_point, DistType lowerBound; if (!has_marked_deleted || !isMarkedDeleted(ep_id)) { - DistType dist = this->dist_func(data_point, getDataByInternalId(ep_id), this->dim); + DistType dist = this->distFunc(data_point, getDataByInternalId(ep_id), this->dim); lowerBound = dist; top_candidates.emplace(dist, ep_id); candidate_set.emplace(-dist, ep_id); @@ -703,17 +749,18 @@ HNSWIndex::searchLayer(idType ep_id, const void *data_point, while (!candidate_set.empty()) { pair curr_el_pair = candidate_set.top(); + if ((-curr_el_pair.first) > lowerBound && top_candidates.size() >= ef) { break; } candidate_set.pop(); - lowerBound = processCandidate( - curr_el_pair.second, data_point, layer, ef, visited_tag, - visited_nodes_handler->getElementsTags(), top_candidates, candidate_set, lowerBound); + processCandidate(curr_el_pair.second, data_point, layer, ef, + visited_nodes_handler->getElementsTags(), visited_tag, + top_candidates, candidate_set, lowerBound); } - returnVisitedList(visited_nodes_handler); + returnVisitedList(visited_nodes_handler); return top_candidates; } @@ -726,6 +773,9 @@ void HNSWIndex::getNeighborsByHeuristic2( candidatesMaxHeap queue_closest(this->allocator); vecsim_stl::vector> return_list(this->allocator); + vecsim_stl::vector cached_vectors(this->allocator); + return_list.reserve(M); + cached_vectors.reserve(M); while (top_candidates.size() > 0) { // the distance is saved negatively to have the queue ordered such that first is closer // (higher). @@ -733,27 +783,26 @@ void HNSWIndex::getNeighborsByHeuristic2( top_candidates.pop(); } - while (queue_closest.size()) { - if (return_list.size() >= M) - break; + while (queue_closest.size() && return_list.size() < M) { pair current_pair = queue_closest.top(); DistType candidate_to_query_dist = -current_pair.first; queue_closest.pop(); bool good = true; + const void *curr_vector = getDataByInternalId(current_pair.second); // a candidate is "good" to become a neighbour, unless we find // another item that was already selected to the neighbours set which is closer // to both q and the candidate than the distance between the candidate and q. - for (pair second_pair : return_list) { + for (size_t i = 0; i < return_list.size(); i++) { DistType candidate_to_selected_dist = - this->dist_func(getDataByInternalId(second_pair.second), - getDataByInternalId(current_pair.second), this->dim); + this->distFunc(cached_vectors[i], curr_vector, this->dim); if (candidate_to_selected_dist < candidate_to_query_dist) { good = false; break; } } if (good) { + cached_vectors.push_back(curr_vector); return_list.push_back(current_pair); } } @@ -766,8 +815,7 @@ void HNSWIndex::getNeighborsByHeuristic2( template void HNSWIndex::revisitNeighborConnections( size_t level, idType new_node_id, const std::pair &neighbor_data, - idType *new_node_neighbors_list, idType *neighbor_neighbors_list, - std::unique_lock &node_lock, std::unique_lock &neighbor_lock) { + LevelData &new_node_level, LevelData &neighbor_level) { // Note - expect that node_lock and neighbor_lock are locked at that point. // Collect the existing neighbors and the new node as the neighbor's neighbors candidates. @@ -776,17 +824,18 @@ void HNSWIndex::revisitNeighborConnections( candidates.emplace(neighbor_data.first, new_node_id); idType selected_neighbor = neighbor_data.second; - for (size_t j = 0; j < getNodeNeighborsCount(neighbor_neighbors_list); j++) { - candidates.emplace(this->dist_func(getDataByInternalId(neighbor_neighbors_list[j]), - getDataByInternalId(selected_neighbor), this->dim), - neighbor_neighbors_list[j]); + const void *selected_neighbor_data = getDataByInternalId(selected_neighbor); + for (size_t j = 0; j < neighbor_level.numLinks; j++) { + candidates.emplace(this->distFunc(getDataByInternalId(neighbor_level.links[j]), + selected_neighbor_data, this->dim), + neighbor_level.links[j]); } std::vector nodes_to_update; auto orig_candidates = candidates; // Candidates will store the newly selected neighbours (for the neighbor). - size_t max_M_cur = level ? maxM_ : maxM0_; + size_t max_M_cur = level ? M : M0; getNeighborsByHeuristic2(candidates, max_M_cur); // Go over the original candidates set, and save the ones chosen to be removed to update later @@ -816,41 +865,36 @@ void HNSWIndex::revisitNeighborConnections( // Acquire all relevant locks for making the updates for the selected neighbor - all its removed // neighbors, along with the neighbors itself and the cur node. // but first, we release the node and neighbors lock to avoid deadlocks. - node_lock.unlock(); - neighbor_lock.unlock(); + unlockNodeLinks(new_node_id); + unlockNodeLinks(selected_neighbor); nodes_to_update.push_back(selected_neighbor); nodes_to_update.push_back(new_node_id); std::sort(nodes_to_update.begin(), nodes_to_update.end()); size_t nodes_to_update_count = nodes_to_update.size(); - std::unique_lock locks[nodes_to_update_count]; for (size_t i = 0; i < nodes_to_update_count; i++) { - locks[i] = std::unique_lock(element_neighbors_locks_[nodes_to_update[i]]); + lockNodeLinks(nodes_to_update[i]); } - - auto *neighbour_incoming_edges = getIncomingEdgesPtr(selected_neighbor, level); - size_t neighbor_neighbors_count = getNodeNeighborsCount(neighbor_neighbors_list); - size_t neighbour_neighbours_idx = 0; bool update_cur_node_required = true; - for (size_t i = 0; i < neighbor_neighbors_count; i++) { + for (size_t i = 0; i < neighbor_level.numLinks; i++) { if (!std::binary_search(nodes_to_update.begin(), nodes_to_update.end(), - neighbor_neighbors_list[i])) { + neighbor_level.links[i])) { // The neighbor is not in the "to_update" nodes list - leave it as is. - neighbor_neighbors_list[neighbour_neighbours_idx++] = neighbor_neighbors_list[i]; + neighbor_level.links[neighbour_neighbours_idx++] = neighbor_level.links[i]; continue; - } else if (neighbor_neighbors_list[i] == new_node_id) { + } else if (neighbor_level.links[i] == new_node_id) { // The new node got into the neighbor's neighbours - this means there was an update in // another thread during between we released and reacquire the locks - leave it // as is. - neighbor_neighbors_list[neighbour_neighbours_idx++] = neighbor_neighbors_list[i]; + neighbor_level.links[neighbour_neighbours_idx++] = neighbor_level.links[i]; update_cur_node_required = false; continue; } // Now we know that we are looking at a node to be removed from the neighbor's neighbors. - auto removed_node = neighbor_neighbors_list[i]; - auto *removed_node_incoming_edges = getIncomingEdgesPtr(removed_node, level); + auto removed_node = neighbor_level.links[i]; + LevelData &removed_node_level = getLevelData(removed_node, level); // Perform the mutual update: // if the removed node id (the neighbour's neighbour to be removed) // wasn't pointing to the neighbour (i.e., the edge was uni-directional), @@ -859,27 +903,28 @@ void HNSWIndex::revisitNeighborConnections( // neighbour's incoming edges set. Note: we assume that every update is performed atomically // mutually, so it should be sufficient to look at the removed node's incoming edges set // alone. - if (!removeIdFromList(*removed_node_incoming_edges, selected_neighbor)) { - neighbour_incoming_edges->push_back(removed_node); + if (!removeIdFromList(*removed_node_level.incomingEdges, selected_neighbor)) { + neighbor_level.incomingEdges->push_back(removed_node); } } - size_t cur_node_neighbors_count = getNodeNeighborsCount(new_node_neighbors_list); - if (update_cur_node_required && cur_node_neighbors_count < max_M_cur && + if (update_cur_node_required && new_node_level.numLinks < max_M_cur && !isMarkedDeleted(new_node_id) && !isMarkedDeleted(selected_neighbor)) { // update the connection between the new node and the neighbor. - new_node_neighbors_list[cur_node_neighbors_count++] = selected_neighbor; - setNodeNeighborsCount(new_node_neighbors_list, cur_node_neighbors_count); + new_node_level.links[new_node_level.numLinks++] = selected_neighbor; if (cur_node_chosen && neighbour_neighbours_idx < max_M_cur) { // connection is mutual - both new node and the selected neighbor in each other's list. - neighbor_neighbors_list[neighbour_neighbours_idx++] = new_node_id; + neighbor_level.links[neighbour_neighbours_idx++] = new_node_id; } else { // unidirectional connection - put the new node in the neighbour's incoming edges. - neighbour_incoming_edges->push_back(new_node_id); + neighbor_level.incomingEdges->push_back(new_node_id); } } // Done updating the neighbor's neighbors. - setNodeNeighborsCount(neighbor_neighbors_list, neighbour_neighbours_idx); + neighbor_level.numLinks = neighbour_neighbours_idx; + for (size_t i = 0; i < nodes_to_update_count; i++) { + unlockNodeLinks(nodes_to_update[i]); + } } template @@ -887,16 +932,16 @@ idType HNSWIndex::mutuallyConnectNewElement( idType new_node_id, candidatesMaxHeap &top_candidates, size_t level) { // The maximum number of neighbors allowed for an existing neighbor (not new). - size_t max_M_cur = level ? maxM_ : maxM0_; + size_t max_M_cur = level ? M : M0; // Filter the top candidates to the selected neighbors by the algorithm heuristics. - getNeighborsByHeuristic2(top_candidates, M_); - assert(top_candidates.size() <= M_ && - "Should be not be more than M_ candidates returned by the heuristic"); + getNeighborsByHeuristic2(top_candidates, M); + assert(top_candidates.size() <= M && + "Should be not be more than M candidates returned by the heuristic"); // Hold (distance_from_new_node_id, neighbor_id) pair for every selected neighbor. vecsim_stl::vector> selected_neighbors(this->allocator); - selected_neighbors.reserve(M_); + selected_neighbors.reserve(M); while (!top_candidates.empty()) { selected_neighbors.push_back(top_candidates.top()); top_candidates.pop(); @@ -905,151 +950,141 @@ idType HNSWIndex::mutuallyConnectNewElement( // The closest vector that has found to be returned (and start the scan from it in the next // level). idType next_closest_entry_point = selected_neighbors.back().second; - idType *new_node_neighbors_list = getNodeNeighborsAtLevel(new_node_id, level); - assert(getNodeNeighborsCount(new_node_neighbors_list) == 0 && + auto *new_node_level = getGraphDataByInternalId(new_node_id); + LevelData &new_node_level_data = getLevelData(new_node_level, level); + assert(new_node_level_data.numLinks == 0 && "The newly inserted element should have blank link list"); - // Create the incoming edges for the new node in the current level. - auto *incoming_edges = new (this->allocator) vecsim_stl::vector(this->allocator); - setIncomingEdgesPtr(new_node_id, level, (void *)incoming_edges); - for (auto &neighbor_data : selected_neighbors) { idType selected_neighbor = neighbor_data.second; // neighbor's id - std::unique_lock node_lock; - std::unique_lock neighbor_lock; - idType lower_id = (new_node_id < selected_neighbor) ? new_node_id : selected_neighbor; - if (lower_id == new_node_id) { - node_lock = std::unique_lock(element_neighbors_locks_[new_node_id]); - neighbor_lock = - std::unique_lock(element_neighbors_locks_[selected_neighbor]); + auto *neighbor_graph_data = getGraphDataByInternalId(selected_neighbor); + if (new_node_id < selected_neighbor) { + lockNodeLinks(new_node_level); + lockNodeLinks(neighbor_graph_data); } else { - neighbor_lock = - std::unique_lock(element_neighbors_locks_[selected_neighbor]); - node_lock = std::unique_lock(element_neighbors_locks_[new_node_id]); + lockNodeLinks(neighbor_graph_data); + lockNodeLinks(new_node_level); } - // get the updated count - this may change between iterations due to releasing the lock. - linkListSize cur_node_neighbors_count = getNodeNeighborsCount(new_node_neighbors_list); - idType *neighbor_neighbors_list = getNodeNeighborsAtLevel(selected_neighbor, level); - linkListSize neighbor_neighbors_count = getNodeNeighborsCount(neighbor_neighbors_list); - // validations... - assert(cur_node_neighbors_count <= max_M_cur && "Neighbors number exceeds limit"); + assert(new_node_level_data.numLinks <= max_M_cur && "Neighbors number exceeds limit"); assert(selected_neighbor != new_node_id && "Trying to connect an element to itself"); - if (cur_node_neighbors_count == max_M_cur) { + // Revalidate the updated count - this may change between iterations due to releasing the + // lock. + if (new_node_level_data.numLinks == max_M_cur) { // The new node cannot add more neighbors + this->log("Couldn't add all chosen neighbors upon inserting a new node"); + unlockNodeLinks(new_node_level); + unlockNodeLinks(neighbor_graph_data); break; } // If one of the two nodes has already deleted - skip the operation. if (isMarkedDeleted(new_node_id) || isMarkedDeleted(selected_neighbor)) { + unlockNodeLinks(new_node_level); + unlockNodeLinks(neighbor_graph_data); continue; } + LevelData &neighbor_level_data = getLevelData(neighbor_graph_data, level); + // if the neighbor's neighbors list has the capacity to add the new node, make the update // and finish. - if (neighbor_neighbors_count < max_M_cur) { - new_node_neighbors_list[cur_node_neighbors_count] = selected_neighbor; - setNodeNeighborsCount(new_node_neighbors_list, cur_node_neighbors_count + 1); - neighbor_neighbors_list[neighbor_neighbors_count] = new_node_id; - setNodeNeighborsCount(neighbor_neighbors_list, neighbor_neighbors_count + 1); + if (neighbor_level_data.numLinks < max_M_cur) { + new_node_level_data.links[new_node_level_data.numLinks++] = selected_neighbor; + neighbor_level_data.links[neighbor_level_data.numLinks++] = new_node_id; + unlockNodeLinks(new_node_level); + unlockNodeLinks(neighbor_graph_data); continue; } // Otherwise - we need to re-evaluate the neighbor's neighbors. // We collect all the existing neighbors and the new node as candidates, and mutually update - // the neighbor's neighbors. - revisitNeighborConnections(level, new_node_id, neighbor_data, new_node_neighbors_list, - neighbor_neighbors_list, node_lock, neighbor_lock); + // the neighbor's neighbors. We also release the acquired locks inside this call. + revisitNeighborConnections(level, new_node_id, neighbor_data, new_node_level_data, + neighbor_level_data); } return next_closest_entry_point; } template void HNSWIndex::repairConnectionsForDeletion( - idType element_internal_id, idType neighbour_id, idType *neighbours, - idType *neighbour_neighbours, size_t level, vecsim_stl::vector &neighbours_bitmap) { + idType element_internal_id, idType neighbour_id, LevelData &node_level, + LevelData &neighbor_level, size_t level, vecsim_stl::vector &neighbours_bitmap) { // put the deleted element's neighbours in the candidates. candidatesMaxHeap candidates(this->allocator); - linkListSize neighbours_count = getNodeNeighborsCount(neighbours); - for (size_t j = 0; j < neighbours_count; j++) { + auto neighbours_data = getDataByInternalId(neighbour_id); + for (size_t j = 0; j < node_level.numLinks; j++) { // Don't put the neighbor itself in his own candidates - if (neighbours[j] == neighbour_id) { + if (node_level.links[j] == neighbour_id) { continue; } - candidates.emplace(this->dist_func(getDataByInternalId(neighbours[j]), - getDataByInternalId(neighbour_id), this->dim), - neighbours[j]); + candidates.emplace( + this->distFunc(getDataByInternalId(node_level.links[j]), neighbours_data, this->dim), + node_level.links[j]); } // add the deleted element's neighbour's original neighbors in the candidates. - vecsim_stl::vector neighbour_orig_neighbours_set(cur_element_count, false, - this->allocator); - linkListSize neighbour_neighbours_count = getNodeNeighborsCount(neighbour_neighbours); + vecsim_stl::vector neighbour_orig_neighbours_set(curElementCount, false, this->allocator); - for (size_t j = 0; j < neighbour_neighbours_count; j++) { - neighbour_orig_neighbours_set[neighbour_neighbours[j]] = true; + for (size_t j = 0; j < neighbor_level.numLinks; j++) { + neighbour_orig_neighbours_set[neighbor_level.links[j]] = true; // Don't add the removed element to the candidates, nor nodes that are already in the // candidates set. - if (neighbours_bitmap[neighbour_neighbours[j]] || - neighbour_neighbours[j] == element_internal_id) { + if (neighbours_bitmap[neighbor_level.links[j]] || + neighbor_level.links[j] == element_internal_id) { continue; } - candidates.emplace(this->dist_func(getDataByInternalId(neighbour_id), - getDataByInternalId(neighbour_neighbours[j]), this->dim), - neighbour_neighbours[j]); + candidates.emplace(this->distFunc(neighbours_data, + getDataByInternalId(neighbor_level.links[j]), this->dim), + neighbor_level.links[j]); } - size_t Mcurmax = level ? maxM_ : maxM0_; + size_t Mcurmax = level ? M : M0; size_t removed_links_num; - idType removed_links[neighbour_neighbours_count]; - removeExtraLinks(candidates, Mcurmax, neighbour_neighbours, neighbour_orig_neighbours_set, + idType removed_links[neighbor_level.numLinks]; + removeExtraLinks(candidates, Mcurmax, neighbor_level, neighbour_orig_neighbours_set, removed_links, &removed_links_num); // remove neighbour id from the incoming list of nodes for his // neighbours that were chosen to remove - auto *neighbour_incoming_edges = getIncomingEdgesPtr(neighbour_id, level); - for (size_t i = 0; i < removed_links_num; i++) { idType node_id = removed_links[i]; - auto *node_incoming_edges = getIncomingEdgesPtr(node_id, level); + LevelData &node_level = getLevelData(node_id, level); // if the node id (the neighbour's neighbour to be removed) // wasn't pointing to the neighbour (edge was one directional), // we should remove it from the node's incoming edges. // otherwise, edge turned from bidirectional to one directional, // and it should be saved in the neighbor's incoming edges. - if (!removeIdFromList(*node_incoming_edges, neighbour_id)) { - neighbour_incoming_edges->push_back(node_id); + if (!removeIdFromList(*node_level.incomingEdges, neighbour_id)) { + neighbor_level.incomingEdges->push_back(node_id); } } // updates for the new edges created - linkListSize updated_links_num = getNodeNeighborsCount(neighbour_neighbours); - for (size_t i = 0; i < updated_links_num; i++) { - idType node_id = neighbour_neighbours[i]; + for (size_t i = 0; i < neighbor_level.numLinks; i++) { + idType node_id = neighbor_level.links[i]; if (!neighbour_orig_neighbours_set[node_id]) { - auto *node_incoming_edges = getIncomingEdgesPtr(node_id, level); + LevelData &node_level = getLevelData(node_id, level); // if the node has an edge to the neighbour as well, remove it // from the incoming nodes of the neighbour // otherwise, need to update the edge as incoming. - idType *node_links = getNodeNeighborsAtLevel(node_id, level); - unsigned short node_links_size = getNodeNeighborsCount(node_links); bool bidirectional_edge = false; - for (size_t j = 0; j < node_links_size; j++) { - if (node_links[j] == neighbour_id) { + for (size_t j = 0; j < node_level.numLinks; j++) { + if (node_level.links[j] == neighbour_id) { // Swap the last element with the current one (equivalent to removing the // neighbor from the list) - this should always succeed and return true. - removeIdFromList(*neighbour_incoming_edges, node_id); + removeIdFromList(*neighbor_level.incomingEdges, node_id); bidirectional_edge = true; break; } } if (!bidirectional_edge) { - node_incoming_edges->push_back(neighbour_id); + node_level.incomingEdges->push_back(neighbour_id); } } } @@ -1057,49 +1092,58 @@ void HNSWIndex::repairConnectionsForDeletion( template void HNSWIndex::replaceEntryPoint() { - idType old_entry = entrypoint_node_; + idType old_entry_point_id = entrypointNode; + auto *old_entry_point = getGraphDataByInternalId(old_entry_point_id); + // Sets an (arbitrary) new entry point, after deleting the current entry point. - while (old_entry == entrypoint_node_) { + while (old_entry_point_id == entrypointNode) { // Use volatile for this variable, so that in case we would have to busy wait for this // element to finish its indexing, the compiler will not use optimizations. Otherwise, // the compiler might evaluate 'isInProcess(candidate_in_process)' once instead of calling // it multiple times in a busy wait manner, and we'll run into an infinite loop if the // candidate is in process when we reach the loop. volatile idType candidate_in_process = INVALID_ID; - { - // Go over the entry point's neighbors at the top level. - std::unique_lock lock(this->element_neighbors_locks_[entrypoint_node_]); - idType *top_level_list = getNodeNeighborsAtLevel(old_entry, max_level_); - auto neighbors_count = getNodeNeighborsCount(top_level_list); - // Tries to set the (arbitrary) first neighbor as the entry point which is not deleted, - // if exists. - for (size_t i = 0; i < neighbors_count; i++) { - if (!isMarkedDeleted(top_level_list[i])) { - if (!isInProcess(top_level_list[i])) { - entrypoint_node_ = top_level_list[i]; - return; - } else { - // Store this candidate which is currently being inserted into the graph in - // case we won't find other candidate at the top level. - candidate_in_process = top_level_list[i]; - } + + // Go over the entry point's neighbors at the top level. + lockNodeLinks(old_entry_point); + LevelData &old_ep_level = getLevelData(old_entry_point, maxLevel); + // Tries to set the (arbitrary) first neighbor as the entry point which is not deleted, + // if exists. + for (size_t i = 0; i < old_ep_level.numLinks; i++) { + if (!isMarkedDeleted(old_ep_level.links[i])) { + if (!isInProcess(old_ep_level.links[i])) { + entrypointNode = old_ep_level.links[i]; + unlockNodeLinks(old_entry_point); + return; + } else { + // Store this candidate which is currently being inserted into the graph in + // case we won't find other candidate at the top level. + candidate_in_process = old_ep_level.links[i]; } } } + unlockNodeLinks(old_entry_point); + // If there is no neighbors in the current level, check for any vector at // this level to be the new entry point. - for (idType cur_id = 0; cur_id < cur_element_count; cur_id++) { - if (element_levels_[cur_id] == max_level_ && cur_id != old_entry && - !isMarkedDeleted(cur_id)) { - // Found a non element in the current max level. - if (!isInProcess(cur_id)) { - entrypoint_node_ = cur_id; - return; - } else if (candidate_in_process == INVALID_ID) { - // This element is still in process, and there hasn't been another candidate in - // process that has found in this level. - candidate_in_process = cur_id; + idType cur_id = 0; + for (DataBlock &graph_data_block : graphDataBlocks) { + size_t size = graph_data_block.getLength(); + for (size_t i = 0; i < size; i++) { + auto cur_element = (ElementGraphData *)graph_data_block.getElement(i); + if (cur_element->toplevel == maxLevel && cur_id != old_entry_point_id && + !isMarkedDeleted(cur_id)) { + // Found a non element in the current max level. + if (!isInProcess(cur_id)) { + entrypointNode = cur_id; + return; + } else if (candidate_in_process == INVALID_ID) { + // This element is still in process, and there hasn't been another candidate + // in process that has found in this level. + candidate_in_process = cur_id; + } } + cur_id++; } } // If we only found candidates which are in process at this level, do busy wait until they @@ -1109,49 +1153,47 @@ void HNSWIndex::replaceEntryPoint() { if (candidate_in_process != INVALID_ID) { while (isInProcess(candidate_in_process)) ; - entrypoint_node_ = candidate_in_process; + entrypointNode = candidate_in_process; return; } - // If we didn't find any vector at the top level, decrease the max_level_ and try again, + // If we didn't find any vector at the top level, decrease the maxLevel and try again, // until we find a new entry point, or the index is empty. - assert(old_entry == entrypoint_node_); - max_level_--; - if ((int)max_level_ < 0) { - max_level_ = HNSW_INVALID_LEVEL; - entrypoint_node_ = INVALID_ID; + assert(old_entry_point_id == entrypointNode); + maxLevel--; + if ((int)maxLevel < 0) { + maxLevel = HNSW_INVALID_LEVEL; + entrypointNode = INVALID_ID; } } } template template -void HNSWIndex::SwapLastIdWithDeletedId(idType element_internal_id) { +void HNSWIndex::SwapLastIdWithDeletedId(idType element_internal_id, + ElementGraphData *last_element, + void *last_element_data) { // Swap label - this is relevant when the last element's label exists (it is not marked as // deleted). For inplace delete, this is always the case. - if (!has_marked_deleted || !isMarkedDeleted(cur_element_count)) { - replaceIdOfLabel(getExternalLabel(cur_element_count), element_internal_id, - cur_element_count); + if (!has_marked_deleted || !isMarkedDeleted(curElementCount)) { + replaceIdOfLabel(getExternalLabel(curElementCount), element_internal_id, curElementCount); } // Swap neighbours - size_t last_element_top_level = element_levels_[cur_element_count]; - for (size_t level = 0; level <= last_element_top_level; level++) { - idType *neighbours = getNodeNeighborsAtLevel(cur_element_count, level); - linkListSize neighbours_count = getNodeNeighborsCount(neighbours); + for (size_t level = 0; level <= last_element->toplevel; level++) { + auto &cur_level = getLevelData(last_element, level); // Go over the neighbours that also points back to the last element whose is going to // change, and update the id. - for (size_t i = 0; i < neighbours_count; i++) { - idType neighbour_id = neighbours[i]; - idType *neighbour_neighbours = getNodeNeighborsAtLevel(neighbour_id, level); - linkListSize neighbour_neighbours_count = getNodeNeighborsCount(neighbour_neighbours); + for (size_t i = 0; i < cur_level.numLinks; i++) { + idType neighbour_id = cur_level.links[i]; + LevelData &neighbor_level = getLevelData(neighbour_id, level); bool bidirectional_edge = false; - for (size_t j = 0; j < neighbour_neighbours_count; j++) { + for (size_t j = 0; j < neighbor_level.numLinks; j++) { // if the edge is bidirectional, update for this neighbor - if (neighbour_neighbours[j] == cur_element_count) { + if (neighbor_level.links[j] == curElementCount) { bidirectional_edge = true; - neighbour_neighbours[j] = element_internal_id; + neighbor_level.links[j] = element_internal_id; break; } } @@ -1159,47 +1201,50 @@ void HNSWIndex::SwapLastIdWithDeletedId(idType element_inter // If this edge is uni-directional, we should update the id in the neighbor's // incoming edges. if (!bidirectional_edge) { - auto *neighbour_incoming_edges = getIncomingEdgesPtr(neighbour_id, level); - // This should always succeed and return true. - removeIdFromList(*neighbour_incoming_edges, cur_element_count); - neighbour_incoming_edges->push_back(element_internal_id); + auto it = std::find(neighbor_level.incomingEdges->begin(), + neighbor_level.incomingEdges->end(), curElementCount); + // This should always succeed + assert(it != neighbor_level.incomingEdges->end()); + *it = element_internal_id; } } // Next, go over the rest of incoming edges (the ones that are not bidirectional) and make // updates. - auto *incoming_edges = getIncomingEdgesPtr(cur_element_count, level); - for (auto incoming_edge : *incoming_edges) { - idType *incoming_neighbour_neighbours = getNodeNeighborsAtLevel(incoming_edge, level); - linkListSize incoming_neighbour_neighbours_count = - getNodeNeighborsCount(incoming_neighbour_neighbours); - for (size_t j = 0; j < incoming_neighbour_neighbours_count; j++) { - if (incoming_neighbour_neighbours[j] == cur_element_count) { - incoming_neighbour_neighbours[j] = element_internal_id; + for (auto incoming_edge : *cur_level.incomingEdges) { + LevelData &incoming_neighbor_level = getLevelData(incoming_edge, level); + for (size_t j = 0; j < incoming_neighbor_level.numLinks; j++) { + if (incoming_neighbor_level.links[j] == curElementCount) { + incoming_neighbor_level.links[j] = element_internal_id; break; } } } } - // Swap the last_id level 0 data, and invalidate the deleted id's data. - memcpy(data_level0_memory_ + element_internal_id * size_data_per_element_ + offsetLevel0_, - data_level0_memory_ + cur_element_count * size_data_per_element_ + offsetLevel0_, - size_data_per_element_); - memset(data_level0_memory_ + cur_element_count * size_data_per_element_ + offsetLevel0_, 0, - size_data_per_element_); + // Move the last element's data to the deleted element's place + auto element = getGraphDataByInternalId(element_internal_id); + memcpy((void *)element, last_element, this->elementGraphDataSize); + + auto data = getDataByInternalId(element_internal_id); + memcpy((void *)data, last_element_data, this->dataSize); - // Swap pointer of higher levels links. - linkLists_[element_internal_id] = linkLists_[cur_element_count]; - linkLists_[cur_element_count] = nullptr; + this->idToMetaData[element_internal_id] = this->idToMetaData[curElementCount]; - // Swap top element level. - element_levels_[element_internal_id] = element_levels_[cur_element_count]; - element_levels_[cur_element_count] = HNSW_INVALID_LEVEL; + if (curElementCount == this->entrypointNode) { + this->entrypointNode = element_internal_id; + } +} - if (cur_element_count == this->entrypoint_node_) { - this->entrypoint_node_ = element_internal_id; +template +void HNSWIndex::destroyGraphData(ElementGraphData *egd) { + delete egd->level0.incomingEdges; + LevelData *cur_ld = egd->others; + for (size_t i = 0; i < egd->toplevel; i++) { + delete cur_ld->incomingEdges; + cur_ld = (LevelData *)((char *)cur_ld + this->levelDataSize); } + this->allocator->free_allocation(egd->others); } // This function is greedily searching for the closest candidate to the given data point at the @@ -1227,17 +1272,17 @@ void HNSWIndex::greedySearchLevel(const void *vector_data, s } changed = false; - std::unique_lock lock(element_neighbors_locks_[bestCand]); - idType *node_links = getNodeNeighborsAtNonBaseLevel(bestCand, level); - linkListSize links_count = getNodeNeighborsCount(node_links); + auto *element = getGraphDataByInternalId(bestCand); + lockNodeLinks(element); + LevelData &node_level_data = getLevelData(element, level); - for (int i = 0; i < links_count; i++) { - idType candidate = node_links[i]; - assert(candidate < this->cur_element_count && "candidate error: out of index range"); + for (int i = 0; i < node_level_data.numLinks; i++) { + idType candidate = node_level_data.links[i]; + assert(candidate < this->curElementCount && "candidate error: out of index range"); if (isInProcess(candidate)) { continue; } - DistType d = this->dist_func(vector_data, getDataByInternalId(candidate), this->dim); + DistType d = this->distFunc(vector_data, getDataByInternalId(candidate), this->dim); if (d < curDist) { curDist = d; bestCand = candidate; @@ -1250,6 +1295,7 @@ void HNSWIndex::greedySearchLevel(const void *vector_data, s } } } + unlockNodeLinks(element); } while (changed); if (!running_query) { bestCand = bestNonDeletedCand; @@ -1258,77 +1304,88 @@ void HNSWIndex::greedySearchLevel(const void *vector_data, s template vecsim_stl::vector -HNSWIndex::safeCollectAllNodeIncomingNeighbors(idType node_id, - size_t node_top_level) { +HNSWIndex::safeCollectAllNodeIncomingNeighbors(idType node_id) const { vecsim_stl::vector incoming_neighbors(this->allocator); - for (size_t level = 0; level <= node_top_level; level++) { + auto element = getGraphDataByInternalId(node_id); + for (size_t level = 0; level <= element->toplevel; level++) { // Save the node neighbor's in the current level while holding its neighbors lock. std::vector neighbors_copy; - std::unique_lock element_lock(element_neighbors_locks_[node_id]); - auto *neighbours = getNodeNeighborsAtLevel(node_id, level); - unsigned short neighbours_count = getNodeNeighborsCount(neighbours); + lockNodeLinks(element); + auto &node_level_data = getLevelData(element, level); // Store the deleted element's neighbours. - neighbors_copy.assign(neighbours, neighbours + neighbours_count); - element_lock.unlock(); + neighbors_copy.assign(node_level_data.links, + node_level_data.links + node_level_data.numLinks); + unlockNodeLinks(element); // Go over the neighbours and collect tho ones that also points back to the removed node. for (auto neighbour_id : neighbors_copy) { // Hold the neighbor's lock while we are going over its neighbors. - std::unique_lock neighbor_lock(element_neighbors_locks_[neighbour_id]); - auto *neighbour_neighbours = getNodeNeighborsAtLevel(neighbour_id, level); - unsigned short neighbour_neighbours_count = getNodeNeighborsCount(neighbour_neighbours); - for (size_t j = 0; j < neighbour_neighbours_count; j++) { + auto *neighbor = getGraphDataByInternalId(neighbour_id); + lockNodeLinks(neighbor); + LevelData &neighbour_level_data = getLevelData(neighbor, level); + + for (size_t j = 0; j < neighbour_level_data.numLinks; j++) { // A bidirectional edge was found - this connection should be repaired. - if (neighbour_neighbours[j] == node_id) { + if (neighbour_level_data.links[j] == node_id) { incoming_neighbors.emplace_back(neighbour_id, (ushort)level); break; } } + unlockNodeLinks(neighbor); } // Next, collect the rest of incoming edges (the ones that are not bidirectional) in the // current level to repair them. - element_lock.lock(); - auto *incoming_edges = getIncomingEdgesPtr(node_id, level); - // Note that the deleted element might be in the process of indexing into the graph in the - // meantime (in async mode). Since the incoming_edges lists in every level are allocated - // while the element is being indexed into that level (in lazy mode), we may find ourselves - // in a situation where the incoming edges was not allocated yet in this level (but we do - // guarantee that the pointer is NULL in that case). In which case, we just continue. We - // also validate that we won't add new edges to a deleted node later on. - if (!incoming_edges) - continue; - for (auto incoming_edge : *incoming_edges) { + lockNodeLinks(element); + for (auto incoming_edge : *node_level_data.incomingEdges) { incoming_neighbors.emplace_back(incoming_edge, (ushort)level); } + unlockNodeLinks(element); } return incoming_neighbors; } template -void HNSWIndex::resizeIndexInternal(size_t new_max_elements) { - element_levels_.resize(new_max_elements); - element_levels_.shrink_to_fit(); +void HNSWIndex::resizeIndexCommon(size_t new_max_elements) { + assert(new_max_elements % this->blockSize == 0 && + "new_max_elements must be a multiple of blockSize"); resizeLabelLookup(new_max_elements); - visited_nodes_handler_pool.resize(new_max_elements); - vecsim_stl::vector(new_max_elements, this->allocator) - .swap(element_neighbors_locks_); - // Reallocate base layer - char *data_level0_memory_new = (char *)this->allocator->reallocate( - data_level0_memory_, new_max_elements * size_data_per_element_); - if (data_level0_memory_new == nullptr) - throw std::runtime_error("Not enough memory: resizeIndex failed to allocate base layer"); - data_level0_memory_ = data_level0_memory_new; + visitedNodesHandlerPool.resize(new_max_elements); + idToMetaData.resize(new_max_elements); + idToMetaData.shrink_to_fit(); + + maxElements = new_max_elements; +} + +template +void HNSWIndex::growByBlock() { + size_t new_max_elements = maxElements + this->blockSize; + + // Validations + assert(vectorBlocks.size() == graphDataBlocks.size()); + assert(vectorBlocks.size() == 0 || vectorBlocks.back().getLength() == this->blockSize); + + vectorBlocks.emplace_back(this->blockSize, this->dataSize, this->allocator, this->alignment); + graphDataBlocks.emplace_back(this->blockSize, this->elementGraphDataSize, this->allocator); + + resizeIndexCommon(new_max_elements); +} + +template +void HNSWIndex::shrinkByBlock() { + assert(maxElements >= this->blockSize); + size_t new_max_elements = maxElements - this->blockSize; + + // Validations + assert(vectorBlocks.size() == graphDataBlocks.size()); + assert(vectorBlocks.size() > 0); + assert(vectorBlocks.back().getLength() == 0); - // Reallocate all other layers - char **linkLists_new = - (char **)this->allocator->reallocate(linkLists_, sizeof(void *) * new_max_elements); - if (linkLists_new == nullptr) - throw std::runtime_error("Not enough memory: resizeIndex failed to allocate other layers"); - linkLists_ = linkLists_new; + vectorBlocks.pop_back(); + graphDataBlocks.pop_back(); - max_elements_ = new_max_elements; + resizeIndexCommon(new_max_elements); } template @@ -1344,36 +1401,33 @@ void HNSWIndex::mutuallyUpdateForRepairedNode( nodes_to_update.push_back(node_id); std::sort(nodes_to_update.begin(), nodes_to_update.end()); size_t nodes_to_update_count = nodes_to_update.size(); - std::unique_lock locks[nodes_to_update_count]; for (size_t i = 0; i < nodes_to_update_count; i++) { - locks[i] = std::unique_lock(element_neighbors_locks_[nodes_to_update[i]]); + lockNodeLinks(nodes_to_update[i]); } - idType *node_neighbors = getNodeNeighborsAtLevel(node_id, level); - linkListSize node_neighbors_count = getNodeNeighborsCount(node_neighbors); - auto *node_incoming_edges = getIncomingEdgesPtr(node_id, level); + LevelData &node_level = getLevelData(node_id, level); // Perform mutual updates: go over the node's neighbors and overwrite the neighbors to remove // that are still exist. size_t node_neighbors_idx = 0; - for (size_t i = 0; i < node_neighbors_count; i++) { + for (size_t i = 0; i < node_level.numLinks; i++) { if (!std::binary_search(nodes_to_update.begin(), nodes_to_update.end(), - node_neighbors[i])) { + node_level.links[i])) { // The repaired node added a new neighbor that we didn't account for before in the // meantime - leave it as is. - node_neighbors[node_neighbors_idx++] = node_neighbors[i]; + node_level.links[node_neighbors_idx++] = node_level.links[i]; continue; } // Check if the current neighbor is in the chosen neighbors list, and remove it from there // if so. - if (removeIdFromList(chosen_neighbors, node_neighbors[i])) { + if (removeIdFromList(chosen_neighbors, node_level.links[i])) { // A chosen neighbor is already connected to the node - leave it as is. - node_neighbors[node_neighbors_idx++] = node_neighbors[i]; + node_level.links[node_neighbors_idx++] = node_level.links[i]; continue; } // Now we know that we are looking at a neighbor that needs to be removed. - auto removed_node = node_neighbors[i]; - auto *removed_node_incoming_edges = getIncomingEdgesPtr(removed_node, level); + auto removed_node = node_level.links[i]; + LevelData &removed_node_level = getLevelData(removed_node, level); // Perform the mutual update: // if the removed node id (the node's neighbour to be removed) // wasn't pointing to the node (i.e., the edge was uni-directional), @@ -1382,8 +1436,8 @@ void HNSWIndex::mutuallyUpdateForRepairedNode( // neighbour's incoming edges set. Note: we assume that every update is performed atomically // mutually, so it should be sufficient to look at the removed node's incoming edges set // alone. - if (!removeIdFromList(*removed_node_incoming_edges, node_id)) { - node_incoming_edges->push_back(removed_node); + if (!removeIdFromList(*removed_node_level.incomingEdges, node_id)) { + node_level.incomingEdges->push_back(removed_node); } } @@ -1410,19 +1464,21 @@ void HNSWIndex::mutuallyUpdateForRepairedNode( if (isMarkedDeleted(chosen_id) || isInProcess(chosen_id)) { continue; } - auto *new_neighbor_incoming_edges = getIncomingEdgesPtr(chosen_id, level); - node_neighbors[node_neighbors_idx++] = chosen_id; + node_level.links[node_neighbors_idx++] = chosen_id; // If the node is in the chosen new node incoming edges, there is a unidirectional // connection from the chosen node to the repaired node that turns into bidirectional. Then, // remove it from the incoming edges set. Otherwise, the edge is created unidirectional, so // we add it to the unidirectional edges set. Note: we assume that all updates occur // mutually and atomically, then can rely on this assumption. - if (!removeIdFromList(*node_incoming_edges, chosen_id)) { - new_neighbor_incoming_edges->push_back(node_id); + if (!removeIdFromList(*node_level.incomingEdges, chosen_id)) { + getLevelData(chosen_id, level).incomingEdges->push_back(node_id); } } // Done updating the node's neighbors. - setNodeNeighborsCount(node_neighbors, node_neighbors_idx); + node_level.numLinks = node_neighbors_idx; + for (size_t i = 0; i < nodes_to_update_count; i++) { + unlockNodeLinks(nodes_to_update[i]); + } } template @@ -1430,35 +1486,35 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t candidatesMaxHeap neighbors_candidates(this->allocator); // Use bitmaps for fast accesses: - // node_orig_neighbours_set is used to diffrentiate between the neighboes that will *not* be - // selected by the heuritics - only the ones that were originally neighbors should be removed. - vecsim_stl::vector node_orig_neighbours_set(max_elements_, false, this->allocator); + // node_orig_neighbours_set is used to differentiate between the neighbors that will *not* be + // selected by the heuristics - only the ones that were originally neighbors should be removed. + vecsim_stl::vector node_orig_neighbours_set(maxElements, false, this->allocator); // neighbors_candidates_set is used to store the nodes that were already collected as // candidates, so we will not collect them again as candidates if we run into them from another // path. - vecsim_stl::vector neighbors_candidates_set(max_elements_, false, this->allocator); + vecsim_stl::vector neighbors_candidates_set(maxElements, false, this->allocator); vecsim_stl::vector deleted_neighbors(this->allocator); // Go over the repaired node neighbors, collect the non-deleted ones to be neighbors candidates // after the repair as well. - { - std::unique_lock node_lock(element_neighbors_locks_[node_id]); - idType *node_neighbors = getNodeNeighborsAtLevel(node_id, level); - linkListSize node_neighbors_count = getNodeNeighborsCount(node_neighbors); - for (size_t j = 0; j < node_neighbors_count; j++) { - node_orig_neighbours_set[node_neighbors[j]] = true; - // Don't add the removed element to the candidates. - if (isMarkedDeleted(node_neighbors[j])) { - deleted_neighbors.push_back(node_neighbors[j]); - continue; - } - neighbors_candidates_set[node_neighbors[j]] = true; - neighbors_candidates.emplace(this->dist_func(getDataByInternalId(node_id), - getDataByInternalId(node_neighbors[j]), - this->dim), - node_neighbors[j]); + const void *node_data = getDataByInternalId(node_id); + auto *element = getGraphDataByInternalId(node_id); + lockNodeLinks(element); + LevelData &node_level_data = getLevelData(element, level); + for (size_t j = 0; j < node_level_data.numLinks; j++) { + node_orig_neighbours_set[node_level_data.links[j]] = true; + // Don't add the removed element to the candidates. + if (isMarkedDeleted(node_level_data.links[j])) { + deleted_neighbors.push_back(node_level_data.links[j]); + continue; } + neighbors_candidates_set[node_level_data.links[j]] = true; + neighbors_candidates.emplace( + this->distFunc(node_data, getDataByInternalId(node_level_data.links[j]), this->dim), + node_level_data.links[j]); } + unlockNodeLinks(element); + // If there are not deleted neighbors at that point the repair job has already been made by // another parallel job, and there is no need to repair the node anymore. if (deleted_neighbors.empty()) { @@ -1477,32 +1533,32 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t nodes_to_update.push_back(deleted_neighbor_id); neighbors_to_remove.push_back(deleted_neighbor_id); - std::unique_lock neighbor_lock( - this->element_neighbors_locks_[deleted_neighbor_id]); - idType *neighbor_neighbours = getNodeNeighborsAtLevel(deleted_neighbor_id, level); - linkListSize neighbor_neighbours_count = getNodeNeighborsCount(neighbor_neighbours); + auto *neighbor = getGraphDataByInternalId(deleted_neighbor_id); + lockNodeLinks(neighbor); + LevelData &neighbor_level_data = getLevelData(neighbor, level); - for (size_t j = 0; j < neighbor_neighbours_count; j++) { + for (size_t j = 0; j < neighbor_level_data.numLinks; j++) { // Don't add removed elements to the candidates, nor nodes that are already in the // candidates set, nor the original node to repair itself. - if (isMarkedDeleted(neighbor_neighbours[j]) || - neighbors_candidates_set[neighbor_neighbours[j]] || - neighbor_neighbours[j] == node_id) { + if (isMarkedDeleted(neighbor_level_data.links[j]) || + neighbors_candidates_set[neighbor_level_data.links[j]] || + neighbor_level_data.links[j] == node_id) { continue; } - neighbors_candidates_set[neighbor_neighbours[j]] = true; + neighbors_candidates_set[neighbor_level_data.links[j]] = true; neighbors_candidates.emplace( - this->dist_func(getDataByInternalId(node_id), - getDataByInternalId(neighbor_neighbours[j]), this->dim), - neighbor_neighbours[j]); + this->distFunc(node_data, getDataByInternalId(neighbor_level_data.links[j]), + this->dim), + neighbor_level_data.links[j]); } + unlockNodeLinks(neighbor); } // Copy the original candidates, and run the heuristics. Afterwards, neighbors_candidates will // store the newly selected neighbours (for the node), while candidates which were originally // neighbors and are not going to be selected, are going to be removed. auto orig_candidates = neighbors_candidates; - size_t max_M_cur = level ? maxM_ : maxM0_; + size_t max_M_cur = level ? M : M0; getNeighborsByHeuristic2(neighbors_candidates, max_M_cur); while (!orig_candidates.empty()) { @@ -1561,154 +1617,104 @@ HNSWIndex::HNSWIndex(const HNSWParams *params, const AbstractIndexInitParams &abstractInitParams, size_t random_seed, size_t pool_initial_size) : VecSimIndexAbstract(abstractInitParams), VecSimIndexTombstone(), - max_elements_(params->initialCapacity), element_levels_(max_elements_, this->allocator), - visited_nodes_handler_pool(pool_initial_size, max_elements_, this->allocator), - element_neighbors_locks_(max_elements_, this->allocator) { - size_t M = params->M ? params->M : HNSW_DEFAULT_M; - if (M > UINT16_MAX / 2) + maxElements(RoundUpInitialCapacity(params->initialCapacity, this->blockSize)), + vectorBlocks(this->allocator), graphDataBlocks(this->allocator), + idToMetaData(maxElements, this->allocator), + visitedNodesHandlerPool(pool_initial_size, maxElements, this->allocator) { + + M = params->M ? params->M : HNSW_DEFAULT_M; + M0 = M * 2; + if (M0 > UINT16_MAX) throw std::runtime_error("HNSW index parameter M is too large: argument overflow"); - M_ = M; - maxM_ = M_; - maxM0_ = M_ * 2; - size_t ef_construction = params->efConstruction ? params->efConstruction : HNSW_DEFAULT_EF_C; - ef_construction_ = std::max(ef_construction, M_); - ef_ = params->efRuntime ? params->efRuntime : HNSW_DEFAULT_EF_RT; - epsilon_ = params->epsilon > 0.0 ? params->epsilon : HNSW_DEFAULT_EPSILON; + efConstruction = params->efConstruction ? params->efConstruction : HNSW_DEFAULT_EF_C; + efConstruction = std::max(efConstruction, M); + ef = params->efRuntime ? params->efRuntime : HNSW_DEFAULT_EF_RT; + epsilon = params->epsilon > 0.0 ? params->epsilon : HNSW_DEFAULT_EPSILON; - cur_element_count = 0; - num_marked_deleted = 0; + curElementCount = 0; + numMarkedDeleted = 0; // initializations for special treatment of the first node - entrypoint_node_ = INVALID_ID; - max_level_ = HNSW_INVALID_LEVEL; + entrypointNode = INVALID_ID; + maxLevel = HNSW_INVALID_LEVEL; if (M <= 1) throw std::runtime_error("HNSW index parameter M cannot be 1"); - mult_ = 1 / log(1.0 * M_); - level_generator_.seed(random_seed); - - // data_level0_memory will look like this: - // | ---2--- | -----2----- | -----4*M0----------- | ---------8-------- |-data_size-| ---8--- | - // | | | ... || |