From 1591029d9dcbb549c5fca58e498bcbbcecbe2af3 Mon Sep 17 00:00:00 2001 From: rhdong Date: Mon, 24 Feb 2025 14:39:23 -0800 Subject: [PATCH] [Fix] Various fixes for 25.02.01 point release (#695) - https://github.com/rapidsai/cuvs/issues/694 - https://github.com/rapidsai/cuvs/issues/626 - https://github.com/rapidsai/cuvs/issues/680 - https://github.com/rapidsai/cuvs/pull/706 Fixes https://github.com/rapidsai/cuvs/issues/694 Fixes https://github.com/rapidsai/cuvs/issues/626 Fixes https://github.com/rapidsai/cuvs/issues/680 Closes https://github.com/rapidsai/cuvs/pull/706 --------- Co-authored-by: Ishan Chattopadhyaya Co-authored-by: Vivek Narang Co-authored-by: Dante Gama Dessavre --- conda/recipes/cuvs-bench-cpu/meta.yaml | 1 + conda/recipes/cuvs-bench/meta.yaml | 1 + cpp/include/cuvs/neighbors/cagra.hpp | 8 +- .../neighbors/detail/cagra/cagra_merge.cuh | 9 +- cpp/src/neighbors/detail/nn_descent.cuh | 24 ++- cpp/tests/neighbors/ann_cagra.cuh | 24 +++ .../java/com/nvidia/cuvs/BruteForceQuery.java | 43 +++-- .../cuvs/internal/BruteForceIndexImpl.java | 24 ++- .../com/nvidia/cuvs/internal/common/Util.java | 26 +++ .../nvidia/cuvs/BruteForceAndSearchIT.java | 62 +++--- .../nvidia/cuvs/BruteForceRandomizedIT.java | 19 +- .../nvidia/cuvs/CagraBuildAndSearchIT.java | 179 ++++++++++++------ .../com/nvidia/cuvs/CagraRandomizedIT.java | 2 +- .../java/com/nvidia/cuvs/CuVSTestCase.java | 43 ++++- .../com/nvidia/cuvs/HnswRandomizedIT.java | 2 +- java/internal/src/cuvs_java.c | 37 ++-- 16 files changed, 358 insertions(+), 146 deletions(-) diff --git a/conda/recipes/cuvs-bench-cpu/meta.yaml b/conda/recipes/cuvs-bench-cpu/meta.yaml index ddc13d568..f5ca3cba6 100644 --- a/conda/recipes/cuvs-bench-cpu/meta.yaml +++ b/conda/recipes/cuvs-bench-cpu/meta.yaml @@ -64,6 +64,7 @@ requirements: - pyyaml - python - requests + - sklearn>=1.5 about: home: https://rapids.ai/ license: Apache-2.0 diff --git a/conda/recipes/cuvs-bench/meta.yaml b/conda/recipes/cuvs-bench/meta.yaml index 1b151a16b..21050e1cf 100644 --- a/conda/recipes/cuvs-bench/meta.yaml +++ b/conda/recipes/cuvs-bench/meta.yaml @@ -101,6 +101,7 @@ requirements: - python - requests - rmm ={{ minor_version }} + - sklearn>=1.5 about: home: https://rapids.ai/ license: Apache-2.0 diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index 1d0acbe35..fc558e3f3 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -337,6 +337,8 @@ struct index : cuvs::neighbors::index { using search_params_type = cagra::search_params; using index_type = IdxT; using value_type = T; + using dataset_index_type = int64_t; + static_assert(!raft::is_narrowing_v, "IdxT must be able to represent all values of uint32_t"); @@ -510,14 +512,14 @@ struct index : cuvs::neighbors::index { */ template auto update_dataset(raft::resources const& res, DatasetT&& dataset) - -> std::enable_if_t, DatasetT>> + -> std::enable_if_t, DatasetT>> { dataset_ = std::make_unique(std::move(dataset)); } template auto update_dataset(raft::resources const& res, std::unique_ptr&& dataset) - -> std::enable_if_t, DatasetT>> + -> std::enable_if_t, DatasetT>> { dataset_ = std::move(dataset); } @@ -561,7 +563,7 @@ struct index : cuvs::neighbors::index { cuvs::distance::DistanceType metric_; raft::device_matrix graph_; raft::device_matrix_view graph_view_; - std::unique_ptr> dataset_; + std::unique_ptr> dataset_; }; /** * @} diff --git a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh index bc29cb206..d85866161 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh @@ -43,14 +43,16 @@ index merge(raft::resources const& handle, const cagra::merge_params& params, std::vector*>& indices) { + using cagra_index_t = cuvs::neighbors::cagra::index; + using ds_idx_type = typename cagra_index_t::dataset_index_type; + std::size_t dim = 0; std::size_t new_dataset_size = 0; int64_t stride = -1; - for (auto index : indices) { + for (cagra_index_t* index : indices) { RAFT_EXPECTS(index != nullptr, "Null pointer detected in 'indices'. Ensure all elements are valid before usage."); - using ds_idx_type = decltype(index->data().n_rows()); if (auto* strided_dset = dynamic_cast*>(&index->data()); strided_dset != nullptr) { if (dim == 0) { @@ -74,8 +76,7 @@ index merge(raft::resources const& handle, IdxT offset = 0; auto merge_dataset = [&](T* dst) { - for (auto index : indices) { - using ds_idx_type = decltype(index->data().n_rows()); + for (cagra_index_t* index : indices) { auto* strided_dset = dynamic_cast*>(&index->data()); RAFT_CUDA_TRY(cudaMemcpy2DAsync(dst + offset * dim, diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 310d4e7a6..dffc94f06 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -1047,24 +1047,32 @@ void GnndGraph::init_random_graph() for (size_t seg_idx = 0; seg_idx < static_cast(num_segments); seg_idx++) { // random sequence (range: 0~nrow) // segment_x stores neighbors which id % num_segments == x - std::vector rand_seq(nrow / num_segments); + std::vector rand_seq((nrow + num_segments - 1) / num_segments); std::iota(rand_seq.begin(), rand_seq.end(), 0); auto gen = std::default_random_engine{seg_idx}; std::shuffle(rand_seq.begin(), rand_seq.end(), gen); #pragma omp parallel for for (size_t i = 0; i < nrow; i++) { - size_t base_idx = i * node_degree + seg_idx * segment_size; - auto h_neighbor_list = h_graph + base_idx; - auto h_dist_list = h_dists.data_handle() + base_idx; + size_t base_idx = i * node_degree + seg_idx * segment_size; + auto h_neighbor_list = h_graph + base_idx; + auto h_dist_list = h_dists.data_handle() + base_idx; + size_t idx = base_idx; + size_t self_in_this_seg = 0; for (size_t j = 0; j < static_cast(segment_size); j++) { - size_t idx = base_idx + j; Index_t id = rand_seq[idx % rand_seq.size()] * num_segments + seg_idx; if ((size_t)id == i) { - id = rand_seq[(idx + segment_size) % rand_seq.size()] * num_segments + seg_idx; + idx++; + id = rand_seq[idx % rand_seq.size()] * num_segments + seg_idx; + self_in_this_seg = 1; } - h_neighbor_list[j].id_with_flag() = id; - h_dist_list[j] = std::numeric_limits::max(); + + h_neighbor_list[j].id_with_flag() = + j < (rand_seq.size() - self_in_this_seg) && size_t(id) < nrow + ? id + : std::numeric_limits::max(); + h_dist_list[j] = std::numeric_limits::max(); + idx++; } } } diff --git a/cpp/tests/neighbors/ann_cagra.cuh b/cpp/tests/neighbors/ann_cagra.cuh index 1e695f9a8..e9408264e 100644 --- a/cpp/tests/neighbors/ann_cagra.cuh +++ b/cpp/tests/neighbors/ann_cagra.cuh @@ -952,6 +952,12 @@ class AnnCagraIndexMergeTest : public ::testing::TestWithParam { (ps.k * ps.dim * 8 / 5 /*(=magic number)*/ < ps.n_rows)) GTEST_SKIP(); + // Avoid splitting datasets with a size of 0 + if (ps.n_rows <= 3) GTEST_SKIP(); + + // IVF_PQ requires the `n_rows >= n_lists`. + if (ps.n_rows < 8 && ps.build_algo == graph_build_algo::IVF_PQ) GTEST_SKIP(); + size_t queries_size = ps.n_queries * ps.k; std::vector indices_Cagra(queries_size); std::vector indices_naive(queries_size); @@ -1161,6 +1167,24 @@ inline std::vector generate_inputs() {0.995}); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); + // Corner cases for small datasets + inputs2 = raft::util::itertools::product( + {2}, + {3, 5, 31, 32, 64, 101}, + {1, 10}, + {2}, // k + {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT}, + {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL}, + {0}, // query size + {0}, + {256}, + {1}, + {cuvs::distance::DistanceType::L2Expanded}, + {false}, + {true}, + {0.995}); + inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); + // Varying dim and build algo. inputs2 = raft::util::itertools::product( {100}, diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceQuery.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceQuery.java index 7febc3ba3..019e27dcd 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceQuery.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceQuery.java @@ -17,6 +17,7 @@ package com.nvidia.cuvs; import java.util.Arrays; +import java.util.BitSet; import java.util.List; /** @@ -28,7 +29,8 @@ public class BruteForceQuery { private List mapping; private float[][] queryVectors; - private long[] prefilter; + private BitSet[] prefilters; + private int numDocs = -1; private int topK; /** @@ -40,12 +42,15 @@ public class BruteForceQuery { * @param topK the top k results to return * @param prefilter the prefilter data to use while searching the BRUTEFORCE * index + * @param numDocs Maximum of bits in each prefilter, representing number of documents in this index. + * Used only when prefilter(s) is/are passed. */ - public BruteForceQuery(float[][] queryVectors, List mapping, int topK, long[] prefilter) { + public BruteForceQuery(float[][] queryVectors, List mapping, int topK, BitSet[] prefilters, int numDocs) { this.queryVectors = queryVectors; this.mapping = mapping; this.topK = topK; - this.prefilter = prefilter; + this.prefilters = prefilters; + this.numDocs = numDocs; } /** @@ -78,16 +83,25 @@ public int getTopK() { /** * Gets the prefilter long array * - * @return a long array + * @return an array of bitsets */ - public long[] getPrefilter() { - return prefilter; + public BitSet[] getPrefilters() { + return prefilters; + } + + /** + * Gets the number of documents supposed to be in this index, as used for prefilters + * + * @return number of documents as an integer + */ + public int getNumDocs() { + return numDocs; } @Override public String toString() { return "BruteForceQuery [mapping=" + mapping + ", queryVectors=" + Arrays.toString(queryVectors) + ", prefilter=" - + Arrays.toString(prefilter) + ", topK=" + topK + "]"; + + Arrays.toString(prefilters) + ", topK=" + topK + "]"; } /** @@ -96,7 +110,8 @@ public String toString() { public static class Builder { private float[][] queryVectors; - private long[] prefilter; + private BitSet[] prefilters; + private int numDocs; private List mapping; private int topK = 2; @@ -134,13 +149,15 @@ public Builder withTopK(int topK) { } /** - * Sets the prefilter data for building the {@link BruteForceQuery}. + * Sets the prefilters data for building the {@link BruteForceQuery}. * - * @param prefilter a one-dimensional long array + * @param prefilters array of bitsets, as many as queries, each containing as + * many bits as there are vectors in the index * @return an instance of this Builder */ - public Builder withPrefilter(long[] prefilter) { - this.prefilter = prefilter; + public Builder withPrefilter(BitSet[] prefilters, int numDocs) { + this.prefilters = prefilters; + this.numDocs = numDocs; return this; } @@ -150,7 +167,7 @@ public Builder withPrefilter(long[] prefilter) { * @return an instance of {@link BruteForceQuery} */ public BruteForceQuery build() { - return new BruteForceQuery(queryVectors, mapping, topK, prefilter); + return new BruteForceQuery(queryVectors, mapping, topK, prefilters, numDocs); } } } diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java index 7ea2801f3..decb13133 100644 --- a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java @@ -26,8 +26,12 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SequenceLayout; import java.lang.invoke.MethodHandle; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; +import java.util.BitSet; import java.util.Objects; import java.util.UUID; @@ -59,7 +63,7 @@ public class BruteForceIndexImpl implements BruteForceIndex{ FunctionDescriptor.of(ADDRESS, ADDRESS, C_LONG, C_LONG, ADDRESS, ADDRESS, C_INT)); private static final MethodHandle searchMethodHandle = downcallHandle("search_brute_force_index", - FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, C_INT, C_LONG, C_INT, ADDRESS, ADDRESS, ADDRESS, ADDRESS, ADDRESS, C_LONG, C_LONG)); + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, C_INT, C_LONG, C_INT, ADDRESS, ADDRESS, ADDRESS, ADDRESS, ADDRESS, C_LONG)); private static final MethodHandle destroyIndexMethodHandle = downcallHandle("destroy_brute_force_index", FunctionDescriptor.ofVoid(ADDRESS, ADDRESS)); @@ -169,16 +173,24 @@ public SearchResults search(BruteForceQuery cuvsQuery) throws Throwable { long numQueries = cuvsQuery.getQueryVectors().length; long numBlocks = cuvsQuery.getTopK() * numQueries; int vectorDimension = numQueries > 0 ? cuvsQuery.getQueryVectors()[0].length : 0; - long prefilterDataLength = cuvsQuery.getPrefilter() != null ? cuvsQuery.getPrefilter().length : 0; long numRows = dataset != null ? dataset.length : 0; SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_LONG); SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_FLOAT); MemorySegment neighborsMemorySegment = resources.getArena().allocate(neighborsSequenceLayout); MemorySegment distancesMemorySegment = resources.getArena().allocate(distancesSequenceLayout); - MemorySegment prefilterDataMemorySegment = cuvsQuery.getPrefilter() != null - ? Util.buildMemorySegment(resources.getArena(), cuvsQuery.getPrefilter()) - : MemorySegment.NULL; + + // prepare the prefiltering data + long prefilterDataLength = 0; + MemorySegment prefilterDataMemorySegment = MemorySegment.NULL; + BitSet[] prefilters = cuvsQuery.getPrefilters(); + if (prefilters != null && prefilters.length > 0) { + BitSet concatenatedFilters = Util.concatenate(prefilters, cuvsQuery.getNumDocs()); + long filters[] = concatenatedFilters.toLongArray(); + prefilterDataMemorySegment = Util.buildMemorySegment(resources.getArena(), filters); + prefilterDataLength = cuvsQuery.getNumDocs() * prefilters.length; + } + MemorySegment querySeg = Util.buildMemorySegment(resources.getArena(), cuvsQuery.getQueryVectors()); try (var localArena = Arena.ofConfined()) { MemorySegment returnValue = localArena.allocate(C_INT); @@ -193,7 +205,7 @@ public SearchResults search(BruteForceQuery cuvsQuery) throws Throwable { distancesMemorySegment, returnValue, prefilterDataMemorySegment, - prefilterDataLength, numRows + prefilterDataLength ); checkError(returnValue.get(C_INT, 0L), "searchMethodHandle"); } diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java index 5bc695ae3..bc8f3bbcc 100644 --- a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java @@ -25,6 +25,8 @@ import java.lang.invoke.MethodHandle; import java.lang.invoke.VarHandle; import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; import java.util.List; import com.nvidia.cuvs.GPUInfo; @@ -184,6 +186,14 @@ public static MemorySegment buildMemorySegment(Arena arena, long[] data) { return dataMemorySegment; } + public static MemorySegment buildMemorySegment(Arena arena, byte[] data) { + int cells = data.length; + MemoryLayout dataMemoryLayout = MemoryLayout.sequenceLayout(cells, C_CHAR); + MemorySegment dataMemorySegment = arena.allocate(dataMemoryLayout); + MemorySegment.copy(data, 0, dataMemorySegment, C_CHAR, 0, cells); + return dataMemorySegment; + } + /** * A utility method for building a {@link MemorySegment} for a 2D float array. * @@ -201,4 +211,20 @@ public static MemorySegment buildMemorySegment(Arena arena, float[][] data) { } return dataMemorySegment; } + + public static BitSet concatenate(BitSet[] arr, int maxSizeOfEachBitSet) { + BitSet ret = new BitSet(maxSizeOfEachBitSet * arr.length); + for (int i = 0; i < arr.length; i++) { + BitSet b = arr[i]; + if (b == null || b.length() == 0) { + ret.set(i * maxSizeOfEachBitSet, (i + 1) * maxSizeOfEachBitSet); + } else { + for (int j = 0; j < maxSizeOfEachBitSet; j++) { + ret.set(i * maxSizeOfEachBitSet + j, b.get(j)); + } + } + } + return ret; + } + } diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchIT.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchIT.java index 3a1ea25dd..888f4ed69 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchIT.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchIT.java @@ -16,12 +16,15 @@ package com.nvidia.cuvs; +import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; + import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.InputStream; import java.lang.invoke.MethodHandles; import java.util.Arrays; +import java.util.BitSet; import java.util.List; import java.util.Map; import java.util.UUID; @@ -31,9 +34,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; -import static org.junit.Assert.assertEquals; - public class BruteForceAndSearchIT extends CuVSTestCase{ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -57,14 +57,14 @@ public void testIndexingAndSearchingFlow() throws Throwable { { 0.03902049f, 0.9689629f }, { 0.92514056f, 0.4463501f }, { 0.6673192f, 0.10993068f } - }; + }; List map = List.of(0, 1, 2, 3); float[][] queries = { { 0.48216683f, 0.0428398f }, { 0.5084142f, 0.6545497f }, { 0.51260436f, 0.2643005f }, { 0.05198065f, 0.5789965f } - }; + }; // Expected search results List> expectedResults = Arrays.asList( @@ -72,19 +72,26 @@ public void testIndexingAndSearchingFlow() throws Throwable { Map.of(0, 0.12472606f, 2, 0.21700788f, 1, 0.3191862f), Map.of(3, 0.047766685f, 2, 0.20332813f, 0, 0.48305476f), Map.of(1, 0.15224183f, 0, 0.5906347f, 3, 0.5986643f) - ); + ); + + // pre-filtering + BitSet prefilter = new BitSet(); + prefilter.set(0); + prefilter.set(1); + prefilter.clear(2); + prefilter.set(3); + + final List> expectedResultsWithFiltering = Arrays.asList( + Map.of(0, 0.83774555f, 1, 1.0540828f, 3, 0.038782537f), + Map.of(0, 0.12472606f, 1, 0.3191862f, 3, 0.32186073f), + Map.of(0, 0.48305476f, 1, 0.7208309f, 3, 0.047766685f), + Map.of(0, 0.5906347f, 1, 0.15224195f, 3, 0.5986643f) + ); for (int j = 0; j < 10; j++) { try (CuVSResources resources = CuVSResources.create()) { - // Create a query object with the query vectors - BruteForceQuery cuvsQuery = new BruteForceQuery.Builder() - .withTopK(3) - .withQueryVectors(queries) - .withMapping(map) - .build(); - // Set index parameters BruteForceIndexParams indexParams = new BruteForceIndexParams.Builder() .withNumWriterThreads(32) @@ -108,18 +115,29 @@ public void testIndexingAndSearchingFlow() throws Throwable { .build(); // Perform the search - SearchResults resultsFromLoadedIndex = loadedIndex.search(cuvsQuery); + BruteForceQuery cuvsQuery = new BruteForceQuery.Builder() + .withTopK(3) + .withQueryVectors(queries) + .withMapping(map) + .build(); + BruteForceQuery cuvsQueryWithFiltering = new BruteForceQuery.Builder() + .withTopK(3) + .withQueryVectors(queries) + .withPrefilter(new BitSet[] {prefilter, prefilter, prefilter, prefilter}, dataset.length) + .withMapping(map) + .build(); - // Check results - log.info(resultsFromLoadedIndex.getResults().toString()); - assertEquals(expectedResults, resultsFromLoadedIndex.getResults()); + // search the loaded index + SearchResults results = loadedIndex.search(cuvsQuery); + checkResults(expectedResults, results.getResults()); - // Perform the search - SearchResults results = index.search(cuvsQuery); + // search the first index + results = index.search(cuvsQuery); + checkResults(expectedResults, results.getResults()); - // Check results - log.info(results.getResults().toString()); - assertEquals(expectedResults, results.getResults()); + // search with pre-filtering + results = index.search(cuvsQueryWithFiltering); + checkResults(expectedResultsWithFiltering, results.getResults()); // Cleanup index.destroyIndex(); diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedIT.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedIT.java index 030b3b022..1db86db43 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedIT.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedIT.java @@ -17,6 +17,8 @@ package com.nvidia.cuvs; import java.lang.invoke.MethodHandles; +import java.util.Arrays; +import java.util.BitSet; import java.util.List; import org.junit.Before; @@ -58,10 +60,22 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { int dimensions = random.nextInt(DIMENSIONS_LIMIT) + 1; int numQueries = random.nextInt(NUM_QUERIES_LIMIT) + 1; int topK = Math.min(random.nextInt(TOP_K_LIMIT) + 1, datasetSize); - + boolean usePrefilter = random.nextBoolean(); if (datasetSize < topK) datasetSize = topK; + BitSet[] prefilters = null; + if (usePrefilter) { + prefilters = new BitSet[numQueries]; + for (int i = 0; i < numQueries; i++) { + BitSet randomFilter = new BitSet(datasetSize); + for (int j = 0; j < datasetSize; j++) { + randomFilter.set(j, random.nextBoolean()); + } + prefilters[i] = randomFilter; + } + } + // Generate a random dataset float[][] dataset = generateData(random, datasetSize, dimensions); @@ -90,7 +104,7 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { assert topK > 0 && topK <= datasetSize : "Invalid topK value."; // Generate expected results using brute force - List> expected = generateExpectedResults(topK, dataset, queries, log); + List> expected = generateExpectedResults(topK, dataset, queries, prefilters, log); // Create CuVS index and query try (CuVSResources resources = CuVSResources.create()) { @@ -98,6 +112,7 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { BruteForceQuery query = new BruteForceQuery.Builder() .withTopK(topK) .withQueryVectors(queries) + .withPrefilter(prefilters, dataset.length) .build(); BruteForceIndexParams indexParams = new BruteForceIndexParams.Builder() diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchIT.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchIT.java index 874c3e525..75b21aed8 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchIT.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchIT.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.InputStream; import java.lang.invoke.MethodHandles; @@ -28,15 +29,21 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.carrotsearch.randomizedtesting.RandomizedRunner; import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo; import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType; +@RunWith(RandomizedRunner.class) public class CagraBuildAndSearchIT extends CuVSTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -44,6 +51,8 @@ public class CagraBuildAndSearchIT extends CuVSTestCase { @Before public void setup() { assumeTrue("not supported on " + System.getProperty("os.name"), isLinuxAmd64()); + initializeRandom(); + log.info("Random context initialized for test."); } /** @@ -60,14 +69,14 @@ public void testIndexingAndSearchingFlow() throws Throwable { { 0.03902049f, 0.9689629f }, { 0.92514056f, 0.4463501f }, { 0.6673192f, 0.10993068f } - }; + }; List map = List.of(0, 1, 2, 3); float[][] queries = { { 0.48216683f, 0.0428398f }, { 0.5084142f, 0.6545497f }, { 0.51260436f, 0.2643005f }, { 0.05198065f, 0.5789965f } - }; + }; // Expected search results List> expectedResults = Arrays.asList( @@ -76,68 +85,114 @@ public void testIndexingAndSearchingFlow() throws Throwable { Map.of(3, 0.047766715f, 2, 0.20332818f, 0, 0.48305473f), Map.of(1, 0.15224178f, 0, 0.59063464f, 3, 0.5986642f)); - for (int j = 0; j < 10; j++) { - - try (CuVSResources resources = CuVSResources.create()) { - - // Configure index parameters - CagraIndexParams indexParams = new CagraIndexParams.Builder() - .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT) - .withGraphDegree(1) - .withIntermediateGraphDegree(2) - .withNumWriterThreads(32) - .withMetric(CuvsDistanceType.L2Expanded) - .build(); - - // Create the index with the dataset - CagraIndex index = CagraIndex.newBuilder(resources) - .withDataset(dataset) - .withIndexParams(indexParams) - .build(); - - // Saving the index on to the disk. - String indexFileName = UUID.randomUUID().toString() + ".cag"; - index.serialize(new FileOutputStream(indexFileName)); - - // Loading a CAGRA index from disk. - File indexFile = new File(indexFileName); - InputStream inputStream = new FileInputStream(indexFile); - CagraIndex loadedIndex = CagraIndex.newBuilder(resources) - .from(inputStream) - .build(); - - // Configure search parameters - CagraSearchParams searchParams = new CagraSearchParams.Builder(resources) - .build(); - - // Create a query object with the query vectors - CagraQuery cuvsQuery = new CagraQuery.Builder() - .withTopK(3) - .withSearchParams(searchParams) - .withQueryVectors(queries) - .withMapping(map) - .build(); - - // Perform the search - SearchResults results = index.search(cuvsQuery); - - // Check results - log.info(results.getResults().toString()); - assertEquals(expectedResults, results.getResults()); - - // Search from deserialized index - results = loadedIndex.search(cuvsQuery); - - // Check results - log.info(results.getResults().toString()); - assertEquals(expectedResults, results.getResults()); - - // Cleanup - if (indexFile.exists()) { - indexFile.delete(); + int numTestsRuns = 10; + + try (CuVSResources resources = CuVSResources.create()) { + // sometimes run this test using different threads? + boolean runTestInDifferentThreads = random.nextBoolean(); + // if running in different threads, run concurrently or one after the other? + boolean runConcurrently = runTestInDifferentThreads ? random.nextBoolean(): false; + + log.info("Running in different threads? " + runTestInDifferentThreads); + log.info("Running concurrently? " + runConcurrently); + + ExecutorService parallelExecutor = runConcurrently ? Executors.newFixedThreadPool(numTestsRuns): null; + + for (int j = 0; j < numTestsRuns; j++) { + Runnable testLogic = indexAndQueryOnce(dataset, map, queries, expectedResults, resources); + if (runTestInDifferentThreads) { + if (runConcurrently) { + parallelExecutor.submit(testLogic); + } else { + ExecutorService singleExecutor = Executors.newSingleThreadExecutor(); + singleExecutor.submit(testLogic); + singleExecutor.shutdown(); + singleExecutor.awaitTermination(2000, TimeUnit.SECONDS); + } + } else { + // run the test logic in the main thread + testLogic.run(); } - index.destroyIndex(); } + if (parallelExecutor != null) { + parallelExecutor.shutdown(); + parallelExecutor.awaitTermination(2000, TimeUnit.SECONDS); + } + } } + + private Runnable indexAndQueryOnce(float[][] dataset, List map, float[][] queries, + List> expectedResults, CuVSResources resources) throws Throwable, FileNotFoundException { + + Runnable thread = new Runnable() { + + @Override + public void run() { + try { + + // Configure index parameters + CagraIndexParams indexParams = new CagraIndexParams.Builder() + .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT) + .withGraphDegree(1) + .withIntermediateGraphDegree(2) + .withNumWriterThreads(32) + .withMetric(CuvsDistanceType.L2Expanded) + .build(); + + // Create the index with the dataset + CagraIndex index = CagraIndex.newBuilder(resources) + .withDataset(dataset) + .withIndexParams(indexParams) + .build(); + + // Saving the index on to the disk. + String indexFileName = UUID.randomUUID().toString() + ".cag"; + index.serialize(new FileOutputStream(indexFileName)); + + // Loading a CAGRA index from disk. + File indexFile = new File(indexFileName); + InputStream inputStream = new FileInputStream(indexFile); + CagraIndex loadedIndex = CagraIndex.newBuilder(resources) + .from(inputStream) + .build(); + + // Configure search parameters + CagraSearchParams searchParams = new CagraSearchParams.Builder(resources) + .build(); + + // Create a query object with the query vectors + CagraQuery cuvsQuery = new CagraQuery.Builder() + .withTopK(3) + .withSearchParams(searchParams) + .withQueryVectors(queries) + .withMapping(map) + .build(); + + // Perform the search + SearchResults results = index.search(cuvsQuery); + + // Check results + log.info(results.getResults().toString()); + assertEquals(expectedResults, results.getResults()); + + // Search from deserialized index + results = loadedIndex.search(cuvsQuery); + + // Check results + log.info(results.getResults().toString()); + assertEquals(expectedResults, results.getResults()); + + // Cleanup + if (indexFile.exists()) { + indexFile.delete(); + } + index.destroyIndex(); + } catch (Throwable ex) { + throw new RuntimeException("Exception during indexing/querying", ex); + } + } + }; + return thread; + } } diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java index 811866dc6..34665d3eb 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java @@ -91,7 +91,7 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { assert topK > 0 && topK <= datasetSize : "Invalid topK value."; // Generate expected results using brute force - List> expected = generateExpectedResults(topK, dataset, queries, log); + List> expected = generateExpectedResults(topK, dataset, queries, null, log); // Create CuVS index and query try (CuVSResources resources = CuVSResources.create()) { diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java index 8a7d84a19..5e40ff579 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java @@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandles; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.Map; import java.util.Random; @@ -50,16 +51,21 @@ protected float[][] generateData(Random random, int rows, int cols) { return data; } - protected List> generateExpectedResults(int topK, float[][] dataset, float[][] queries, Logger log) { + protected List> generateExpectedResults(int topK, float[][] dataset, float[][] queries, BitSet[] prefilters, Logger log) { List> neighborsResult = new ArrayList<>(); int dimensions = dataset[0].length; - for (float[] query : queries) { + for (int q = 0; q < queries.length; q++) { + float[] query = queries[q]; Map distances = new TreeMap<>(); for (int j = 0; j < dataset.length; j++) { double distance = 0; - for (int k = 0; k < dimensions; k++) { - distance += (query[k] - dataset[j][k]) * (query[k] - dataset[j][k]); + if (prefilters != null && prefilters[q].get(j) == false) { + distance = Double.POSITIVE_INFINITY; + } else { + for (int k = 0; k < dimensions; k++) { + distance += (query[k] - dataset[j][k]) * (query[k] - dataset[j][k]); + } } distances.put(j, Math.sqrt(distance)); } @@ -85,7 +91,6 @@ protected void compareResults(SearchResults results, List> expecte // actual vs. expected results for (int i = 0; i < results.getResults().size(); i++) { Map result = results.getResults().get(i); - assertEquals("TopK mismatch for query.", Math.min(topK, datasetSize), result.size()); // Sort result by values (distances) and extract keys List sortedResultKeys = result.entrySet().stream().sorted(Map.Entry.comparingByValue()) @@ -101,6 +106,34 @@ protected void compareResults(SearchResults results, List> expecte } } + protected static void checkResults(List> expected, List> actual) { + List> sortedExpected = new ArrayList>(); + List> sortedActual = new ArrayList>(); + for (Map map: expected) { + sortedExpected.add(new TreeMap(map) { + @Override + public boolean equals(Object o) { + Map map = (Map) o; + if (this.size() != map.size()) return false; + for (Integer key: map.keySet()) { + try { + if (Math.abs((float)map.get(key) - ((float)get(key))) < 0.0001f == false) { + return false; + } + } catch (Exception ex) { + return false; + } + } + return true; + } + }); + } + for (Map map: actual) { + sortedActual.add(new TreeMap(map)); + } + assertEquals(sortedExpected, sortedActual); + } + protected static boolean isLinuxAmd64() { String name = System.getProperty("os.name"); return (name.startsWith("Linux")) && System.getProperty("os.arch").equals("amd64"); diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedIT.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedIT.java index 6d367efc4..07033391b 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedIT.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedIT.java @@ -97,7 +97,7 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { assert topK > 0 && topK <= datasetSize : "Invalid topK value."; // Generate expected results using brute force - List> expected = generateExpectedResults(topK, dataset, queries, log); + List> expected = generateExpectedResults(topK, dataset, queries, null, log); // Create CuVS index and query try (CuVSResources resources = CuVSResources.create()) { diff --git a/java/internal/src/cuvs_java.c b/java/internal/src/cuvs_java.c index febde8463..025b0f6e9 100644 --- a/java/internal/src/cuvs_java.c +++ b/java/internal/src/cuvs_java.c @@ -96,7 +96,6 @@ cuvsCagraIndex_t build_cagra_index(float *dataset, long rows, long dimensions, c cuvsStreamGet(cuvs_resources, &stream); omp_set_num_threads(n_writer_threads); - cuvsRMMPoolMemoryResourceEnable(95, 95, false); int64_t dataset_shape[2] = {rows, dimensions}; DLManagedTensor dataset_tensor = prepare_tensor(dataset, dataset_shape, kDLFloat, 32, 2, kDLCUDA); @@ -226,7 +225,6 @@ cuvsBruteForceIndex_t build_brute_force_index(float *dataset, long rows, long di int *return_value, int n_writer_threads) { omp_set_num_threads(n_writer_threads); - cuvsRMMPoolMemoryResourceEnable(95, 95, false); cudaStream_t stream; cuvsStreamGet(cuvs_resources, &stream); @@ -266,26 +264,22 @@ cuvsBruteForceIndex_t build_brute_force_index(float *dataset, long rows, long di * @param[in] n_rows number of rows in the dataset */ void search_brute_force_index(cuvsBruteForceIndex_t index, float *queries, int topk, long n_queries, int dimensions, - cuvsResources_t cuvs_resources, int64_t *neighbors_h, float *distances_h, int *return_value, long *prefilter_data, - long prefilter_data_length, long n_rows) { + cuvsResources_t cuvs_resources, int64_t *neighbors_h, float *distances_h, int *return_value, uint32_t *prefilter_data, + long prefilter_data_length) { cudaStream_t stream; cuvsStreamGet(cuvs_resources, &stream); int64_t *neighbors; float *distances, *queries_d; - long *prefilter_data_d; - - long prefilter_data_32_size = sizeof(uint32_t) * prefilter_data_length * 2; - uint32_t *prefilter_data_32 = (uint32_t *)malloc(prefilter_data_32_size); + uint32_t *prefilter_d = NULL; + int64_t prefilter_len = 0; cuvsRMMAlloc(cuvs_resources, (void**) &queries_d, sizeof(float) * n_queries * dimensions); cuvsRMMAlloc(cuvs_resources, (void**) &neighbors, sizeof(int64_t) * n_queries * topk); cuvsRMMAlloc(cuvs_resources, (void**) &distances, sizeof(float) * n_queries * topk); - cuvsRMMAlloc(cuvs_resources, (void**) &prefilter_data_d, prefilter_data_32_size); cudaMemcpy(queries_d, queries, sizeof(float) * n_queries * dimensions, cudaMemcpyDefault); - cudaMemcpy(prefilter_data_d, prefilter_data_32, prefilter_data_32_size, cudaMemcpyDefault); int64_t queries_shape[2] = {n_queries, dimensions}; DLManagedTensor queries_tensor = prepare_tensor(queries_d, queries_shape, kDLFloat, 32, 2, kDLCUDA); @@ -296,20 +290,22 @@ void search_brute_force_index(cuvsBruteForceIndex_t index, float *queries, int t int64_t distances_shape[2] = {n_queries, topk}; DLManagedTensor distances_tensor = prepare_tensor(distances, distances_shape, kDLFloat, 32, 2, kDLCUDA); - // unpack the incoming long into two 32bit ints - for (long i = 0; i < prefilter_data_length; i++) { - *(prefilter_data_32 + (2 * i)) = (int)(*(prefilter_data + i) >> 32); - *(prefilter_data_32 + ((2 * i) + 1)) = (int)*(prefilter_data + i); - //long l = (((long)*(prefilter_data_32 + (2 * i))) << 32) | (*(prefilter_data_32 + ((2 * i) + 1)) & 0xffffffffL); - } - cuvsFilter prefilter; + DLManagedTensor prefilter_tensor; if (prefilter_data == NULL) { prefilter.type = NO_FILTER; prefilter.addr = (uintptr_t)NULL; } else { - int64_t prefilter_shape[1] = {(n_queries * n_rows + 31) / 32}; - DLManagedTensor prefilter_tensor = prepare_tensor(prefilter_data_d, prefilter_shape, kDLUInt, 32, 1, kDLCUDA); + // Parse the filters data + int num_integers = (prefilter_data_length+63)/64 * 2; + int extraPaddingByteExists = prefilter_data_length % 64 > 32? 0: 1; + int64_t prefilter_shape[1] = {(prefilter_data_length + 31) / 32}; + + prefilter_len = prefilter_shape[0]; + cuvsRMMAlloc(cuvs_resources, (void**) &prefilter_d, sizeof(uint32_t) * prefilter_len); + cudaMemcpy(prefilter_d, prefilter_data, sizeof(uint32_t) * prefilter_len, cudaMemcpyHostToDevice); + + prefilter_tensor = prepare_tensor(prefilter_d, prefilter_shape, kDLUInt, 32, 1, kDLCUDA); prefilter.type = BITMAP; prefilter.addr = (uintptr_t)&prefilter_tensor; } @@ -323,6 +319,9 @@ void search_brute_force_index(cuvsBruteForceIndex_t index, float *queries, int t cuvsRMMFree(cuvs_resources, neighbors, sizeof(int64_t) * n_queries * topk); cuvsRMMFree(cuvs_resources, distances, sizeof(float) * n_queries * topk); cuvsRMMFree(cuvs_resources, queries_d, sizeof(float) * n_queries * dimensions); + if(prefilter_d != NULL) { + cuvsRMMFree(cuvs_resources, prefilter_d, sizeof(uint32_t) * prefilter_len); + } } /**