From d7c258e1649a90fd8df2606fc170b5829140150a Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 5 Feb 2025 08:51:01 -0500 Subject: [PATCH 01/12] Add telemetry (#652) Enables telemetry during CUVS CI runs. This is done by parsing GitHub Actions run log metadata and should have no impact on build or test times. xref rapidsai/build-infra#139 Authors: - Gil Forsyth (https://github.com/gforsyth) Approvers: - Mike Sarahan (https://github.com/msarahan) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/652 --- .github/workflows/pr.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index a5981fe1f..233b82f94 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -26,12 +26,23 @@ jobs: - wheel-build-cuvs - wheel-tests-cuvs - devcontainer + - telemetry-setup secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} + telemetry-setup: + continue-on-error: true + runs-on: ubuntu-latest + env: + OTEL_SERVICE_NAME: 'pr-cuvs' + steps: + - name: Telemetry setup + if: ${{ vars.TELEMETRY_ENABLED == 'true' }} + uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main check-nightly-ci: + needs: telemetry-setup # Switch to ubuntu-latest once it defaults to a version of Ubuntu that # provides at least Python 3.11 (see # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) @@ -44,6 +55,7 @@ jobs: with: repo: cuvs changed-files: + needs: telemetry-setup secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: @@ -77,10 +89,12 @@ jobs: - '!rust/**' - '!thirdparty/LICENSES/**' checks: + needs: telemetry-setup secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false + ignored_pr_jobs: "telemetry-summarize" conda-cpp-build: needs: checks secrets: inherit @@ -162,6 +176,7 @@ jobs: script: ci/test_wheel_cuvs.sh devcontainer: secrets: inherit + needs: telemetry-setup uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' @@ -170,3 +185,13 @@ jobs: sccache -z; build-all --verbose; sccache -s; + + telemetry-summarize: + # This job must use a self-hosted runner to record telemetry traces. + runs-on: linux-amd64-cpu4 + needs: pr-builder + if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }} + continue-on-error: true + steps: + - name: Telemetry summarize + uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main From 228d949bcb8d4625710a8780a04c89c199e4164e Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Thu, 6 Feb 2025 02:48:55 +0000 Subject: [PATCH 02/12] cuvs-java: Rework the api to be Java 21 friendly (#628) This change reworks the api to allow it to be used with Java 21. The implementation is moved to an internal package, compiled with JDK 22, and packaged as an mrjar. The benefit of this structure is that the api can be used in environments that compile to a minimum of Java 21, but run on more recent JDKs like 22 and 23 - which is exactly what Elasticsearch and Lucene do. In fact, a minimum compilation target of Java 21 is common, since 21, at the time of writing, is the most recent LTS Java release. The most significant change is that the non-trivial api types are now, for the most part, interfaces. Instance can be created by one of the factory methods, which lookup an spi to find the implementation. If on a release greater than Java 21, then a functioning implementation is returned. Otherwise, a no-op implementation is returned. This is a reasonably standard way for a Java api to behave, and allows the developer to handle the case where the platform does not have a functioning implementation. This change also refactors the native downcall method handles so that they are static final constants - which optimise better by the JVM. It's also the generally accepted pattern, where the handles are tied to the lifetime of class which effectively mediates access - by virtue of reachability. Another thing that I added is the ability to programmatically set the temporary directory used for intermediate operations - this is important to how both Lucene and Elasticsearch work - since they commonly only have permission to write to certain parts of the disk. Additionally, 1. the error codes from native calls are plumbed in and checked. As well as `cuvsGetLastErrorText`. 2. a state is added to any classes that hold a reference to native resources that could be released. 3. a local arena is used for memory allocation only needed per downcall invocation, e.g. the return value. 4. I moved the tests to be integration tests, since they need to run on the jar (rather than the exploded classes). They can be run by any of; `mvn verify`, or `mvn integration-test`, or `mvn -Dit.test="*Hnsw*" verify` 5. I refactored the entry-points to the api to be static methods and added an `spi` layer. You can see the minimal impact on the tests. 6. Move the native library out of the top-level directory in the jar and into an os/arch position in the META-INF. 7. add service provider support for custom implementations. Authors: - Chris Hegarty (https://github.com/ChrisHegarty) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/628 --- java/build.sh | 2 +- java/cuvs-java/pom.xml | 61 +- .../java/com/nvidia/cuvs/BruteForceIndex.java | 288 +-------- .../nvidia/cuvs/CagraCompressionParams.java | 29 - .../main/java/com/nvidia/cuvs/CagraIndex.java | 579 +++++------------- .../com/nvidia/cuvs/CagraIndexParams.java | 32 +- .../com/nvidia/cuvs/CagraSearchParams.java | 47 +- .../java/com/nvidia/cuvs/CuVSResources.java | 134 +--- .../main/java/com/nvidia/cuvs/GPUInfo.java | 74 +-- .../main/java/com/nvidia/cuvs/HnswIndex.java | 263 ++------ .../java/com/nvidia/cuvs/HnswIndexParams.java | 37 +- .../com/nvidia/cuvs/HnswSearchParams.java | 90 +-- .../com/nvidia/cuvs/LibraryException.java | 16 + .../java/com/nvidia/cuvs/SearchResults.java | 30 + .../java/com/nvidia/cuvs/common/Util.java | 265 -------- .../com/nvidia/cuvs/spi/CuVSProvider.java | 69 +++ .../nvidia/cuvs/spi/CuVSServiceProvider.java | 68 ++ .../nvidia/cuvs/spi/UnsupportedProvider.java | 50 ++ java/cuvs-java/src/main/java/module-info.java | 3 + .../cuvs/internal/BruteForceIndexImpl.java | 380 ++++++++++++ .../internal}/BruteForceSearchResults.java | 8 +- .../nvidia/cuvs/internal/CagraIndexImpl.java | 515 ++++++++++++++++ .../cuvs/internal}/CagraSearchResults.java | 6 +- .../cuvs/internal/CuVSResourcesImpl.java | 154 +++++ .../nvidia/cuvs/internal/HnswIndexImpl.java | 303 +++++++++ .../cuvs/internal}/HnswSearchResults.java | 8 +- .../cuvs/internal/common/LinkerHelper.java | 64 ++ .../cuvs/internal/common/LoaderUtils.java | 100 +++ .../internal/common/SearchResultsImpl.java} | 14 +- .../com/nvidia/cuvs/internal/common/Util.java | 204 ++++++ .../cuvs/internal}/panama/BruteForceH.java | 2 +- .../nvidia/cuvs/internal}/panama/CagraH.java | 2 +- .../internal}/panama/CuVSBruteForceIndex.java | 2 +- .../panama/CuVSCagraCompressionParams.java | 2 +- .../cuvs/internal}/panama/CuVSCagraIndex.java | 2 +- .../panama/CuVSCagraIndexParams.java | 2 +- .../panama/CuVSCagraSearchParams.java | 2 +- .../cuvs/internal}/panama/CuVSFilter.java | 2 +- .../panama/CuVSHnswExtendParams.java | 2 +- .../cuvs/internal}/panama/CuVSHnswIndex.java | 2 +- .../internal}/panama/CuVSHnswIndexParams.java | 2 +- .../panama/CuVSHnswSearchParams.java | 2 +- .../cuvs/internal}/panama/DLDataType.java | 2 +- .../cuvs/internal}/panama/DLDevice.java | 2 +- .../internal}/panama/DLManagedTensor.java | 2 +- .../panama/DLManagedTensorVersioned.java | 2 +- .../cuvs/internal}/panama/DLPackVersion.java | 2 +- .../cuvs/internal}/panama/DLTensor.java | 2 +- .../cuvs/internal}/panama/DistanceH.java | 2 +- .../nvidia/cuvs/internal}/panama/DlpackH.java | 2 +- .../nvidia/cuvs/internal}/panama/Fsidt.java | 2 +- .../nvidia/cuvs/internal}/panama/GpuInfo.java | 2 +- .../cuvs/internal}/panama/GpuInfoH.java | 2 +- .../nvidia/cuvs/internal}/panama/HnswH.java | 2 +- .../cuvs/internal}/panama/IvfFlatH.java | 2 +- .../nvidia/cuvs/internal}/panama/IvfPqH.java | 2 +- .../cuvs/internal}/panama/MaxAlignT.java | 2 +- .../com/nvidia/cuvs/spi/JDKProvider.java | 60 ++ ...chTest.java => BruteForceAndSearchIT.java} | 19 +- ...dTest.java => BruteForceRandomizedIT.java} | 27 +- ...chTest.java => CagraBuildAndSearchIT.java} | 18 +- ...omizedTest.java => CagraRandomizedIT.java} | 53 +- .../java/com/nvidia/cuvs/CuVSTestCase.java | 22 +- ...rchTest.java => HnswBuildAndSearchIT.java} | 23 +- ...domizedTest.java => HnswRandomizedIT.java} | 37 +- .../nvidia/cuvs/internal/common/UtilIT.java | 37 ++ java/examples/pom.xml | 8 +- .../cuvs/examples/BruteForceExample.java | 8 +- .../nvidia/cuvs/examples/CagraExample.java | 10 +- .../com/nvidia/cuvs/examples/HnswExample.java | 16 +- 70 files changed, 2600 insertions(+), 1685 deletions(-) create mode 100644 java/cuvs-java/src/main/java/com/nvidia/cuvs/SearchResults.java delete mode 100644 java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java create mode 100644 java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/CuVSProvider.java create mode 100644 java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/CuVSServiceProvider.java create mode 100644 java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/UnsupportedProvider.java create mode 100644 java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/BruteForceSearchResults.java (93%) create mode 100644 java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/CagraSearchResults.java (93%) create mode 100644 java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CuVSResourcesImpl.java create mode 100644 java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswIndexImpl.java rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/HnswSearchResults.java (93%) create mode 100644 java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/LinkerHelper.java create mode 100644 java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/LoaderUtils.java rename java/cuvs-java/src/main/{java/com/nvidia/cuvs/common/SearchResults.java => java22/com/nvidia/cuvs/internal/common/SearchResultsImpl.java} (80%) create mode 100644 java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/BruteForceH.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CagraH.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSBruteForceIndex.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSCagraCompressionParams.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSCagraIndex.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSCagraIndexParams.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSCagraSearchParams.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSFilter.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSHnswExtendParams.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSHnswIndex.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSHnswIndexParams.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/CuVSHnswSearchParams.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/DLDataType.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/DLDevice.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/DLManagedTensor.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/DLManagedTensorVersioned.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/DLPackVersion.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/DLTensor.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/DistanceH.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/DlpackH.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/Fsidt.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/GpuInfo.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/GpuInfoH.java (98%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/HnswH.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/IvfFlatH.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/IvfPqH.java (99%) rename java/cuvs-java/src/main/{java/com/nvidia/cuvs => java22/com/nvidia/cuvs/internal}/panama/MaxAlignT.java (99%) create mode 100644 java/cuvs-java/src/main/java22/com/nvidia/cuvs/spi/JDKProvider.java rename java/cuvs-java/src/test/java/com/nvidia/cuvs/{BruteForceAndSearchTest.java => BruteForceAndSearchIT.java} (88%) rename java/cuvs-java/src/test/java/com/nvidia/cuvs/{BruteForceRandomizedTest.java => BruteForceRandomizedIT.java} (75%) rename java/cuvs-java/src/test/java/com/nvidia/cuvs/{CagraBuildAndSearchTest.java => CagraBuildAndSearchIT.java} (89%) rename java/cuvs-java/src/test/java/com/nvidia/cuvs/{CagraRandomizedTest.java => CagraRandomizedIT.java} (64%) rename java/cuvs-java/src/test/java/com/nvidia/cuvs/{HnswBuildAndSearchTest.java => HnswBuildAndSearchIT.java} (87%) rename java/cuvs-java/src/test/java/com/nvidia/cuvs/{HnswRandomizedTest.java => HnswRandomizedIT.java} (79%) create mode 100644 java/cuvs-java/src/test/java/com/nvidia/cuvs/internal/common/UtilIT.java diff --git a/java/build.sh b/java/build.sh index 05b47de77..3483c544e 100755 --- a/java/build.sh +++ b/java/build.sh @@ -10,5 +10,5 @@ cd internal && cmake . && cmake --build . \ && cd .. \ && mvn install:install-file -DgroupId=$GROUP_ID -DartifactId=cuvs-java-internal -Dversion=$VERSION -Dpackaging=so -Dfile=$SO_FILE_PATH/libcuvs_java.so \ && cd cuvs-java \ - && mvn package \ + && mvn verify \ && mvn install:install-file -Dfile=./target/cuvs-java-$VERSION-jar-with-dependencies.jar -DgroupId=$GROUP_ID -DartifactId=cuvs-java -Dversion=$VERSION -Dpackaging=jar diff --git a/java/cuvs-java/pom.xml b/java/cuvs-java/pom.xml index c8d31638c..c036c6c66 100644 --- a/java/cuvs-java/pom.xml +++ b/java/cuvs-java/pom.xml @@ -69,16 +69,53 @@ org.apache.maven.plugins - maven-surefire-plugin + maven-compiler-plugin + 3.13.0 + + + default-compile + + compile + + + 21 + + ${project.basedir}/src/main/java + + + + + compile-java-22 + compile + + compile + + + 22 + + ${project.basedir}/src/main/java22 + + true + + + + + + org.apache.maven.plugins + maven-failsafe-plugin 3.5.2 - suites - 1 - false - - ${project.build.directory}/classes - + 1 + --enable-native-access=com.nvidia.cuvs + + + + integration-test + verify + + + org.apache.maven.plugins @@ -101,7 +138,7 @@ so false - ${project.build.directory}/classes + ${project.build.directory}/classes/META-INF/native/linux_x64 libcuvs_java.so @@ -117,6 +154,7 @@ jar-with-dependencies + merge add @@ -134,14 +172,15 @@ org.apache.maven.plugins maven-jar-plugin - 2.2 + 3.4.2 - + + true true com.nvidia.cuvs.examples.CagraExample - + diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndex.java index 30ffca1cd..e09c0228f 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndex.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceIndex.java @@ -16,24 +16,14 @@ package com.nvidia.cuvs; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; +import com.nvidia.cuvs.spi.CuVSProvider; + import java.io.InputStream; import java.io.OutputStream; -import java.lang.foreign.FunctionDescriptor; -import java.lang.foreign.MemoryLayout; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.SequenceLayout; -import java.lang.foreign.ValueLayout; -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; +import java.nio.file.Path; +import java.util.Objects; import java.util.UUID; -import com.nvidia.cuvs.common.Util; -import com.nvidia.cuvs.panama.CuVSBruteForceIndex; - /** * * {@link BruteForceIndex} encapsulates a BRUTEFORCE index, along with methods @@ -41,123 +31,13 @@ * * @since 25.02 */ -public class BruteForceIndex { - - private final float[][] dataset; - private final CuVSResources resources; - private MethodHandle indexMethodHandle; - private MethodHandle searchMethodHandle; - private MethodHandle destroyIndexMethodHandle; - private MethodHandle serializeMethodHandle; - private MethodHandle deserializeMethodHandle; - private IndexReference bruteForceIndexReference; - private BruteForceIndexParams bruteForceIndexParams; - private MemoryLayout longMemoryLayout; - private MemoryLayout intMemoryLayout; - private MemoryLayout floatMemoryLayout; - - /** - * Constructor for building the index using specified dataset - * - * @param dataset the dataset used for creating the BRUTEFORCE - * index - * @param resources an instance of {@link CuVSResources} - * @param bruteForceIndexParams an instance of {@link BruteForceIndexParams} - * holding the index parameters - */ - private BruteForceIndex(float[][] dataset, CuVSResources resources, BruteForceIndexParams bruteForceIndexParams) - throws Throwable { - this.dataset = dataset; - this.resources = resources; - this.bruteForceIndexParams = bruteForceIndexParams; - - longMemoryLayout = resources.linker.canonicalLayouts().get("long"); - intMemoryLayout = resources.linker.canonicalLayouts().get("int"); - floatMemoryLayout = resources.linker.canonicalLayouts().get("float"); - - initializeMethodHandles(); - this.bruteForceIndexReference = build(); - } - - /** - * Constructor for loading the index from an {@link InputStream} - * - * @param inputStream an instance of stream to read the index bytes from - * @param resources an instance of {@link CuVSResources} - */ - private BruteForceIndex(InputStream inputStream, CuVSResources resources) throws Throwable { - this.bruteForceIndexParams = null; - this.dataset = null; - this.resources = resources; - - longMemoryLayout = resources.linker.canonicalLayouts().get("long"); - intMemoryLayout = resources.linker.canonicalLayouts().get("int"); - floatMemoryLayout = resources.linker.canonicalLayouts().get("float"); - - initializeMethodHandles(); - this.bruteForceIndexReference = deserialize(inputStream); - } - - /** - * Initializes the {@link MethodHandles} for invoking native methods. - * - * @throws IOException @{@link IOException} is unable to load the native library - */ - private void initializeMethodHandles() throws IOException { - indexMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("build_brute_force_index").get(), - FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.ADDRESS, longMemoryLayout, longMemoryLayout, - ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout)); - - searchMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("search_brute_force_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout, longMemoryLayout, - intMemoryLayout, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, - ValueLayout.ADDRESS, longMemoryLayout, longMemoryLayout)); - - destroyIndexMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("destroy_brute_force_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - - serializeMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("serialize_brute_force_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - - deserializeMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("deserialize_brute_force_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - } +public interface BruteForceIndex { /** * Invokes the native destroy_brute_force_index function to de-allocate * BRUTEFORCE index */ - public void destroyIndex() throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - destroyIndexMethodHandle.invokeExact(bruteForceIndexReference.getMemorySegment(), returnValueMemorySegment); - } - - /** - * Invokes the native build_brute_force_index function via the Panama API to - * build the {@link BruteForceIndex} - * - * @return an instance of {@link IndexReference} that holds the pointer to the - * index - */ - private IndexReference build() throws Throwable { - long rows = dataset.length; - long cols = rows > 0 ? dataset[0].length : 0; - - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - - IndexReference indexReference = new IndexReference((MemorySegment) indexMethodHandle.invokeExact( - Util.buildMemorySegment(resources.linker, resources.arena, dataset), rows, cols, resources.getMemorySegment(), - returnValueMemorySegment, bruteForceIndexParams.getNumWriterThreads())); - - return indexReference; - } + void destroyIndex() throws Throwable; /** * Invokes the native search_brute_force_index via the Panama API for searching @@ -165,33 +45,9 @@ private IndexReference build() throws Throwable { * * @param cuvsQuery an instance of {@link BruteForceQuery} holding the query * vectors and other parameters - * @return an instance of {@link BruteForceSearchResults} containing the results + * @return an instance of {@link SearchResults} containing the results */ - public BruteForceSearchResults search(BruteForceQuery cuvsQuery) throws Throwable { - long numQueries = cuvsQuery.getQueryVectors().length; - long numBlocks = cuvsQuery.getTopK() * numQueries; - int vectorDimension = numQueries > 0 ? cuvsQuery.getQueryVectors()[0].length : 0; - long prefilterDataLength = cuvsQuery.getPrefilter() != null ? cuvsQuery.getPrefilter().length : 0; - long numRows = dataset != null ? dataset.length : 0; - - SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, longMemoryLayout); - SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, floatMemoryLayout); - MemorySegment neighborsMemorySegment = resources.arena.allocate(neighborsSequenceLayout); - MemorySegment distancesMemorySegment = resources.arena.allocate(distancesSequenceLayout); - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - MemorySegment prefilterDataMemorySegment = cuvsQuery.getPrefilter() != null - ? Util.buildMemorySegment(resources.linker, resources.arena, cuvsQuery.getPrefilter()) - : MemorySegment.NULL; - - searchMethodHandle.invokeExact(bruteForceIndexReference.getMemorySegment(), - Util.buildMemorySegment(resources.linker, resources.arena, cuvsQuery.getQueryVectors()), cuvsQuery.getTopK(), - numQueries, vectorDimension, resources.getMemorySegment(), neighborsMemorySegment, distancesMemorySegment, - returnValueMemorySegment, prefilterDataMemorySegment, prefilterDataLength, numRows); - - return new BruteForceSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment, - distancesMemorySegment, cuvsQuery.getTopK(), cuvsQuery.getMapping(), numQueries); - } + SearchResults search(BruteForceQuery cuvsQuery) throws Throwable; /** * A method to persist a BRUTEFORCE index using an instance of @@ -200,83 +56,33 @@ public BruteForceSearchResults search(BruteForceQuery cuvsQuery) throws Throwabl * @param outputStream an instance of {@link OutputStream} to write the index * bytes into */ - public void serialize(OutputStream outputStream) throws Throwable { - serialize(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".bf")); - } - + void serialize(OutputStream outputStream) throws Throwable; /** * A method to persist a BRUTEFORCE index using an instance of * {@link OutputStream} and path to the intermediate temporary file. * * @param outputStream an instance of {@link OutputStream} to write the index * bytes to - * @param tempFile an intermediate {@link File} where BRUTEFORCE index is + * @param tempFile an intermediate {@link Path} where BRUTEFORCE index is * written temporarily */ - public void serialize(OutputStream outputStream, File tempFile) throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - serializeMethodHandle.invokeExact(resources.getMemorySegment(), bruteForceIndexReference.getMemorySegment(), - returnValueMemorySegment, - Util.buildMemorySegment(resources.linker, resources.arena, tempFile.getAbsolutePath())); - FileInputStream fileInputStream = new FileInputStream(tempFile); - byte[] chunk = new byte[1024]; // TODO: Make this configurable - int chunkLength = 0; - while ((chunkLength = fileInputStream.read(chunk)) != -1) { - outputStream.write(chunk, 0, chunkLength); - } - fileInputStream.close(); - tempFile.delete(); - } + void serialize(OutputStream outputStream, Path tempFile) throws Throwable; /** - * Gets an instance of {@link IndexReference} by deserializing a BRUTEFORCE - * index using an {@link InputStream}. + * Creates a new Builder with an instance of {@link CuVSResources}. * - * @param inputStream an instance of {@link InputStream} - * @return an instance of {@link IndexReference}. + * @param cuvsResources an instance of {@link CuVSResources} + * @throws UnsupportedOperationException if the provider does not cuvs */ - private IndexReference deserialize(InputStream inputStream) throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - String tmpIndexFile = "/tmp/" + UUID.randomUUID().toString() + ".bf"; - IndexReference indexReference = new IndexReference(resources); - - File tempFile = new File(tmpIndexFile); - FileOutputStream fileOutputStream = new FileOutputStream(tempFile); - byte[] chunk = new byte[1024]; - int chunkLength = 0; - while ((chunkLength = inputStream.read(chunk)) != -1) { - fileOutputStream.write(chunk, 0, chunkLength); - } - deserializeMethodHandle.invokeExact(resources.getMemorySegment(), indexReference.getMemorySegment(), - returnValueMemorySegment, Util.buildMemorySegment(resources.linker, resources.arena, tmpIndexFile)); - - inputStream.close(); - fileOutputStream.close(); - tempFile.delete(); - - return indexReference; + static Builder newBuilder(CuVSResources cuvsResources) { + Objects.requireNonNull(cuvsResources); + return CuVSProvider.provider().newBruteForceIndexBuilder(cuvsResources); } /** * Builder helps configure and create an instance of {@link BruteForceIndex}. */ - public static class Builder { - - private float[][] dataset; - private CuVSResources cuvsResources; - private BruteForceIndexParams bruteForceIndexParams; - private InputStream inputStream; - - /** - * Constructs this Builder with an instance of {@link CuVSResources}. - * - * @param cuvsResources an instance of {@link CuVSResources} - */ - public Builder(CuVSResources cuvsResources) { - this.cuvsResources = cuvsResources; - } + interface Builder { /** * Registers an instance of configured {@link BruteForceIndexParams} with this @@ -285,10 +91,7 @@ public Builder(CuVSResources cuvsResources) { * @param bruteForceIndexParams An instance of BruteForceIndexParams * @return An instance of this Builder */ - public Builder withIndexParams(BruteForceIndexParams bruteForceIndexParams) { - this.bruteForceIndexParams = bruteForceIndexParams; - return this; - } + Builder withIndexParams(BruteForceIndexParams bruteForceIndexParams); /** * Sets an instance of InputStream typically used when index deserialization is @@ -297,10 +100,7 @@ public Builder withIndexParams(BruteForceIndexParams bruteForceIndexParams) { * @param inputStream an instance of {@link InputStream} * @return an instance of this Builder */ - public Builder from(InputStream inputStream) { - this.inputStream = inputStream; - return this; - } + Builder from(InputStream inputStream); /** * Sets the dataset for building the {@link BruteForceIndex}. @@ -308,57 +108,13 @@ public Builder from(InputStream inputStream) { * @param dataset a two-dimensional float array * @return an instance of this Builder */ - public Builder withDataset(float[][] dataset) { - this.dataset = dataset; - return this; - } + Builder withDataset(float[][] dataset); /** * Builds and returns an instance of {@link BruteForceIndex}. * * @return an instance of {@link BruteForceIndex} */ - public BruteForceIndex build() throws Throwable { - if (inputStream != null) { - return new BruteForceIndex(inputStream, cuvsResources); - } else { - return new BruteForceIndex(dataset, cuvsResources, bruteForceIndexParams); - } - } - } - - /** - * Holds the memory reference to a BRUTEFORCE index. - */ - protected static class IndexReference { - - private final MemorySegment memorySegment; - - /** - * Constructs CagraIndexReference and allocate the MemorySegment. - */ - protected IndexReference(CuVSResources resources) { - memorySegment = CuVSBruteForceIndex.allocate(resources.arena); - } - - /** - * Constructs BruteForceIndexReference with an instance of MemorySegment passed - * as a parameter. - * - * @param indexMemorySegment the MemorySegment instance to use for containing - * index reference - */ - protected IndexReference(MemorySegment indexMemorySegment) { - this.memorySegment = indexMemorySegment; - } - - /** - * Gets the instance of index MemorySegment. - * - * @return index MemorySegment - */ - protected MemorySegment getMemorySegment() { - return memorySegment; - } + BruteForceIndex build() throws Throwable; } } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraCompressionParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraCompressionParams.java index 09f6bae8b..83c9c9acb 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraCompressionParams.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraCompressionParams.java @@ -16,10 +16,6 @@ package com.nvidia.cuvs; -import java.lang.foreign.MemorySegment; - -import com.nvidia.cuvs.panama.CuVSCagraCompressionParams; - /** * Supplemental compression parameters to build CAGRA Index. * @@ -27,7 +23,6 @@ */ public class CagraCompressionParams { - private final MemorySegment memorySegment; private CuVSResources resources; private final int pqBits; private final int pqDim; @@ -63,30 +58,6 @@ private CagraCompressionParams(CuVSResources resources, int pqBits, int pqDim, i this.kmeansNIters = kmeansNIters; this.vqKmeansTrainsetFraction = vqKmeansTrainsetFraction; this.pqKmeansTrainsetFraction = pqKmeansTrainsetFraction; - this.memorySegment = initMemorySegment(); - } - - /** - * Allocates the configured compression parameters in the MemorySegment. - */ - private MemorySegment initMemorySegment() { - MemorySegment compressionParamsMemorySegment = CuVSCagraCompressionParams.allocate(resources.arena); - CuVSCagraCompressionParams.pq_bits(compressionParamsMemorySegment, pqBits); - CuVSCagraCompressionParams.pq_dim(compressionParamsMemorySegment, pqDim); - CuVSCagraCompressionParams.vq_n_centers(compressionParamsMemorySegment, vqNCenters); - CuVSCagraCompressionParams.kmeans_n_iters(compressionParamsMemorySegment, kmeansNIters); - CuVSCagraCompressionParams.vq_kmeans_trainset_fraction(compressionParamsMemorySegment, vqKmeansTrainsetFraction); - CuVSCagraCompressionParams.pq_kmeans_trainset_fraction(compressionParamsMemorySegment, pqKmeansTrainsetFraction); - return compressionParamsMemorySegment; - } - - /** - * Gets the memory segment instance containing the compression parameters. - * - * @return the memory segment instance containing the compression parameters. - */ - protected MemorySegment getMemorySegment() { - return memorySegment; } /** diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndex.java index 21eea9783..f92d97edd 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndex.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndex.java @@ -16,23 +16,13 @@ package com.nvidia.cuvs; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.lang.foreign.FunctionDescriptor; -import java.lang.foreign.MemoryLayout; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.SequenceLayout; -import java.lang.foreign.ValueLayout; -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; +import java.nio.file.Path; +import java.util.Objects; import java.util.UUID; -import com.nvidia.cuvs.common.Util; -import com.nvidia.cuvs.panama.CuVSCagraIndex; +import com.nvidia.cuvs.spi.CuVSProvider; /** * {@link CagraIndex} encapsulates a CAGRA index, along with methods to interact @@ -46,472 +36,185 @@ * * @since 25.02 */ -public class CagraIndex { +public interface CagraIndex { - private final float[][] dataset; - private final CuVSResources resources; - private MethodHandle indexMethodHandle; - private MethodHandle searchMethodHandle; - private MethodHandle serializeMethodHandle; - private MethodHandle deserializeMethodHandle; - private MethodHandle destroyIndexMethodHandle; - private MethodHandle serializeCAGRAIndexToHNSWMethodHandle; - private CagraIndexParams cagraIndexParameters; - private CagraCompressionParams cagraCompressionParams; - private IndexReference cagraIndexReference; - private MemoryLayout longMemoryLayout; - private MemoryLayout intMemoryLayout; - private MemoryLayout floatMemoryLayout; - - /** - * Constructor for building the index using specified dataset - * - * @param indexParameters an instance of {@link CagraIndexParams} holding - * the index parameters - * @param cagraCompressionParams an instance of {@link CagraCompressionParams} - * holding the compression parameters - * @param dataset the dataset for indexing - * @param resources an instance of {@link CuVSResources} - */ - private CagraIndex(CagraIndexParams indexParameters, CagraCompressionParams cagraCompressionParams, float[][] dataset, - CuVSResources resources) throws Throwable { - this.cagraIndexParameters = indexParameters; - this.cagraCompressionParams = cagraCompressionParams; - this.dataset = dataset; - this.resources = resources; - - longMemoryLayout = resources.linker.canonicalLayouts().get("long"); - intMemoryLayout = resources.linker.canonicalLayouts().get("int"); - floatMemoryLayout = resources.linker.canonicalLayouts().get("float"); - - initializeMethodHandles(); - this.cagraIndexReference = build(); - } - - /** - * Constructor for loading the index from an {@link InputStream} - * - * @param inputStream an instance of stream to read the index bytes from - * @param resources an instance of {@link CuVSResources} - */ - private CagraIndex(InputStream inputStream, CuVSResources resources) throws Throwable { - this.cagraIndexParameters = null; - this.cagraCompressionParams = null; - this.dataset = null; - this.resources = resources; - - longMemoryLayout = resources.linker.canonicalLayouts().get("long"); - intMemoryLayout = resources.linker.canonicalLayouts().get("int"); - floatMemoryLayout = resources.linker.canonicalLayouts().get("float"); - - initializeMethodHandles(); - this.cagraIndexReference = deserialize(inputStream); - } - - /** - * Initializes the {@link MethodHandles} for invoking native methods. - * - * @throws IOException @{@link IOException} is unable to load the native library - */ - private void initializeMethodHandles() throws IOException { - indexMethodHandle = resources.linker.downcallHandle(resources.getSymbolLookup().find("build_cagra_index").get(), - FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.ADDRESS, longMemoryLayout, longMemoryLayout, - ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout)); - - searchMethodHandle = resources.linker.downcallHandle(resources.getSymbolLookup().find("search_cagra_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout, longMemoryLayout, - intMemoryLayout, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, - ValueLayout.ADDRESS)); - - serializeMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("serialize_cagra_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - - deserializeMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("deserialize_cagra_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - - destroyIndexMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("destroy_cagra_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - - serializeCAGRAIndexToHNSWMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("serialize_cagra_index_to_hnsw").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - - } - - /** - * Invokes the native destroy_cagra_index to de-allocate the CAGRA index - */ - public void destroyIndex() throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - destroyIndexMethodHandle.invokeExact(cagraIndexReference.getMemorySegment(), returnValueMemorySegment); - } - - /** - * Invokes the native build_cagra_index function via the Panama API to build the - * {@link CagraIndex} - * - * @return an instance of {@link IndexReference} that holds the pointer to the - * index - */ - private IndexReference build() throws Throwable { - long rows = dataset.length; - long cols = rows > 0 ? dataset[0].length : 0; - - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - - MemorySegment indexParamsMemorySegment = cagraIndexParameters != null ? cagraIndexParameters.getMemorySegment() - : MemorySegment.NULL; - - int numWriterThreads = cagraIndexParameters != null ? cagraIndexParameters.getNumWriterThreads() : 1; - - MemorySegment compressionParamsMemorySegment = cagraCompressionParams != null - ? cagraCompressionParams.getMemorySegment() - : MemorySegment.NULL; - - IndexReference indexReference = new IndexReference((MemorySegment) indexMethodHandle.invokeExact( - Util.buildMemorySegment(resources.linker, resources.arena, dataset), rows, cols, resources.getMemorySegment(), - returnValueMemorySegment, indexParamsMemorySegment, compressionParamsMemorySegment, numWriterThreads)); - - return indexReference; - } - - /** - * Invokes the native search_cagra_index via the Panama API for searching a - * CAGRA index. - * - * @param query an instance of {@link CagraQuery} holding the query vectors and - * other parameters - * @return an instance of {@link CagraSearchResults} containing the results - */ - public CagraSearchResults search(CagraQuery query) throws Throwable { - int topK = query.getMapping() != null ? Math.min(query.getMapping().size(), query.getTopK()) : query.getTopK(); - long numQueries = query.getQueryVectors().length; - long numBlocks = topK * numQueries; - int vectorDimension = numQueries > 0 ? query.getQueryVectors()[0].length : 0; - - SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, intMemoryLayout); - SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, floatMemoryLayout); - MemorySegment neighborsMemorySegment = resources.arena.allocate(neighborsSequenceLayout); - MemorySegment distancesMemorySegment = resources.arena.allocate(distancesSequenceLayout); - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - - searchMethodHandle.invokeExact(cagraIndexReference.getMemorySegment(), - Util.buildMemorySegment(resources.linker, resources.arena, query.getQueryVectors()), topK, numQueries, - vectorDimension, resources.getMemorySegment(), neighborsMemorySegment, distancesMemorySegment, - returnValueMemorySegment, query.getCagraSearchParameters().getMemorySegment()); - - return new CagraSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment, - distancesMemorySegment, topK, query.getMapping(), numQueries); - } - - /** - * A method to persist a CAGRA index using an instance of {@link OutputStream} - * for writing index bytes. - * - * @param outputStream an instance of {@link OutputStream} to write the index - * bytes into - */ - public void serialize(OutputStream outputStream) throws Throwable { - serialize(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".cag"), 1024); - } - - /** - * A method to persist a CAGRA index using an instance of {@link OutputStream} - * for writing index bytes. - * - * @param outputStream an instance of {@link OutputStream} to write the index - * bytes into - * @param bufferLength the length of buffer to use for writing bytes. Default - * value is 1024 - */ - public void serialize(OutputStream outputStream, int bufferLength) throws Throwable { - serialize(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".cag"), bufferLength); - } - - /** - * A method to persist a CAGRA index using an instance of {@link OutputStream} - * for writing index bytes. - * - * @param outputStream an instance of {@link OutputStream} to write the index - * bytes into - * @param tempFile an intermediate {@link File} where CAGRA index is written - * temporarily - */ - public void serialize(OutputStream outputStream, File tempFile) throws Throwable { - serialize(outputStream, tempFile, 1024); - } - - /** - * A method to persist a CAGRA index using an instance of {@link OutputStream} - * and path to the intermediate temporary file. - * - * @param outputStream an instance of {@link OutputStream} to write the index - * bytes to - * @param tempFile an intermediate {@link File} where CAGRA index is written - * temporarily - * @param bufferLength the length of buffer to use for writing bytes. Default - * value is 1024 - */ - public void serialize(OutputStream outputStream, File tempFile, int bufferLength) throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - serializeMethodHandle.invokeExact(resources.getMemorySegment(), cagraIndexReference.getMemorySegment(), - returnValueMemorySegment, - Util.buildMemorySegment(resources.linker, resources.arena, tempFile.getAbsolutePath())); - FileInputStream fileInputStream = new FileInputStream(tempFile); - byte[] chunk = new byte[bufferLength]; - int chunkLength = 0; - while ((chunkLength = fileInputStream.read(chunk)) != -1) { - outputStream.write(chunk, 0, chunkLength); - } - fileInputStream.close(); - tempFile.delete(); - } - - /** - * A method to create and persist HNSW index from CAGRA index using an instance - * of {@link OutputStream} and path to the intermediate temporary file. - * - * @param outputStream an instance of {@link OutputStream} to write the index - * bytes to - */ - public void serializeToHNSW(OutputStream outputStream) throws Throwable { - serializeToHNSW(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".hnsw"), 1024); - } - - /** - * A method to create and persist HNSW index from CAGRA index using an instance - * of {@link OutputStream} and path to the intermediate temporary file. - * - * @param outputStream an instance of {@link OutputStream} to write the index - * bytes to - * @param bufferLength the length of buffer to use for writing bytes. Default - * value is 1024 - */ - public void serializeToHNSW(OutputStream outputStream, int bufferLength) throws Throwable { - serializeToHNSW(outputStream, File.createTempFile(UUID.randomUUID().toString(), ".hnsw"), bufferLength); - } - - /** - * A method to create and persist HNSW index from CAGRA index using an instance - * of {@link OutputStream} and path to the intermediate temporary file. - * - * @param outputStream an instance of {@link OutputStream} to write the index - * bytes to - * @param tempFile an intermediate {@link File} where CAGRA index is written - * temporarily - */ - public void serializeToHNSW(OutputStream outputStream, File tempFile) throws Throwable { - serializeToHNSW(outputStream, tempFile, 1024); - } - - /** - * A method to create and persist HNSW index from CAGRA index using an instance - * of {@link OutputStream} and path to the intermediate temporary file. - * - * @param outputStream an instance of {@link OutputStream} to write the index - * bytes to - * @param tempFile an intermediate {@link File} where CAGRA index is written - * temporarily - * @param bufferLength the length of buffer to use for writing bytes. Default - * value is 1024 - */ - public void serializeToHNSW(OutputStream outputStream, File tempFile, int bufferLength) throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - serializeCAGRAIndexToHNSWMethodHandle.invokeExact(resources.getMemorySegment(), - Util.buildMemorySegment(resources.linker, resources.arena, tempFile.getAbsolutePath()), - cagraIndexReference.getMemorySegment(), returnValueMemorySegment); - FileInputStream fileInputStream = new FileInputStream(tempFile); - byte[] chunk = new byte[bufferLength]; - int chunkLength = 0; - while ((chunkLength = fileInputStream.read(chunk)) != -1) { - outputStream.write(chunk, 0, chunkLength); - } - fileInputStream.close(); - tempFile.delete(); - } - - /** - * Gets an instance of {@link IndexReference} by deserializing a CAGRA index - * using an {@link InputStream}. - * - * @param inputStream an instance of {@link InputStream} - * @return an instance of {@link IndexReference}. - */ - private IndexReference deserialize(InputStream inputStream) throws Throwable { - return deserialize(inputStream, 1024); - } - - /** - * Gets an instance of {@link IndexReference} by deserializing a CAGRA index - * using an {@link InputStream}. - * - * @param inputStream an instance of {@link InputStream} - * @param bufferLength the length of the buffer to use while reading the bytes - * from the stream. Default value is 1024. - * @return an instance of {@link IndexReference}. - */ - private IndexReference deserialize(InputStream inputStream, int bufferLength) throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - String tmpIndexFile = "/tmp/" + UUID.randomUUID().toString() + ".cag"; - IndexReference indexReference = new IndexReference(resources); - - File tempFile = new File(tmpIndexFile); - FileOutputStream fileOutputStream = new FileOutputStream(tempFile); - byte[] chunk = new byte[bufferLength]; - int chunkLength = 0; - while ((chunkLength = inputStream.read(chunk)) != -1) { - fileOutputStream.write(chunk, 0, chunkLength); - } - deserializeMethodHandle.invokeExact(resources.getMemorySegment(), indexReference.getMemorySegment(), - returnValueMemorySegment, Util.buildMemorySegment(resources.linker, resources.arena, tmpIndexFile)); - - inputStream.close(); - fileOutputStream.close(); - tempFile.delete(); - - return indexReference; - } - - /** - * Gets an instance of {@link CagraIndexParams} - * - * @return an instance of {@link CagraIndexParams} - */ - public CagraIndexParams getCagraIndexParameters() { - return cagraIndexParameters; - } - - /** - * Gets an instance of {@link CuVSResources} - * - * @return an instance of {@link CuVSResources} - */ - public CuVSResources getCuVSResources() { - return resources; - } + /** + * Invokes the native destroy_cagra_index to de-allocate the CAGRA index + */ + void destroyIndex() throws Throwable; - /** - * Builder helps configure and create an instance of {@link CagraIndex}. - */ - public static class Builder { + /** + * Invokes the native search_cagra_index via the Panama API for searching a + * CAGRA index. + * + * @param query an instance of {@link CagraQuery} holding the query vectors and + * other parameters + * @return an instance of {@link SearchResults} containing the results + */ + SearchResults search(CagraQuery query) throws Throwable; - private float[][] dataset; - private CagraIndexParams cagraIndexParams; - private CagraCompressionParams cagraCompressionParams; - private CuVSResources cuvsResources; - private InputStream inputStream; + /** + * A method to persist a CAGRA index using an instance of {@link OutputStream} + * for writing index bytes. + * + * @param outputStream an instance of {@link OutputStream} to write the index + * bytes into + */ + void serialize(OutputStream outputStream) throws Throwable; /** - * Constructs this Builder with an instance of {@link CuVSResources}. + * A method to persist a CAGRA index using an instance of {@link OutputStream} + * for writing index bytes. * - * @param cuvsResources an instance of {@link CuVSResources} + * @param outputStream an instance of {@link OutputStream} to write the index + * bytes into + * @param bufferLength the length of buffer to use for writing bytes. Default + * value is 1024 */ - public Builder(CuVSResources cuvsResources) { - this.cuvsResources = cuvsResources; - } + void serialize(OutputStream outputStream, int bufferLength) throws Throwable; /** - * Sets an instance of InputStream typically used when index deserialization is - * needed. + * A method to persist a CAGRA index using an instance of {@link OutputStream} + * for writing index bytes. * - * @param inputStream an instance of {@link InputStream} - * @return an instance of this Builder + * @param outputStream an instance of {@link OutputStream} to write the index + * bytes into + * @param tempFile an intermediate {@link Path} where CAGRA index is written + * temporarily */ - public Builder from(InputStream inputStream) { - this.inputStream = inputStream; - return this; + default void serialize(OutputStream outputStream, Path tempFile) throws Throwable { + serialize(outputStream, tempFile, 1024); } /** - * Sets the dataset for building the {@link CagraIndex}. + * A method to persist a CAGRA index using an instance of {@link OutputStream} + * and path to the intermediate temporary file. * - * @param dataset a two-dimensional float array - * @return an instance of this Builder + * @param outputStream an instance of {@link OutputStream} to write the index + * bytes to + * @param tempFile an intermediate {@link Path} where CAGRA index is written + * temporarily + * @param bufferLength the length of buffer to use for writing bytes. Default + * value is 1024 */ - public Builder withDataset(float[][] dataset) { - this.dataset = dataset; - return this; - } + void serialize(OutputStream outputStream, Path tempFile, int bufferLength) throws Throwable; /** - * Registers an instance of configured {@link CagraIndexParams} with this - * Builder. + * A method to create and persist HNSW index from CAGRA index using an instance + * of {@link OutputStream} and path to the intermediate temporary file. * - * @param cagraIndexParameters An instance of CagraIndexParams. - * @return An instance of this Builder. + * @param outputStream an instance of {@link OutputStream} to write the index + * bytes to */ - public Builder withIndexParams(CagraIndexParams cagraIndexParameters) { - this.cagraIndexParams = cagraIndexParameters; - return this; - } + void serializeToHNSW(OutputStream outputStream) throws Throwable; /** - * Registers an instance of configured {@link CagraCompressionParams} with this - * Builder. + * A method to create and persist HNSW index from CAGRA index using an instance + * of {@link OutputStream} and path to the intermediate temporary file. * - * @param cagraCompressionParams An instance of CagraCompressionParams. - * @return An instance of this Builder. + * @param outputStream an instance of {@link OutputStream} to write the index + * bytes to + * @param bufferLength the length of buffer to use for writing bytes. Default + * value is 1024 */ - public Builder withCompressionParams(CagraCompressionParams cagraCompressionParams) { - this.cagraCompressionParams = cagraCompressionParams; - return this; - } + void serializeToHNSW(OutputStream outputStream, int bufferLength) throws Throwable; /** - * Builds and returns an instance of CagraIndex. + * A method to create and persist HNSW index from CAGRA index using an instance + * of {@link OutputStream} and path to the intermediate temporary file. * - * @return an instance of CagraIndex + * @param outputStream an instance of {@link OutputStream} to write the index + * bytes to + * @param tempFile an intermediate {@link Path} where CAGRA index is written + * temporarily */ - public CagraIndex build() throws Throwable { - if (inputStream != null) { - return new CagraIndex(inputStream, cuvsResources); - } else { - return new CagraIndex(cagraIndexParams, cagraCompressionParams, dataset, cuvsResources); - } + default void serializeToHNSW(OutputStream outputStream, Path tempFile) throws Throwable { + serializeToHNSW(outputStream, tempFile, 1024); } - } - /** - * Holds the memory reference to a CAGRA index. - */ - protected static class IndexReference { + /** + * A method to create and persist HNSW index from CAGRA index using an instance + * of {@link OutputStream} and path to the intermediate temporary file. + * + * @param outputStream an instance of {@link OutputStream} to write the index + * bytes to + * @param tempFile an intermediate {@link Path} where CAGRA index is written + * temporarily + * @param bufferLength the length of buffer to use for writing bytes. Default + * value is 1024 + */ + void serializeToHNSW(OutputStream outputStream, Path tempFile, int bufferLength) throws Throwable; - private final MemorySegment memorySegment; + /** + * Gets an instance of {@link CagraIndexParams} + * + * @return an instance of {@link CagraIndexParams} + */ + CagraIndexParams getCagraIndexParameters(); /** - * Constructs CagraIndexReference and allocate the MemorySegment. + * Gets an instance of {@link CuVSResources} + * + * @return an instance of {@link CuVSResources} */ - protected IndexReference(CuVSResources resources) { - memorySegment = CuVSCagraIndex.allocate(resources.arena); - } + CuVSResources getCuVSResources(); /** - * Constructs CagraIndexReference with an instance of MemorySegment passed as a - * parameter. + * Creates a new Builder with an instance of {@link CuVSResources}. * - * @param indexMemorySegment the MemorySegment instance to use for containing - * index reference + * @param cuvsResources an instance of {@link CuVSResources} + * @throws UnsupportedOperationException if the provider does not cuvs */ - protected IndexReference(MemorySegment indexMemorySegment) { - this.memorySegment = indexMemorySegment; + static Builder newBuilder(CuVSResources cuvsResources) { + Objects.requireNonNull(cuvsResources); + return CuVSProvider.provider().newCagraIndexBuilder(cuvsResources); } /** - * Gets the instance of index MemorySegment. - * - * @return index MemorySegment + * Builder helps configure and create an instance of {@link CagraIndex}. */ - protected MemorySegment getMemorySegment() { - return memorySegment; + interface Builder { + + /** + * Sets an instance of InputStream typically used when index deserialization is + * needed. + * + * @param inputStream an instance of {@link InputStream} + * @return an instance of this Builder + */ + Builder from(InputStream inputStream); + + /** + * Sets the dataset for building the {@link CagraIndex}. + * + * @param dataset a two-dimensional float array + * @return an instance of this Builder + */ + Builder withDataset(float[][] dataset); + + /** + * Registers an instance of configured {@link CagraIndexParams} with this + * Builder. + * + * @param cagraIndexParameters An instance of CagraIndexParams. + * @return An instance of this Builder. + */ + Builder withIndexParams(CagraIndexParams cagraIndexParameters); + + /** + * Registers an instance of configured {@link CagraCompressionParams} with this + * Builder. + * + * @param cagraCompressionParams An instance of CagraCompressionParams. + * @return An instance of this Builder. + */ + public Builder withCompressionParams(CagraCompressionParams cagraCompressionParams); + + /** + * Builds and returns an instance of CagraIndex. + * + * @return an instance of CagraIndex + */ + CagraIndex build() throws Throwable; } - } } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java index 767ee281b..11b4f4b90 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraIndexParams.java @@ -16,10 +16,6 @@ package com.nvidia.cuvs; -import java.lang.foreign.MemorySegment; - -import com.nvidia.cuvs.panama.CuVSCagraIndexParams; - /** * Supplemental parameters to build CAGRA Index. * @@ -29,8 +25,6 @@ public class CagraIndexParams { private final CagraGraphBuildAlgo cuvsCagraGraphBuildAlgo; private final CuvsDistanceType cuvsDistanceType; - private final MemorySegment memorySegment; - private CuVSResources resources; private final int intermediateGraphDegree; private final int graphDegree; private final int nnDescentNiter; @@ -164,28 +158,15 @@ private CuvsDistanceType(int value) { } - private CagraIndexParams(CuVSResources resources, int intermediateGraphDegree, int graphDegree, + private CagraIndexParams(int intermediateGraphDegree, int graphDegree, CagraGraphBuildAlgo CuvsCagraGraphBuildAlgo, int nnDescentNiter, int writerThreads, CuvsDistanceType cuvsDistanceType) { - this.resources = resources; this.intermediateGraphDegree = intermediateGraphDegree; this.graphDegree = graphDegree; this.cuvsCagraGraphBuildAlgo = CuvsCagraGraphBuildAlgo; this.nnDescentNiter = nnDescentNiter; this.numWriterThreads = writerThreads; this.cuvsDistanceType = cuvsDistanceType; - - this.memorySegment = initMemorySegment(); - } - - private MemorySegment initMemorySegment() { - MemorySegment indexParamsMemorySegment = CuVSCagraIndexParams.allocate(resources.arena); - CuVSCagraIndexParams.intermediate_graph_degree(indexParamsMemorySegment, intermediateGraphDegree); - CuVSCagraIndexParams.graph_degree(indexParamsMemorySegment, graphDegree); - CuVSCagraIndexParams.build_algo(indexParamsMemorySegment, cuvsCagraGraphBuildAlgo.value); - CuVSCagraIndexParams.nn_descent_niter(indexParamsMemorySegment, nnDescentNiter); - CuVSCagraIndexParams.metric(indexParamsMemorySegment, cuvsDistanceType.value); - return indexParamsMemorySegment; } /** @@ -221,10 +202,6 @@ public int getNNDescentNumIterations() { return nnDescentNiter; } - protected MemorySegment getMemorySegment() { - return memorySegment; - } - /** * Gets the {@link CuvsDistanceType} used to build the index. */ @@ -251,7 +228,6 @@ public String toString() { */ public static class Builder { - private CuVSResources resources; private CagraGraphBuildAlgo cuvsCagraGraphBuildAlgo = CagraGraphBuildAlgo.NN_DESCENT; private CuvsDistanceType cuvsDistanceType = CuvsDistanceType.L2Expanded; private int intermediateGraphDegree = 128; @@ -259,9 +235,7 @@ public static class Builder { private int nnDescentNumIterations = 20; private int numWriterThreads = 2; - public Builder(CuVSResources resources) { - this.resources = resources; - } + public Builder() { } /** * Sets the degree of input graph for pruning. @@ -337,7 +311,7 @@ public Builder withNumWriterThreads(int numWriterThreads) { * @return an instance of {@link CagraIndexParams} */ public CagraIndexParams build() { - return new CagraIndexParams(resources, intermediateGraphDegree, graphDegree, cuvsCagraGraphBuildAlgo, + return new CagraIndexParams(intermediateGraphDegree, graphDegree, cuvsCagraGraphBuildAlgo, nnDescentNumIterations, numWriterThreads, cuvsDistanceType); } } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchParams.java index 54dbb548e..74ce37c24 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchParams.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchParams.java @@ -16,10 +16,6 @@ package com.nvidia.cuvs; -import java.lang.foreign.MemorySegment; - -import com.nvidia.cuvs.panama.CuVSCagraSearchParams; - /** * CagraSearchParams encapsulates the logic for configuring and holding search * parameters. @@ -28,7 +24,6 @@ */ public class CagraSearchParams { - private CuVSResources resources; private int maxQueries; private int iTopKSize; private int maxIterations; @@ -40,7 +35,6 @@ public class CagraSearchParams { private int numRandomSamplings; private float hashMapMaxFillRate; private long randXORMask; - private MemorySegment memorySegment; private SearchAlgo searchAlgo; private HashMapMode hashMapMode; @@ -143,34 +137,6 @@ private CagraSearchParams(CuVSResources resources, int maxQueries, int iTopKSize this.hashMapMaxFillRate = hashmapMaxFillRate; this.numRandomSamplings = numRandomSamplings; this.randXORMask = randXORMask; - this.resources = resources; - - this.memorySegment = allocateMemorySegment(); - } - - /** - * Allocates the configured search parameters in the MemorySegment. - */ - private MemorySegment allocateMemorySegment() { - MemorySegment memorySegment = CuVSCagraSearchParams.allocate(resources.arena); - CuVSCagraSearchParams.max_queries(memorySegment, maxQueries); - CuVSCagraSearchParams.itopk_size(memorySegment, iTopKSize); - CuVSCagraSearchParams.max_iterations(memorySegment, maxIterations); - if (searchAlgo != null) { - CuVSCagraSearchParams.algo(memorySegment, searchAlgo.value); - } - CuVSCagraSearchParams.team_size(memorySegment, teamSize); - CuVSCagraSearchParams.search_width(memorySegment, searchWidth); - CuVSCagraSearchParams.min_iterations(memorySegment, minIterations); - CuVSCagraSearchParams.thread_block_size(memorySegment, threadBlockSize); - if (hashMapMode != null) { - CuVSCagraSearchParams.hashmap_mode(memorySegment, hashMapMode.value); - } - CuVSCagraSearchParams.hashmap_min_bitlen(memorySegment, hashmapMinBitlen); - CuVSCagraSearchParams.hashmap_max_fill_rate(memorySegment, hashMapMaxFillRate); - CuVSCagraSearchParams.num_random_samplings(memorySegment, numRandomSamplings); - CuVSCagraSearchParams.rand_xor_mask(memorySegment, randXORMask); - return memorySegment; } /** @@ -273,15 +239,6 @@ public long getRandXORMask() { return randXORMask; } - /** - * Gets the MemorySegment holding CagraSearchParams. - * - * @return the MemorySegment holding CagraSearchParams - */ - protected MemorySegment getMemorySegment() { - return memorySegment; - } - /** * Gets which search implementation is configured. * @@ -302,12 +259,12 @@ public HashMapMode getHashMapMode() { @Override public String toString() { - return "CagraSearchParams [resources=" + resources + ", maxQueries=" + maxQueries + ", itopkSize=" + iTopKSize + return "CagraSearchParams [maxQueries=" + maxQueries + ", itopkSize=" + iTopKSize + ", maxIterations=" + maxIterations + ", cuvsCagraSearchAlgo=" + searchAlgo + ", teamSize=" + teamSize + ", searchWidth=" + searchWidth + ", minIterations=" + minIterations + ", threadBlockSize=" + threadBlockSize + ", hashMapMode=" + hashMapMode + ", hashMapMinBitlen=" + hashmapMinBitlen + ", hashMapMaxFillRate=" + hashMapMaxFillRate + ", numRandomSamplings=" + numRandomSamplings + ", randXORMask=" + randXORMask - + ", memorySegment=" + memorySegment + "]"; + + "]"; } /** diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java index dbaba3ebb..371ff01cc 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java @@ -16,137 +16,49 @@ package com.nvidia.cuvs; -import java.io.File; -import java.lang.foreign.Arena; -import java.lang.foreign.FunctionDescriptor; -import java.lang.foreign.Linker; -import java.lang.foreign.MemoryLayout; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.SymbolLookup; -import java.lang.foreign.ValueLayout; -import java.lang.invoke.MethodHandle; +import com.nvidia.cuvs.spi.CuVSProvider; -import com.nvidia.cuvs.common.Util; +import java.nio.file.Path; /** * Used for allocating resources for cuVS * * @since 25.02 */ -public class CuVSResources implements AutoCloseable { - - public final Arena arena; - public final Linker linker; - public final SymbolLookup symbolLookup; - protected File nativeLibrary; - private final MethodHandle createResourcesMethodHandle; - private final MethodHandle destroyResourcesMethodHandle; - private MemorySegment resourcesMemorySegment; - private MemoryLayout intMemoryLayout; +public interface CuVSResources extends AutoCloseable { /** - * Constructor that allocates the resources needed for cuVS - * - * @throws Throwable exception thrown when native function is invoked + * Closes this resources and releases any resources associated with it. */ - public CuVSResources() throws Throwable { - linker = Linker.nativeLinker(); - arena = Arena.ofShared(); - - nativeLibrary = Util.loadNativeLibrary(); - symbolLookup = SymbolLookup.libraryLookup(nativeLibrary.getAbsolutePath(), arena); - intMemoryLayout = linker.canonicalLayouts().get("int"); - - createResourcesMethodHandle = linker.downcallHandle(symbolLookup.find("create_resources").get(), - FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - - destroyResourcesMethodHandle = linker.downcallHandle(symbolLookup.find("destroy_resources").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - - createResources(); - } - - /** - * Creates the resources used internally and returns its reference. - * - * @throws Throwable exception thrown when native function is invoked - */ - public void createResources() throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = arena.allocate(returnValueMemoryLayout); - resourcesMemorySegment = (MemorySegment) createResourcesMethodHandle.invokeExact(returnValueMemorySegment); - } - @Override - public void close() { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = arena.allocate(returnValueMemoryLayout); - try { - destroyResourcesMethodHandle.invokeExact(resourcesMemorySegment, returnValueMemorySegment); - } catch (Throwable e) { - e.printStackTrace(); - } - if (!arena.scope().isAlive()) { - arena.close(); - } - nativeLibrary.delete(); - } + void close(); + /** - * Gets the reference to the cuvsResources MemorySegment. - * - * @return cuvsResources MemorySegment + * The temporary directory to use for intermediate operations. + * Defaults to {@systemProperty java.io.tmpdir}. */ - protected MemorySegment getMemorySegment() { - return resourcesMemorySegment; - } + Path tempDirectory(); /** - * Returns the loaded libcuvs_java_cagra.so as a {@link SymbolLookup} + * Creates a new resources. + * Equivalent to + *
{@code
+   *   create(CuVSProvider.tempDirectory())
+   * }
*/ - protected SymbolLookup getSymbolLookup() { - return symbolLookup; + static CuVSResources create() throws Throwable { + return create(CuVSProvider.tempDirectory()); } /** - * Container for GPU information + * Creates a new resources. + * + * @param tempDirectory the temporary directory to use for intermediate operations + * @throws UnsupportedOperationException if the provider does not cuvs + * @throws LibraryException if the native library cannot be loaded */ - public class GPUInfo { - - private final int gpuId; - private final long freeMemory; - private final long totalMemory; - private final float computeCapability; - - public GPUInfo(int gpuId, long freeMemory, long totalMemory, float computeCapability) { - super(); - this.gpuId = gpuId; - this.freeMemory = freeMemory; - this.totalMemory = totalMemory; - this.computeCapability = computeCapability; - } - - public int getGpuId() { - return gpuId; - } - - public long getFreeMemory() { - return freeMemory; - } - - public long getTotalMemory() { - return totalMemory; - } - - public float getComputeCapability() { - return computeCapability; - } - - @Override - public String toString() { - return "GPUInfo [gpuId=" + gpuId + ", freeMemory=" + freeMemory + ", totalMemory=" + totalMemory - + ", computeCapability=" + computeCapability + "]"; - } - + static CuVSResources create(Path tempDirectory) throws Throwable { + return CuVSProvider.provider().newCuVSResources(tempDirectory); } } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/GPUInfo.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/GPUInfo.java index 48cade179..c9c3bd9f8 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/GPUInfo.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/GPUInfo.java @@ -1,56 +1,28 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.nvidia.cuvs; /** * Contains GPU information + * + * @param gpuId id of the GPU starting from 0 + * @param name ASCII string identifying device + * @param freeMemory returned free memory in bytes + * @param totalMemory returned total memory in bytes + * @param computeCapability the compute capability of the device */ -public class GPUInfo { - private final int gpuId; - private final String name; - private final long freeMemory; - private final long totalMemory; - private final float computeCapability; - - /** - * Constructor for GPUInfo - * - * @param gpuId id of the GPU starting from 0 - * @param name ASCII string identifying device - * @param freeMemory returned free memory in bytes - * @param totalMemory returned total memory in bytes - * @param computeCapability the compute capability of the device - */ - public GPUInfo(int gpuId, String name, long freeMemory, long totalMemory, float computeCapability) { - super(); - this.gpuId = gpuId; - this.name = name; - this.freeMemory = freeMemory; - this.totalMemory = totalMemory; - this.computeCapability = computeCapability; - } - - public int getGpuId() { - return gpuId; - } - - public String getName() { - return name; - } - - public long getFreeMemory() { - return freeMemory; - } - - public long getTotalMemory() { - return totalMemory; - } - - public float getComputeCapability() { - return computeCapability; - } - - @Override - public String toString() { - return "GPUInfo [gpuId=" + gpuId + ", name=" + name + ", freeMemory=" + freeMemory + ", totalMemory=" + totalMemory - + ", computeCapability=" + computeCapability + "]"; - } -} +public record GPUInfo(int gpuId, String name, long freeMemory, long totalMemory, float computeCapability) { } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndex.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndex.java index d55308a1b..ebd7776a8 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndex.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndex.java @@ -16,21 +16,10 @@ package com.nvidia.cuvs; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; import java.io.InputStream; -import java.lang.foreign.FunctionDescriptor; -import java.lang.foreign.MemoryLayout; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.SequenceLayout; -import java.lang.foreign.ValueLayout; -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.util.UUID; +import java.util.Objects; -import com.nvidia.cuvs.common.Util; -import com.nvidia.cuvs.panama.CuVSHnswIndex; +import com.nvidia.cuvs.spi.CuVSProvider; /** * {@link HnswIndex} encapsulates a HNSW index, along with methods to interact @@ -38,226 +27,62 @@ * * @since 25.02 */ -public class HnswIndex { - - private final CuVSResources resources; - private MethodHandle deserializeHnswIndexMethodHandle; - private MethodHandle searchHnswIndexMethodHandle; - private MethodHandle destroyHnswIndexMethodHandle; - private HnswIndexParams hnswIndexParams; - private IndexReference hnswIndexReference; - private MemoryLayout longMemoryLayout; - private MemoryLayout intMemoryLayout; - private MemoryLayout floatMemoryLayout; - - /** - * Constructor for loading the index from an {@link InputStream} - * - * @param inputStream an instance of stream to read the index bytes from - * @param resources an instance of {@link CuVSResources} - */ - private HnswIndex(InputStream inputStream, CuVSResources resources, HnswIndexParams hnswIndexParams) - throws Throwable { - this.hnswIndexParams = hnswIndexParams; - this.resources = resources; - - longMemoryLayout = resources.linker.canonicalLayouts().get("long"); - intMemoryLayout = resources.linker.canonicalLayouts().get("int"); - floatMemoryLayout = resources.linker.canonicalLayouts().get("float"); - - initializeMethodHandles(); - this.hnswIndexReference = deserialize(inputStream); - } - - /** - * Initializes the {@link MethodHandles} for invoking native methods. - * - * @throws IOException @{@link IOException} is unable to load the native library - */ - private void initializeMethodHandles() throws IOException { - deserializeHnswIndexMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("deserialize_hnsw_index").get(), FunctionDescriptor.of(ValueLayout.ADDRESS, - ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout)); - - searchHnswIndexMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("search_hnsw_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, - ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, intMemoryLayout, intMemoryLayout, - longMemoryLayout)); - - destroyHnswIndexMethodHandle = resources.linker.downcallHandle( - resources.getSymbolLookup().find("destroy_hnsw_index").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - } - - /** - * Invokes the native destroy_hnsw_index to de-allocate the HNSW index - */ - public void destroyIndex() throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - destroyHnswIndexMethodHandle.invokeExact(hnswIndexReference.getMemorySegment(), returnValueMemorySegment); - } - - /** - * Invokes the native search_hnsw_index via the Panama API for searching a HNSW - * index. - * - * @param query an instance of {@link HnswQuery} holding the query vectors and - * other parameters - * @return an instance of {@link HnswSearchResults} containing the results - */ - public HnswSearchResults search(HnswQuery query) throws Throwable { - long numQueries = query.getQueryVectors().length; - long numBlocks = query.getTopK() * numQueries; - int vectorDimension = numQueries > 0 ? query.getQueryVectors()[0].length : 0; - - SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, longMemoryLayout); - SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, floatMemoryLayout); - MemorySegment neighborsMemorySegment = resources.arena.allocate(neighborsSequenceLayout); - MemorySegment distancesMemorySegment = resources.arena.allocate(distancesSequenceLayout); - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - - searchHnswIndexMethodHandle.invokeExact(resources.getMemorySegment(), hnswIndexReference.getMemorySegment(), - query.getHnswSearchParams().getHnswSearchParamsMemorySegment(), returnValueMemorySegment, - neighborsMemorySegment, distancesMemorySegment, - Util.buildMemorySegment(resources.linker, resources.arena, query.getQueryVectors()), query.getTopK(), - vectorDimension, numQueries); - - return new HnswSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment, - distancesMemorySegment, query.getTopK(), query.getMapping(), numQueries); - } - - /** - * Gets an instance of {@link IndexReference} by deserializing a HNSW index - * using an {@link InputStream}. - * - * @param inputStream an instance of {@link InputStream} - * @return an instance of {@link IndexReference}. - */ - private IndexReference deserialize(InputStream inputStream) throws Throwable { - return deserialize(inputStream, 1024); - } - - /** - * Gets an instance of {@link IndexReference} by deserializing a HNSW index - * using an {@link InputStream}. - * - * @param inputStream an instance of {@link InputStream} - * @param bufferLength the length of the buffer to use while reading the bytes - * from the stream. Default value is 1024. - * @return an instance of {@link IndexReference}. - */ - private IndexReference deserialize(InputStream inputStream, int bufferLength) throws Throwable { - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = resources.arena.allocate(returnValueMemoryLayout); - String tmpIndexFile = "/tmp/" + UUID.randomUUID().toString() + ".hnsw"; - - File tempFile = new File(tmpIndexFile); - FileOutputStream fileOutputStream = new FileOutputStream(tempFile); - byte[] chunk = new byte[bufferLength]; - int chunkLength = 0; - while ((chunkLength = inputStream.read(chunk)) != -1) { - fileOutputStream.write(chunk, 0, chunkLength); - } - - IndexReference indexReference = new IndexReference((MemorySegment) deserializeHnswIndexMethodHandle.invokeExact( - resources.getMemorySegment(), Util.buildMemorySegment(resources.linker, resources.arena, tmpIndexFile), - hnswIndexParams.getHnswIndexParamsMemorySegment(), returnValueMemorySegment, - hnswIndexParams.getVectorDimension())); - - inputStream.close(); - fileOutputStream.close(); - tempFile.delete(); - - return indexReference; - } - - /** - * Builder helps configure and create an instance of {@link HnswIndex}. - */ - public static class Builder { - - private CuVSResources cuvsResources; - private InputStream inputStream; - private HnswIndexParams hnswIndexParams; +public interface HnswIndex { /** - * Constructs this Builder with an instance of {@link CuVSResources}. - * - * @param cuvsResources an instance of {@link CuVSResources} + * Invokes the native destroy_hnsw_index to de-allocate the HNSW index */ - public Builder(CuVSResources cuvsResources) { - this.cuvsResources = cuvsResources; - } + void destroyIndex() throws Throwable; /** - * Sets an instance of InputStream typically used when index deserialization is - * needed. + * Invokes the native search_hnsw_index via the Panama API for searching a HNSW + * index. * - * @param inputStream an instance of {@link InputStream} - * @return an instance of this Builder + * @param query an instance of {@link HnswQuery} holding the query vectors and + * other parameters + * @return an instance of {@link SearchResults} containing the results */ - public Builder from(InputStream inputStream) { - this.inputStream = inputStream; - return this; - } + SearchResults search(HnswQuery query) throws Throwable; /** - * Registers an instance of configured {@link HnswIndexParams} with this - * Builder. + * Creates a new Builder with an instance of {@link CuVSResources}. * - * @param hnswIndexParameters An instance of HnswIndexParams. - * @return An instance of this Builder. - */ - public Builder withIndexParams(HnswIndexParams hnswIndexParameters) { - this.hnswIndexParams = hnswIndexParameters; - return this; - } - - /** - * Builds and returns an instance of CagraIndex. - * - * @return an instance of CagraIndex - */ - public HnswIndex build() throws Throwable { - return new HnswIndex(inputStream, cuvsResources, hnswIndexParams); - } - } - - /** - * Holds the memory reference to a HNSW index. - */ - protected static class IndexReference { - - private final MemorySegment memorySegment; - - /** - * Constructs CagraIndexReference and allocate the MemorySegment. - */ - protected IndexReference(CuVSResources resources) { - memorySegment = CuVSHnswIndex.allocate(resources.arena); - } - - /** - * Constructs CagraIndexReference with an instance of MemorySegment passed as a - * parameter. - * - * @param indexMemorySegment the MemorySegment instance to use for containing - * index reference + * @param cuvsResources an instance of {@link CuVSResources} + * @throws UnsupportedOperationException if the provider does not cuvs */ - protected IndexReference(MemorySegment indexMemorySegment) { - this.memorySegment = indexMemorySegment; + static HnswIndex.Builder newBuilder(CuVSResources cuvsResources) { + Objects.requireNonNull(cuvsResources); + return CuVSProvider.provider().newHnswIndexBuilder(cuvsResources); } /** - * Gets the instance of index MemorySegment. - * - * @return index MemorySegment + * Builder helps configure and create an instance of {@link HnswIndex}. */ - protected MemorySegment getMemorySegment() { - return memorySegment; + interface Builder { + + /** + * Sets an instance of InputStream typically used when index deserialization is + * needed. + * + * @param inputStream an instance of {@link InputStream} + * @return an instance of this Builder + */ + Builder from(InputStream inputStream); + + /** + * Registers an instance of configured {@link HnswIndexParams} with this + * Builder. + * + * @param hnswIndexParameters An instance of HnswIndexParams. + * @return An instance of this Builder. + */ + Builder withIndexParams(HnswIndexParams hnswIndexParameters); + + /** + * Builds and returns an instance of CagraIndex. + * + * @return an instance of CagraIndex + */ + HnswIndex build() throws Throwable; } - } } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndexParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndexParams.java index ef06adf61..3960d5dae 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndexParams.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswIndexParams.java @@ -16,10 +16,6 @@ package com.nvidia.cuvs; -import java.lang.foreign.MemorySegment; - -import com.nvidia.cuvs.panama.CuVSHnswIndexParams; - /** * Supplemental parameters to build HNSW index. * @@ -55,35 +51,17 @@ private CuvsHnswHierarchy(int value) { } }; - private CuVSResources resources; - private MemorySegment memorySegment; private CuvsHnswHierarchy hierarchy = CuvsHnswHierarchy.NONE; private int efConstruction = 200; private int numThreads = 2; private int vectorDimension; - private HnswIndexParams(CuVSResources resources, CuvsHnswHierarchy hierarchy, int efConstruction, int numThreads, + private HnswIndexParams(CuvsHnswHierarchy hierarchy, int efConstruction, int numThreads, int vectorDimension) { - this.resources = resources; this.hierarchy = hierarchy; this.efConstruction = efConstruction; this.numThreads = numThreads; this.vectorDimension = vectorDimension; - this.memorySegment = allocateMemorySegment(); - } - - /** - * Allocates the configured search parameters in the MemorySegment. - */ - private MemorySegment allocateMemorySegment() { - MemorySegment memorySegment = CuVSHnswIndexParams.allocate(resources.arena); - CuVSHnswIndexParams.ef_construction(memorySegment, efConstruction); - CuVSHnswIndexParams.num_threads(memorySegment, numThreads); - return memorySegment; - } - - public MemorySegment getHnswIndexParamsMemorySegment() { - return memorySegment; } /** @@ -118,10 +96,6 @@ public int getVectorDimension() { return vectorDimension; } - public CuVSResources getResources() { - return resources; - } - @Override public String toString() { return "HnswIndexParams [hierarchy=" + hierarchy + ", efConstruction=" + efConstruction + ", numThreads=" @@ -133,7 +107,6 @@ public String toString() { */ public static class Builder { - private CuVSResources resources; private CuvsHnswHierarchy hierarchy = CuvsHnswHierarchy.NONE; private int efConstruction = 200; private int numThreads = 2; @@ -141,12 +114,8 @@ public static class Builder { /** * Constructs this Builder with an instance of Arena. - * - * @param resources the {@link CuVSResources} instance to use */ - public Builder(CuVSResources resources) { - this.resources = resources; - } + public Builder() { } /** * Sets the hierarchy for HNSW index when converting from CAGRA index. @@ -205,7 +174,7 @@ public Builder withVectorDimension(int vectorDimension) { * @return an instance of {@link HnswIndexParams} */ public HnswIndexParams build() { - return new HnswIndexParams(resources, hierarchy, efConstruction, numThreads, vectorDimension); + return new HnswIndexParams(hierarchy, efConstruction, numThreads, vectorDimension); } } } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchParams.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchParams.java index a2725e6de..26b91a5a4 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchParams.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchParams.java @@ -16,92 +16,40 @@ package com.nvidia.cuvs; -import java.lang.foreign.MemorySegment; - -import com.nvidia.cuvs.panama.CuVSHnswSearchParams; - /** * HnswSearchParams encapsulates the logic for configuring and holding search * parameters for HNSW index. * + * @param ef the ef value + * @param numThreads the number of threads * @since 25.02 */ -public class HnswSearchParams { - - private CuVSResources resources; - private MemorySegment memorySegment; - private int ef = 200; - private int numThreads = 0; - - /** - * Constructs an instance of HnswSearchParams with passed search parameters. - * - * @param resources the resources instance to use - * @param ef the ef value - * @param numThreads the number of threads - * - */ - private HnswSearchParams(CuVSResources resources, int ef, int numThreads) { - this.resources = resources; - this.ef = ef; - this.numThreads = numThreads; - this.memorySegment = allocateMemorySegment(); - } - - /** - * Allocates the configured search parameters in the MemorySegment. - */ - private MemorySegment allocateMemorySegment() { - MemorySegment memorySegment = CuVSHnswSearchParams.allocate(resources.arena); - CuVSHnswSearchParams.ef(memorySegment, ef); - CuVSHnswSearchParams.num_threads(memorySegment, numThreads); - return memorySegment; - } - - public MemorySegment getHnswSearchParamsMemorySegment() { - return memorySegment; - } - - /** - * Gets the ef value - * - * @return the integer ef value - */ - public int getEf() { - return ef; - } - - /** - * Gets the number of threads - * - * @return the number of threads - */ - public int getNumThreads() { - return numThreads; - } - - @Override - public String toString() { - return "HnswSearchParams [ef=" + ef + ", numThreads=" + numThreads + "]"; - } +public record HnswSearchParams (int ef, int numThreads) { + + public HnswSearchParams { + if (ef < 0) { + throw new IllegalArgumentException(); + } + if (numThreads < 0) { + throw new IllegalArgumentException(); + } + } /** * Builder configures and creates an instance of HnswSearchParams. */ public static class Builder { - private CuVSResources resources; - private int ef = 200; - private int numThreads = 0; + private static final int DEFAULT_EF_VALUE = 200; + private static final int DEFAULT_NUM_THREADS = 0; + + private int ef = DEFAULT_EF_VALUE; + private int numThreads = DEFAULT_NUM_THREADS; /** * Constructs this Builder with an instance of Arena. - * - * @param resources the {@link CuVSResources} instance to use */ - public Builder(CuVSResources resources) { - this.resources = resources; - } + public Builder() { } /** * Sets the ef value @@ -131,7 +79,7 @@ public Builder withNumThreads(int numThreads) { * @return an instance of HnswSearchParams */ public HnswSearchParams build() { - return new HnswSearchParams(resources, ef, numThreads); + return new HnswSearchParams(ef, numThreads); } } } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/LibraryException.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/LibraryException.java index 40018be92..ace825a95 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/LibraryException.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/LibraryException.java @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.nvidia.cuvs; public class LibraryException extends RuntimeException { diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/SearchResults.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/SearchResults.java new file mode 100644 index 000000000..c01ea04a3 --- /dev/null +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/SearchResults.java @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs; + +import java.util.List; +import java.util.Map; + +public interface SearchResults { + + /** + * Gets a list results as a map of neighbor IDs to distances. + * + * @return a list of results for each query as a map of neighbor IDs to distance + */ + List> getResults(); +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java deleted file mode 100644 index 163ef3a84..000000000 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.nvidia.cuvs.common; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.lang.foreign.Arena; -import java.lang.foreign.FunctionDescriptor; -import java.lang.foreign.Linker; -import java.lang.foreign.MemoryLayout; -import java.lang.foreign.MemoryLayout.PathElement; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.SymbolLookup; -import java.lang.foreign.ValueLayout; -import java.lang.invoke.MethodHandle; -import java.lang.invoke.VarHandle; -import java.util.ArrayList; -import java.util.List; - -import com.nvidia.cuvs.GPUInfo; -import com.nvidia.cuvs.LibraryException; -import com.nvidia.cuvs.panama.GpuInfo; - -public class Util { - - private static Arena arena = null; - private static Linker linker = null; - private static SymbolLookup symbolLookup = null; - private static MemoryLayout intMemoryLayout; - private static MethodHandle getGpuInfoMethodHandle = null; - protected static File nativeLibrary; - - static { - try { - linker = Linker.nativeLinker(); - arena = Arena.ofShared(); - nativeLibrary = Util.loadLibraryFromJar("/libcuvs_java.so"); - symbolLookup = SymbolLookup.libraryLookup(nativeLibrary.getAbsolutePath(), arena); - intMemoryLayout = linker.canonicalLayouts().get("int"); - getGpuInfoMethodHandle = linker.downcallHandle(symbolLookup.find("get_gpu_info").get(), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); - } catch (Exception e) { - throw new LibraryException("LibCuVS Java Library Not Loaded", e); - } - } - - /** - * Get the list of compatible GPUs based on compute capability >= 7.0 and total - * memory >= 8GB - * - * @return a list of compatible GPUs. See {@link GPUInfo} - */ - public static List compatibleGPUs() throws Throwable { - return compatibleGPUs(7.0, 8192); - } - - /** - * Get the list of compatible GPUs based on given compute capability and total - * memory - * - * @param minComputeCapability the minimum compute capability - * @param minDeviceMemoryMB the minimum total available memory in MB - * @return a list of compatible GPUs. See {@link GPUInfo} - */ - public static List compatibleGPUs(double minComputeCapability, int minDeviceMemoryMB) throws Throwable { - List compatibleGPUs = new ArrayList(); - double minDeviceMemoryB = Math.pow(2, 20) * minDeviceMemoryMB; - for (GPUInfo gpuInfo : availableGPUs()) { - if (gpuInfo.getComputeCapability() >= minComputeCapability && gpuInfo.getTotalMemory() >= minDeviceMemoryB) { - compatibleGPUs.add(gpuInfo); - } - } - return compatibleGPUs; - } - - /** - * Gets all the available GPUs - * - * @return a list of {@link GPUInfo} objects with GPU details - */ - public static List availableGPUs() throws Throwable { - List results = new ArrayList(); - MemoryLayout returnValueMemoryLayout = intMemoryLayout; - MemorySegment returnValueMemorySegment = arena.allocate(returnValueMemoryLayout); - MemoryLayout numGpuMemoryLayout = intMemoryLayout; - MemorySegment numGpuMemorySegment = arena.allocate(numGpuMemoryLayout); - - /* - * Setting a value of 1024 because we cannot predict how much memory to allocate - * before the function is invoked as cudaGetDeviceCount is inside the - * get_gpu_info function. - */ - MemorySegment GpuInfoArrayMemorySegment = GpuInfo.allocateArray(1024, arena); - getGpuInfoMethodHandle.invokeExact(returnValueMemorySegment, numGpuMemorySegment, GpuInfoArrayMemorySegment); - int numGPUs = numGpuMemorySegment.get(ValueLayout.JAVA_INT, 0); - MemoryLayout ml = MemoryLayout.sequenceLayout(numGPUs, GpuInfo.layout()); - for (int i = 0; i < numGPUs; i++) { - VarHandle gpuIdVarHandle = ml.varHandle(PathElement.sequenceElement(i), PathElement.groupElement("gpu_id")); - VarHandle freeMemoryVarHandle = ml.varHandle(PathElement.sequenceElement(i), - PathElement.groupElement("free_memory")); - VarHandle totalMemoryVarHandle = ml.varHandle(PathElement.sequenceElement(i), - PathElement.groupElement("total_memory")); - VarHandle ComputeCapabilityVarHandle = ml.varHandle(PathElement.sequenceElement(i), - PathElement.groupElement("compute_capability")); - StringBuilder gpuName = new StringBuilder(); - char b = 1; - int p = 0; - while (b != 0x00) { - VarHandle gpuNameVarHandle = ml.varHandle(PathElement.sequenceElement(i), PathElement.groupElement("name"), - PathElement.sequenceElement(p++)); - b = (char) (byte) gpuNameVarHandle.get(GpuInfoArrayMemorySegment, 0L); - gpuName.append(b); - } - results.add(new GPUInfo((int) gpuIdVarHandle.get(GpuInfoArrayMemorySegment, 0L), gpuName.toString().trim(), - (long) freeMemoryVarHandle.get(GpuInfoArrayMemorySegment, 0L), - (long) totalMemoryVarHandle.get(GpuInfoArrayMemorySegment, 0L), - (float) ComputeCapabilityVarHandle.get(GpuInfoArrayMemorySegment, 0L))); - } - return results; - } - - /** - * A utility method for getting an instance of {@link MemorySegment} for a - * {@link String}. - * - * @param str the string for the expected {@link MemorySegment} - * @return an instance of {@link MemorySegment} - */ - public static MemorySegment buildMemorySegment(Linker linker, Arena arena, String str) { - MemoryLayout charMemoryLayout = linker.canonicalLayouts().get("char"); - StringBuilder sb = new StringBuilder(str).append('\0'); - MemoryLayout stringMemoryLayout = MemoryLayout.sequenceLayout(sb.length(), charMemoryLayout); - MemorySegment stringMemorySegment = arena.allocate(stringMemoryLayout); - - for (int i = 0; i < sb.length(); i++) { - VarHandle varHandle = stringMemoryLayout.varHandle(PathElement.sequenceElement(i)); - varHandle.set(stringMemorySegment, 0L, (byte) sb.charAt(i)); - } - return stringMemorySegment; - } - - /** - * A utility method for building a {@link MemorySegment} for a 1D long array. - * - * @param data The 1D long array for which the {@link MemorySegment} is needed - * @return an instance of {@link MemorySegment} - */ - public static MemorySegment buildMemorySegment(Linker linker, Arena arena, long[] data) { - int cells = data.length; - MemoryLayout longMemoryLayout = linker.canonicalLayouts().get("long"); - MemoryLayout dataMemoryLayout = MemoryLayout.sequenceLayout(cells, longMemoryLayout); - MemorySegment dataMemorySegment = arena.allocate(dataMemoryLayout); - MemorySegment.copy(data, 0, dataMemorySegment, (ValueLayout) longMemoryLayout, 0, cells); - return dataMemorySegment; - } - - /** - * A utility method for building a {@link MemorySegment} for a 2D float array. - * - * @param data The 2D float array for which the {@link MemorySegment} is needed - * @return an instance of {@link MemorySegment} - */ - public static MemorySegment buildMemorySegment(Linker linker, Arena arena, float[][] data) { - long rows = data.length; - long cols = rows > 0 ? data[0].length : 0; - MemoryLayout floatMemoryLayout = linker.canonicalLayouts().get("float"); - MemoryLayout dataMemoryLayout = MemoryLayout.sequenceLayout(rows * cols, floatMemoryLayout); - MemorySegment dataMemorySegment = arena.allocate(dataMemoryLayout); - long floatByteSize = floatMemoryLayout.byteSize(); - - for (int r = 0; r < rows; r++) { - MemorySegment.copy(data[r], 0, dataMemorySegment, (ValueLayout) floatMemoryLayout, (r * cols * floatByteSize), - (int) cols); - } - - return dataMemorySegment; - } - - /** - * Load the CuVS .so file from environment variable CUVS_JAVA_SO_PATH. If not - * found there, try to load it from the classpath to a temporary file. - */ - public static File loadNativeLibrary() throws IOException { - String libraryPathFromEnvironment = System.getenv("CUVS_JAVA_SO_PATH"); - if (libraryPathFromEnvironment != null) { - File file = new File(libraryPathFromEnvironment); - if (!file.exists()) - throw new RuntimeException( - "Environment variable CUVS_JAVA_SO_PATH points to non-existent file: " + libraryPathFromEnvironment); - return file; - } - return loadLibraryFromJar("/libcuvs_java.so"); - } - - private static File loadLibraryFromJar(String path) throws IOException { - if (!path.startsWith("/")) { - throw new IllegalArgumentException("The path has to be absolute (start with '/')."); - } - // Obtain filename from path - String[] parts = path.split("/"); - String filename = (parts.length > 1) ? parts[parts.length - 1] : null; - - // Split filename to prefix and suffix (extension) - String prefix = ""; - String suffix = null; - if (filename != null) { - parts = filename.split("\\.", 2); - prefix = parts[0]; - suffix = (parts.length > 1) ? "." + parts[parts.length - 1] : null; - } - // Prepare temporary file - File temp = File.createTempFile(prefix, suffix); - InputStream libraryStream = Util.class.getModule().getResourceAsStream(path); // Util.class.getResourceAsStream(path); - streamCopy(libraryStream, new FileOutputStream(temp)); - - return temp; - } - - private static void streamCopy(InputStream is, OutputStream os) throws LibraryException { - if (is == null) { - throw new LibraryException("CuVS Library Not Found in ClassPath"); - } - byte[] buffer = new byte[1024]; - int readBytes; - - try { - while ((readBytes = is.read(buffer)) != -1) { - os.write(buffer, 0, readBytes); - } - } catch (IOException e) { - throw new LibraryException(e); - } finally { - // If read/write fails, close streams safely before throwing an exception - if (os != null) - try { - os.close(); - } catch (IOException e) { - e.printStackTrace(); - } - if (is != null) - try { - is.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } -} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/CuVSProvider.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/CuVSProvider.java new file mode 100644 index 000000000..04bce4cc7 --- /dev/null +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/CuVSProvider.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.spi; + +import com.nvidia.cuvs.BruteForceIndex; +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.HnswIndex; + +import java.nio.file.Path; + +/** + * A provider of low-level cuvs resources and builders. + */ +public interface CuVSProvider { + + Path TMPDIR = Path.of(System.getProperty("java.io.tmpdir")); + + /** + * The temporary directory to use for intermediate operations. + * Defaults to {@systemProperty java.io.tmpdir}. + */ + static Path tempDirectory() { + return TMPDIR; + } + + /** + * The directory where to extract and install the native library. + * Defaults to {@systemProperty java.io.tmpdir}. + */ + default Path nativeLibraryPath() { + return TMPDIR; + } + + /** Creates a new CuVSResources. */ + CuVSResources newCuVSResources(Path tempDirectory) + throws Throwable; + + /** Creates a new BruteForceIndex Builder. */ + BruteForceIndex.Builder newBruteForceIndexBuilder(CuVSResources cuVSResources) + throws UnsupportedOperationException; + + /** Creates a new CagraIndex Builder. */ + CagraIndex.Builder newCagraIndexBuilder(CuVSResources cuVSResources) + throws UnsupportedOperationException; + + /** Creates a new HnswIndex Builder. */ + HnswIndex.Builder newHnswIndexBuilder(CuVSResources cuVSResources) + throws UnsupportedOperationException; + + /** Retrieves the system-wide provider. */ + static CuVSProvider provider() { + return CuVSServiceProvider.Holder.INSTANCE; + } +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/CuVSServiceProvider.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/CuVSServiceProvider.java new file mode 100644 index 000000000..e81126366 --- /dev/null +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/CuVSServiceProvider.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.spi; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.util.ServiceLoader; + +/** + * Service-provider class for {@linkplain CuVSProvider}. + */ +public abstract class CuVSServiceProvider { + + /** + * Initialize and return an {@link CuVSProvider} provided by this provider. + * @param builtinProvider the built-in provider. + * @return the CuVSProvider provided by this provider + */ + public abstract CuVSProvider get(CuVSProvider builtinProvider); + + static class Holder { + static final CuVSProvider INSTANCE = loadProvider(); + + private static CuVSProvider loadProvider() { + var builtinProvider = builtinProvider(); + return ServiceLoader.load(CuVSServiceProvider.class) + .findFirst() + .map(p -> p.get(builtinProvider)) + .orElse(builtinProvider); + } + + static CuVSProvider builtinProvider() { + if (Runtime.version().feature() > 21 && isLinuxAmd64()) { + try { + var cls = Class.forName("com.nvidia.cuvs.spi.JDKProvider"); + var ctr = MethodHandles.lookup().findConstructor(cls, MethodType.methodType(void.class)); + return (CuVSProvider) ctr.invoke(); + } catch (Throwable e) { + throw new AssertionError(e); + } + } + return new UnsupportedProvider(); + } + + /** + * Returns true iff the architecture is x64 (amd64) and the OS Linux + * (the * OS we currently support for the native lib). + */ + static boolean isLinuxAmd64() { + String name = System.getProperty("os.name"); + return (name.startsWith("Linux")) && System.getProperty("os.arch").equals("amd64"); + } + } +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/UnsupportedProvider.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/UnsupportedProvider.java new file mode 100644 index 000000000..7dc820750 --- /dev/null +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/spi/UnsupportedProvider.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.spi; + +import com.nvidia.cuvs.BruteForceIndex; +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.HnswIndex; + +import java.nio.file.Path; + +/** + * A provider that unconditionally throws UnsupportedOperationException. + */ +final class UnsupportedProvider implements CuVSProvider { + + @Override + public CuVSResources newCuVSResources(Path tempDirectory) { + throw new UnsupportedOperationException(); + } + + @Override + public BruteForceIndex.Builder newBruteForceIndexBuilder(CuVSResources cuVSResources) { + throw new UnsupportedOperationException(); + } + + @Override + public CagraIndex.Builder newCagraIndexBuilder(CuVSResources cuVSResources) { + throw new UnsupportedOperationException(); + } + + @Override + public HnswIndex.Builder newHnswIndexBuilder(CuVSResources cuVSResources) { + throw new UnsupportedOperationException(); + } +} diff --git a/java/cuvs-java/src/main/java/module-info.java b/java/cuvs-java/src/main/java/module-info.java index 468252f22..5bcc9990f 100644 --- a/java/cuvs-java/src/main/java/module-info.java +++ b/java/cuvs-java/src/main/java/module-info.java @@ -16,4 +16,7 @@ module com.nvidia.cuvs { exports com.nvidia.cuvs; + exports com.nvidia.cuvs.spi; + + uses com.nvidia.cuvs.spi.CuVSServiceProvider; } diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java new file mode 100644 index 000000000..7ea2801f3 --- /dev/null +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.internal; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SequenceLayout; +import java.lang.invoke.MethodHandle; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Objects; +import java.util.UUID; + +import com.nvidia.cuvs.BruteForceIndex; +import com.nvidia.cuvs.BruteForceIndexParams; +import com.nvidia.cuvs.BruteForceQuery; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.SearchResults; +import com.nvidia.cuvs.internal.common.Util; +import com.nvidia.cuvs.internal.panama.CuVSBruteForceIndex; + +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_FLOAT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_LONG; +import static com.nvidia.cuvs.internal.common.LinkerHelper.downcallHandle; +import static com.nvidia.cuvs.internal.common.Util.checkError; +import static java.lang.foreign.ValueLayout.ADDRESS; + +/** + * + * {@link BruteForceIndex} encapsulates a BRUTEFORCE index, along with methods + * to interact with it. + * + * @since 25.02 + */ +public class BruteForceIndexImpl implements BruteForceIndex{ + + private static final MethodHandle indexMethodHandle = downcallHandle("build_brute_force_index", + FunctionDescriptor.of(ADDRESS, ADDRESS, C_LONG, C_LONG, ADDRESS, ADDRESS, C_INT)); + + private static final MethodHandle searchMethodHandle = downcallHandle("search_brute_force_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, C_INT, C_LONG, C_INT, ADDRESS, ADDRESS, ADDRESS, ADDRESS, ADDRESS, C_LONG, C_LONG)); + + private static final MethodHandle destroyIndexMethodHandle = downcallHandle("destroy_brute_force_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS)); + + private static final MethodHandle serializeMethodHandle = downcallHandle("serialize_brute_force_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS, ADDRESS)); + + private static final MethodHandle deserializeMethodHandle = downcallHandle("deserialize_brute_force_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS, ADDRESS)); + + private final float[][] dataset; + private final CuVSResourcesImpl resources; + private final IndexReference bruteForceIndexReference; + private final BruteForceIndexParams bruteForceIndexParams; + private boolean destroyed; + + /** + * Constructor for building the index using specified dataset + * + * @param dataset the dataset used for creating the BRUTEFORCE + * index + * @param resources an instance of {@link CuVSResourcesImpl} + * @param bruteForceIndexParams an instance of {@link BruteForceIndexParams} + * holding the index parameters + */ + private BruteForceIndexImpl(float[][] dataset, CuVSResourcesImpl resources, BruteForceIndexParams bruteForceIndexParams) + throws Throwable { + this.dataset = dataset; + this.resources = resources; + this.bruteForceIndexParams = bruteForceIndexParams; + this.bruteForceIndexReference = build(); + } + + /** + * Constructor for loading the index from an {@link InputStream} + * + * @param inputStream an instance of stream to read the index bytes from + * @param resources an instance of {@link CuVSResourcesImpl} + */ + private BruteForceIndexImpl(InputStream inputStream, CuVSResourcesImpl resources) throws Throwable { + this.bruteForceIndexParams = null; + this.dataset = null; + this.resources = resources; + this.bruteForceIndexReference = deserialize(inputStream); + } + + private void checkNotDestroyed() { + if (destroyed) { + throw new IllegalStateException("destroyed"); + } + } + + /** + * Invokes the native destroy_brute_force_index function to de-allocate + * BRUTEFORCE index + */ + @Override + public void destroyIndex() throws Throwable { + checkNotDestroyed(); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + destroyIndexMethodHandle.invokeExact(bruteForceIndexReference.getMemorySegment(), returnValue); + checkError(returnValue.get(C_INT, 0L), "destroyIndexMethodHandle"); + } finally { + destroyed = true; + } + } + + /** + * Invokes the native build_brute_force_index function via the Panama API to + * build the {@link BruteForceIndex} + * + * @return an instance of {@link IndexReference} that holds the pointer to the + * index + */ + private IndexReference build() throws Throwable { + long rows = dataset.length; + long cols = rows > 0 ? dataset[0].length : 0; + + MemorySegment dataSeg = Util.buildMemorySegment(resources.getArena(), dataset); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + MemorySegment indexSeg = (MemorySegment) indexMethodHandle.invokeExact( + dataSeg, + rows, + cols, + resources.getMemorySegment(), + returnValue, + bruteForceIndexParams.getNumWriterThreads() + ); + checkError(returnValue.get(C_INT, 0L), "indexMethodHandle"); + return new IndexReference(indexSeg); + } + } + + /** + * Invokes the native search_brute_force_index via the Panama API for searching + * a BRUTEFORCE index. + * + * @param cuvsQuery an instance of {@link BruteForceQuery} holding the query + * vectors and other parameters + * @return an instance of {@link BruteForceSearchResults} containing the results + */ + @Override + public SearchResults search(BruteForceQuery cuvsQuery) throws Throwable { + checkNotDestroyed(); + long numQueries = cuvsQuery.getQueryVectors().length; + long numBlocks = cuvsQuery.getTopK() * numQueries; + int vectorDimension = numQueries > 0 ? cuvsQuery.getQueryVectors()[0].length : 0; + long prefilterDataLength = cuvsQuery.getPrefilter() != null ? cuvsQuery.getPrefilter().length : 0; + long numRows = dataset != null ? dataset.length : 0; + + SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_LONG); + SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_FLOAT); + MemorySegment neighborsMemorySegment = resources.getArena().allocate(neighborsSequenceLayout); + MemorySegment distancesMemorySegment = resources.getArena().allocate(distancesSequenceLayout); + MemorySegment prefilterDataMemorySegment = cuvsQuery.getPrefilter() != null + ? Util.buildMemorySegment(resources.getArena(), cuvsQuery.getPrefilter()) + : MemorySegment.NULL; + MemorySegment querySeg = Util.buildMemorySegment(resources.getArena(), cuvsQuery.getQueryVectors()); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + searchMethodHandle.invokeExact( + bruteForceIndexReference.getMemorySegment(), + querySeg, + cuvsQuery.getTopK(), + numQueries, + vectorDimension, + resources.getMemorySegment(), + neighborsMemorySegment, + distancesMemorySegment, + returnValue, + prefilterDataMemorySegment, + prefilterDataLength, numRows + ); + checkError(returnValue.get(C_INT, 0L), "searchMethodHandle"); + } + return new BruteForceSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment, + distancesMemorySegment, cuvsQuery.getTopK(), cuvsQuery.getMapping(), numQueries); + } + + @Override + public void serialize(OutputStream outputStream) throws Throwable { + Path p = Files.createTempFile(resources.tempDirectory(), UUID.randomUUID().toString(), ".bf"); + serialize(outputStream, p); + } + + @Override + public void serialize(OutputStream outputStream, Path tempFile) throws Throwable { + checkNotDestroyed(); + tempFile = tempFile.toAbsolutePath(); + MemorySegment pathSeg = Util.buildMemorySegment(resources.getArena(), tempFile.toString()); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + serializeMethodHandle.invokeExact( + resources.getMemorySegment(), + bruteForceIndexReference.getMemorySegment(), + returnValue, + pathSeg + ); + checkError(returnValue.get(C_INT, 0L), "serializeMethodHandle"); + + try (FileInputStream fileInputStream = new FileInputStream(tempFile.toFile())) { + fileInputStream.transferTo(outputStream); + } finally { + Files.deleteIfExists(tempFile); + } + } + } + + /** + * Gets an instance of {@link IndexReference} by deserializing a BRUTEFORCE + * index using an {@link InputStream}. + * + * @param inputStream an instance of {@link InputStream} + * @return an instance of {@link IndexReference}. + */ + private IndexReference deserialize(InputStream inputStream) throws Throwable { + checkNotDestroyed(); + Path tmpIndexFile = Files.createTempFile(resources.tempDirectory(), UUID.randomUUID().toString(), ".bf"); + tmpIndexFile = tmpIndexFile.toAbsolutePath(); + IndexReference indexReference = new IndexReference(resources); + + try (var in = inputStream; + FileOutputStream fileOutputStream = new FileOutputStream(tmpIndexFile.toFile())) { + in.transferTo(fileOutputStream); + MemorySegment pathSeg = Util.buildMemorySegment(resources.getArena(), tmpIndexFile.toString()); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + deserializeMethodHandle.invokeExact( + resources.getMemorySegment(), + indexReference.getMemorySegment(), + returnValue, + pathSeg + ); + checkError(returnValue.get(C_INT, 0L), "deserializeMethodHandle"); + } + } finally { + Files.deleteIfExists(tmpIndexFile); + } + return indexReference; + } + + public static BruteForceIndex.Builder newBuilder(CuVSResources cuvsResources) { + Objects.requireNonNull(cuvsResources); + if (!(cuvsResources instanceof CuVSResourcesImpl)) { + throw new IllegalArgumentException("Unsupported " + cuvsResources); + } + return new Builder((CuVSResourcesImpl)cuvsResources); + } + + /** + * Builder helps configure and create an instance of {@link BruteForceIndex}. + */ + public static class Builder implements BruteForceIndex.Builder { + + private float[][] dataset; + private final CuVSResourcesImpl cuvsResources; + private BruteForceIndexParams bruteForceIndexParams; + private InputStream inputStream; + + /** + * Constructs this Builder with an instance of {@link CuVSResourcesImpl}. + * + * @param cuvsResources an instance of {@link CuVSResources} + */ + public Builder(CuVSResourcesImpl cuvsResources) { + this.cuvsResources = cuvsResources; + } + + /** + * Registers an instance of configured {@link BruteForceIndexParams} with this + * Builder. + * + * @param bruteForceIndexParams An instance of BruteForceIndexParams + * @return An instance of this Builder + */ + @Override + public Builder withIndexParams(BruteForceIndexParams bruteForceIndexParams) { + this.bruteForceIndexParams = bruteForceIndexParams; + return this; + } + + /** + * Sets an instance of InputStream typically used when index deserialization is + * needed. + * + * @param inputStream an instance of {@link InputStream} + * @return an instance of this Builder + */ + @Override + public Builder from(InputStream inputStream) { + this.inputStream = inputStream; + return this; + } + + /** + * Sets the dataset for building the {@link BruteForceIndex}. + * + * @param dataset a two-dimensional float array + * @return an instance of this Builder + */ + @Override + public Builder withDataset(float[][] dataset) { + this.dataset = dataset; + return this; + } + + /** + * Builds and returns an instance of {@link BruteForceIndex}. + * + * @return an instance of {@link BruteForceIndex} + */ + @Override + public BruteForceIndexImpl build() throws Throwable { + if (inputStream != null) { + return new BruteForceIndexImpl(inputStream, cuvsResources); + } else { + return new BruteForceIndexImpl(dataset, cuvsResources, bruteForceIndexParams); + } + } + } + + /** + * Holds the memory reference to a BRUTEFORCE index. + */ + protected static class IndexReference { + + private final MemorySegment memorySegment; + + /** + * Constructs CagraIndexReference and allocate the MemorySegment. + */ + protected IndexReference(CuVSResourcesImpl resources) { + memorySegment = CuVSBruteForceIndex.allocate(resources.getArena()); + } + + /** + * Constructs BruteForceIndexReference with an instance of MemorySegment passed + * as a parameter. + * + * @param indexMemorySegment the MemorySegment instance to use for containing + * index reference + */ + protected IndexReference(MemorySegment indexMemorySegment) { + this.memorySegment = indexMemorySegment; + } + + /** + * Gets the instance of index MemorySegment. + * + * @return index MemorySegment + */ + protected MemorySegment getMemorySegment() { + return memorySegment; + } + } +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceSearchResults.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceSearchResults.java similarity index 93% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceSearchResults.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceSearchResults.java index b87e1f601..1b872708d 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/BruteForceSearchResults.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceSearchResults.java @@ -14,7 +14,9 @@ * limitations under the License. */ -package com.nvidia.cuvs; +package com.nvidia.cuvs.internal; + +import com.nvidia.cuvs.internal.common.SearchResultsImpl; import java.lang.foreign.MemorySegment; import java.lang.foreign.SequenceLayout; @@ -22,14 +24,12 @@ import java.util.List; import java.util.Map; -import com.nvidia.cuvs.common.SearchResults; - /** * SearchResult encapsulates the logic for reading and holding search results. * * @since 25.02 */ -public class BruteForceSearchResults extends SearchResults { +public class BruteForceSearchResults extends SearchResultsImpl { protected BruteForceSearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout, MemorySegment neighboursMemorySegment, MemorySegment distancesMemorySegment, int topK, List mapping, diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java new file mode 100644 index 000000000..0577276d3 --- /dev/null +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.internal; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SequenceLayout; +import java.lang.invoke.MethodHandle; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Objects; +import java.util.UUID; + +import com.nvidia.cuvs.CagraCompressionParams; +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CagraIndexParams; +import com.nvidia.cuvs.CagraQuery; +import com.nvidia.cuvs.CagraSearchParams; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.SearchResults; +import com.nvidia.cuvs.internal.common.Util; +import com.nvidia.cuvs.internal.panama.CuVSCagraCompressionParams; +import com.nvidia.cuvs.internal.panama.CuVSCagraIndex; +import com.nvidia.cuvs.internal.panama.CuVSCagraIndexParams; +import com.nvidia.cuvs.internal.panama.CuVSCagraSearchParams; + +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_FLOAT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_LONG; +import static com.nvidia.cuvs.internal.common.LinkerHelper.downcallHandle; +import static com.nvidia.cuvs.internal.common.Util.checkError; +import static java.lang.foreign.ValueLayout.ADDRESS; + +/** + * {@link CagraIndex} encapsulates a CAGRA index, along with methods to interact + * with it. + *

+ * CAGRA is a graph-based nearest neighbors algorithm that was built from the + * ground up for GPU acceleration. CAGRA demonstrates state-of-the art index + * build and query performance for both small and large-batch sized search. Know + * more about this algorithm + * here + * + * @since 25.02 + */ +public class CagraIndexImpl implements CagraIndex { + + private static final MethodHandle indexMethodHandle = downcallHandle("build_cagra_index", + FunctionDescriptor.of(ADDRESS, ADDRESS, C_LONG, C_LONG, ADDRESS, ADDRESS, ADDRESS, ADDRESS, C_INT)); + + private static final MethodHandle searchMethodHandle = downcallHandle("search_cagra_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, C_INT, C_LONG, C_INT, ADDRESS, ADDRESS, ADDRESS, ADDRESS, ADDRESS)); + + private static final MethodHandle serializeMethodHandle = downcallHandle("serialize_cagra_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS, ADDRESS)); + + private static final MethodHandle deserializeMethodHandle = downcallHandle("deserialize_cagra_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS, ADDRESS)); + + private static final MethodHandle destroyIndexMethodHandle = downcallHandle("destroy_cagra_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS)); + + private static final MethodHandle serializeCAGRAIndexToHNSWMethodHandle = downcallHandle("serialize_cagra_index_to_hnsw", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS, ADDRESS)); + + private final float[][] dataset; + private final CuVSResourcesImpl resources; + private final CagraIndexParams cagraIndexParameters; + private final CagraCompressionParams cagraCompressionParams; + private final IndexReference cagraIndexReference; + private boolean destroyed; + + /** + * Constructor for building the index using specified dataset + * + * @param indexParameters an instance of {@link CagraIndexParams} holding + * the index parameters + * @param cagraCompressionParams an instance of {@link CagraCompressionParams} + * holding the compression parameters + * @param dataset the dataset for indexing + * @param resources an instance of {@link CuVSResources} + */ + private CagraIndexImpl(CagraIndexParams indexParameters, CagraCompressionParams cagraCompressionParams, float[][] dataset, + CuVSResourcesImpl resources) throws Throwable { + this.cagraIndexParameters = indexParameters; + this.cagraCompressionParams = cagraCompressionParams; + this.dataset = dataset; + this.resources = resources; + this.cagraIndexReference = build(); + } + + /** + * Constructor for loading the index from an {@link InputStream} + * + * @param inputStream an instance of stream to read the index bytes from + * @param resources an instance of {@link CuVSResources} + */ + private CagraIndexImpl(InputStream inputStream, CuVSResourcesImpl resources) throws Throwable { + this.cagraIndexParameters = null; + this.cagraCompressionParams = null; + this.dataset = null; + this.resources = resources; + this.cagraIndexReference = deserialize(inputStream); + } + + private void checkNotDestroyed() { + if (destroyed) { + throw new IllegalStateException("destroyed"); + } + } + + /** + * Invokes the native destroy_cagra_index to de-allocate the CAGRA index + */ + @Override + public void destroyIndex() throws Throwable { + checkNotDestroyed(); + try (var arena = Arena.ofConfined()) { + MemorySegment returnValue = arena.allocate(C_INT); + destroyIndexMethodHandle.invokeExact(cagraIndexReference.getMemorySegment(), returnValue); + checkError(returnValue.get(C_INT, 0L), "destroyIndexMethodHandle"); + } finally { + destroyed = true; + } + } + + /** + * Invokes the native build_cagra_index function via the Panama API to build the + * {@link CagraIndex} + * + * @return an instance of {@link IndexReference} that holds the pointer to the + * index + */ + private IndexReference build() throws Throwable { + long rows = dataset.length; + long cols = rows > 0 ? dataset[0].length : 0; + + MemorySegment indexParamsMemorySegment = cagraIndexParameters != null + ? segmentFromIndexParams(cagraIndexParameters) + : MemorySegment.NULL; + + int numWriterThreads = cagraIndexParameters != null ? cagraIndexParameters.getNumWriterThreads() : 1; + + MemorySegment compressionParamsMemorySegment = cagraCompressionParams != null + ? segmentFromCompressionParams(cagraCompressionParams) + : MemorySegment.NULL; + + MemorySegment dataSeg = Util.buildMemorySegment(resources.getArena(), dataset); + + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + var indexSeg = (MemorySegment) indexMethodHandle.invokeExact( + dataSeg, + rows, + cols, + resources.getMemorySegment(), + returnValue, + indexParamsMemorySegment, + compressionParamsMemorySegment, + numWriterThreads + ); + checkError(returnValue.get(C_INT, 0L), "indexMethodHandle"); + return new IndexReference(indexSeg); + } + } + + /** + * Invokes the native search_cagra_index via the Panama API for searching a + * CAGRA index. + * + * @param query an instance of {@link CagraQuery} holding the query vectors and + * other parameters + * @return an instance of {@link CagraSearchResults} containing the results + */ + @Override + public SearchResults search(CagraQuery query) throws Throwable { + checkNotDestroyed(); + int topK = query.getMapping() != null ? Math.min(query.getMapping().size(), query.getTopK()) : query.getTopK(); + long numQueries = query.getQueryVectors().length; + long numBlocks = topK * numQueries; + int vectorDimension = numQueries > 0 ? query.getQueryVectors()[0].length : 0; + + SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_INT); + SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_FLOAT); + MemorySegment neighborsMemorySegment = resources.getArena().allocate(neighborsSequenceLayout); + MemorySegment distancesMemorySegment = resources.getArena().allocate(distancesSequenceLayout); + MemorySegment floatsSeg = Util.buildMemorySegment(resources.getArena(), query.getQueryVectors()); + + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + searchMethodHandle.invokeExact( + cagraIndexReference.getMemorySegment(), + floatsSeg, + topK, + numQueries, + vectorDimension, + resources.getMemorySegment(), + neighborsMemorySegment, + distancesMemorySegment, + returnValue, + segmentFromSearchParams(query.getCagraSearchParameters()) + ); + checkError(returnValue.get(C_INT, 0L), "searchMethodHandle"); + } + return new CagraSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment, + distancesMemorySegment, topK, query.getMapping(), numQueries); + } + + @Override + public void serialize(OutputStream outputStream) throws Throwable { + Path p = Files.createTempFile(resources.tempDirectory(), UUID.randomUUID().toString(), ".cag"); + serialize(outputStream, p, 1024); + } + + @Override + public void serialize(OutputStream outputStream, int bufferLength) throws Throwable { + Path p = Files.createTempFile(resources.tempDirectory(), UUID.randomUUID().toString(), ".cag"); + serialize(outputStream, p, bufferLength); + } + + @Override + public void serialize(OutputStream outputStream, Path tempFile, int bufferLength) throws Throwable { + checkNotDestroyed(); + tempFile = tempFile.toAbsolutePath(); + MemorySegment pathSeg = Util.buildMemorySegment(resources.getArena(), tempFile.toString()); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + serializeMethodHandle.invokeExact( + resources.getMemorySegment(), + cagraIndexReference.getMemorySegment(), + returnValue, + pathSeg + ); + checkError(returnValue.get(C_INT, 0L), "serializeMethodHandle"); + + try (FileInputStream fileInputStream = new FileInputStream(tempFile.toFile())) { + byte[] chunk = new byte[bufferLength]; + int chunkLength = 0; + while ((chunkLength = fileInputStream.read(chunk)) != -1) { + outputStream.write(chunk, 0, chunkLength); + } + } finally { + Files.deleteIfExists(tempFile); + } + } + } + + @Override + public void serializeToHNSW(OutputStream outputStream) throws Throwable { + Path p = Files.createTempFile(resources.tempDirectory(), UUID.randomUUID().toString(), ".hnsw"); + serializeToHNSW(outputStream, p, 1024); + } + + @Override + public void serializeToHNSW(OutputStream outputStream, int bufferLength) throws Throwable { + Path p = Files.createTempFile(resources.tempDirectory(), UUID.randomUUID().toString(), ".hnsw"); + serializeToHNSW(outputStream, p, bufferLength); + } + + @Override + public void serializeToHNSW(OutputStream outputStream, Path tempFile, int bufferLength) throws Throwable { + checkNotDestroyed(); + tempFile = tempFile.toAbsolutePath(); + MemorySegment pathSeg = Util.buildMemorySegment(resources.getArena(), tempFile.toString()); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + serializeCAGRAIndexToHNSWMethodHandle.invokeExact( + resources.getMemorySegment(), + pathSeg, + cagraIndexReference.getMemorySegment(), + returnValue + ); + checkError(returnValue.get(C_INT, 0L), "serializeCAGRAIndexToHNSWMethodHandle"); + + try (FileInputStream fileInputStream = new FileInputStream(tempFile.toFile())) { + byte[] chunk = new byte[bufferLength]; + int chunkLength; + while ((chunkLength = fileInputStream.read(chunk)) != -1) { + outputStream.write(chunk, 0, chunkLength); + } + } finally { + Files.deleteIfExists(tempFile); + } + } + } + + /** + * Gets an instance of {@link IndexReference} by deserializing a CAGRA index + * using an {@link InputStream}. + * + * @param inputStream an instance of {@link InputStream} + * @return an instance of {@link IndexReference}. + */ + private IndexReference deserialize(InputStream inputStream) throws Throwable { + return deserialize(inputStream, 1024); + } + + /** + * Gets an instance of {@link IndexReference} by deserializing a CAGRA index + * using an {@link InputStream}. + * + * @param inputStream an instance of {@link InputStream} + * @param bufferLength the length of the buffer to use while reading the bytes + * from the stream. Default value is 1024. + * @return an instance of {@link IndexReference}. + */ + private IndexReference deserialize(InputStream inputStream, int bufferLength) throws Throwable { + Path tmpIndexFile = Files.createTempFile(resources.tempDirectory(), UUID.randomUUID().toString(), ".cag"); + tmpIndexFile = tmpIndexFile.toAbsolutePath(); + IndexReference indexReference = new IndexReference(resources); + + try (var in = inputStream; + FileOutputStream fileOutputStream = new FileOutputStream(tmpIndexFile.toFile())) { + in.transferTo(fileOutputStream); + MemorySegment pathSeg = Util.buildMemorySegment(resources.getArena(), tmpIndexFile.toString()); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + deserializeMethodHandle.invokeExact( + resources.getMemorySegment(), + indexReference.getMemorySegment(), + returnValue, + pathSeg + ); + checkError(returnValue.get(C_INT, 0L), "deserializeMethodHandle"); + } + } finally { + Files.deleteIfExists(tmpIndexFile); + } + return indexReference; + } + + /** + * Gets an instance of {@link CagraIndexParams} + * + * @return an instance of {@link CagraIndexParams} + */ + @Override + public CagraIndexParams getCagraIndexParameters() { + return cagraIndexParameters; + } + + /** + * Gets an instance of {@link CuVSResources} + * + * @return an instance of {@link CuVSResources} + */ + @Override + public CuVSResourcesImpl getCuVSResources() { + return resources; + } + + + /** + * Allocates the configured compression parameters in the MemorySegment. + */ + private MemorySegment segmentFromCompressionParams(CagraCompressionParams params) { + MemorySegment seg = CuVSCagraCompressionParams.allocate(resources.getArena()); + CuVSCagraCompressionParams.pq_bits(seg, params.getPqBits()); + CuVSCagraCompressionParams.pq_dim(seg, params.getPqDim()); + CuVSCagraCompressionParams.vq_n_centers(seg, params.getVqNCenters()); + CuVSCagraCompressionParams.kmeans_n_iters(seg, params.getKmeansNIters()); + CuVSCagraCompressionParams.vq_kmeans_trainset_fraction(seg, params.getVqKmeansTrainsetFraction()); + CuVSCagraCompressionParams.pq_kmeans_trainset_fraction(seg, params.getPqKmeansTrainsetFraction()); + return seg; + } + + /** + * Allocates the configured index parameters in the MemorySegment. + */ + private MemorySegment segmentFromIndexParams(CagraIndexParams params) { + MemorySegment seg = CuVSCagraIndexParams.allocate(resources.getArena()); + CuVSCagraIndexParams.intermediate_graph_degree(seg, params.getIntermediateGraphDegree()); + CuVSCagraIndexParams.graph_degree(seg, params.getGraphDegree()); + CuVSCagraIndexParams.build_algo(seg, params.getCagraGraphBuildAlgo().value); + CuVSCagraIndexParams.nn_descent_niter(seg, params.getNNDescentNumIterations()); + CuVSCagraIndexParams.metric(seg, params.getCuvsDistanceType().value); + return seg; + } + + /** + * Allocates the configured search parameters in the MemorySegment. + */ + private MemorySegment segmentFromSearchParams(CagraSearchParams params) { + MemorySegment seg = CuVSCagraSearchParams.allocate(resources.getArena()); + CuVSCagraSearchParams.max_queries(seg, params.getMaxQueries()); + CuVSCagraSearchParams.itopk_size(seg, params.getITopKSize()); + CuVSCagraSearchParams.max_iterations(seg, params.getMaxIterations()); + if (params.getCagraSearchAlgo() != null) { + CuVSCagraSearchParams.algo(seg, params.getCagraSearchAlgo().value); + } + CuVSCagraSearchParams.team_size(seg, params.getTeamSize()); + CuVSCagraSearchParams.search_width(seg, params.getSearchWidth()); + CuVSCagraSearchParams.min_iterations(seg, params.getMinIterations()); + CuVSCagraSearchParams.thread_block_size(seg, params.getThreadBlockSize()); + if (params.getHashMapMode() != null) { + CuVSCagraSearchParams.hashmap_mode(seg, params.getHashMapMode().value); + } + CuVSCagraSearchParams.hashmap_max_fill_rate(seg, params.getHashMapMaxFillRate()); + CuVSCagraSearchParams.num_random_samplings(seg, params.getNumRandomSamplings()); + CuVSCagraSearchParams.rand_xor_mask(seg, params.getRandXORMask()); + return seg; + } + + public static CagraIndex.Builder newBuilder(CuVSResources cuvsResources) { + Objects.requireNonNull(cuvsResources); + if (!(cuvsResources instanceof CuVSResourcesImpl)) { + throw new IllegalArgumentException("Unsupported " + cuvsResources); + } + return new CagraIndexImpl.Builder((CuVSResourcesImpl)cuvsResources); + } + + /** + * Builder helps configure and create an instance of {@link CagraIndex}. + */ + public static class Builder implements CagraIndex.Builder{ + + private float[][] dataset; + private CagraIndexParams cagraIndexParams; + private CagraCompressionParams cagraCompressionParams; + private CuVSResourcesImpl cuvsResources; + private InputStream inputStream; + + public Builder(CuVSResourcesImpl cuvsResources) { + this.cuvsResources = cuvsResources; + } + + @Override + public Builder from(InputStream inputStream) { + this.inputStream = inputStream; + return this; + } + + @Override + public Builder withDataset(float[][] dataset) { + this.dataset = dataset; + return this; + } + + @Override + public Builder withIndexParams(CagraIndexParams cagraIndexParameters) { + this.cagraIndexParams = cagraIndexParameters; + return this; + } + + @Override + public Builder withCompressionParams(CagraCompressionParams cagraCompressionParams) { + this.cagraCompressionParams = cagraCompressionParams; + return this; + } + + @Override + public CagraIndexImpl build() throws Throwable { + if (inputStream != null) { + return new CagraIndexImpl(inputStream, cuvsResources); + } else { + return new CagraIndexImpl(cagraIndexParams, cagraCompressionParams, dataset, cuvsResources); + } + } + } + + /** + * Holds the memory reference to a CAGRA index. + */ + protected static class IndexReference { + + private final MemorySegment memorySegment; + + /** + * Constructs CagraIndexReference and allocate the MemorySegment. + */ + protected IndexReference(CuVSResourcesImpl resources) { + memorySegment = CuVSCagraIndex.allocate(resources.getArena()); + } + + /** + * Constructs CagraIndexReference with an instance of MemorySegment passed as a + * parameter. + * + * @param indexMemorySegment the MemorySegment instance to use for containing + * index reference + */ + protected IndexReference(MemorySegment indexMemorySegment) { + this.memorySegment = indexMemorySegment; + } + + /** + * Gets the instance of index MemorySegment. + * + * @return index MemorySegment + */ + protected MemorySegment getMemorySegment() { + return memorySegment; + } + } +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchResults.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraSearchResults.java similarity index 93% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchResults.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraSearchResults.java index 3473facce..490201307 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CagraSearchResults.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraSearchResults.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs; +package com.nvidia.cuvs.internal; import java.lang.foreign.MemorySegment; import java.lang.foreign.SequenceLayout; @@ -22,14 +22,14 @@ import java.util.List; import java.util.Map; -import com.nvidia.cuvs.common.SearchResults; +import com.nvidia.cuvs.internal.common.SearchResultsImpl; /** * SearchResult encapsulates the logic for reading and holding search results. * * @since 25.02 */ -public class CagraSearchResults extends SearchResults { +public class CagraSearchResults extends SearchResultsImpl { protected CagraSearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout, MemorySegment neighboursMemorySegment, MemorySegment distancesMemorySegment, int topK, List mapping, diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CuVSResourcesImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CuVSResourcesImpl.java new file mode 100644 index 000000000..a44d7ede4 --- /dev/null +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CuVSResourcesImpl.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.internal; + +import com.nvidia.cuvs.CuVSResources; + +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.MemorySegment; +import java.lang.invoke.MethodHandle; +import java.nio.file.Path; + +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.downcallHandle; +import static com.nvidia.cuvs.internal.common.Util.checkError; +import static java.lang.foreign.ValueLayout.ADDRESS; + +/** + * Used for allocating resources for cuVS + * + * @since 25.02 + */ +public class CuVSResourcesImpl implements CuVSResources { + + static final MethodHandle createResourcesMethodHandle = downcallHandle( + "create_resources", FunctionDescriptor.of(ADDRESS, ADDRESS) + ); + + private static final MethodHandle destroyResourcesMethodHandle = downcallHandle( + "destroy_resources", FunctionDescriptor.ofVoid(ADDRESS, ADDRESS) + ); + + private final Path tempDirectory; + private final Arena arena; + private final MemorySegment resourcesMemorySegment; + private boolean destroyed; + + /** + * Constructor that allocates the resources needed for cuVS + * + * @throws Throwable exception thrown when native function is invoked + */ + public CuVSResourcesImpl(Path tempDirectory) throws Throwable { + this.tempDirectory = tempDirectory; + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + resourcesMemorySegment = (MemorySegment) createResourcesMethodHandle.invokeExact(returnValue); + checkError(returnValue.get(C_INT, 0L), "createResourcesMethodHandle"); + } + arena = Arena.ofShared(); + } + + @Override + public void close() { + checkNotDestroyed(); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + destroyResourcesMethodHandle.invokeExact(resourcesMemorySegment, returnValue); + checkError(returnValue.get(C_INT, 0L), "destroyResourcesMethodHandle"); + } catch (Throwable e) { + e.printStackTrace(); + } finally { + destroyed = true; + } + if (!arena.scope().isAlive()) { + arena.close(); + } + } + + @Override + public Path tempDirectory() { + return tempDirectory; + } + + private void checkNotDestroyed() { + if (destroyed) { + throw new IllegalStateException("destroyed"); + } + } + + /** + * Gets the reference to the cuvsResources MemorySegment. + * + * @return cuvsResources MemorySegment + */ + protected MemorySegment getMemorySegment() { + checkNotDestroyed(); + return resourcesMemorySegment; + } + + /** + * The allocation arena used by this resources. + */ + protected Arena getArena() { + checkNotDestroyed(); + return arena; + } + + /** + * Container for GPU information + */ + public class GPUInfo { + + private final int gpuId; + private final long freeMemory; + private final long totalMemory; + private final float computeCapability; + + public GPUInfo(int gpuId, long freeMemory, long totalMemory, float computeCapability) { + super(); + this.gpuId = gpuId; + this.freeMemory = freeMemory; + this.totalMemory = totalMemory; + this.computeCapability = computeCapability; + } + + public int getGpuId() { + return gpuId; + } + + public long getFreeMemory() { + return freeMemory; + } + + public long getTotalMemory() { + return totalMemory; + } + + public float getComputeCapability() { + return computeCapability; + } + + @Override + public String toString() { + return "GPUInfo [gpuId=" + gpuId + ", freeMemory=" + freeMemory + ", totalMemory=" + totalMemory + + ", computeCapability=" + computeCapability + "]"; + } + + } +} diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswIndexImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswIndexImpl.java new file mode 100644 index 000000000..bb9553e25 --- /dev/null +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswIndexImpl.java @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.internal; + +import java.io.FileOutputStream; +import java.io.InputStream; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SequenceLayout; +import java.lang.invoke.MethodHandle; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Objects; +import java.util.UUID; + +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.HnswIndex; +import com.nvidia.cuvs.HnswIndexParams; +import com.nvidia.cuvs.HnswQuery; +import com.nvidia.cuvs.HnswSearchParams; +import com.nvidia.cuvs.SearchResults; +import com.nvidia.cuvs.internal.common.Util; +import com.nvidia.cuvs.internal.panama.CuVSHnswIndex; +import com.nvidia.cuvs.internal.panama.CuVSHnswIndexParams; +import com.nvidia.cuvs.internal.panama.CuVSHnswSearchParams; + +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_FLOAT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_LONG; +import static com.nvidia.cuvs.internal.common.LinkerHelper.downcallHandle; +import static com.nvidia.cuvs.internal.common.Util.checkError; +import static java.lang.foreign.ValueLayout.ADDRESS; + +/** + * {@link HnswIndex} encapsulates a HNSW index, along with methods to interact + * with it. + * + * @since 25.02 + */ +public class HnswIndexImpl implements HnswIndex { + + private static final MethodHandle deserializeHnswIndexMethodHandle = downcallHandle("deserialize_hnsw_index", + FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, ADDRESS, ADDRESS, C_INT)); + + private static final MethodHandle searchHnswIndexMethodHandle = downcallHandle("search_hnsw_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS, ADDRESS, ADDRESS, ADDRESS, ADDRESS, C_INT, C_INT, C_LONG)); + + private static final MethodHandle destroyHnswIndexMethodHandle = downcallHandle("destroy_hnsw_index", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS)); + + private final CuVSResourcesImpl resources; + private final HnswIndexParams hnswIndexParams; + private final IndexReference hnswIndexReference; + + /** + * Constructor for loading the index from an {@link InputStream} + * + * @param inputStream an instance of stream to read the index bytes from + * @param resources an instance of {@link CuVSResourcesImpl} + */ + private HnswIndexImpl(InputStream inputStream, CuVSResourcesImpl resources, HnswIndexParams hnswIndexParams) + throws Throwable { + this.hnswIndexParams = hnswIndexParams; + this.resources = resources; + this.hnswIndexReference = deserialize(inputStream); + } + + /** + * Invokes the native destroy_hnsw_index to de-allocate the HNSW index + */ + @Override + public void destroyIndex() throws Throwable { + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + destroyHnswIndexMethodHandle.invokeExact(hnswIndexReference.getMemorySegment(), returnValue); + checkError(returnValue.get(C_INT, 0L), "destroyHnswIndexMethodHandle"); + } + } + + /** + * Invokes the native search_hnsw_index via the Panama API for searching a HNSW + * index. + * + * @param query an instance of {@link HnswQuery} holding the query vectors and + * other parameters + * @return an instance of {@link HnswSearchResults} containing the results + */ + @Override + public SearchResults search(HnswQuery query) throws Throwable { + long numQueries = query.getQueryVectors().length; + long numBlocks = query.getTopK() * numQueries; + int vectorDimension = numQueries > 0 ? query.getQueryVectors()[0].length : 0; + + SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_LONG); + SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_FLOAT); + MemorySegment neighborsMemorySegment = resources.getArena().allocate(neighborsSequenceLayout); + MemorySegment distancesMemorySegment = resources.getArena().allocate(distancesSequenceLayout); + MemorySegment querySeg = Util.buildMemorySegment(resources.getArena(), query.getQueryVectors()); + + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + searchHnswIndexMethodHandle.invokeExact( + resources.getMemorySegment(), + hnswIndexReference.getMemorySegment(), + segmentFromSearchParams(query.getHnswSearchParams()), + returnValue, + neighborsMemorySegment, + distancesMemorySegment, + querySeg, + query.getTopK(), + vectorDimension, + numQueries + ); + checkError(returnValue.get(C_INT, 0L), "searchHnswIndexMethodHandle"); + } + return new HnswSearchResults(neighborsSequenceLayout, distancesSequenceLayout, neighborsMemorySegment, + distancesMemorySegment, query.getTopK(), query.getMapping(), numQueries); + } + + /** + * Gets an instance of {@link IndexReference} by deserializing a HNSW index + * using an {@link InputStream}. + * + * @param inputStream an instance of {@link InputStream} + * @return an instance of {@link IndexReference}. + */ + private IndexReference deserialize(InputStream inputStream) throws Throwable { + return deserialize(inputStream, 1024); + } + + /** + * Gets an instance of {@link IndexReference} by deserializing a HNSW index + * using an {@link InputStream}. + * + * @param inputStream an instance of {@link InputStream} + * @param bufferLength the length of the buffer to use while reading the bytes + * from the stream. Default value is 1024. + * @return an instance of {@link IndexReference}. + */ + private IndexReference deserialize(InputStream inputStream, int bufferLength) throws Throwable { + Path tmpIndexFile = Files.createTempFile(resources.tempDirectory(), UUID.randomUUID().toString(), ".hnsw"); + tmpIndexFile = tmpIndexFile.toAbsolutePath(); + + try (var in = inputStream; + FileOutputStream fileOutputStream = new FileOutputStream(tmpIndexFile.toFile())) { + byte[] chunk = new byte[bufferLength]; + int chunkLength; + while ((chunkLength = in.read(chunk)) != -1) { + fileOutputStream.write(chunk, 0, chunkLength); + } + + MemorySegment pathSeg = Util.buildMemorySegment(resources.getArena(), tmpIndexFile.toString()); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValue = localArena.allocate(C_INT); + MemorySegment deserSeg = (MemorySegment) deserializeHnswIndexMethodHandle.invokeExact( + resources.getMemorySegment(), + pathSeg, + segmentFromIndexParams(hnswIndexParams), + returnValue, + hnswIndexParams.getVectorDimension() + ); + checkError(returnValue.get(C_INT, 0L), "deserializeHnswIndexMethodHandle"); + return new IndexReference(deserSeg); + } + } finally { + Files.deleteIfExists(tmpIndexFile); + } + } + + /** + * Allocates the configured search parameters in the MemorySegment. + */ + private MemorySegment segmentFromIndexParams(HnswIndexParams params) { + MemorySegment seg = CuVSHnswIndexParams.allocate(resources.getArena()); + CuVSHnswIndexParams.ef_construction(seg, params.getEfConstruction()); + CuVSHnswIndexParams.num_threads(seg, params.getNumThreads()); + return seg; + } + + /** + * Allocates the configured search parameters in the MemorySegment. + */ + private MemorySegment segmentFromSearchParams(HnswSearchParams params) { + MemorySegment seg = CuVSHnswSearchParams.allocate(resources.getArena()); + CuVSHnswSearchParams.ef(seg, params.ef()); + CuVSHnswSearchParams.num_threads(seg, params.numThreads()); + return seg; + } + + public static HnswIndex.Builder newBuilder(CuVSResources cuvsResources) { + Objects.requireNonNull(cuvsResources); + if (!(cuvsResources instanceof CuVSResourcesImpl)) { + throw new IllegalArgumentException("Unsupported " + cuvsResources); + } + return new HnswIndexImpl.Builder((CuVSResourcesImpl)cuvsResources); + } + + /** + * Builder helps configure and create an instance of {@link HnswIndex}. + */ + public static class Builder implements HnswIndex.Builder { + + private final CuVSResourcesImpl cuvsResources; + private InputStream inputStream; + private HnswIndexParams hnswIndexParams; + + /** + * Constructs this Builder with an instance of {@link CuVSResources}. + * + * @param cuvsResources an instance of {@link CuVSResources} + */ + public Builder(CuVSResourcesImpl cuvsResources) { + this.cuvsResources = cuvsResources; + } + + /** + * Sets an instance of InputStream typically used when index deserialization is + * needed. + * + * @param inputStream an instance of {@link InputStream} + * @return an instance of this Builder + */ + @Override + public Builder from(InputStream inputStream) { + this.inputStream = inputStream; + return this; + } + + /** + * Registers an instance of configured {@link HnswIndexParams} with this + * Builder. + * + * @param hnswIndexParameters An instance of HnswIndexParams. + * @return An instance of this Builder. + */ + @Override + public Builder withIndexParams(HnswIndexParams hnswIndexParameters) { + this.hnswIndexParams = hnswIndexParameters; + return this; + } + + /** + * Builds and returns an instance of CagraIndex. + * + * @return an instance of CagraIndex + */ + @Override + public HnswIndexImpl build() throws Throwable { + return new HnswIndexImpl(inputStream, cuvsResources, hnswIndexParams); + } + } + + /** + * Holds the memory reference to a HNSW index. + */ + protected static class IndexReference { + + private final MemorySegment memorySegment; + + /** + * Constructs CagraIndexReference and allocate the MemorySegment. + */ + protected IndexReference(CuVSResourcesImpl resources) { + memorySegment = CuVSHnswIndex.allocate(resources.getArena()); + } + + /** + * Constructs CagraIndexReference with an instance of MemorySegment passed as a + * parameter. + * + * @param indexMemorySegment the MemorySegment instance to use for containing + * index reference + */ + protected IndexReference(MemorySegment indexMemorySegment) { + this.memorySegment = indexMemorySegment; + } + + /** + * Gets the instance of index MemorySegment. + * + * @return index MemorySegment + */ + protected MemorySegment getMemorySegment() { + return memorySegment; + } + } +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchResults.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswSearchResults.java similarity index 93% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchResults.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswSearchResults.java index 8cb4d89e0..d2920c717 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/HnswSearchResults.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswSearchResults.java @@ -14,7 +14,9 @@ * limitations under the License. */ -package com.nvidia.cuvs; +package com.nvidia.cuvs.internal; + +import com.nvidia.cuvs.internal.common.SearchResultsImpl; import java.lang.foreign.MemorySegment; import java.lang.foreign.SequenceLayout; @@ -22,14 +24,12 @@ import java.util.List; import java.util.Map; -import com.nvidia.cuvs.common.SearchResults; - /** * SearchResult encapsulates the logic for reading and holding search results. * * @since 25.02 */ -public class HnswSearchResults extends SearchResults { +public class HnswSearchResults extends SearchResultsImpl { protected HnswSearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout, MemorySegment neighboursMemorySegment, MemorySegment distancesMemorySegment, int topK, List mapping, diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/LinkerHelper.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/LinkerHelper.java new file mode 100644 index 000000000..176f4f90d --- /dev/null +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/LinkerHelper.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.internal.common; + +import java.lang.foreign.AddressLayout; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; + +/** + * Utility methods for calling into the native linker. + */ +public class LinkerHelper { + + private static final Linker LINKER = Linker.nativeLinker(); + private static final SymbolLookup SYMBOL_LOOKUP; + + public static final ValueLayout.OfByte C_CHAR = (ValueLayout.OfByte) LINKER.canonicalLayouts().get("char"); + + public static final ValueLayout.OfInt C_INT = (ValueLayout.OfInt) LINKER.canonicalLayouts().get("int"); + + public static final ValueLayout.OfLong C_LONG = (ValueLayout.OfLong) LINKER.canonicalLayouts().get("long"); + + public static final ValueLayout.OfFloat C_FLOAT = (ValueLayout.OfFloat) LINKER.canonicalLayouts().get("float"); + + public static final AddressLayout C_POINTER = ((AddressLayout) LINKER.canonicalLayouts().get("void*")) + .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, C_CHAR)); + + static { + var nativeLibrary = LoaderUtils.loadNativeLibrary(); + // we use a global arena here, since the symbols obtained + // from the returned lookup are for the lifetime of the jvm + SYMBOL_LOOKUP = SymbolLookup.libraryLookup(nativeLibrary.toAbsolutePath(), Arena.global()); + } + + static MemorySegment functionAddress(String function) { + return SYMBOL_LOOKUP.find(function).orElseThrow(() -> new LinkageError("Native function " + function + " could not be found")); + } + + public static MethodHandle downcallHandle(String function, FunctionDescriptor functionDescriptor, Linker.Option... options) { + return LINKER.downcallHandle(functionAddress(function), functionDescriptor, options); + } + + private LinkerHelper() {} +} diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/LoaderUtils.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/LoaderUtils.java new file mode 100644 index 000000000..f97696984 --- /dev/null +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/LoaderUtils.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.internal.common; + +import com.nvidia.cuvs.LibraryException; +import com.nvidia.cuvs.spi.CuVSProvider; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Objects; + +class LoaderUtils { + + private LoaderUtils() {} + + /** + * Load the CuVS .so file from environment variable CUVS_JAVA_SO_PATH. If not + * found there, try to load it from the classpath to a temporary file. + */ + static Path loadNativeLibrary() throws LibraryException { + String libraryPathFromEnvironment = System.getenv("CUVS_JAVA_SO_PATH"); + if (libraryPathFromEnvironment != null) { + Path file = Path.of(libraryPathFromEnvironment).toAbsolutePath(); + if (Files.notExists(file)) { + throw new LibraryException( + "Environment variable CUVS_JAVA_SO_PATH points to non-existent file: " + libraryPathFromEnvironment); + } + if (Files.isDirectory(file)) { + throw new LibraryException( + "Environment variable CUVS_JAVA_SO_PATH points to a directory: " + libraryPathFromEnvironment); + } + return file; + } + return loadLibraryFromJar("/META-INF/native/linux_x64/libcuvs_java.so"); + } + + static Path loadLibraryFromJar(String path) throws LibraryException { + if (!path.startsWith("/")) { + throw new IllegalArgumentException("The path has to be absolute (start with '/')."); + } + // Obtain filename from path + String filename = path.substring(path.lastIndexOf("/") + 1); + + // Split filename to prefix and suffix (extension) + String[] parts = filename.split("\\.", 2); + String prefix = parts[0]; + String suffix = (parts.length > 1) ? "." + parts[parts.length - 1] : null; + + // Prepare temporary file + try { + Path temp = Files.createTempFile(nativeLibraryPath(), prefix, suffix); + temp.toFile().deleteOnExit(); + InputStream libraryStream = Util.class.getModule().getResourceAsStream(path); + if (libraryStream == null) { + throw new LibraryException("CuVS Library Not Found in ClassPath"); + } + streamCopy(libraryStream, new FileOutputStream(temp.toFile())); + return temp; + } catch (IOException ioe) { + throw new LibraryException(ioe); + } + } + + static void streamCopy(InputStream is, OutputStream os) throws IOException { + Objects.requireNonNull(is); + try (var in = is; + var out = os) { + in.transferTo(out); + } + } + + static Path nativeLibraryPath() { + Path p = CuVSProvider.provider().nativeLibraryPath().toAbsolutePath(); + if (Files.notExists(p)) { + throw new LibraryException("non-existent path: " + p); + } + if (!Files.isDirectory(p)) { + throw new LibraryException("not a directory: " + p); + } + return p; + } +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/SearchResults.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/SearchResultsImpl.java similarity index 80% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/common/SearchResults.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/SearchResultsImpl.java index 83e98cb8e..dfd82c004 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/SearchResults.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/SearchResultsImpl.java @@ -1,4 +1,6 @@ -package com.nvidia.cuvs.common; +package com.nvidia.cuvs.internal.common; + +import com.nvidia.cuvs.SearchResults; import java.lang.foreign.MemoryLayout.PathElement; import java.lang.foreign.MemorySegment; @@ -8,12 +10,11 @@ import java.util.List; import java.util.Map; -public abstract class SearchResults { +public abstract class SearchResultsImpl implements SearchResults { protected final List> results; protected final List mapping; // TODO: Is this performant in a user application? - protected final SequenceLayout neighboursSequenceLayout; - protected final SequenceLayout distancesSequenceLayout; + protected final MemorySegment neighboursMemorySegment; protected final MemorySegment distancesMemorySegment; protected final int topK; @@ -21,13 +22,11 @@ public abstract class SearchResults { protected final VarHandle neighboursVarHandle; protected final VarHandle distancesVarHandle; - protected SearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout, + protected SearchResultsImpl(SequenceLayout neighboursSequenceLayout, SequenceLayout distancesSequenceLayout, MemorySegment neighboursMemorySegment, MemorySegment distancesMemorySegment, int topK, List mapping, long numberOfQueries) { this.topK = topK; this.numberOfQueries = numberOfQueries; - this.neighboursSequenceLayout = neighboursSequenceLayout; - this.distancesSequenceLayout = distancesSequenceLayout; this.neighboursMemorySegment = neighboursMemorySegment; this.distancesMemorySegment = distancesMemorySegment; this.mapping = mapping; @@ -47,6 +46,7 @@ protected SearchResults(SequenceLayout neighboursSequenceLayout, SequenceLayout * * @return a list of results for each query as a map of neighbor IDs to distance */ + @Override public List> getResults() { return results; } diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java new file mode 100644 index 000000000..5bc695ae3 --- /dev/null +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.internal.common; + +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemoryLayout.PathElement; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.VarHandle; +import java.util.ArrayList; +import java.util.List; + +import com.nvidia.cuvs.GPUInfo; +import com.nvidia.cuvs.internal.panama.GpuInfo; + +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_CHAR; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_FLOAT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_LONG; +import static com.nvidia.cuvs.internal.common.LinkerHelper.downcallHandle; +import static java.lang.foreign.ValueLayout.ADDRESS; + +public class Util { + + public static final int CUVS_SUCCESS = 1; + + private static final MethodHandle getGpuInfoMethodHandle = downcallHandle("get_gpu_info", + FunctionDescriptor.ofVoid(ADDRESS, ADDRESS, ADDRESS)); + + private static final MethodHandle getLastErrorTextMethodHandle = downcallHandle("cuvsGetLastErrorText", + FunctionDescriptor.of(ADDRESS)); + + private Util() {} + + /** + * Checks the result value of a native method handle call. + * + * @param value the return value + * @param caller the native method handle that was called + */ + public static void checkError(int value, String caller) { + if (value != CUVS_SUCCESS) { + String errorMsg = getLastErrorText(); + throw new RuntimeException(caller + " returned " + value + "[" + errorMsg + "]"); + } + } + + static final long MAX_ERROR_TEXT = 1_000_000L; + + static String getLastErrorText() { + try { + MemorySegment seg = (MemorySegment) getLastErrorTextMethodHandle.invokeExact(); + if (seg.equals(MemorySegment.NULL)) { + return "no last error text"; + } + return seg.reinterpret(MAX_ERROR_TEXT).getString(0L); + } catch (Throwable t) { + throw new RuntimeException(t); + } + } + + /** + * Get the list of compatible GPUs based on compute capability >= 7.0 and total + * memory >= 8GB + * + * @return a list of compatible GPUs. See {@link GPUInfo} + */ + public static List compatibleGPUs() throws Throwable { + return compatibleGPUs(7.0, 8192); + } + + /** + * Get the list of compatible GPUs based on given compute capability and total + * memory + * + * @param minComputeCapability the minimum compute capability + * @param minDeviceMemoryMB the minimum total available memory in MB + * @return a list of compatible GPUs. See {@link GPUInfo} + */ + public static List compatibleGPUs(double minComputeCapability, int minDeviceMemoryMB) throws Throwable { + List compatibleGPUs = new ArrayList(); + double minDeviceMemoryB = Math.pow(2, 20) * minDeviceMemoryMB; + for (GPUInfo gpuInfo : availableGPUs()) { + if (gpuInfo.computeCapability() >= minComputeCapability && gpuInfo.totalMemory() >= minDeviceMemoryB) { + compatibleGPUs.add(gpuInfo); + } + } + return compatibleGPUs; + } + + /** + * Gets all the available GPUs + * + * @return a list of {@link GPUInfo} objects with GPU details + */ + public static List availableGPUs() throws Throwable { + List results = new ArrayList<>(); + try (var localArena = Arena.ofConfined()) { + MemorySegment returnValueMemorySegment = localArena.allocate(C_INT); + MemorySegment numGpuMemorySegment = localArena.allocate(C_INT); + + /* + * Setting a value of 1024 because we cannot predict how much memory to allocate + * before the function is invoked as cudaGetDeviceCount is inside the + * get_gpu_info function. + */ + MemorySegment GpuInfoArrayMemorySegment = GpuInfo.allocateArray(1024, localArena); + getGpuInfoMethodHandle.invokeExact(returnValueMemorySegment, numGpuMemorySegment, GpuInfoArrayMemorySegment); + int numGPUs = numGpuMemorySegment.get(ValueLayout.JAVA_INT, 0); + MemoryLayout ml = MemoryLayout.sequenceLayout(numGPUs, GpuInfo.layout()); + for (int i = 0; i < numGPUs; i++) { + VarHandle gpuIdVarHandle = ml.varHandle(PathElement.sequenceElement(i), PathElement.groupElement("gpu_id")); + VarHandle freeMemoryVarHandle = ml.varHandle(PathElement.sequenceElement(i), + PathElement.groupElement("free_memory")); + VarHandle totalMemoryVarHandle = ml.varHandle(PathElement.sequenceElement(i), + PathElement.groupElement("total_memory")); + VarHandle ComputeCapabilityVarHandle = ml.varHandle(PathElement.sequenceElement(i), + PathElement.groupElement("compute_capability")); + StringBuilder gpuName = new StringBuilder(); + char b = 1; + int p = 0; + while (b != 0x00) { + VarHandle gpuNameVarHandle = ml.varHandle(PathElement.sequenceElement(i), PathElement.groupElement("name"), + PathElement.sequenceElement(p++)); + b = (char) (byte) gpuNameVarHandle.get(GpuInfoArrayMemorySegment, 0L); + gpuName.append(b); + } + results.add(new GPUInfo((int) gpuIdVarHandle.get(GpuInfoArrayMemorySegment, 0L), gpuName.toString().trim(), + (long) freeMemoryVarHandle.get(GpuInfoArrayMemorySegment, 0L), + (long) totalMemoryVarHandle.get(GpuInfoArrayMemorySegment, 0L), + (float) ComputeCapabilityVarHandle.get(GpuInfoArrayMemorySegment, 0L))); + } + return results; + } + } + + /** + * A utility method for getting an instance of {@link MemorySegment} for a + * {@link String}. + * + * @param str the string for the expected {@link MemorySegment} + * @return an instance of {@link MemorySegment} + */ + public static MemorySegment buildMemorySegment(Arena arena, String str) { + StringBuilder sb = new StringBuilder(str).append('\0'); + MemoryLayout stringMemoryLayout = MemoryLayout.sequenceLayout(sb.length(), C_CHAR); + MemorySegment stringMemorySegment = arena.allocate(stringMemoryLayout); + + for (int i = 0; i < sb.length(); i++) { + VarHandle varHandle = stringMemoryLayout.varHandle(PathElement.sequenceElement(i)); + varHandle.set(stringMemorySegment, 0L, (byte) sb.charAt(i)); + } + return stringMemorySegment; + } + + /** + * A utility method for building a {@link MemorySegment} for a 1D long array. + * + * @param data The 1D long array for which the {@link MemorySegment} is needed + * @return an instance of {@link MemorySegment} + */ + public static MemorySegment buildMemorySegment(Arena arena, long[] data) { + int cells = data.length; + MemoryLayout dataMemoryLayout = MemoryLayout.sequenceLayout(cells, C_LONG); + MemorySegment dataMemorySegment = arena.allocate(dataMemoryLayout); + MemorySegment.copy(data, 0, dataMemorySegment, C_LONG, 0, cells); + return dataMemorySegment; + } + + /** + * A utility method for building a {@link MemorySegment} for a 2D float array. + * + * @param data The 2D float array for which the {@link MemorySegment} is needed + * @return an instance of {@link MemorySegment} + */ + public static MemorySegment buildMemorySegment(Arena arena, float[][] data) { + long rows = data.length; + long cols = rows > 0 ? data[0].length : 0; + MemoryLayout dataMemoryLayout = MemoryLayout.sequenceLayout(rows * cols, C_FLOAT); + MemorySegment dataMemorySegment = arena.allocate(dataMemoryLayout); + for (int r = 0; r < rows; r++) { + MemorySegment.copy(data[r], 0, dataMemorySegment, C_FLOAT, (r * cols * C_FLOAT.byteSize()), + (int) cols); + } + return dataMemorySegment; + } +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/BruteForceH.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/BruteForceH.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/BruteForceH.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/BruteForceH.java index 16603606b..0e34eb2cf 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/BruteForceH.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/BruteForceH.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.ValueLayout.JAVA_BYTE; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CagraH.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CagraH.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CagraH.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CagraH.java index 88ddeb6fe..82ecbdc34 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CagraH.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CagraH.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.ValueLayout.JAVA_BYTE; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSBruteForceIndex.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSBruteForceIndex.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSBruteForceIndex.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSBruteForceIndex.java index 8bf29027e..0a1ac29e2 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSBruteForceIndex.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSBruteForceIndex.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraCompressionParams.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraCompressionParams.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraCompressionParams.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraCompressionParams.java index 1fe8eca76..923287fe4 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraCompressionParams.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraCompressionParams.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndex.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraIndex.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndex.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraIndex.java index ec15f6729..7ea435ba0 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndex.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraIndex.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndexParams.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraIndexParams.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndexParams.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraIndexParams.java index 1823eacfe..ffe88effa 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraIndexParams.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraIndexParams.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraSearchParams.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraSearchParams.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraSearchParams.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraSearchParams.java index ec59284d9..2a1efbfd7 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSCagraSearchParams.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSCagraSearchParams.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSFilter.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSFilter.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSFilter.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSFilter.java index 9385660dc..dceaca994 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSFilter.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSFilter.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswExtendParams.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswExtendParams.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswExtendParams.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswExtendParams.java index 8d750d02e..83358e233 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswExtendParams.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswExtendParams.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndex.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswIndex.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndex.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswIndex.java index 1f879462b..aa12e4400 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndex.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswIndex.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndexParams.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswIndexParams.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndexParams.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswIndexParams.java index 3e6e45a09..d6b06872f 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswIndexParams.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswIndexParams.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswSearchParams.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswSearchParams.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswSearchParams.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswSearchParams.java index 4a5941a3c..ff8329a57 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/CuVSHnswSearchParams.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/CuVSHnswSearchParams.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDataType.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLDataType.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDataType.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLDataType.java index c7c588676..aaa2f4d2d 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDataType.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLDataType.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDevice.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLDevice.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDevice.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLDevice.java index a0d5d89ea..ceadb6a69 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLDevice.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLDevice.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensor.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLManagedTensor.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensor.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLManagedTensor.java index 71bba9fe5..c28dd71c3 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensor.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLManagedTensor.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensorVersioned.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLManagedTensorVersioned.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensorVersioned.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLManagedTensorVersioned.java index efdcf9043..f631140e2 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLManagedTensorVersioned.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLManagedTensorVersioned.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLPackVersion.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLPackVersion.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLPackVersion.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLPackVersion.java index bc8050766..c65fdd887 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLPackVersion.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLPackVersion.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLTensor.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLTensor.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLTensor.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLTensor.java index 63082b35a..21d928905 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DLTensor.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DLTensor.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DistanceH.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DistanceH.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DistanceH.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DistanceH.java index 3949e1089..b98f95ae2 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DistanceH.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DistanceH.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import java.lang.invoke.*; import java.lang.foreign.*; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DlpackH.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DlpackH.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DlpackH.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DlpackH.java index d459dd3bf..6ef78336f 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/DlpackH.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/DlpackH.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.ValueLayout.JAVA_BYTE; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/Fsidt.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/Fsidt.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/Fsidt.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/Fsidt.java index 07f2849a5..fb5f82ed1 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/Fsidt.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/Fsidt.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; import static java.lang.foreign.MemoryLayout.PathElement.sequenceElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfo.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/GpuInfo.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfo.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/GpuInfo.java index c1d13575c..eb8aba223 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfo.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/GpuInfo.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; import static java.lang.foreign.MemoryLayout.PathElement.sequenceElement; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfoH.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/GpuInfoH.java similarity index 98% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfoH.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/GpuInfoH.java index ad37e37ec..0457d50be 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuInfoH.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/GpuInfoH.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.ValueLayout.JAVA_BYTE; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/HnswH.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/HnswH.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/HnswH.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/HnswH.java index 32945c432..b6945030b 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/HnswH.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/HnswH.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.ValueLayout.JAVA_BYTE; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfFlatH.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/IvfFlatH.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfFlatH.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/IvfFlatH.java index 47353cc9e..32cdeb813 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfFlatH.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/IvfFlatH.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.ValueLayout.JAVA_BYTE; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfPqH.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/IvfPqH.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfPqH.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/IvfPqH.java index d5f1e3cee..66d0ba5ac 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/IvfPqH.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/IvfPqH.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.ValueLayout.JAVA_BYTE; diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/MaxAlignT.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/MaxAlignT.java similarity index 99% rename from java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/MaxAlignT.java rename to java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/MaxAlignT.java index 1216df6f7..d07c524db 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/MaxAlignT.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/panama/MaxAlignT.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.nvidia.cuvs.panama; +package com.nvidia.cuvs.internal.panama; import static java.lang.foreign.MemoryLayout.PathElement.groupElement; diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/spi/JDKProvider.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/spi/JDKProvider.java new file mode 100644 index 000000000..32f0225de --- /dev/null +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/spi/JDKProvider.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.cuvs.spi; + +import com.nvidia.cuvs.BruteForceIndex; +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.HnswIndex; +import com.nvidia.cuvs.internal.BruteForceIndexImpl; +import com.nvidia.cuvs.internal.CagraIndexImpl; +import com.nvidia.cuvs.internal.CuVSResourcesImpl; +import com.nvidia.cuvs.internal.HnswIndexImpl; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Objects; + +final class JDKProvider implements CuVSProvider { + + @Override + public CuVSResources newCuVSResources(Path tempDirectory) throws Throwable { + Objects.requireNonNull(tempDirectory); + if (Files.notExists(tempDirectory)) { + throw new IllegalArgumentException("does not exist:" + tempDirectory); + } + if (!Files.isDirectory(tempDirectory)) { + throw new IllegalArgumentException("not a directory:" + tempDirectory); + } + return new CuVSResourcesImpl(tempDirectory); + } + + @Override + public BruteForceIndex.Builder newBruteForceIndexBuilder(CuVSResources cuVSResources) { + return BruteForceIndexImpl.newBuilder(Objects.requireNonNull(cuVSResources)); + } + + @Override + public CagraIndex.Builder newCagraIndexBuilder(CuVSResources cuVSResources) { + return CagraIndexImpl.newBuilder(Objects.requireNonNull(cuVSResources)); + } + + @Override + public HnswIndex.Builder newHnswIndexBuilder(CuVSResources cuVSResources) { + return HnswIndexImpl.newBuilder(Objects.requireNonNull(cuVSResources)); + } +} diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchIT.java similarity index 88% rename from java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchTest.java rename to java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchIT.java index 91e6825bc..3a1ea25dd 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchTest.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceAndSearchIT.java @@ -16,8 +16,6 @@ package com.nvidia.cuvs; -import static org.junit.Assert.assertEquals; - import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -28,16 +26,23 @@ import java.util.Map; import java.util.UUID; +import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.nvidia.cuvs.common.SearchResults; +import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; +import static org.junit.Assert.assertEquals; -public class BruteForceAndSearchTest { +public class BruteForceAndSearchIT extends CuVSTestCase{ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + @Before + public void setup() { + assumeTrue("not supported on " + System.getProperty("os.name"), isLinuxAmd64()); + } + /** * A basic test that checks the whole flow - from indexing to search. * @@ -71,7 +76,7 @@ public void testIndexingAndSearchingFlow() throws Throwable { for (int j = 0; j < 10; j++) { - try (CuVSResources resources = new CuVSResources()) { + try (CuVSResources resources = CuVSResources.create()) { // Create a query object with the query vectors BruteForceQuery cuvsQuery = new BruteForceQuery.Builder() @@ -86,7 +91,7 @@ public void testIndexingAndSearchingFlow() throws Throwable { .build(); // Create the index with the dataset - BruteForceIndex index = new BruteForceIndex.Builder(resources) + BruteForceIndex index = BruteForceIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); @@ -98,7 +103,7 @@ public void testIndexingAndSearchingFlow() throws Throwable { // Loading a BRUTEFORCE index from disk. File indexFile = new File(indexFileName); InputStream inputStream = new FileInputStream(indexFile); - BruteForceIndex loadedIndex = new BruteForceIndex.Builder(resources) + BruteForceIndex loadedIndex = BruteForceIndex.newBuilder(resources) .from(inputStream) .build(); diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedIT.java similarity index 75% rename from java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedTest.java rename to java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedIT.java index 64d1eb86e..030b3b022 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedTest.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/BruteForceRandomizedIT.java @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.nvidia.cuvs; import java.lang.invoke.MethodHandles; @@ -11,13 +27,16 @@ import com.carrotsearch.randomizedtesting.RandomizedRunner; +import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; + @RunWith(RandomizedRunner.class) -public class BruteForceRandomizedTest extends CuVSTestCase { +public class BruteForceRandomizedIT extends CuVSTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @Before public void setup() { + assumeTrue(isLinuxAmd64()); initializeRandom(); log.info("Random context initialized for test."); } @@ -74,7 +93,7 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { List> expected = generateExpectedResults(topK, dataset, queries, log); // Create CuVS index and query - try (CuVSResources resources = new CuVSResources()) { + try (CuVSResources resources = CuVSResources.create()) { BruteForceQuery query = new BruteForceQuery.Builder() .withTopK(topK) @@ -85,13 +104,13 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { .withNumWriterThreads(32) .build(); - BruteForceIndex index = new BruteForceIndex.Builder(resources) + BruteForceIndex index = BruteForceIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); log.info("Index built successfully. Executing search..."); - BruteForceSearchResults results = index.search(query); + SearchResults results = index.search(query); compareResults(results, expected, topK, datasetSize, numQueries); } diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchIT.java similarity index 89% rename from java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java rename to java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchIT.java index 934e60b1c..874c3e525 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchIT.java @@ -16,6 +16,7 @@ package com.nvidia.cuvs; +import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; import static org.junit.Assert.assertEquals; import java.io.File; @@ -28,18 +29,23 @@ import java.util.Map; import java.util.UUID; +import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo; import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType; -import com.nvidia.cuvs.common.SearchResults; -public class CagraBuildAndSearchTest { +public class CagraBuildAndSearchIT extends CuVSTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + @Before + public void setup() { + assumeTrue("not supported on " + System.getProperty("os.name"), isLinuxAmd64()); + } + /** * A basic test that checks the whole flow - from indexing to search. * @@ -72,10 +78,10 @@ public void testIndexingAndSearchingFlow() throws Throwable { for (int j = 0; j < 10; j++) { - try (CuVSResources resources = new CuVSResources()) { + try (CuVSResources resources = CuVSResources.create()) { // Configure index parameters - CagraIndexParams indexParams = new CagraIndexParams.Builder(resources) + CagraIndexParams indexParams = new CagraIndexParams.Builder() .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT) .withGraphDegree(1) .withIntermediateGraphDegree(2) @@ -84,7 +90,7 @@ public void testIndexingAndSearchingFlow() throws Throwable { .build(); // Create the index with the dataset - CagraIndex index = new CagraIndex.Builder(resources) + CagraIndex index = CagraIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); @@ -96,7 +102,7 @@ public void testIndexingAndSearchingFlow() throws Throwable { // Loading a CAGRA index from disk. File indexFile = new File(indexFileName); InputStream inputStream = new FileInputStream(indexFile); - CagraIndex loadedIndex = new CagraIndex.Builder(resources) + CagraIndex loadedIndex = CagraIndex.newBuilder(resources) .from(inputStream) .build(); diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java similarity index 64% rename from java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java rename to java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java index fc7570133..811866dc6 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedIT.java @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.nvidia.cuvs; import java.lang.invoke.MethodHandles; @@ -12,13 +28,16 @@ import com.carrotsearch.randomizedtesting.RandomizedRunner; import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo; +import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; + @RunWith(RandomizedRunner.class) -public class CagraRandomizedTest extends CuVSTestCase { +public class CagraRandomizedIT extends CuVSTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @Before public void setup() { + assumeTrue("not supported on " + System.getProperty("os.name"), isLinuxAmd64()); initializeRandom(); log.info("Random context initialized for test."); } @@ -75,27 +94,31 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { List> expected = generateExpectedResults(topK, dataset, queries, log); // Create CuVS index and query - try (CuVSResources resources = new CuVSResources()) { - CagraIndexParams indexParams = new CagraIndexParams.Builder(resources) + try (CuVSResources resources = CuVSResources.create()) { + CagraIndexParams indexParams = new CagraIndexParams.Builder() .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT) .build(); - CagraIndex index = new CagraIndex.Builder(resources) + CagraIndex index = CagraIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); log.info("Index built successfully."); - // Execute search and retrieve results - CagraQuery query = new CagraQuery.Builder() - .withQueryVectors(queries) - .withTopK(topK) - .withSearchParams(new CagraSearchParams.Builder(resources) - .build()) - .build(); - log.info("Query built successfully. Executing search..."); - CagraSearchResults results = index.search(query); - - compareResults(results, expected, topK, datasetSize, numQueries); + try { + // Execute search and retrieve results + CagraQuery query = new CagraQuery.Builder() + .withQueryVectors(queries) + .withTopK(topK) + .withSearchParams(new CagraSearchParams.Builder(resources) + .build()) + .build(); + log.info("Query built successfully. Executing search..."); + SearchResults results = index.search(query); + + compareResults(results, expected, topK, datasetSize, numQueries); + } finally { + index.destroyIndex(); + } } } } diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java index bd50dc759..8a7d84a19 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CuVSTestCase.java @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.nvidia.cuvs; import static org.junit.Assert.assertEquals; @@ -14,7 +30,6 @@ import org.slf4j.LoggerFactory; import com.carrotsearch.randomizedtesting.RandomizedContext; -import com.nvidia.cuvs.common.SearchResults; public abstract class CuVSTestCase { protected Random random; @@ -85,4 +100,9 @@ protected void compareResults(SearchResults results, List> expecte } } } + + protected static boolean isLinuxAmd64() { + String name = System.getProperty("os.name"); + return (name.startsWith("Linux")) && System.getProperty("os.arch").equals("amd64"); + } } diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchIT.java similarity index 87% rename from java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchTest.java rename to java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchIT.java index 712e7edf9..eb4933cf9 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchTest.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswBuildAndSearchIT.java @@ -16,6 +16,7 @@ package com.nvidia.cuvs; +import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; import static org.junit.Assert.assertEquals; import java.io.File; @@ -28,6 +29,7 @@ import java.util.Map; import java.util.UUID; +import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,10 +37,15 @@ import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo; import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType; -public class HnswBuildAndSearchTest { +public class HnswBuildAndSearchIT extends CuVSTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + @Before + public void setup() { + assumeTrue("not supported on " + System.getProperty("os.name"), isLinuxAmd64()); + } + /** * A basic test that checks the whole flow - from indexing to search. * @@ -72,10 +79,10 @@ public void testIndexingAndSearchingFlow() throws Throwable { for (int j = 0; j < 10; j++) { - try (CuVSResources resources = new CuVSResources()) { + try (CuVSResources resources = CuVSResources.create()) { // Configure index parameters - CagraIndexParams indexParams = new CagraIndexParams.Builder(resources) + CagraIndexParams indexParams = new CagraIndexParams.Builder() .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.IVF_PQ) .withGraphDegree(64) .withIntermediateGraphDegree(128) @@ -84,7 +91,7 @@ public void testIndexingAndSearchingFlow() throws Throwable { .build(); // Create the index with the dataset - CagraIndex index = new CagraIndex.Builder(resources) + CagraIndex index = CagraIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); @@ -93,18 +100,18 @@ public void testIndexingAndSearchingFlow() throws Throwable { String hnswIndexFileName = UUID.randomUUID().toString() + ".hnsw"; index.serializeToHNSW(new FileOutputStream(hnswIndexFileName)); - HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder(resources) + HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder() .withVectorDimension(2) .build(); InputStream inputStreamHNSW = new FileInputStream(hnswIndexFileName); File hnswIndexFile = new File(hnswIndexFileName); - HnswIndex hnswIndex = new HnswIndex.Builder(resources) + HnswIndex hnswIndex = HnswIndex.newBuilder(resources) .from(inputStreamHNSW) .withIndexParams(hnswIndexParams) .build(); - HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder(resources) + HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder() .build(); HnswQuery hnswQuery = new HnswQuery.Builder() @@ -114,7 +121,7 @@ public void testIndexingAndSearchingFlow() throws Throwable { .withTopK(3) .build(); - HnswSearchResults results = hnswIndex.search(hnswQuery); + SearchResults results = hnswIndex.search(hnswQuery); // Check results log.info(results.getResults().toString()); diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedIT.java similarity index 79% rename from java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedTest.java rename to java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedIT.java index c292309f7..6d367efc4 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedTest.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/HnswRandomizedIT.java @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.nvidia.cuvs; import java.io.File; @@ -18,13 +34,16 @@ import com.nvidia.cuvs.CagraIndexParams.CagraGraphBuildAlgo; import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType; +import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; + @RunWith(RandomizedRunner.class) -public class HnswRandomizedTest extends CuVSTestCase { +public class HnswRandomizedIT extends CuVSTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @Before public void setup() { + assumeTrue("not supported on " + System.getProperty("os.name"), isLinuxAmd64()); initializeRandom(); log.info("Random context initialized for test."); } @@ -81,10 +100,10 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { List> expected = generateExpectedResults(topK, dataset, queries, log); // Create CuVS index and query - try (CuVSResources resources = new CuVSResources()) { + try (CuVSResources resources = CuVSResources.create()) { // Configure index parameters - CagraIndexParams indexParams = new CagraIndexParams.Builder(resources) + CagraIndexParams indexParams = new CagraIndexParams.Builder() .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT) .withGraphDegree(64) .withIntermediateGraphDegree(128) @@ -93,27 +112,27 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { .build(); // Create the index with the dataset - CagraIndex index = new CagraIndex.Builder(resources) + CagraIndex index = CagraIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); // Saving the HNSW index on to the disk. String hnswIndexFileName = UUID.randomUUID().toString() + ".hnsw"; - index.serializeToHNSW(new FileOutputStream(hnswIndexFileName)); + index.serializeToHNSW(new FileOutputStream(hnswIndexFileName)); // fails here - HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder(resources) + HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder() .withVectorDimension(dimensions) .build(); InputStream inputStreamHNSW = new FileInputStream(hnswIndexFileName); File hnswIndexFile = new File(hnswIndexFileName); - HnswIndex hnswIndex = new HnswIndex.Builder(resources) + HnswIndex hnswIndex = HnswIndex.newBuilder(resources) .from(inputStreamHNSW) .withIndexParams(hnswIndexParams) .build(); - HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder(resources) + HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder() .withNumThreads(32) .build(); @@ -124,7 +143,7 @@ private void tmpResultsTopKWithRandomValues() throws Throwable { .build(); log.info("Index built successfully. Executing search..."); - HnswSearchResults results = hnswIndex.search(hnswQuery); + SearchResults results = hnswIndex.search(hnswQuery); if (hnswIndexFile.exists()) { hnswIndexFile.delete(); diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/internal/common/UtilIT.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/internal/common/UtilIT.java new file mode 100644 index 000000000..a86d1154c --- /dev/null +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/internal/common/UtilIT.java @@ -0,0 +1,37 @@ +package com.nvidia.cuvs.internal.common; + +import com.nvidia.cuvs.CuVSTestCase; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; + +import static com.carrotsearch.randomizedtesting.RandomizedTest.assumeTrue; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; + +public class UtilIT extends CuVSTestCase { + + private static final Logger log = LoggerFactory.getLogger(UtilIT.class); + + @Before + public void setup() { + assumeTrue("not supported on " + System.getProperty("os.name"), isLinuxAmd64()); + } + + @Test + public void testGetLastErrorText() throws Throwable { + var cls = Class.forName("com.nvidia.cuvs.internal.common.Util"); + var lookup = MethodHandles.lookup(); + var mt = MethodType.methodType(String.class); + var mh = lookup.findStatic(cls, "getLastErrorText", mt); + + // first, ensures that accessing the error text when there is none does not crash! + String errorText = (String) mh.invoke(); + // second, ensures that the default test is returned + assertThat(errorText, equalTo("no last error text")); + } +} diff --git a/java/examples/pom.xml b/java/examples/pom.xml index 58ebef84e..90b51743c 100644 --- a/java/examples/pom.xml +++ b/java/examples/pom.xml @@ -77,6 +77,7 @@ maven-assembly-plugin + 3.4.2 make-jar-with-dependencies @@ -90,9 +91,10 @@ - - com.nvidia.cuvs.examples.CagraExample - + + true + com.nvidia.cuvs.examples.CagraExample + jar-with-dependencies diff --git a/java/examples/src/main/java/com/nvidia/cuvs/examples/BruteForceExample.java b/java/examples/src/main/java/com/nvidia/cuvs/examples/BruteForceExample.java index 5f72d92fc..c85912941 100644 --- a/java/examples/src/main/java/com/nvidia/cuvs/examples/BruteForceExample.java +++ b/java/examples/src/main/java/com/nvidia/cuvs/examples/BruteForceExample.java @@ -7,6 +7,7 @@ import java.lang.invoke.MethodHandles; import java.util.UUID; +import com.nvidia.cuvs.SearchResults; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,7 +15,6 @@ import com.nvidia.cuvs.BruteForceIndexParams; import com.nvidia.cuvs.BruteForceQuery; import com.nvidia.cuvs.CuVSResources; -import com.nvidia.cuvs.common.SearchResults; public class BruteForceExample { @@ -37,7 +37,7 @@ public static void main(String[] args) throws Throwable { { 0.05198065f, 0.5789965f } }; - try (CuVSResources resources = new CuVSResources()) { + try (CuVSResources resources = CuVSResources.create()) { // Create a query object with the query vectors BruteForceQuery cuvsQuery = new BruteForceQuery.Builder() @@ -50,7 +50,7 @@ public static void main(String[] args) throws Throwable { .build(); // Create the index with the dataset - BruteForceIndex index = new BruteForceIndex.Builder(resources) + BruteForceIndex index = BruteForceIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); @@ -62,7 +62,7 @@ public static void main(String[] args) throws Throwable { // Loading a BRUTEFORCE index from disk. File indexFile = new File(indexFileName); InputStream inputStream = new FileInputStream(indexFile); - BruteForceIndex loadedIndex = new BruteForceIndex.Builder(resources) + BruteForceIndex loadedIndex = BruteForceIndex.newBuilder(resources) .from(inputStream) .build(); diff --git a/java/examples/src/main/java/com/nvidia/cuvs/examples/CagraExample.java b/java/examples/src/main/java/com/nvidia/cuvs/examples/CagraExample.java index f561ce69d..9dabcc6bc 100644 --- a/java/examples/src/main/java/com/nvidia/cuvs/examples/CagraExample.java +++ b/java/examples/src/main/java/com/nvidia/cuvs/examples/CagraExample.java @@ -7,6 +7,7 @@ import java.lang.invoke.MethodHandles; import java.util.UUID; +import com.nvidia.cuvs.SearchResults; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,7 +18,6 @@ import com.nvidia.cuvs.CagraQuery; import com.nvidia.cuvs.CagraSearchParams; import com.nvidia.cuvs.CuVSResources; -import com.nvidia.cuvs.common.SearchResults; public class CagraExample { @@ -40,10 +40,10 @@ public static void main(String[] args) throws Throwable { { 0.05198065f, 0.5789965f } }; - try (CuVSResources resources = new CuVSResources()) { + try (CuVSResources resources = CuVSResources.create()) { // Configure index parameters - CagraIndexParams indexParams = new CagraIndexParams.Builder(resources) + CagraIndexParams indexParams = new CagraIndexParams.Builder() .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.NN_DESCENT) .withGraphDegree(1) .withIntermediateGraphDegree(2) @@ -51,7 +51,7 @@ public static void main(String[] args) throws Throwable { .build(); // Create the index with the dataset - CagraIndex index = new CagraIndex.Builder(resources) + CagraIndex index = CagraIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); @@ -63,7 +63,7 @@ public static void main(String[] args) throws Throwable { // Loading a CAGRA index from disk. File indexFile = new File(indexFileName); InputStream inputStream = new FileInputStream(indexFile); - CagraIndex loadedIndex = new CagraIndex.Builder(resources) + CagraIndex loadedIndex = CagraIndex.newBuilder(resources) .from(inputStream) .build(); diff --git a/java/examples/src/main/java/com/nvidia/cuvs/examples/HnswExample.java b/java/examples/src/main/java/com/nvidia/cuvs/examples/HnswExample.java index 0f13be3bc..7fbbccf64 100644 --- a/java/examples/src/main/java/com/nvidia/cuvs/examples/HnswExample.java +++ b/java/examples/src/main/java/com/nvidia/cuvs/examples/HnswExample.java @@ -7,6 +7,7 @@ import java.lang.invoke.MethodHandles; import java.util.UUID; +import com.nvidia.cuvs.SearchResults; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -19,7 +20,6 @@ import com.nvidia.cuvs.HnswIndexParams; import com.nvidia.cuvs.HnswQuery; import com.nvidia.cuvs.HnswSearchParams; -import com.nvidia.cuvs.HnswSearchResults; public class HnswExample { @@ -42,10 +42,10 @@ public static void main(String[] args) throws Throwable { { 0.05198065f, 0.5789965f } }; - try (CuVSResources resources = new CuVSResources()) { + try (CuVSResources resources = CuVSResources.create()) { // Configure index parameters - CagraIndexParams indexParams = new CagraIndexParams.Builder(resources) + CagraIndexParams indexParams = new CagraIndexParams.Builder() .withCagraGraphBuildAlgo(CagraGraphBuildAlgo.IVF_PQ) .withGraphDegree(64) .withIntermediateGraphDegree(128) @@ -54,7 +54,7 @@ public static void main(String[] args) throws Throwable { .build(); // Create the index with the dataset - CagraIndex index = new CagraIndex.Builder(resources) + CagraIndex index = CagraIndex.newBuilder(resources) .withDataset(dataset) .withIndexParams(indexParams) .build(); @@ -63,18 +63,18 @@ public static void main(String[] args) throws Throwable { String hnswIndexFileName = UUID.randomUUID().toString() + ".hnsw"; index.serializeToHNSW(new FileOutputStream(hnswIndexFileName)); - HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder(resources) + HnswIndexParams hnswIndexParams = new HnswIndexParams.Builder() .withVectorDimension(2) .build(); InputStream inputStreamHNSW = new FileInputStream(hnswIndexFileName); File hnswIndexFile = new File(hnswIndexFileName); - HnswIndex hnswIndex = new HnswIndex.Builder(resources) + HnswIndex hnswIndex = HnswIndex.newBuilder(resources) .from(inputStreamHNSW) .withIndexParams(hnswIndexParams) .build(); - HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder(resources) + HnswSearchParams hnswSearchParams = new HnswSearchParams.Builder() .build(); HnswQuery hnswQuery = new HnswQuery.Builder() @@ -83,7 +83,7 @@ public static void main(String[] args) throws Throwable { .withTopK(3) .build(); - HnswSearchResults results = hnswIndex.search(hnswQuery); + SearchResults results = hnswIndex.search(hnswQuery); // Check results log.info(results.getResults().toString()); From bf86cde5000946e285c58f098dee2f2579f9c5fa Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 5 Feb 2025 18:54:51 -0800 Subject: [PATCH 03/12] Expose binary quantizer to C and Python (#660) Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/660 --- cpp/CMakeLists.txt | 1 + cpp/include/cuvs/neighbors/cagra.h | 4 +- .../cuvs/preprocessing/quantize/binary.h | 44 +++++++++ cpp/src/neighbors/cagra_c.cpp | 8 +- cpp/src/preprocessing/quantize/binary_c.cpp | 76 +++++++++++++++ python/cuvs/cuvs/distance/distance.pyx | 1 + python/cuvs/cuvs/distance_type.pxd | 1 + python/cuvs/cuvs/neighbors/cagra/cagra.pxd | 1 + python/cuvs/cuvs/neighbors/cagra/cagra.pyx | 11 ++- python/cuvs/cuvs/preprocessing/CMakeLists.txt | 1 + .../cuvs/preprocessing/quantize/__init__.py | 18 ++++ .../quantize/binary/CMakeLists.txt | 28 ++++++ .../quantize/binary/__init__.pxd | 0 .../preprocessing/quantize/binary/__init__.py | 17 ++++ .../preprocessing/quantize/binary/binary.pxd | 25 +++++ .../preprocessing/quantize/binary/binary.pyx | 94 +++++++++++++++++++ .../preprocessing/quantize/scalar/__init__.py | 8 ++ .../cuvs/cuvs/tests/test_binary_quantizer.py | 52 ++++++++++ python/cuvs/cuvs/tests/test_doctests.py | 4 +- 19 files changed, 389 insertions(+), 5 deletions(-) create mode 100644 cpp/include/cuvs/preprocessing/quantize/binary.h create mode 100644 cpp/src/preprocessing/quantize/binary_c.cpp create mode 100644 python/cuvs/cuvs/preprocessing/quantize/binary/CMakeLists.txt create mode 100644 python/cuvs/cuvs/preprocessing/quantize/binary/__init__.pxd create mode 100644 python/cuvs/cuvs/preprocessing/quantize/binary/__init__.py create mode 100644 python/cuvs/cuvs/preprocessing/quantize/binary/binary.pxd create mode 100644 python/cuvs/cuvs/preprocessing/quantize/binary/binary.pyx create mode 100644 python/cuvs/cuvs/tests/test_binary_quantizer.py diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fb33c1ab7..b324c6786 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -693,6 +693,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB $<$:src/neighbors/hnsw_c.cpp> src/neighbors/nn_descent_c.cpp src/neighbors/refine/refine_c.cpp + src/preprocessing/quantize/binary_c.cpp src/preprocessing/quantize/scalar_c.cpp src/distance/pairwise_distance_c.cpp ) diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index d6d757e0e..f3dc0d5c2 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -42,7 +42,9 @@ enum cuvsCagraGraphBuildAlgo { /* Use IVF-PQ to build all-neighbors knn graph */ IVF_PQ, /* Experimental, use NN-Descent to build all-neighbors knn graph */ - NN_DESCENT + NN_DESCENT, + /* Experimental, use iterative cagra search and optimize to build the knn graph */ + ITERATIVE_CAGRA_SEARCH }; /** Parameters for VPQ compression. */ diff --git a/cpp/include/cuvs/preprocessing/quantize/binary.h b/cpp/include/cuvs/preprocessing/quantize/binary.h new file mode 100644 index 000000000..28ef6f0a8 --- /dev/null +++ b/cpp/include/cuvs/preprocessing/quantize/binary.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Applies binary quantization transform to the given dataset + * + * This applies binary quantization to a dataset, changing any positive + * values to a bitwise 1. This is useful for searching with the + * BitwiseHamming distance type. + * + * @param[in] res raft resource + * @param[in] dataset a row-major host or device matrix to transform + * @param[out] out a row-major host or device matrix to store transformed data + */ +cuvsError_t cuvsBinaryQuantizerTransform(cuvsResources_t res, + DLManagedTensor* dataset, + DLManagedTensor* out); + +#ifdef __cplusplus +} +#endif diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index f2c09f904..9b86072ef 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -55,13 +55,19 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* cuvs::neighbors::cagra::graph_build_params::ivf_pq_params(dataset_extent); break; } - case cuvsCagraGraphBuildAlgo::NN_DESCENT: + case cuvsCagraGraphBuildAlgo::NN_DESCENT: { cuvs::neighbors::cagra::graph_build_params::nn_descent_params nn_descent_params{}; nn_descent_params = cuvs::neighbors::nn_descent::index_params(index_params.intermediate_graph_degree); nn_descent_params.max_iterations = params.nn_descent_niter; index_params.graph_build_params = nn_descent_params; break; + } + case cuvsCagraGraphBuildAlgo::ITERATIVE_CAGRA_SEARCH: { + cuvs::neighbors::cagra::graph_build_params::iterative_search_params p; + index_params.graph_build_params = p; + break; + } }; if (auto* cparams = params.compression; cparams != nullptr) { diff --git a/cpp/src/preprocessing/quantize/binary_c.cpp b/cpp/src/preprocessing/quantize/binary_c.cpp new file mode 100644 index 000000000..1045f06b3 --- /dev/null +++ b/cpp/src/preprocessing/quantize/binary_c.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include + +namespace { + +template +void _transform(cuvsResources_t res, DLManagedTensor* dataset_tensor, DLManagedTensor* out_tensor) +{ + auto res_ptr = reinterpret_cast(res); + + auto dataset = dataset_tensor->dl_tensor; + if (cuvs::core::is_dlpack_device_compatible(dataset)) { + using mdspan_type = raft::device_matrix_view; + using out_mdspan_type = raft::device_matrix_view; + + cuvs::preprocessing::quantize::binary::transform( + *res_ptr, + cuvs::core::from_dlpack(dataset_tensor), + cuvs::core::from_dlpack(out_tensor)); + + } else if (cuvs::core::is_dlpack_host_compatible(dataset)) { + using mdspan_type = raft::host_matrix_view; + using out_mdspan_type = raft::host_matrix_view; + + cuvs::preprocessing::quantize::binary::transform( + *res_ptr, + cuvs::core::from_dlpack(dataset_tensor), + cuvs::core::from_dlpack(out_tensor)); + } else { + RAFT_FAIL("dataset must be accessible on host or device memory"); + } +} + +} // namespace + +extern "C" cuvsError_t cuvsBinaryQuantizerTransform(cuvsResources_t res, + DLManagedTensor* dataset_tensor, + DLManagedTensor* out_tensor) +{ + return cuvs::core::translate_exceptions([=] { + auto dataset = dataset_tensor->dl_tensor; + if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { + _transform(res, dataset_tensor, out_tensor); + } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) { + _transform(res, dataset_tensor, out_tensor); + } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) { + _transform(res, dataset_tensor, out_tensor); + } else { + RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", + dataset.dtype.code, + dataset.dtype.bits); + } + }); +} diff --git a/python/cuvs/cuvs/distance/distance.pyx b/python/cuvs/cuvs/distance/distance.pyx index 6b80d43b2..d6222b59a 100644 --- a/python/cuvs/cuvs/distance/distance.pyx +++ b/python/cuvs/cuvs/distance/distance.pyx @@ -46,6 +46,7 @@ DISTANCE_TYPES = { "minkowski": cuvsDistanceType.LpUnexpanded, "russellrao": cuvsDistanceType.RusselRaoExpanded, "dice": cuvsDistanceType.DiceExpanded, + "bitwise_hamming": cuvsDistanceType.BitwiseHamming } DISTANCE_NAMES = {v: k for k, v in DISTANCE_TYPES.items()} diff --git a/python/cuvs/cuvs/distance_type.pxd b/python/cuvs/cuvs/distance_type.pxd index e3cc54568..078745012 100644 --- a/python/cuvs/cuvs/distance_type.pxd +++ b/python/cuvs/cuvs/distance_type.pxd @@ -38,4 +38,5 @@ cdef extern from "cuvs/distance/distance.h" nogil: KLDivergence RusselRaoExpanded DiceExpanded + BitwiseHamming Precomputed diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd index fba7e3d1e..41d74dbc7 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd @@ -37,6 +37,7 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil: ctypedef enum cuvsCagraGraphBuildAlgo: IVF_PQ NN_DESCENT + ITERATIVE_CAGRA_SEARCH ctypedef struct cuvsCagraCompressionParams: uint32_t pq_bits diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx index 1853292e9..56a7c061b 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx @@ -145,11 +145,14 @@ cdef class IndexParams: build_algo: string denoting the graph building algorithm to use, \ default = "ivf_pq" - Valid values for algo: ["ivf_pq", "nn_descent"], where + Valid values for algo: ["ivf_pq", "nn_descent", + "iterative_cagra_search"], where - ivf_pq will use the IVF-PQ algorithm for building the knn graph - nn_descent (experimental) will use the NN-Descent algorithm for building the knn graph. It is expected to be generally faster than ivf_pq. + - iterative_cagra_search will iteratively build the knn graph using + CAGRA's search() and optimize() compression: CompressionParams, optional If compression is desired should be a CompressionParams object. If None compression will be disabled. @@ -184,6 +187,12 @@ cdef class IndexParams: self.params.build_algo = cuvsCagraGraphBuildAlgo.IVF_PQ elif build_algo == "nn_descent": self.params.build_algo = cuvsCagraGraphBuildAlgo.NN_DESCENT + elif build_algo == "iterative_cagra_search": + self.params.build_algo = \ + cuvsCagraGraphBuildAlgo.ITERATIVE_CAGRA_SEARCH + else: + raise ValueError(f"Unknown build_algo '{build_algo}'") + self.params.nn_descent_niter = nn_descent_niter if compression is not None: self.compression = compression diff --git a/python/cuvs/cuvs/preprocessing/CMakeLists.txt b/python/cuvs/cuvs/preprocessing/CMakeLists.txt index be31760f9..403dab0cb 100644 --- a/python/cuvs/cuvs/preprocessing/CMakeLists.txt +++ b/python/cuvs/cuvs/preprocessing/CMakeLists.txt @@ -12,4 +12,5 @@ # the License. # ============================================================================= +add_subdirectory(quantize/binary) add_subdirectory(quantize/scalar) diff --git a/python/cuvs/cuvs/preprocessing/quantize/__init__.py b/python/cuvs/cuvs/preprocessing/quantize/__init__.py index e69de29bb..1fa2cbcf0 100644 --- a/python/cuvs/cuvs/preprocessing/quantize/__init__.py +++ b/python/cuvs/cuvs/preprocessing/quantize/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from cuvs.preprocessing.quantize import binary, scalar + +__all__ = ["binary", "scalar"] diff --git a/python/cuvs/cuvs/preprocessing/quantize/binary/CMakeLists.txt b/python/cuvs/cuvs/preprocessing/quantize/binary/CMakeLists.txt new file mode 100644 index 000000000..5e2d3bde3 --- /dev/null +++ b/python/cuvs/cuvs/preprocessing/quantize/binary/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources binary.pyx) +set(linked_libraries cuvs::cuvs cuvs::c_api) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX + preprocessing_quantize_scalar_ +) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/preprocessing/quantize/binary/__init__.pxd b/python/cuvs/cuvs/preprocessing/quantize/binary/__init__.pxd new file mode 100644 index 000000000..e69de29bb diff --git a/python/cuvs/cuvs/preprocessing/quantize/binary/__init__.py b/python/cuvs/cuvs/preprocessing/quantize/binary/__init__.py new file mode 100644 index 000000000..ce1a9ffee --- /dev/null +++ b/python/cuvs/cuvs/preprocessing/quantize/binary/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .binary import transform + +__all__ = ["transform"] diff --git a/python/cuvs/cuvs/preprocessing/quantize/binary/binary.pxd b/python/cuvs/cuvs/preprocessing/quantize/binary/binary.pxd new file mode 100644 index 000000000..f00b308f5 --- /dev/null +++ b/python/cuvs/cuvs/preprocessing/quantize/binary/binary.pxd @@ -0,0 +1,25 @@ +# +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t +from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor + + +cdef extern from "cuvs/preprocessing/quantize/binary.h" nogil: + cuvsError_t cuvsBinaryQuantizerTransform(cuvsResources_t res, + DLManagedTensor* dataset, + DLManagedTensor* out) diff --git a/python/cuvs/cuvs/preprocessing/quantize/binary/binary.pyx b/python/cuvs/cuvs/preprocessing/quantize/binary/binary.pyx new file mode 100644 index 000000000..eb776646f --- /dev/null +++ b/python/cuvs/cuvs/preprocessing/quantize/binary/binary.pyx @@ -0,0 +1,94 @@ +# +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +import numpy as np + +from cuvs.common cimport cydlpack + +from pylibraft.common import auto_convert_output, device_ndarray +from pylibraft.common.cai_wrapper import wrap_array + +from cuvs.common.exceptions import check_cuvs +from cuvs.common.resources import auto_sync_resources +from cuvs.neighbors.common import _check_input_array + + +@auto_sync_resources +@auto_convert_output +def transform(dataset, output=None, resources=None): + """ + Applies binary quantization transform to given dataset + + This applies binary quantization to a dataset, changing any positive + values to a bitwise 1. This is useful for searching with the + BitwiseHamming distance type. + + Parameters + ---------- + dataset : row major host or device dataset to transform + output : optional preallocated output memory, on host or device memory + {resources_docstring} + + Returns + ------- + output : transformed dataset quantized into a uint8 + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.preprocessing.quantize import binary + >>> from cuvs.neighbors import cagra + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.standard_normal((n_samples, n_features), + ... dtype=cp.float32) + >>> transformed = binary.transform(dataset) + >>> + >>> # build a cagra index on the binarized data + >>> params = cagra.IndexParams(metric="bitwise_hamming", + ... build_algo="iterative_cagra_search") + >>> idx = cagra.build(params, transformed) + """ + + dataset_ai = wrap_array(dataset) + + _check_input_array(dataset_ai, + [np.dtype("float32"), + np.dtype("float64"), + np.dtype("float16")]) + + if output is None: + on_device = hasattr(dataset, "__cuda_array_interface__") + ndarray = device_ndarray if on_device else np + cols = int(np.ceil(dataset_ai.shape[1] / 8)) + output = ndarray.empty((dataset_ai.shape[0], cols), dtype="uint8") + + output_ai = wrap_array(output) + _check_input_array(output_ai, [np.dtype("uint8")]) + + cdef cuvsResources_t res = resources.get_c_obj() + + cdef cydlpack.DLManagedTensor* dataset_dlpack = \ + cydlpack.dlpack_c(dataset_ai) + cdef cydlpack.DLManagedTensor* output_dlpack = \ + cydlpack.dlpack_c(output_ai) + + check_cuvs(cuvsBinaryQuantizerTransform(res, + dataset_dlpack, + output_dlpack)) + + return output diff --git a/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.py b/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.py index f24510bab..06b766c21 100644 --- a/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.py +++ b/python/cuvs/cuvs/preprocessing/quantize/scalar/__init__.py @@ -19,3 +19,11 @@ train, transform, ) + +__all__ = [ + "Quantizer", + "QuantizerParams", + "inverse_transform", + "train", + "transform", +] diff --git a/python/cuvs/cuvs/tests/test_binary_quantizer.py b/python/cuvs/cuvs/tests/test_binary_quantizer.py new file mode 100644 index 000000000..e9bba0d3f --- /dev/null +++ b/python/cuvs/cuvs/tests/test_binary_quantizer.py @@ -0,0 +1,52 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from pylibraft.common import device_ndarray + +from cuvs.preprocessing.quantize import binary + + +@pytest.mark.parametrize("n_rows", [50, 100]) +@pytest.mark.parametrize("n_cols", [10, 50]) +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("device_memory", [True, False]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16]) +def test_binary_quantizer(n_rows, n_cols, inplace, device_memory, dtype): + input1 = np.random.random_sample((n_rows, n_cols)).astype(dtype) + + output_cols = int(np.ceil(n_cols / 8)) + output = ( + np.zeros((n_rows, output_cols), dtype="uint8") if inplace else None + ) + + input1_device = device_ndarray(input1) + output_device = device_ndarray(output) if inplace else None + + transformed = binary.transform( + input1_device if device_memory else input1, + output=(output_device if device_memory else output) + if inplace + else None, + ) + if device_memory: + actual = transformed if not inplace else output_device + actual = actual.copy_to_host() + else: + actual = transformed if not inplace else output + + expected = np.packbits(input1 > 0, axis=-1, bitorder="little") + assert np.all(actual == expected) diff --git a/python/cuvs/cuvs/tests/test_doctests.py b/python/cuvs/cuvs/tests/test_doctests.py index 381a077fb..a35ef4eca 100644 --- a/python/cuvs/cuvs/tests/test_doctests.py +++ b/python/cuvs/cuvs/tests/test_doctests.py @@ -23,7 +23,7 @@ import cuvs.distance import cuvs.neighbors -import cuvs.preprocessing.quantize.scalar +import cuvs.preprocessing.quantize # Code adapted from https://github.com/rapidsai/cudf/blob/branch-23.02/python/cudf/cudf/tests/test_doctests.py # noqa @@ -97,7 +97,7 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None): DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.ivf_flat)) DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.common)) DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.distance)) -DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.preprocessing.quantize.scalar)) +DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.preprocessing.quantize)) def _test_name_from_docstring(docstring): From 47f53686be9121c19a74e952b4fd9cc19a0204d4 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 6 Feb 2025 00:07:28 -0500 Subject: [PATCH 04/12] HNSW GPU hierarchy (#616) Authors: - Divye Gala (https://github.com/divyegala) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/616 --- cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu | 2 + cpp/include/cuvs/neighbors/hnsw.h | 13 +- cpp/include/cuvs/neighbors/hnsw.hpp | 8 +- cpp/src/neighbors/detail/hnsw.hpp | 245 +++++++++++++++++-- python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd | 1 + python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx | 13 +- python/cuvs/cuvs/tests/test_hnsw.py | 9 +- 7 files changed, 261 insertions(+), 30 deletions(-) diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu index e45a3bd5a..ed6c7b20f 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu @@ -33,6 +33,8 @@ void parse_build_param(const nlohmann::json& conf, param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::NONE; } else if (conf.at("hierarchy") == "cpu") { param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::CPU; + } else if (conf.at("hierarchy") == "gpu") { + param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::GPU; } else { THROW("Invalid value for hierarchy: %s", conf.at("hierarchy").get().c_str()); } diff --git a/cpp/include/cuvs/neighbors/hnsw.h b/cpp/include/cuvs/neighbors/hnsw.h index d88fd3b4e..61426e931 100644 --- a/cpp/include/cuvs/neighbors/hnsw.h +++ b/cpp/include/cuvs/neighbors/hnsw.h @@ -42,7 +42,9 @@ enum cuvsHnswHierarchy { /* Flat hierarchy, search is base-layer only */ NONE, /* Full hierarchy is built using the CPU */ - CPU + CPU, + /* Full hierarchy is built using the GPU */ + GPU }; struct cuvsHnswIndexParams { @@ -50,9 +52,12 @@ struct cuvsHnswIndexParams { enum cuvsHnswHierarchy hierarchy; /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/ int ef_construction; - /** Number of host threads to use to construct hierarchy when hierarchy is `CPU` - When the value is 0, the number of threads is automatically determined to the maximum - number of threads available. + /** Number of host threads to use to construct hierarchy when hierarchy is `CPU` or `GPU`. + When the value is 0, the number of threads is automatically determined to the + maximum number of threads available. + NOTE: When hierarchy is `GPU`, while the majority of the work is done on the GPU, + initialization of the HNSW index itself and some other work + is parallelized with the help of CPU threads. */ int num_threads; }; diff --git a/cpp/include/cuvs/neighbors/hnsw.hpp b/cpp/include/cuvs/neighbors/hnsw.hpp index 750f1f87f..88cbf88b0 100644 --- a/cpp/include/cuvs/neighbors/hnsw.hpp +++ b/cpp/include/cuvs/neighbors/hnsw.hpp @@ -45,7 +45,8 @@ namespace cuvs::neighbors::hnsw { */ enum class HnswHierarchy { NONE, // base-layer-only index - CPU // full index with CPU-built hierarchy + CPU, // full index with CPU-built hierarchy + GPU // full index with GPU-built hierarchy }; struct index_params : cuvs::neighbors::index_params { @@ -53,9 +54,12 @@ struct index_params : cuvs::neighbors::index_params { HnswHierarchy hierarchy = HnswHierarchy::NONE; /** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/ int ef_construction = 200; - /** Number of host threads to use to construct hierarchy when hierarchy is `CPU` + /** Number of host threads to use to construct hierarchy when hierarchy is `CPU` or `GPU`. When the value is 0, the number of threads is automatically determined to the maximum number of threads available. + NOTE: When hierarchy is `GPU`, while the majority of the work is done on the GPU, + initialization of the HNSW index itself and some other work + is parallelized with the help of CPU threads. */ int num_threads = 0; }; diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index 07e012349..6ab8631d4 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -28,6 +29,11 @@ namespace cuvs::neighbors::hnsw::detail { +// This is needed as hnswlib hardcodes the distance type to float +// or int32_t in certain places. However, we can solve uint8 or int8 +// natively with the pacth cuVS applies. We could potentially remove +// all the hardcodes and propagate templates throughout hnswlib, but +// as of now it's not needed. template struct hnsw_dist_t { using type = void; @@ -136,7 +142,6 @@ std::enable_if_t>> fro const cuvs::neighbors::cagra::index& cagra_index, std::optional> dataset) { - // auto host_dataset = raft::make_host_matrix(dataset.extent(0), dataset.extent(1)); auto host_dataset = raft::make_host_matrix(0, 0); raft::host_matrix_view host_dataset_view( host_dataset.data_handle(), host_dataset.extent(0), host_dataset.extent(1)); @@ -179,24 +184,29 @@ std::enable_if_t>> fro } appr_algo->base_layer_init = true; // reset to true to allow addition of new points - // move cagra graph to host - auto graph = cagra_index.graph(); - auto host_graph = - raft::make_host_matrix(graph.extent(0), graph.extent(1)); - raft::copy(host_graph.data_handle(), - graph.data_handle(), - graph.size(), - raft::resource::get_cuda_stream(res)); - raft::resource::sync_stream(res); + // move cagra graph to host or access it from host if available + auto host_graph_view = cagra_index.graph(); + auto host_graph = raft::make_host_matrix(0, 0); + if (!raft::is_host_accessible(raft::memory_type_from_pointer(host_graph_view.data_handle()))) { + // copy cagra graph to host + host_graph = raft::make_host_matrix(host_graph_view.extent(0), + host_graph_view.extent(1)); + raft::copy(host_graph.data_handle(), + host_graph_view.data_handle(), + host_graph_view.size(), + raft::resource::get_cuda_stream(res)); + raft::resource::sync_stream(res); + host_graph_view = host_graph.view(); + } // copy cagra graph to hnswlib base layer -#pragma omp parallel for - for (size_t i = 0; i < static_cast(host_graph.extent(0)); ++i) { +#pragma omp parallel for num_threads(num_threads) + for (size_t i = 0; i < static_cast(host_graph_view.extent(0)); ++i) { auto hnsw_internal_id = appr_algo->label_lookup_.find(i)->second; auto ll_i = appr_algo->get_linklist0(hnsw_internal_id); - appr_algo->setListCount(ll_i, host_graph.extent(1)); + appr_algo->setListCount(ll_i, host_graph_view.extent(1)); auto* data = (uint32_t*)(ll_i + 1); - for (size_t j = 0; j < static_cast(host_graph.extent(1)); ++j) { + for (size_t j = 0; j < static_cast(host_graph_view.extent(1)); ++j) { auto neighbor_internal_id = appr_algo->label_lookup_.find(host_graph(i, j))->second; data[j] = neighbor_internal_id; } @@ -206,6 +216,202 @@ std::enable_if_t>> fro return hnsw_index; } +template +int initialize_point_in_hnsw(hnswlib::HierarchicalNSW* appr_algo, + raft::host_matrix_view dataset, + int64_t real_index, + int curlevel) +{ + auto cur_c = appr_algo->cur_element_count++; + appr_algo->element_levels_[cur_c] = curlevel; + memset(appr_algo->data_level0_memory_ + cur_c * appr_algo->size_data_per_element_ + + appr_algo->offsetLevel0_, + 0, + appr_algo->size_data_per_element_); + + // Initialisation of the data and label + memcpy(appr_algo->getExternalLabeLp(cur_c), &real_index, sizeof(hnswlib::labeltype)); + memcpy(appr_algo->getDataByInternalId(cur_c), + dataset.data_handle() + real_index * dataset.extent(1), + appr_algo->data_size_); + + if (curlevel) { + appr_algo->linkLists_[cur_c] = (char*)malloc(appr_algo->size_links_per_element_ * curlevel + 1); + if (appr_algo->linkLists_[cur_c] == nullptr) + throw std::runtime_error("Not enough memory: addPoint failed to allocate linklist"); + memset(appr_algo->linkLists_[cur_c], 0, appr_algo->size_links_per_element_ * curlevel + 1); + } + return cur_c; +} + +template +void all_neighbors_graph(raft::resources const& res, + raft::host_matrix_view dataset, + raft::host_matrix_view neighbors, + cuvs::distance::DistanceType metric) +{ + nn_descent::index_params nn_params; + nn_params.graph_degree = neighbors.extent(1); + nn_params.intermediate_graph_degree = neighbors.extent(1) * 2; + nn_params.metric = metric; + nn_params.return_distances = false; + auto nn_index = nn_descent::build(res, nn_params, dataset, neighbors); +} + +template +std::enable_if_t>> from_cagra( + raft::resources const& res, + const index_params& params, + const cuvs::neighbors::cagra::index& cagra_index, + std::optional> dataset) +{ + auto host_dataset = raft::make_host_matrix(0, 0); + raft::host_matrix_view host_dataset_view( + host_dataset.data_handle(), host_dataset.extent(0), host_dataset.extent(1)); + if (dataset.has_value()) { + host_dataset_view = dataset.value(); + } else { + // move dataset to host, remove padding + auto cagra_dataset = cagra_index.dataset(); + host_dataset = + raft::make_host_matrix(cagra_dataset.extent(0), cagra_dataset.extent(1)); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(), + sizeof(T) * host_dataset.extent(1), + cagra_dataset.data_handle(), + sizeof(T) * cagra_dataset.stride(0), + sizeof(T) * host_dataset.extent(1), + cagra_dataset.extent(0), + cudaMemcpyDefault, + raft::resource::get_cuda_stream(res))); + raft::resource::sync_stream(res); + host_dataset_view = host_dataset.view(); + } + + // initialize hnsw index + auto hnsw_index = + std::make_unique>(host_dataset_view.extent(1), cagra_index.metric(), hierarchy); + auto appr_algo = std::make_unique::type>>( + hnsw_index->get_space(), + host_dataset_view.extent(0), + cagra_index.graph().extent(1) / 2, + params.ef_construction); + + // assign a level to each point and initialize the points in hnsw + std::vector levels(host_dataset_view.extent(0)); + std::vector hnsw_internal_ids(host_dataset_view.extent(0)); + + auto num_threads = params.num_threads == 0 ? omp_get_max_threads() : params.num_threads; +#pragma omp parallel for num_threads(num_threads) + for (int64_t i = 0; i < host_dataset_view.extent(0); i++) { + levels[i] = appr_algo->getRandomLevel(appr_algo->mult_) + 1; + hnsw_internal_ids[i] = + initialize_point_in_hnsw(appr_algo.get(), host_dataset_view, i, levels[i] - 1); + } + + // sort the points by levels + // build histogram + std::vector hist; + std::vector order(host_dataset_view.extent(0)); + for (int64_t i = 0; i < host_dataset_view.extent(0); i++) { + auto pt_level = levels[i] - 1; + while (pt_level >= hist.size()) + hist.push_back(0); + hist[pt_level]++; + } + + // accumulate + std::vector offsets(hist.size() + 1, 0); + for (size_t i = 0; i < hist.size() - 1; i++) { + offsets[i + 1] = offsets[i] + hist[i]; + } + + // bucket sort + for (int64_t i = 0; i < host_dataset_view.extent(0); i++) { + auto pt_level = levels[i] - 1; + order[offsets[pt_level]++] = i; + } + + // set last point of the highest level as the entry point + appr_algo->enterpoint_node_ = hnsw_internal_ids[order.back()]; + appr_algo->maxlevel_ = hist.size() - 1; + + // iterate over the points in the descending order of their levels + for (size_t pt_level = hist.size() - 1; pt_level >= 1; pt_level--) { + auto start_idx = offsets[pt_level - 1]; + auto end_idx = offsets[hist.size() - 1]; + auto num_pts = end_idx - start_idx; + auto neighbor_size = num_pts > appr_algo->M_ ? appr_algo->M_ : num_pts - 1; + if (num_pts <= 1) { + // this means only 1 point in the level + continue; + } + + // gather points from dataset to form query set on host + auto host_query_set = raft::make_host_matrix(num_pts, host_dataset_view.extent(1)); + // TODO: Use `raft::matrix::gather` when available as a public API + // Issue: https://github.com/rapidsai/raft/issues/2572 +#pragma omp parallel for num_threads(num_threads) + for (auto i = start_idx; i < end_idx; i++) { + auto pt_id = order[i]; + std::copy(&host_dataset_view(pt_id, 0), + &host_dataset_view(pt_id + 1, 0), + &host_query_set(i - start_idx, 0)); + } + + // find neighbors of the query set + auto host_neighbors = raft::make_host_matrix(num_pts, neighbor_size); + all_neighbors_graph(res, + raft::make_const_mdspan(host_query_set.view()), + host_neighbors.view(), + cagra_index.metric()); + + // add points to the HNSW index upper layers +#pragma omp parallel for num_threads(num_threads) + for (auto i = start_idx; i < end_idx; i++) { + auto pt_id = order[i]; + auto internal_id = hnsw_internal_ids[pt_id]; + auto ll_cur = appr_algo->get_linklist(internal_id, pt_level); + appr_algo->setListCount(ll_cur, host_neighbors.extent(1)); + auto* data = (uint32_t*)(ll_cur + 1); + auto neighbors = &host_neighbors(i - start_idx, 0); + for (auto j = 0; j < host_neighbors.extent(1); j++) { + auto neighbor_id = order[neighbors[j] + start_idx]; + auto neighbor_internal_id = hnsw_internal_ids[neighbor_id]; + data[j] = neighbor_internal_id; + } + } + } + + // move cagra graph to host or access it from host if available + auto host_graph_view = cagra_index.graph(); + auto host_graph = raft::make_host_matrix(0, 0); + if (!raft::is_host_accessible(raft::memory_type_from_pointer(host_graph_view.data_handle()))) { + // copy cagra graph to host + host_graph = raft::make_host_matrix(host_graph_view.extent(0), + host_graph_view.extent(1)); + raft::copy(host_graph.data_handle(), + host_graph_view.data_handle(), + host_graph_view.size(), + raft::resource::get_cuda_stream(res)); + raft::resource::sync_stream(res); + host_graph_view = host_graph.view(); + } + +// copy cagra graph to hnswlib base layer +#pragma omp parallel for num_threads(num_threads) + for (size_t i = 0; i < static_cast(host_graph_view.extent(0)); ++i) { + auto ll_i = appr_algo->get_linklist0(hnsw_internal_ids[i]); + appr_algo->setListCount(ll_i, host_graph_view.extent(1)); + auto* data = (uint32_t*)(ll_i + 1); + for (size_t j = 0; j < static_cast(host_graph_view.extent(1)); ++j) { + data[j] = hnsw_internal_ids[host_graph_view(i, j)]; + } + } + + hnsw_index->set_index(std::move(appr_algo)); + return hnsw_index; +} + template std::unique_ptr> from_cagra( raft::resources const& res, @@ -217,8 +423,9 @@ std::unique_ptr> from_cagra( return from_cagra(res, params, cagra_index, dataset); } else if (params.hierarchy == HnswHierarchy::CPU) { return from_cagra(res, params, cagra_index, dataset); - } - { + } else if (params.hierarchy == HnswHierarchy::GPU) { + return from_cagra(res, params, cagra_index, dataset); + } else { RAFT_FAIL("Unsupported hierarchy type"); } } @@ -269,9 +476,9 @@ void search(raft::resources const& res, raft::host_matrix_view neighbors, raft::host_matrix_view distances) { - RAFT_EXPECTS( - queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), - "Number of rows in output neighbors and distances matrices must equal the number of queries."); + RAFT_EXPECTS(queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), + "Number of rows in output neighbors and distances matrices must equal the number of " + "queries."); RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), "Number of columns in output neighbors and distances matrices must equal k"); diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd index e0c517933..8e478bcd2 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd +++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pxd @@ -28,6 +28,7 @@ cdef extern from "cuvs/neighbors/hnsw.h" nogil: ctypedef enum cuvsHnswHierarchy: NONE CPU + GPU ctypedef struct cuvsHnswIndexParams: cuvsHnswHierarchy hierarchy diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx index 72a3617bd..789a3d93b 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx +++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx @@ -49,13 +49,18 @@ cdef class IndexParams: The hierarchy of the HNSW index. Valid values are ["none", "cpu"]. - "none": No hierarchy is built. - "cpu": Hierarchy is built using CPU. + - "gpu": Hierarchy is built using GPU. ef_construction : int, default = 200 (optional) Maximum number of candidate list size used during construction when hierarchy is `cpu`. num_threads : int, default = 0 (optional) Number of CPU threads used to increase construction parallelism - when hierarchy is `cpu`. When the value is 0, the number of threads is - automatically determined to the maximum number of threads available. + when hierarchy is `cpu` or `gpu`. When the value is 0, the number of + threads is automatically determined to the maximum number of threads + available. + NOTE: When hierarchy is `gpu`, while the majority of the work is done + on the GPU, initialization of the HNSW index itself and some other + work is parallelized with the help of CPU threads. """ cdef cuvsHnswIndexParams* params @@ -74,6 +79,8 @@ cdef class IndexParams: self.params.hierarchy = cuvsHnswHierarchy.NONE elif hierarchy == "cpu": self.params.hierarchy = cuvsHnswHierarchy.CPU + elif hierarchy == "gpu": + self.params.hierarchy = cuvsHnswHierarchy.GPU else: raise ValueError("Invalid hierarchy type." " Valid values are 'none' and 'cpu'.") @@ -86,6 +93,8 @@ cdef class IndexParams: return "none" elif self.params.hierarchy == cuvsHnswHierarchy.CPU: return "cpu" + elif self.params.hierarchy == cuvsHnswHierarchy.GPU: + return "gpu" @property def ef_construction(self): diff --git a/python/cuvs/cuvs/tests/test_hnsw.py b/python/cuvs/cuvs/tests/test_hnsw.py index 23a0920ef..c24cdf8a1 100644 --- a/python/cuvs/cuvs/tests/test_hnsw.py +++ b/python/cuvs/cuvs/tests/test_hnsw.py @@ -85,7 +85,7 @@ def run_hnsw_build_search_test( @pytest.mark.parametrize("num_threads", [2, 4]) @pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) -@pytest.mark.parametrize("hierarchy", ["none", "cpu"]) +@pytest.mark.parametrize("hierarchy", ["none", "cpu", "gpu"]) def test_hnsw(dtype, k, ef, num_threads, metric, build_algo, hierarchy): # Note that inner_product tests use normalized input which we cannot # represent in int8, therefore we test only sqeuclidean metric here. @@ -111,6 +111,7 @@ def run_hnsw_extend_test( intermediate_graph_degree=128, graph_degree=64, search_params={}, + hierarchy="cpu", ): dataset = generate_data((n_rows, n_cols), dtype) add_dataset = generate_data((add_rows, n_cols), dtype) @@ -135,7 +136,7 @@ def run_hnsw_extend_test( assert index.trained - hnsw_params = hnsw.IndexParams(hierarchy="cpu") + hnsw_params = hnsw.IndexParams(hierarchy=hierarchy) hnsw_index = hnsw.from_cagra(hnsw_params, index) hnsw.extend(hnsw.ExtendParams(), hnsw_index, add_dataset) @@ -167,7 +168,8 @@ def run_hnsw_extend_test( @pytest.mark.parametrize("num_threads", [2, 4]) @pytest.mark.parametrize("metric", ["sqeuclidean"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) -def test_hnsw_extend(dtype, k, ef, num_threads, metric, build_algo): +@pytest.mark.parametrize("hierarchy", ["cpu", "gpu"]) +def test_hnsw_extend(dtype, k, ef, num_threads, metric, build_algo, hierarchy): # Note that inner_product tests use normalized input which we cannot # represent in int8, therefore we test only sqeuclidean metric here. run_hnsw_extend_test( @@ -176,4 +178,5 @@ def test_hnsw_extend(dtype, k, ef, num_threads, metric, build_algo): metric=metric, build_algo=build_algo, search_params={"ef": ef, "num_threads": num_threads}, + hierarchy=hierarchy, ) From 97bd78d03dd8d47b0c44dff1872fbdc265050c9a Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 6 Feb 2025 15:17:05 +0100 Subject: [PATCH 05/12] Fix ann-bench deadlocking on HNSW destruction due to task locks (#667) The ann-bench tool has been observed to deadlock on thread.join() due to unnecessary mutex lock. The problem is that the destructor doesn't release the thread mutex and thus doesn't allow the thread to escape the condition_variable.wait() function. The fix is to just remove the lock in the destructor (which doesn't modify the state of the task anyway). Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/cuvs/pull/667 --- cpp/bench/ann/src/common/thread_pool.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/bench/ann/src/common/thread_pool.hpp b/cpp/bench/ann/src/common/thread_pool.hpp index e2192daea..287ac0a2d 100644 --- a/cpp/bench/ann/src/common/thread_pool.hpp +++ b/cpp/bench/ann/src/common/thread_pool.hpp @@ -60,9 +60,8 @@ class fixed_thread_pool { finished_.store(true, std::memory_order_relaxed); for (unsigned i = 0; i < threads_.size(); ++i) { + // NB: don't lock the task mutex here, may deadlock on .join() otherwise auto& task = tasks_[i]; - std::lock_guard lock(task.mtx); - task.cv.notify_one(); threads_[i].join(); } @@ -105,8 +104,7 @@ class fixed_thread_pool { IdxT start = i * items_per_thread; auto& task = tasks_[i]; { - std::lock_guard lock(task.mtx); - (void)lock; // stop nvcc warning + [[maybe_unused]] std::lock_guard lock(task.mtx); task.task = std::packaged_task([=] { wrapped_f(start, start + items_per_thread); }); futures.push_back(task.task.get_future()); task.has_task = true; From 961e60c34efd55159f764c33a0495bb370abde76 Mon Sep 17 00:00:00 2001 From: Ajit Mistry <55892788+ajit283@users.noreply.github.com> Date: Thu, 6 Feb 2025 21:57:05 +0100 Subject: [PATCH 06/12] Go API - [WIP] (#212) This PR adds a Go API. It's far from completion and still work in progress. Feel free to suggest improvements! Authors: - Ajit Mistry (https://github.com/ajit283) - Corey J. Nolet (https://github.com/cjnolet) - Ben Frederickson (https://github.com/benfred) Approvers: - Ben Frederickson (https://github.com/benfred) - Gil Forsyth (https://github.com/gforsyth) URL: https://github.com/rapidsai/cuvs/pull/212 --- .github/workflows/build.yaml | 13 + .github/workflows/pr.yaml | 14 + build.sh | 10 +- ci/build_go.sh | 39 ++ .../all_cuda-118_arch-aarch64.yaml | 1 + .../all_cuda-118_arch-x86_64.yaml | 1 + .../all_cuda-128_arch-aarch64.yaml | 1 + .../all_cuda-128_arch-x86_64.yaml | 1 + dependencies.yaml | 15 + go/brute_force/brute_force.go | 83 ++++ go/brute_force/brute_force_test.go | 110 +++++ go/cagra/cagra.go | 136 ++++++ go/cagra/cagra_test.go | 327 ++++++++++++++ go/cagra/extend_params.go | 46 ++ go/cagra/index_params.go | 159 +++++++ go/cagra/search_params.go | 163 +++++++ go/distance.go | 70 +++ go/distance_test.go | 49 ++ go/dlpack.go | 420 ++++++++++++++++++ go/dlpack_test.go | 190 ++++++++ go/exceptions.go | 23 + go/go.mod | 3 + go/go.sum | 4 + go/ivf_flat/index_params.go | 87 ++++ go/ivf_flat/ivf_flat.go | 72 +++ go/ivf_flat/ivf_flat_test.go | 123 +++++ go/ivf_flat/search_params.go | 39 ++ go/ivf_pq/index_params.go | 149 +++++++ go/ivf_pq/ivf_pq.go | 73 +++ go/ivf_pq/ivf_pq_test.go | 123 +++++ go/ivf_pq/search_params.go | 107 +++++ go/memory_resource.go | 93 ++++ go/memory_resource_test.go | 19 + go/resources.go | 57 +++ 34 files changed, 2819 insertions(+), 1 deletion(-) create mode 100755 ci/build_go.sh create mode 100644 go/brute_force/brute_force.go create mode 100644 go/brute_force/brute_force_test.go create mode 100644 go/cagra/cagra.go create mode 100644 go/cagra/cagra_test.go create mode 100644 go/cagra/extend_params.go create mode 100644 go/cagra/index_params.go create mode 100644 go/cagra/search_params.go create mode 100644 go/distance.go create mode 100644 go/distance_test.go create mode 100644 go/dlpack.go create mode 100644 go/dlpack_test.go create mode 100644 go/exceptions.go create mode 100644 go/go.mod create mode 100644 go/go.sum create mode 100644 go/ivf_flat/index_params.go create mode 100644 go/ivf_flat/ivf_flat.go create mode 100644 go/ivf_flat/ivf_flat_test.go create mode 100644 go/ivf_flat/search_params.go create mode 100644 go/ivf_pq/index_params.go create mode 100644 go/ivf_pq/ivf_pq.go create mode 100644 go/ivf_pq/ivf_pq_test.go create mode 100644 go/ivf_pq/search_params.go create mode 100644 go/memory_resource.go create mode 100644 go/memory_resource_test.go create mode 100644 go/resources.go diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 59b8e00de..d1f633248 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -47,6 +47,19 @@ jobs: node_type: "gpu-v100-latest-1" run_script: "ci/build_rust.sh" sha: ${{ inputs.sha }} + go-build: + needs: cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + arch: "amd64" + date: ${{ inputs.date }} + container_image: "rapidsai/ci-conda:latest" + node_type: "gpu-v100-latest-1" + run_script: "ci/build_go.sh" + sha: ${{ inputs.sha }} python-build: needs: [cpp-build] secrets: inherit diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 233b82f94..832932d99 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -22,6 +22,7 @@ jobs: - conda-python-tests - docs-build - rust-build + - go-build - wheel-build-libcuvs - wheel-build-cuvs - wheel-tests-cuvs @@ -70,6 +71,7 @@ jobs: - '!notebooks/**' - '!python/**' - '!rust/**' + - '!go/**' - '!thirdparty/LICENSES/**' test_notebooks: - '**' @@ -77,6 +79,7 @@ jobs: - '!.pre-commit-config.yaml' - '!README.md' - '!rust/**' + - '!go/**' - '!thirdparty/LICENSES/**' test_python: - '**' @@ -87,6 +90,7 @@ jobs: - '!img/**' - '!notebooks/**' - '!rust/**' + - '!go/**' - '!thirdparty/LICENSES/**' checks: needs: telemetry-setup @@ -150,6 +154,16 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_rust.sh" + go-build: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/build_go.sh" wheel-build-libcuvs: needs: checks secrets: inherit diff --git a/build.sh b/build.sh index 89e1b5a33..4aaa5617c 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # scripts, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuvs python rust java docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h" +VALIDARGS="clean libcuvs python rust go java docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h" HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-ann=] [--build-metrics=] where is: clean - remove all existing build artifacts and configuration (start over) @@ -26,6 +26,7 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool==0.8,<1.0 - doxygen>=1.8.20 - gcc_linux-aarch64=11.* +- go - graphviz - ipython - libclang==16.0.6 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index c6a65e684..cf73f4b74 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -23,6 +23,7 @@ dependencies: - dlpack>=0.8,<1.0 - doxygen>=1.8.20 - gcc_linux-64=11.* +- go - graphviz - ipython - libclang==16.0.6 diff --git a/conda/environments/all_cuda-128_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml index c508a9dc1..f16197561 100644 --- a/conda/environments/all_cuda-128_arch-aarch64.yaml +++ b/conda/environments/all_cuda-128_arch-aarch64.yaml @@ -24,6 +24,7 @@ dependencies: - dlpack>=0.8,<1.0 - doxygen>=1.8.20 - gcc_linux-aarch64=13.* +- go - graphviz - ipython - libclang==16.0.6 diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml index f043e544b..5056ef185 100644 --- a/conda/environments/all_cuda-128_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -24,6 +24,7 @@ dependencies: - dlpack>=0.8,<1.0 - doxygen>=1.8.20 - gcc_linux-64=13.* +- go - graphviz - ipython - libclang==16.0.6 diff --git a/dependencies.yaml b/dependencies.yaml index 7a50c0d46..cfa63250d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -21,6 +21,7 @@ files: - rapids_build - run_py_cuvs - rust + - go - test_libcuvs - test_python_common - test_py_cuvs @@ -81,6 +82,14 @@ files: - cuda_version - rapids_build - rust + go: + output: none + includes: + - clang + - cuda + - cuda_version + - rapids_build + - go py_build_libcuvs: output: pyproject pyproject_dir: python/libcuvs @@ -470,6 +479,12 @@ dependencies: packages: - make - rust + go: + common: + - output_types: [conda] + packages: + - go + - dlpack>=0.8,<1.0 build_wheels: common: - output_types: [requirements, pyproject] diff --git a/go/brute_force/brute_force.go b/go/brute_force/brute_force.go new file mode 100644 index 000000000..29f1a5d95 --- /dev/null +++ b/go/brute_force/brute_force.go @@ -0,0 +1,83 @@ +package brute_force + +// #include +import "C" + +import ( + "errors" + "unsafe" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Brute Force KNN Index +type BruteForceIndex struct { + index C.cuvsBruteForceIndex_t + trained bool +} + +// Creates a new empty Brute Force KNN Index +func CreateIndex() (*BruteForceIndex, error) { + var index C.cuvsBruteForceIndex_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceIndexCreate(&index))) + if err != nil { + return nil, err + } + + return &BruteForceIndex{index: index, trained: false}, nil +} + +// Destroys the Brute Force KNN Index +func (index *BruteForceIndex) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceIndexDestroy(index.index))) + if err != nil { + return err + } + return nil +} + +// Builds a new Brute Force KNN Index from the dataset for efficient search. +// +// # Arguments +// +// * `Resources` - Resources to use +// * `Dataset` - A row-major matrix on either the host or device to index +// * `metric` - Distance type to use for building the index +// * `metric_arg` - Value of `p` for Minkowski distances - set to 2.0 if not applicable +func BuildIndex[T any](Resources cuvs.Resource, Dataset *cuvs.Tensor[T], metric cuvs.Distance, metric_arg float32, index *BruteForceIndex) error { + CMetric, exists := cuvs.CDistances[metric] + + if !exists { + return errors.New("cuvs: invalid distance metric") + } + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceBuild(C.cuvsResources_t(Resources.Resource), (*C.DLManagedTensor)(unsafe.Pointer(Dataset.C_tensor)), C.cuvsDistanceType(CMetric), C.float(metric_arg), index.index))) + if err != nil { + return err + } + index.trained = true + + return nil +} + +// Perform a Nearest Neighbors search on the Index +// +// # Arguments +// +// * `Resources` - Resources to use +// * `queries` - Tensor in device memory to query for +// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors +// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors +func SearchIndex[T any](resources cuvs.Resource, index BruteForceIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error { + if !index.trained { + return errors.New("index needs to be built before calling search") + } + + prefilter := C.cuvsFilter{ + addr: 0, + _type: C.NO_FILTER, + } + + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceSearch(C.ulong(resources.Resource), index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), prefilter))) +} diff --git a/go/brute_force/brute_force_test.go b/go/brute_force/brute_force_test.go new file mode 100644 index 000000000..151c48e6a --- /dev/null +++ b/go/brute_force/brute_force_test.go @@ -0,0 +1,110 @@ +package brute_force + +import ( + "math/rand/v2" + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestCagra(t *testing.T) { + const ( + nDataPoints = 1024 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + index, _ := CreateIndex() + defer index.Close() + + // use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries.Close() + + neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, &dataset, cuvs.DistanceL2, 2.0, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } + + err = SearchIndex(resource, *index, &queries, &neighbors, &distances) + if err != nil { + t.Fatalf("error searching index: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + if neighborsSlice[i][0] != int64(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } + } +} diff --git a/go/cagra/cagra.go b/go/cagra/cagra.go new file mode 100644 index 000000000..61b9e71d6 --- /dev/null +++ b/go/cagra/cagra.go @@ -0,0 +1,136 @@ +package cagra + +// #include +import "C" + +import ( + "errors" + "unsafe" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Cagra ANN Index +type CagraIndex struct { + index C.cuvsCagraIndex_t + trained bool +} + +// Creates a new empty Cagra Index +func CreateIndex() (*CagraIndex, error) { + var index C.cuvsCagraIndex_t + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexCreate(&index))) + if err != nil { + return nil, err + } + + return &CagraIndex{index: index}, nil +} + +// Builds a new Index from the dataset for efficient search. +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters for building the index +// * `dataset` - A row-major Tensor on either the host or device to index +// * `index` - CagraIndex to build +func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *CagraIndex) error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index))) + if err != nil { + return err + } + index.trained = true + return nil +} + +// Extends the index with additional data +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters for extending the index +// * `additional_dataset` - A row-major Tensor on the device to extend the index with +// * `return_dataset` - A row-major Tensor on the device that will receive the extended dataset +// * `index` - CagraIndex to extend +func ExtendIndex[T any](Resources cuvs.Resource, params *ExtendParams, additional_dataset *cuvs.Tensor[T], return_dataset *cuvs.Tensor[T], index *CagraIndex) error { + if !index.trained { + return errors.New("index needs to be built before calling extend") + } + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtend(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(additional_dataset.C_tensor)), index.index, (*C.DLManagedTensor)(unsafe.Pointer(return_dataset.C_tensor))))) + if err != nil { + return err + } + return nil +} + +// Destroys the Cagra Index +func (index *CagraIndex) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexDestroy(index.index))) + if err != nil { + return err + } + return nil +} + +// Perform a Approximate Nearest Neighbors search on the Index +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters to use in searching the index +// * `queries` - A tensor in device memory to query for +// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors +// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors +// * `allowList` - List of indices to allow in the search, if nil, no filtering is applied +func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *CagraIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[uint32], distances *cuvs.Tensor[T], allowList []uint32) error { + if !index.trained { + return errors.New("index needs to be built before calling search") + } + + var filter C.cuvsFilter + bitset := createBitset(allowList) + allowListTensor, err := cuvs.NewVector[uint32](bitset) + if err != nil { + return err + } + defer allowListTensor.Close() + _, err = allowListTensor.ToDevice(&Resources) + if err != nil { + return err + } + if allowList == nil { + filter = C.cuvsFilter{ + _type: C.NO_FILTER, + addr: C.uintptr_t(0), + } + } else { + filter = C.cuvsFilter{ + _type: C.BITSET, + addr: C.uintptr_t(uintptr(unsafe.Pointer(allowListTensor.C_tensor))), + } + } + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), filter))) +} + +func createBitset(allowList []uint32) []uint32 { + // Calculate size needed for the bitset array + // Each uint32 handles 32 bits, so we divide the max ID by 32 (shift right by 5) + maxID := uint32(0) + for _, id := range allowList { + if id > maxID { + maxID = id + } + } + size := (maxID >> 5) + 1 // Division by 32, add 1 to handle remainder + bitset := make([]uint32, size) + for _, id := range allowList { + // Calculate which uint32 in our array (divide by 32) + arrayIndex := id >> 5 + // Calculate bit position within that uint32 (mod 32) + bitPosition := id & 31 // equivalent to id % 32 + // Set the bit + bitset[arrayIndex] |= 1 << bitPosition + } + return bitset +} diff --git a/go/cagra/cagra_test.go b/go/cagra/cagra_test.go new file mode 100644 index 000000000..86baf5e60 --- /dev/null +++ b/go/cagra/cagra_test.go @@ -0,0 +1,327 @@ +package cagra + +import ( + "math/rand/v2" + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestCagra(t *testing.T) { + const ( + nDataPoints = 1024 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() + + index, _ := CreateIndex() + defer index.Close() + + // use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries.Close() + + neighbors, err := cuvs.NewTensorOnDevice[uint32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } + + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() + + err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances, nil) + if err != nil { + t.Fatalf("error searching index: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + if neighborsSlice[i][0] != uint32(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } + } +} + +func TestCagraFiltering(t *testing.T) { + const ( + nDataPoints = 1024 + nFilteredOut = 512 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() + + index, _ := CreateIndex() + defer index.Close() + + // Test queries: first 4 points (should be found without filter) + queries1, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries1.Close() + + // Test queries: points 512-515 (should be found with filter, not found without) + queries2, _ := cuvs.NewTensor(testDataset[nFilteredOut:(nFilteredOut + nQueries)]) + defer queries2.Close() + + neighbors, err := cuvs.NewTensorOnDevice[uint32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() + + // Step 1: Search without filter - first 4 points should find themselves + if _, err := queries1.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries1 to device: %v", err) + } + + err = SearchIndex(resource, SearchParams, index, &queries1, &neighbors, &distances, nil) + if err != nil { + t.Fatalf("error searching index without filter: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + // Verify first 4 points found themselves without filter + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + if neighborsSlice[i][0] != uint32(i) { + t.Error("without filter: wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + // Step 2: Search with filter excluding first half - first 4 points should not be found + allowList := make([]uint32, nDataPoints-nFilteredOut) + for i := range allowList { + allowList[i] = uint32(i + nFilteredOut) + } + + if _, err := queries1.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries1 back to device: %v", err) + } + + if _, err := neighbors.ToDevice(&resource); err != nil { + t.Fatalf("error moving neighbors back to device: %v", err) + } + + if _, err := distances.ToDevice(&resource); err != nil { + t.Fatalf("error moving distances back to device: %v", err) + } + + err = SearchIndex(resource, SearchParams, index, &queries1, &neighbors, &distances, allowList) + if err != nil { + t.Fatalf("error searching index with filter: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + // Verify first 4 points are not in results when filtered + neighborsSlice, err = neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + for j := range neighborsSlice[i] { + if neighborsSlice[i][j] < uint32(nFilteredOut) { + t.Error("with filter: found point that should be filtered out:", neighborsSlice[i][j]) + } + } + } + + // Step 3: Search points 512-515 with filter - they should find themselves + if _, err := queries2.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries2 to device: %v", err) + } + + if _, err := neighbors.ToDevice(&resource); err != nil { + t.Fatalf("error moving neighbors back to device: %v", err) + } + + if _, err := distances.ToDevice(&resource); err != nil { + t.Fatalf("error moving distances back to device: %v", err) + } + + err = SearchIndex(resource, SearchParams, index, &queries2, &neighbors, &distances, allowList) + if err != nil { + t.Fatalf("error searching index with filter for second query set: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err = neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + // Verify points 512-515 find themselves when filtered to second half + for i := range neighborsSlice { + expectedID := uint32(i + nFilteredOut) + if neighborsSlice[i][0] != expectedID { + t.Error("with filter: wrong neighbor for filtered query, expected", expectedID, "got", neighborsSlice[i][0]) + } + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("with filter: distance should be close to 0 for filtered query, got", distancesSlice[i][0]) + } + } +} diff --git a/go/cagra/extend_params.go b/go/cagra/extend_params.go new file mode 100644 index 000000000..7e173c045 --- /dev/null +++ b/go/cagra/extend_params.go @@ -0,0 +1,46 @@ +package cagra + +// #include +import "C" + +import ( + cuvs "github.com/rapidsai/cuvs/go" +) + +// Parameters to extend CAGRA Index +type ExtendParams struct { + params C.cuvsCagraExtendParams_t +} + +// Creates a new ExtendParams +func CreateExtendParams() (*ExtendParams, error) { + var params C.cuvsCagraExtendParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtendParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + ExtendParams := &ExtendParams{params: params} + + return ExtendParams, nil +} + +// The additional dataset is divided into chunks and added to the graph. +// This is the knob to adjust the tradeoff between the recall and operation throughput. +// Large chunk sizes can result in high throughput, but use more +// working memory (O(max_chunk_size*degree^2)). +// This can also degrade recall because no edges are added between the nodes in the same chunk. +// Auto select when 0. +func (p *ExtendParams) SetMaxChunkSize(max_chunk_size uint32) (*ExtendParams, error) { + p.params.max_chunk_size = C.uint32_t(max_chunk_size) + return p, nil +} + +func (p *ExtendParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtendParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/cagra/index_params.go b/go/cagra/index_params.go new file mode 100644 index 000000000..51fdb318f --- /dev/null +++ b/go/cagra/index_params.go @@ -0,0 +1,159 @@ +package cagra + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +type IndexParams struct { + params C.cuvsCagraIndexParams_t +} + +// Supplemental parameters to build CAGRA Index +type CompressionParams struct { + params C.cuvsCagraCompressionParams_t +} + +type BuildAlgo int + +const ( + IvfPq BuildAlgo = iota + NnDescent + AutoSelect +) + +var cBuildAlgos = map[BuildAlgo]int{ + IvfPq: C.IVF_PQ, + NnDescent: C.NN_DESCENT, + AutoSelect: C.AUTO_SELECT, +} + +// Creates a new CompressionParams +func CreateCompressionParams() (*CompressionParams, error) { + var params C.cuvsCagraCompressionParams_t + + if params == nil { + return nil, errors.New("memory allocation failed") + } + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraCompressionParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &CompressionParams{params: params}, nil +} + +// The bit length of the vector element after compression by PQ. +func (p *CompressionParams) SetPQBits(pq_bits uint32) (*CompressionParams, error) { + p.params.pq_bits = C.uint32_t(pq_bits) + + return p, nil +} + +// The dimensionality of the vector after compression by PQ. When zero, +// an optimal value is selected using a heuristic. +func (p *CompressionParams) SetPQDim(pq_dim uint32) (*CompressionParams, error) { + p.params.pq_dim = C.uint32_t(pq_dim) + + return p, nil +} + +// Vector Quantization (VQ) codebook size - number of "coarse cluster +// centers". When zero, an optimal value is selected using a heuristic. +func (p *CompressionParams) SetVQNCenters(vq_n_centers uint32) (*CompressionParams, error) { + p.params.vq_n_centers = C.uint32_t(vq_n_centers) + + return p, nil +} + +// The number of iterations searching for kmeans centers (both VQ & PQ +// phases). +func (p *CompressionParams) SetKMeansNIters(kmeans_n_iters uint32) (*CompressionParams, error) { + p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters) + + return p, nil +} + +// The fraction of data to use during iterative kmeans building (VQ +// phase). When zero, an optimal value is selected using a heuristic. +func (p *CompressionParams) SetVQKMeansTrainsetFraction(vq_kmeans_trainset_fraction float64) (*CompressionParams, error) { + p.params.vq_kmeans_trainset_fraction = C.double(vq_kmeans_trainset_fraction) + + return p, nil +} + +// The fraction of data to use during iterative kmeans building (PQ +// phase). When zero, an optimal value is selected using a heuristic. +func (p *CompressionParams) SetPQKMeansTrainsetFraction(pq_kmeans_trainset_fraction float64) (*CompressionParams, error) { + p.params.pq_kmeans_trainset_fraction = C.double(pq_kmeans_trainset_fraction) + + return p, nil +} + +// Creates a new IndexParams +func CreateIndexParams() (*IndexParams, error) { + var params C.cuvsCagraIndexParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + IndexParams := &IndexParams{params: params} + + return IndexParams, nil +} + +// Degree of input graph for pruning +func (p *IndexParams) SetIntermediateGraphDegree(intermediate_graph_degree uintptr) (*IndexParams, error) { + p.params.intermediate_graph_degree = C.size_t(intermediate_graph_degree) + return p, nil +} + +// Degree of output graph +func (p *IndexParams) SetGraphDegree(intermediate_graph_degree uintptr) (*IndexParams, error) { + p.params.graph_degree = C.size_t(intermediate_graph_degree) + + return p, nil +} + +// ANN algorithm to build knn graph +func (p *IndexParams) SetBuildAlgo(build_algo BuildAlgo) (*IndexParams, error) { + CBuildAlgo, exists := cBuildAlgos[build_algo] + + if !exists { + return nil, errors.New("cuvs: invalid build_algo") + } + p.params.build_algo = uint32(CBuildAlgo) + + return p, nil +} + +// Number of iterations to run if building with NN_DESCENT +func (p *IndexParams) SetNNDescentNiter(nn_descent_niter uint32) (*IndexParams, error) { + p.params.nn_descent_niter = C.ulong(nn_descent_niter) + + return p, nil +} + +// Compression parameters +func (p *IndexParams) SetCompression(compression *CompressionParams) (*IndexParams, error) { + p.params.compression = C.cuvsCagraCompressionParams_t(compression.params) + + return p, nil +} + +// Destroys IndexParams +func (p *IndexParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexParamsDestroy(p.params))) + if err != nil { + return err + } + + return nil +} diff --git a/go/cagra/search_params.go b/go/cagra/search_params.go new file mode 100644 index 000000000..94207d288 --- /dev/null +++ b/go/cagra/search_params.go @@ -0,0 +1,163 @@ +package cagra + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Supplemental parameters to search CAGRA Index +type SearchParams struct { + params C.cuvsCagraSearchParams_t +} + +type SearchAlgo int + +const ( + SearchAlgoSingleCta SearchAlgo = iota + SearchAlgoMultiCta + SearchAlgoMultiKernel + SearchAlgoAuto +) + +type HashmapMode int + +const ( + HashmapModeHash HashmapMode = iota + HashmapModeSmall + HashmapModeAuto +) + +// Creates a new SearchParams +func CreateSearchParams() (*SearchParams, error) { + var params C.cuvsCagraSearchParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearchParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &SearchParams{params: params}, nil +} + +// Maximum number of queries to search at the same time (batch size). Auto select when 0 +func (p *SearchParams) SetMaxQueries(max_queries uintptr) (*SearchParams, error) { + p.params.max_queries = C.size_t(max_queries) + return p, nil +} + +// Number of intermediate search results retained during the search. +// This is the main knob to adjust trade off between accuracy and search speed. +// Higher values improve the search accuracy +func (p *SearchParams) SetItopkSize(itopk_size uintptr) (*SearchParams, error) { + p.params.itopk_size = C.size_t(itopk_size) + return p, nil +} + +// Upper limit of search iterations. Auto select when 0. +func (p *SearchParams) SetMaxIterations(max_iterations uintptr) (*SearchParams, error) { + p.params.max_iterations = C.size_t(max_iterations) + return p, nil +} + +// Which search implementation to use. +func (p *SearchParams) SetAlgo(algo SearchAlgo) (*SearchParams, error) { + CAlgo := C.SINGLE_CTA + + switch algo { + case SearchAlgoSingleCta: + CAlgo = C.SINGLE_CTA + case SearchAlgoMultiCta: + CAlgo = C.MULTI_CTA + case SearchAlgoMultiKernel: + CAlgo = C.MULTI_KERNEL + case SearchAlgoAuto: + CAlgo = C.AUTO + default: + return nil, errors.New("unsupported algo") + } + + p.params.algo = uint32(CAlgo) + + return p, nil +} + +// Number of threads used to calculate a single distance. 4, 8, 16, or 32. +func (p *SearchParams) SetTeamSize(team_size uintptr) (*SearchParams, error) { + p.params.team_size = C.size_t(team_size) + return p, nil +} + +// Lower limit of search iterations. +func (p *SearchParams) SetMinIterations(min_iterations uintptr) (*SearchParams, error) { + p.params.min_iterations = C.size_t(min_iterations) + return p, nil +} + +// How many nodes to search at once. Auto select when 0. +func (p *SearchParams) SetSearchWidth(search_width uintptr) (*SearchParams, error) { + p.params.search_width = C.size_t(search_width) + return p, nil +} + +// Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. +func (p *SearchParams) SetThreadBlockSize(thread_block_size uintptr) (*SearchParams, error) { + p.params.thread_block_size = C.size_t(thread_block_size) + return p, nil +} + +// Hashmap type. Auto selection when AUTO. +func (p *SearchParams) SetHashmapMode(hashmap_mode HashmapMode) (*SearchParams, error) { + CHashMode := C.AUTO_HASH + + switch hashmap_mode { + case HashmapModeHash: + CHashMode = C.HASH + case HashmapModeSmall: + CHashMode = C.SMALL + case HashmapModeAuto: + CHashMode = C.AUTO_HASH + default: + return nil, errors.New("unsupported hashmap_mode") + } + + p.params.hashmap_mode = uint32(CHashMode) + + return p, nil +} + +// Lower limit of hashmap bit length. More than 8. +func (p *SearchParams) SetHashmapMinBitlen(hashmap_min_bitlen uintptr) (*SearchParams, error) { + p.params.hashmap_min_bitlen = C.size_t(hashmap_min_bitlen) + return p, nil +} + +// Upper limit of hashmap fill rate. More than 0.1, less than 0.9. +func (p *SearchParams) SetHashmapMaxFillRate(hashmap_max_fill_rate float32) (*SearchParams, error) { + p.params.hashmap_max_fill_rate = C.float(hashmap_max_fill_rate) + return p, nil +} + +// Number of iterations of initial random seed node selection. 1 or more. +func (p *SearchParams) SetNumRandomSamplings(num_random_samplings uint32) (*SearchParams, error) { + p.params.num_random_samplings = C.uint32_t(num_random_samplings) + return p, nil +} + +// Bit mask used for initial random seed node selection. +func (p *SearchParams) SetRandXorMask(rand_xor_mask uint64) (*SearchParams, error) { + p.params.rand_xor_mask = C.uint64_t(rand_xor_mask) + return p, nil +} + +// Destroys SearchParams +func (p *SearchParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearchParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/distance.go b/go/distance.go new file mode 100644 index 000000000..530e1d61b --- /dev/null +++ b/go/distance.go @@ -0,0 +1,70 @@ +package cuvs + +// #include +import "C" + +import ( + "errors" + "unsafe" +) + +type Distance int + +// Supported distance metrics +const ( + DistanceL2 Distance = iota + DistanceSQEuclidean + DistanceEuclidean + DistanceL1 + DistanceCityblock + DistanceInnerProduct + DistanceChebyshev + DistanceCanberra + DistanceCosine + DistanceLp + DistanceCorrelation + DistanceJaccard + DistanceHellinger + DistanceBrayCurtis + DistanceJensenShannon + DistanceHamming + DistanceKLDivergence + DistanceMinkowski + DistanceRusselRao + DistanceDice +) + +// Maps cuvs Go distances to C distances +var CDistances = map[Distance]int{ + DistanceL2: C.L2SqrtExpanded, + DistanceSQEuclidean: C.L2Expanded, + DistanceEuclidean: C.L2SqrtExpanded, + DistanceL1: C.L1, + DistanceCityblock: C.L1, + DistanceInnerProduct: C.InnerProduct, + DistanceChebyshev: C.Linf, + DistanceCanberra: C.Canberra, + DistanceCosine: C.CosineExpanded, + DistanceLp: C.LpUnexpanded, + DistanceCorrelation: C.CorrelationExpanded, + DistanceJaccard: C.JaccardExpanded, + DistanceHellinger: C.HellingerExpanded, + DistanceBrayCurtis: C.BrayCurtis, + DistanceJensenShannon: C.JensenShannon, + DistanceHamming: C.HammingUnexpanded, + DistanceKLDivergence: C.KLDivergence, + DistanceMinkowski: C.LpUnexpanded, + DistanceRusselRao: C.RusselRaoExpanded, + DistanceDice: C.DiceExpanded, +} + +// Computes the pairwise distance between two vectors. +func PairwiseDistance[T any](Resources Resource, x *Tensor[T], y *Tensor[T], distances *Tensor[float32], metric Distance, metric_arg float32) error { + CMetric, exists := CDistances[metric] + + if !exists { + return errors.New("cuvs: invalid distance metric") + } + + return CheckCuvs(CuvsError(C.cuvsPairwiseDistance(C.cuvsResources_t(Resources.Resource), (*C.DLManagedTensor)(unsafe.Pointer(x.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(y.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), C.cuvsDistanceType(CMetric), C.float(metric_arg)))) +} diff --git a/go/distance_test.go b/go/distance_test.go new file mode 100644 index 000000000..17f40a1ba --- /dev/null +++ b/go/distance_test.go @@ -0,0 +1,49 @@ +package cuvs_test + +import ( + "math/rand" + "testing" + "time" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestDistance(t *testing.T) { + resource, _ := cuvs.NewResource(nil) + + rand.Seed(time.Now().UnixNano()) + + NDataPoints := 256 + NFeatures := 16 + + TestDataset := make([][]float32, NDataPoints) + for i := range TestDataset { + TestDataset[i] = make([]float32, NFeatures) + for j := range TestDataset[i] { + TestDataset[i][j] = rand.Float32() + } + } + + dataset, _ := cuvs.NewTensor(TestDataset) + + DistancesDataset := make([][]float32, NDataPoints) + for i := range DistancesDataset { + DistancesDataset[i] = make([]float32, NDataPoints) + } + + distances, _ := cuvs.NewTensor(DistancesDataset) + + distances.ToDevice(&resource) + dataset.ToDevice(&resource) + + cuvs.PairwiseDistance(resource, &dataset, &dataset, &distances, cuvs.DistanceL2, 0.0) + + distances.ToHost(&resource) + + resource.Sync() + + arr, _ := distances.Slice() + if arr[0][0] != 0.0 { + t.Error("wrong distance, expected", 0.0, "got", arr[0][0]) + } +} diff --git a/go/dlpack.go b/go/dlpack.go new file mode 100644 index 000000000..6fe619fd3 --- /dev/null +++ b/go/dlpack.go @@ -0,0 +1,420 @@ +package cuvs + +// #include +// #include +// #include +import "C" + +import ( + "errors" + "strconv" + "unsafe" +) + +type TensorNumberType interface { + int64 | uint32 | float32 +} + +// ManagedTensor is a wrapper around a dlpack DLManagedTensor object. +// This lets you pass matrices in device or host memory into cuvs. +type Tensor[T any] struct { + C_tensor *C.DLManagedTensor + shape []int64 +} + +// Creates a new Tensor on the host and copies the data into it. +func NewTensor[T TensorNumberType](data [][]T) (Tensor[T], error) { + if len(data) == 0 || len(data[0]) == 0 { + return Tensor[T]{}, errors.New("empty data") + } + + dtype := getDLDataType[T]() + + totalElements := len(data) * len(data[0]) + + dataPtr := C.malloc(C.size_t(totalElements * int(unsafe.Sizeof(T(0))))) + if dataPtr == nil { + return Tensor[T]{}, errors.New("data memory allocation failed") + } + + dataSlice := unsafe.Slice((*T)(dataPtr), totalElements) + flattenData(data, dataSlice) + + shapePtr := C.malloc(C.size_t(2 * int(unsafe.Sizeof(C.int64_t(0))))) + if shapePtr == nil { + C.free(dataPtr) + return Tensor[T]{}, errors.New("shape memory allocation failed") + } + + shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), 2) + shapeSlice[0] = C.int64_t(len(data)) + shapeSlice[1] = C.int64_t(len(data[0])) + + // Create DLManagedTensor + dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{})))) + if dlm == nil { + return Tensor[T]{}, errors.New("tensor allocation failed") + } + + dlm.dl_tensor.data = dataPtr + dlm.dl_tensor.device = C.DLDevice{ + device_type: C.DLDeviceType(C.kDLCPU), + device_id: 0, + } + dlm.dl_tensor.dtype = dtype + dlm.dl_tensor.ndim = 2 + dlm.dl_tensor.shape = (*C.int64_t)(shapePtr) + dlm.dl_tensor.strides = nil + dlm.dl_tensor.byte_offset = 0 + dlm.manager_ctx = nil + dlm.deleter = nil + + return Tensor[T]{ + C_tensor: dlm, + shape: []int64{int64(len(data)), int64(len(data[0]))}, + }, nil +} + +func NewVector[T TensorNumberType](data []T) (Tensor[T], error) { + if len(data) == 0 { + return Tensor[T]{}, errors.New("empty data") + } + + dtype := getDLDataType[T]() + + totalElements := len(data) + + dataPtr := C.malloc(C.size_t(totalElements * int(unsafe.Sizeof(T(0))))) + if dataPtr == nil { + return Tensor[T]{}, errors.New("data memory allocation failed") + } + + dataSlice := unsafe.Slice((*T)(dataPtr), totalElements) + copy(dataSlice, data) + + shapePtr := C.malloc(C.size_t(int(unsafe.Sizeof(C.int64_t(0))))) + if shapePtr == nil { + C.free(dataPtr) + return Tensor[T]{}, errors.New("shape memory allocation failed") + } + + shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), 1) + shapeSlice[0] = C.int64_t(len(data)) + + // Create DLManagedTensor + dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{})))) + if dlm == nil { + return Tensor[T]{}, errors.New("tensor allocation failed") + } + + dlm.dl_tensor.data = dataPtr + dlm.dl_tensor.device = C.DLDevice{ + device_type: C.DLDeviceType(C.kDLCPU), + device_id: 0, + } + dlm.dl_tensor.dtype = dtype + dlm.dl_tensor.ndim = 1 + dlm.dl_tensor.shape = (*C.int64_t)(shapePtr) + dlm.dl_tensor.strides = nil + dlm.dl_tensor.byte_offset = 0 + dlm.manager_ctx = nil + dlm.deleter = nil + + return Tensor[T]{ + C_tensor: dlm, + shape: []int64{int64(len(data))}, + }, nil +} + +// Creates a new Tensor with uninitialized data on the current device. +func NewTensorOnDevice[T TensorNumberType](res *Resource, shape []int64) (Tensor[T], error) { + if len(shape) < 2 { + return Tensor[T]{}, errors.New("shape must be at least 2") + } + + shapePtr := C.malloc(C.size_t(len(shape) * int(unsafe.Sizeof(C.int64_t(0))))) + if shapePtr == nil { + return Tensor[T]{}, errors.New("shape memory allocation failed") + } + + shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), len(shape)) + for i, dim := range shape { + shapeSlice[i] = C.int64_t(dim) + } + + dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{})))) + if dlm == nil { + return Tensor[T]{}, errors.New("tensor allocation failed") + } + dtype := getDLDataType[T]() + + var deviceDataPtr unsafe.Pointer + bytes := calculateBytes(shape, dtype) + err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &deviceDataPtr, C.size_t(bytes)))) + if err != nil { + C.free(unsafe.Pointer(dlm)) + C.free(unsafe.Pointer(shapePtr)) + return Tensor[T]{}, err + } + + dlm.dl_tensor.data = deviceDataPtr + dlm.dl_tensor.device = C.DLDevice{ + device_type: C.DLDeviceType(C.kDLCUDA), + device_id: 0, + } + dlm.dl_tensor.dtype = dtype + dlm.dl_tensor.ndim = C.int(len(shape)) + dlm.dl_tensor.shape = (*C.int64_t)(shapePtr) + dlm.dl_tensor.strides = nil + dlm.dl_tensor.byte_offset = 0 + dlm.manager_ctx = nil + dlm.deleter = nil + + shapeCopy := make([]int64, len(shape)) + copy(shapeCopy, shape) + + return Tensor[T]{ + C_tensor: dlm, + shape: shapeCopy, + }, nil +} + +// Destroys Tensor, freeing the memory it was allocated on. +func (t *Tensor[T]) Close() error { + if t.C_tensor.dl_tensor.device.device_type == C.kDLCUDA { + bytes := t.sizeInBytes() + res, err := NewResource(nil) + if err != nil { + return err + } + err = CheckCuvs(CuvsError(C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes)))) + + return err + } else if t.C_tensor.dl_tensor.device.device_type == C.kDLCPU { + if t.C_tensor.dl_tensor.data != nil { + C.free(t.C_tensor.dl_tensor.data) + t.C_tensor.dl_tensor.data = nil + } + } + + if t.C_tensor.dl_tensor.shape != nil { + C.free(unsafe.Pointer(t.C_tensor.dl_tensor.shape)) + t.C_tensor.dl_tensor.shape = nil + } + + if t.C_tensor != nil { + C.free(unsafe.Pointer(t.C_tensor)) + t.C_tensor = nil + } + + t.C_tensor = nil + return nil +} + +// Transfers the data in the Tensor to the device. +func (t *Tensor[T]) ToDevice(res *Resource) (*Tensor[T], error) { + bytes := t.sizeInBytes() + + var DeviceDataPointer unsafe.Pointer + + err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &DeviceDataPointer, C.size_t(bytes)))) + if err != nil { + return nil, err + } + + err = CheckCuda( + C.cudaMemcpy( + DeviceDataPointer, + t.C_tensor.dl_tensor.data, + C.size_t(bytes), + C.cudaMemcpyHostToDevice, + )) + if err != nil { + C.cuvsRMMFree(res.Resource, DeviceDataPointer, C.size_t(bytes)) + return nil, err + } + t.C_tensor.dl_tensor.device.device_type = C.kDLCUDA + t.C_tensor.dl_tensor.data = DeviceDataPointer + + return t, nil +} + +// Returns the shape of the Tensor. +func (t *Tensor[T]) Shape() []int64 { + return t.shape +} + +// Expands the Tensor by adding newData to the end of the current data. +// The Tensor must be on the device. +func (t *Tensor[T]) Expand(res *Resource, newData [][]T) (*Tensor[T], error) { + if t.C_tensor.dl_tensor.device.device_type != C.kDLCUDA { + return &Tensor[T]{}, errors.New("Tensor must be on GPU") + } + + newShape := []int64{int64(len(newData)), int64(len(newData[0]))} + + flatData := make([]T, len(newData)*len(newData[0])) + for i := range newData { + for j := range newData[i] { + flatData[i*len(newData[0])+j] = newData[i][j] + } + } + + old_shape := unsafe.Slice((*int64)(unsafe.Pointer(t.C_tensor.dl_tensor.shape)), 2) + + if old_shape[1] != newShape[1] { + return &Tensor[T]{}, errors.New("new shape must be same as old shape, old shape: " + strconv.Itoa(int(old_shape[1])) + ", new shape: " + strconv.Itoa(int(newShape[1]))) + } + + newDataSize := newShape[0] * newShape[1] * int64(t.C_tensor.dl_tensor.dtype.bits) / 8 + + bytes := t.sizeInBytes() + + var NewDeviceDataPointer unsafe.Pointer + + err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &NewDeviceDataPointer, C.size_t(bytes+newDataSize)))) + if err != nil { + return nil, err + } + + err = CheckCuda( + C.cudaMemcpy( + NewDeviceDataPointer, + t.C_tensor.dl_tensor.data, + C.size_t(bytes), + C.cudaMemcpyDeviceToDevice, + )) + if err != nil { + C.cuvsRMMFree(res.Resource, NewDeviceDataPointer, C.size_t(bytes+newDataSize)) + return nil, err + } + + err = CheckCuda( + C.cudaMemcpy( + unsafe.Pointer(uintptr(NewDeviceDataPointer)+uintptr(bytes)), + unsafe.Pointer(&flatData[0]), + C.size_t(newDataSize), + C.cudaMemcpyHostToDevice, + )) + if err != nil { + C.cuvsRMMFree(res.Resource, NewDeviceDataPointer, C.size_t(bytes+newDataSize)) + return nil, err + } + + err = CheckCuvs(CuvsError( + C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes)))) + if err != nil { + return nil, err + } + + shape := make([]int64, 2) + shape[0] = int64(*t.C_tensor.dl_tensor.shape) + int64(len(newData)) + + shape[1] = newShape[1] + + t.shape = shape + + t.C_tensor.dl_tensor.data = NewDeviceDataPointer + t.C_tensor.dl_tensor.shape = (*C.int64_t)(unsafe.Pointer(&shape[0])) + + return t, nil +} + +// Transfers the data in the Tensor to the host. +func (t *Tensor[T]) ToHost(res *Resource) (*Tensor[T], error) { + bytes := t.sizeInBytes() + + addr := (C.malloc(C.size_t(bytes))) + if addr == nil { + return nil, errors.New("memory allocation failed") + } + + err := CheckCuda( + C.cudaMemcpy( + addr, + t.C_tensor.dl_tensor.data, + C.size_t(bytes), + C.cudaMemcpyDeviceToHost, + )) + if err != nil { + return nil, err + } + + err = CheckCuvs(CuvsError( + C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes)))) + if err != nil { + return nil, err + } + + t.C_tensor.dl_tensor.device.device_type = C.kDLCPU + t.C_tensor.dl_tensor.data = addr + + return t, nil +} + +// Returns a slice of the data in the Tensor. +// The Tensor must be on the host. +func (t *Tensor[T]) Slice() ([][]T, error) { + if t.C_tensor.dl_tensor.device.device_type != C.kDLCPU { + return nil, errors.New("Tensor must be on CPU") + } + + flatData := unsafe.Slice((*T)(t.C_tensor.dl_tensor.data), t.shape[0]*t.shape[1]) + + data := make([][]T, t.shape[0]) + for i := range data { + data[i] = make([]T, t.shape[1]) + for j := range data[i] { + data[i][j] = flatData[i*int(t.shape[1])+j] + } + } + + return data, nil +} + +func getDLDataType[T TensorNumberType]() C.DLDataType { + var zero T + switch any(zero).(type) { + case int64: + return C.DLDataType{ + bits: C.uchar(64), + lanes: C.ushort(1), + code: C.kDLInt, + } + case uint32: + return C.DLDataType{ + bits: C.uchar(32), + lanes: C.ushort(1), + code: C.kDLUInt, + } + case float32: + return C.DLDataType{ + bits: C.uchar(32), + lanes: C.ushort(1), + code: C.kDLFloat, + } + } + panic("unreachable") // Go compiler ensures this is unreachable +} + +func flattenData[T TensorNumberType](data [][]T, dest []T) { + cols := len(data[0]) + for i, row := range data { + copy(dest[i*cols:], row) + } +} + +func (t *Tensor[T]) sizeInBytes() int64 { + return calculateBytes(t.shape, t.C_tensor.dl_tensor.dtype) +} + +func calculateBytes(shape []int64, dtype C.DLDataType) int64 { + bytes := int64(1) + for dim := range shape { + bytes *= (shape[dim]) + } + + bytes *= int64(dtype.bits) / 8 + + return bytes +} diff --git a/go/dlpack_test.go b/go/dlpack_test.go new file mode 100644 index 000000000..135363f81 --- /dev/null +++ b/go/dlpack_test.go @@ -0,0 +1,190 @@ +package cuvs_test + +import ( + "math/rand" + "reflect" + "testing" + "time" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestDlPack(t *testing.T) { + resource, _ := cuvs.NewResource(nil) + rand.Seed(time.Now().UnixNano()) + NDataPoints := 256 + NFeatures := 16 + TestDataset := make([][]float32, NDataPoints) + for i := range TestDataset { + TestDataset[i] = make([]float32, NFeatures) + for j := range TestDataset[i] { + TestDataset[i][j] = float32(i) + } + } + + dataset, err := cuvs.NewTensor(TestDataset[:127]) + if err != nil { + t.Fatal(err) + } + + _, err = dataset.ToDevice(&resource) + if err != nil { + t.Fatal(err) + } + + _, err = dataset.Expand(&resource, TestDataset[127:]) + if err != nil { + t.Fatal(err) + } + + _, err = dataset.ToHost(&resource) + if err != nil { + t.Fatal(err) + } + + arr, err := dataset.Slice() + if err != nil { + t.Fatal(err) + } + + for i := range arr { + for j := range arr[i] { + if arr[i][j] != TestDataset[i][j] { + t.Errorf("slices don't match at [%d][%d], expected %f, got %f", + i, j, TestDataset[i][j], arr[i][j]) + } + } + } +} + +func TestShape(t *testing.T) { + // Test cases with different shapes + testCases := []struct { + rows int + cols int + name string + }{ + {10, 5, "small matrix"}, + {100, 20, "medium matrix"}, + {1000, 50, "large matrix"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create test data + data := make([][]float32, tc.rows) + for i := range data { + data[i] = make([]float32, tc.cols) + for j := range data[i] { + data[i][j] = float32(i * j) + } + } + + // Create tensor + tensor, err := cuvs.NewTensor(data) + if err != nil { + t.Fatalf("failed to create tensor: %v", err) + } + defer tensor.Close() + + // Check shape + shape := tensor.Shape() + expectedShape := []int64{int64(tc.rows), int64(tc.cols)} + if !reflect.DeepEqual(shape, expectedShape) { + t.Errorf("incorrect shape: got %v, want %v", shape, expectedShape) + } + }) + } +} + +func TestEmptyTensor(t *testing.T) { + // Test creating tensor with empty data + _, err := cuvs.NewTensor([][]float32{}) + if err == nil { + t.Error("expected error when creating tensor with empty data, got nil") + } +} + +func TestDeviceOperations(t *testing.T) { + resource, err := cuvs.NewResource(nil) + if err != nil { + t.Fatal(err) + } + + // Create test data + data := make([][]float32, 10) + for i := range data { + data[i] = make([]float32, 5) + for j := range data[i] { + data[i][j] = float32(i * j) + } + } + + // Test device transfer operations + t.Run("device transfer", func(t *testing.T) { + tensor, err := cuvs.NewTensor(data) + if err != nil { + t.Fatal(err) + } + defer tensor.Close() + + // Transfer to device + deviceTensor, err := tensor.ToDevice(&resource) + if err != nil { + t.Fatalf("failed to transfer to device: %v", err) + } + + // Transfer back to host + hostTensor, err := deviceTensor.ToHost(&resource) + if err != nil { + t.Fatalf("failed to transfer back to host: %v", err) + } + + // Verify data + result, err := hostTensor.Slice() + if err != nil { + t.Fatalf("failed to slice tensor: %v", err) + } + + for i := range data { + for j := range data[i] { + if result[i][j] != data[i][j] { + t.Errorf("data mismatch at [%d][%d]: got %f, want %f", + i, j, result[i][j], data[i][j]) + } + } + } + }) +} + +func TestDifferentDataTypes(t *testing.T) { + // Test int64 tensor + t.Run("int64", func(t *testing.T) { + data := [][]int64{{1, 2}, {3, 4}} + tensor, err := cuvs.NewTensor(data) + if err != nil { + t.Fatal(err) + } + defer tensor.Close() + + shape := tensor.Shape() + if !reflect.DeepEqual(shape, []int64{2, 2}) { + t.Errorf("incorrect shape for int64 tensor: got %v, want [2 2]", shape) + } + }) + + // Test uint32 tensor + t.Run("uint32", func(t *testing.T) { + data := [][]uint32{{1, 2}, {3, 4}} + tensor, err := cuvs.NewTensor(data) + if err != nil { + t.Fatal(err) + } + defer tensor.Close() + + shape := tensor.Shape() + if !reflect.DeepEqual(shape, []int64{2, 2}) { + t.Errorf("incorrect shape for uint32 tensor: got %v, want [2 2]", shape) + } + }) +} diff --git a/go/exceptions.go b/go/exceptions.go new file mode 100644 index 000000000..af73ca991 --- /dev/null +++ b/go/exceptions.go @@ -0,0 +1,23 @@ +package cuvs + +// #include +import "C" +import "errors" + +type CuvsError C.cuvsError_t + +// Wrapper function to convert cuvs error to Go error +func CheckCuvs(error CuvsError) error { + if error == C.CUVS_ERROR { + return errors.New(C.GoString(C.cuvsGetLastErrorText())) + } + return nil +} + +// Wrapper function to convert cuda error to Go error +func CheckCuda(error C.cudaError_t) error { + if error != C.cudaSuccess { + return errors.New(C.GoString(C.cudaGetErrorString(error))) + } + return nil +} diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 000000000..4e1ab39a4 --- /dev/null +++ b/go/go.mod @@ -0,0 +1,3 @@ +module github.com/rapidsai/cuvs/go + +go 1.22.4 diff --git a/go/go.sum b/go/go.sum new file mode 100644 index 000000000..81937f7b1 --- /dev/null +++ b/go/go.sum @@ -0,0 +1,4 @@ +golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= +golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= +golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= diff --git a/go/ivf_flat/index_params.go b/go/ivf_flat/index_params.go new file mode 100644 index 000000000..f6cbe60fb --- /dev/null +++ b/go/ivf_flat/index_params.go @@ -0,0 +1,87 @@ +package ivf_flat + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Supplemental parameters to build IVF Flat Index +type IndexParams struct { + params C.cuvsIvfFlatIndexParams_t +} + +// Creates a new IndexParams +func CreateIndexParams() (*IndexParams, error) { + var params C.cuvsIvfFlatIndexParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &IndexParams{params: params}, nil +} + +// The number of clusters used in the coarse quantizer. +func (p *IndexParams) SetNLists(n_lists uint32) (*IndexParams, error) { + p.params.n_lists = C.uint32_t(n_lists) + return p, nil +} + +// Distance Type to use for building the index +func (p *IndexParams) SetMetric(metric cuvs.Distance) (*IndexParams, error) { + CMetric, exists := cuvs.CDistances[metric] + + if !exists { + return nil, errors.New("cuvs: invalid distance metric") + } + p.params.metric = C.cuvsDistanceType(CMetric) + + return p, nil +} + +// Metric argument for Minkowski distances - set to 2.0 if not applicable +func (p *IndexParams) SetMetricArg(metric_arg float32) (*IndexParams, error) { + p.params.metric_arg = C.float(metric_arg) + return p, nil +} + +// The number of iterations searching for kmeans centers during index building. +func (p *IndexParams) SetKMeansNIters(kmeans_n_iters uint32) (*IndexParams, error) { + p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters) + return p, nil +} + +// If kmeans_trainset_fraction is less than 1, then the dataset is +// subsampled, and only n_samples * kmeans_trainset_fraction rows +// are used for training. +func (p *IndexParams) SetKMeansTrainsetFraction(kmeans_trainset_fraction float64) (*IndexParams, error) { + p.params.kmeans_trainset_fraction = C.double(kmeans_trainset_fraction) + return p, nil +} + +// After training the coarse and fine quantizers, we will populate +// the index with the dataset if add_data_on_build == true, otherwise +// the index is left empty, and the extend method can be used +// to add new vectors to the index. +func (p *IndexParams) SetAddDataOnBuild(add_data_on_build bool) (*IndexParams, error) { + if add_data_on_build { + p.params.add_data_on_build = C._Bool(true) + } else { + p.params.add_data_on_build = C._Bool(false) + } + return p, nil +} + +// Destroys IndexParams +func (p *IndexParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/ivf_flat/ivf_flat.go b/go/ivf_flat/ivf_flat.go new file mode 100644 index 000000000..3330eb95e --- /dev/null +++ b/go/ivf_flat/ivf_flat.go @@ -0,0 +1,72 @@ +package ivf_flat + +// #include +import "C" + +import ( + "errors" + "unsafe" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// IVF Flat Index +type IvfFlatIndex struct { + index C.cuvsIvfFlatIndex_t + trained bool +} + +// Creates a new empty IvfFlatIndex +func CreateIndex(params *IndexParams, dataset *cuvs.Tensor[float32]) (*IvfFlatIndex, error) { + var index C.cuvsIvfFlatIndex_t + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexCreate(&index))) + if err != nil { + return nil, err + } + + return &IvfFlatIndex{index: index}, nil +} + +// Builds an IvfFlatIndex from the dataset for efficient search. +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters for building the index +// * `dataset` - A row-major Tensor on either the host or device to index +// * `index` - IvfFlatIndex to build +func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *IvfFlatIndex) error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index))) + if err != nil { + return err + } + index.trained = true + return nil +} + +// Destroys the IvfFlatIndex +func (index *IvfFlatIndex) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexDestroy(index.index))) + if err != nil { + return err + } + return nil +} + +// Perform a Approximate Nearest Neighbors search on the Index +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters to use in searching the index +// * `index` - IvfFlatIndex to search +// * `queries` - A tensor in device memory to query for +// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors +// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors +func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *IvfFlatIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error { + if !index.trained { + return errors.New("index needs to be built before calling search") + } + + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor))))) +} diff --git a/go/ivf_flat/ivf_flat_test.go b/go/ivf_flat/ivf_flat_test.go new file mode 100644 index 000000000..8bca965f8 --- /dev/null +++ b/go/ivf_flat/ivf_flat_test.go @@ -0,0 +1,123 @@ +package ivf_flat + +import ( + "math/rand/v2" + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestIvfFlat(t *testing.T) { + const ( + nDataPoints = 1024 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() + + index, _ := CreateIndex(indexParams, &dataset) + defer index.Close() + + // use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries.Close() + + neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } + + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() + + err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances) + if err != nil { + t.Fatalf("error searching index: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + println(neighborsSlice[i][0]) + if neighborsSlice[i][0] != int64(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } + } +} diff --git a/go/ivf_flat/search_params.go b/go/ivf_flat/search_params.go new file mode 100644 index 000000000..3606e4662 --- /dev/null +++ b/go/ivf_flat/search_params.go @@ -0,0 +1,39 @@ +package ivf_flat + +// #include +import "C" + +import ( + cuvs "github.com/rapidsai/cuvs/go" +) + +type SearchParams struct { + params C.cuvsIvfFlatSearchParams_t +} + +// Creates a new SearchParams +func CreateSearchParams() (*SearchParams, error) { + var params C.cuvsIvfFlatSearchParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearchParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &SearchParams{params: params}, nil +} + +// The number of clusters to search. +func (p *SearchParams) SetNProbes(n_probes uint32) (*SearchParams, error) { + p.params.n_probes = C.uint32_t(n_probes) + return p, nil +} + +// Destroy SearchParams +func (p *SearchParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearchParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/ivf_pq/index_params.go b/go/ivf_pq/index_params.go new file mode 100644 index 000000000..d04b44621 --- /dev/null +++ b/go/ivf_pq/index_params.go @@ -0,0 +1,149 @@ +package ivf_pq + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +type IndexParams struct { + params C.cuvsIvfPqIndexParams_t +} + +type codebookKind int + +const ( + Subspace codebookKind = iota + Cluster +) + +var cCodebookKinds = map[codebookKind]int{ + Subspace: C.PER_SUBSPACE, + Cluster: C.PER_CLUSTER, +} + +// Creates a new IndexParams +func CreateIndexParams() (*IndexParams, error) { + var params C.cuvsIvfPqIndexParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &IndexParams{params: params}, nil +} + +// The number of clusters used in the coarse quantizer. +func (p *IndexParams) SetNLists(n_lists uint32) (*IndexParams, error) { + p.params.n_lists = C.uint32_t(n_lists) + return p, nil +} + +// Distance Type to use for building the index +func (p *IndexParams) SetMetric(metric cuvs.Distance) (*IndexParams, error) { + CMetric, exists := cuvs.CDistances[metric] + + if !exists { + return nil, errors.New("cuvs: invalid distance metric") + } + p.params.metric = C.cuvsDistanceType(CMetric) + + return p, nil +} + +// Metric argument for Minkowski distances - set to 2.0 if not applicable +func (p *IndexParams) SetMetricArg(metric_arg float32) (*IndexParams, error) { + p.params.metric_arg = C.float(metric_arg) + return p, nil +} + +// The number of iterations searching for kmeans centers during index building. +func (p *IndexParams) SetKMeansNIters(kmeans_n_iters uint32) (*IndexParams, error) { + p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters) + return p, nil +} + +// If kmeans_trainset_fraction is less than 1, then the dataset is +// subsampled, and only n_samples * kmeans_trainset_fraction rows +// are used for training. +func (p *IndexParams) SetKMeansTrainsetFraction(kmeans_trainset_fraction float64) (*IndexParams, error) { + p.params.kmeans_trainset_fraction = C.double(kmeans_trainset_fraction) + return p, nil +} + +// The bit length of the vector element after quantization. +func (p *IndexParams) SetPQBits(pq_bits uint32) (*IndexParams, error) { + p.params.pq_bits = C.uint32_t(pq_bits) + return p, nil +} + +// The dimensionality of a the vector after product quantization. +// When zero, an optimal value is selected using a heuristic. Note +// pq_dim * pq_bits must be a multiple of 8. Hint: a smaller 'pq_dim' +// results in a smaller index size and better search performance, but +// lower recall. If 'pq_bits' is 8, 'pq_dim' can be set to any number, +// but multiple of 8 are desirable for good performance. If 'pq_bits' +// is not 8, 'pq_dim' should be a multiple of 8. For good performance, +// it is desirable that 'pq_dim' is a multiple of 32. Ideally, +// 'pq_dim' should be also a divisor of the dataset dim. +func (p *IndexParams) SetPQDim(pq_dim uint32) (*IndexParams, error) { + p.params.pq_dim = C.uint32_t(pq_dim) + return p, nil +} + +func (p *IndexParams) SetCodebookKind(codebook_kind codebookKind) (*IndexParams, error) { + CCodebookKind, exists := cCodebookKinds[codebook_kind] + + if !exists { + return nil, errors.New("cuvs: invalid codebook_kind") + } + p.params.codebook_kind = uint32(CCodebookKind) + + return p, nil +} + +// Apply a random rotation matrix on the input data and queries even +// if `dim % pq_dim == 0`. Note: if `dim` is not multiple of `pq_dim`, +// a random rotation is always applied to the input data and queries +// to transform the working space from `dim` to `rot_dim`, which may +// be slightly larger than the original space and and is a multiple +// of `pq_dim` (`rot_dim % pq_dim == 0`). However, this transform is +// not necessary when `dim` is multiple of `pq_dim` (`dim == rot_dim`, +// hence no need in adding "extra" data columns / features). By +// default, if `dim == rot_dim`, the rotation transform is +// initialized with the identity matrix. When +// `force_random_rotation == True`, a random orthogonal transform +func (p *IndexParams) SetForceRandomRotation(force_random_rotation bool) (*IndexParams, error) { + if force_random_rotation { + p.params.force_random_rotation = C._Bool(true) + } else { + p.params.force_random_rotation = C._Bool(false) + } + return p, nil +} + +// After training the coarse and fine quantizers, we will populate +// the index with the dataset if add_data_on_build == true, otherwise +// the index is left empty, and the extend method can be used +// to add new vectors to the index. +func (p *IndexParams) SetAddDataOnBuild(add_data_on_build bool) (*IndexParams, error) { + if add_data_on_build { + p.params.add_data_on_build = C._Bool(true) + } else { + p.params.add_data_on_build = C._Bool(false) + } + return p, nil +} + +// Destroys IndexParams +func (p *IndexParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/ivf_pq/ivf_pq.go b/go/ivf_pq/ivf_pq.go new file mode 100644 index 000000000..cbbec629d --- /dev/null +++ b/go/ivf_pq/ivf_pq.go @@ -0,0 +1,73 @@ +package ivf_pq + +// #include +import "C" + +import ( + "errors" + "unsafe" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// IVF PQ Index +type IvfPqIndex struct { + index C.cuvsIvfPqIndex_t + trained bool +} + +// Creates a new empty IvfPqIndex +func CreateIndex(params *IndexParams, dataset *cuvs.Tensor[float32]) (*IvfPqIndex, error) { + var index C.cuvsIvfPqIndex_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexCreate(&index))) + if err != nil { + return nil, err + } + + return &IvfPqIndex{index: index}, nil +} + +// Builds an IvfPqIndex from the dataset for efficient search. +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters for building the index +// * `dataset` - A row-major Tensor on either the host or device to index +// * `index` - IvfPqIndex to build +func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *IvfPqIndex) error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index))) + if err != nil { + return err + } + index.trained = true + return nil +} + +// Destroys the IvfPqIndex +func (index *IvfPqIndex) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexDestroy(index.index))) + if err != nil { + return err + } + return nil +} + +// Perform a Approximate Nearest Neighbors search on the Index +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters to use in searching the index +// * `index` - IvfPqIndex to search +// * `queries` - A tensor in device memory to query for +// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors +// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors +func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *IvfPqIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error { + if !index.trained { + return errors.New("index needs to be built before calling search") + } + + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor))))) +} diff --git a/go/ivf_pq/ivf_pq_test.go b/go/ivf_pq/ivf_pq_test.go new file mode 100644 index 000000000..39b7727a3 --- /dev/null +++ b/go/ivf_pq/ivf_pq_test.go @@ -0,0 +1,123 @@ +package ivf_pq + +import ( + "math/rand/v2" + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestIvfPq(t *testing.T) { + const ( + nDataPoints = 1024 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() + + index, _ := CreateIndex(indexParams, &dataset) + defer index.Close() + + // use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries.Close() + + neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } + + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() + + err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances) + if err != nil { + t.Fatalf("error searching index: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + println(neighborsSlice[i][0]) + if neighborsSlice[i][0] != int64(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } + } +} diff --git a/go/ivf_pq/search_params.go b/go/ivf_pq/search_params.go new file mode 100644 index 000000000..190ca7036 --- /dev/null +++ b/go/ivf_pq/search_params.go @@ -0,0 +1,107 @@ +package ivf_pq + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Supplemental parameters to search IVF PQ Index +type SearchParams struct { + params C.cuvsIvfPqSearchParams_t +} + +type lutDtype int + +const ( + Lut_Uint8 lutDtype = iota + Lut_Uint16 + Lut_Uint32 + Lut_Uint64 + Lut_Int8 + Lut_Int16 + Lut_Int32 + Lut_Int64 +) + +var cLutDtypes = map[lutDtype]int{ + Lut_Uint8: C.CUDA_R_8U, + Lut_Uint16: C.CUDA_R_16U, + Lut_Uint32: C.CUDA_R_32U, + Lut_Uint64: C.CUDA_R_64U, + Lut_Int8: C.CUDA_R_8I, + Lut_Int16: C.CUDA_R_16I, + Lut_Int32: C.CUDA_R_32I, + Lut_Int64: C.CUDA_R_64I, +} + +type internalDistanceDtype int + +const ( + InternalDistance_Float32 internalDistanceDtype = iota + InternalDistance_Float64 +) + +var CInternalDistanceDtypes = map[internalDistanceDtype]int{ + InternalDistance_Float32: C.CUDA_R_32F, + InternalDistance_Float64: C.CUDA_R_64F, +} + +// Creates a new SearchParams +func CreateSearchParams() (*SearchParams, error) { + var params C.cuvsIvfPqSearchParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearchParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &SearchParams{params: params}, nil +} + +// The number of clusters to search. +func (p *SearchParams) SetNProbes(n_probes uint32) (*SearchParams, error) { + p.params.n_probes = C.uint32_t(n_probes) + return p, nil +} + +// Data type of look up table to be created dynamically at search +// time. The use of low-precision types reduces the amount of shared +// memory required at search time, so fast shared memory kernels can +// be used even for datasets with large dimansionality. Note that +// the recall is slightly degraded when low-precision type is +// selected. +func (p *SearchParams) SetLutDtype(lut_dtype lutDtype) (*SearchParams, error) { + CLutDtype, exists := cLutDtypes[lutDtype(lut_dtype)] + + if !exists { + return nil, errors.New("cuvs: invalid lut_dtype") + } + p.params.lut_dtype = C.cudaDataType_t(CLutDtype) + + return p, nil +} + +// Storage data type for distance/similarity computation. +func (p *SearchParams) SetInternalDistanceDtype(internal_distance_dtype internalDistanceDtype) (*SearchParams, error) { + CInternalDistanceDtype, exists := CInternalDistanceDtypes[internalDistanceDtype(internal_distance_dtype)] + + if !exists { + return nil, errors.New("cuvs: invalid internal_distance_dtype") + } + p.params.internal_distance_dtype = C.cudaDataType_t(CInternalDistanceDtype) + + return p, nil +} + +// Destroys SearchParams +func (p *SearchParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearchParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/memory_resource.go b/go/memory_resource.go new file mode 100644 index 000000000..df042c821 --- /dev/null +++ b/go/memory_resource.go @@ -0,0 +1,93 @@ +package cuvs + +// #include +import "C" + +import ( + "runtime" +) + +type CuvsMemoryCommand int + +const ( + CuvsMemoryNew = iota + CuvsMemoryRelease +) + +type CuvsPoolMemory struct { + ch chan CuvsMemoryCommand + errCh chan error + initial_pool_size_percent int + max_pool_size_percent int + managed bool +} + +// Creates new CuvsPoolMemory struct +// initial_pool_size_percent is the initial size of the pool in percent of total available device memory +// max_pool_size_percent is the maximum size of the pool in percent of total available device memory +// managed is whether to use CUDA managed memory +func NewCuvsPoolMemory(initial_pool_size_percent int, max_pool_size_percent int, managed bool) (*CuvsPoolMemory, error) { + c := CuvsPoolMemory{ + ch: make(chan CuvsMemoryCommand), + errCh: make(chan error), + initial_pool_size_percent: initial_pool_size_percent, + max_pool_size_percent: max_pool_size_percent, + managed: managed, + } + + c.start() + c.ch <- CuvsMemoryNew + + if err := <-c.errCh; err != nil { + return nil, err + } + + return &c, nil +} + +// Enables pool memory +func (m *CuvsPoolMemory) start() { + go func() { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + for command := range m.ch { + var err error + switch command { + case CuvsMemoryNew: + err = CheckCuvs(CuvsError(C.cuvsRMMPoolMemoryResourceEnable( + C.int(m.initial_pool_size_percent), + C.int(m.max_pool_size_percent), + C._Bool(m.managed)))) + m.errCh <- err + + case CuvsMemoryRelease: + err = CheckCuvs(CuvsError(C.cuvsRMMMemoryResourceReset())) + m.errCh <- err + } + } + }() +} + +// Disables pool memory +func (m *CuvsPoolMemory) Close() error { + m.ch <- CuvsMemoryRelease + err := <-m.errCh + close(m.ch) + close(m.errCh) + return err +} + +func Example() error { + mem, err := NewCuvsPoolMemory(60, 100, false) + if err != nil { + return err + } + + err = mem.Close() + if err != nil { + return err + } + + return nil +} diff --git a/go/memory_resource_test.go b/go/memory_resource_test.go new file mode 100644 index 000000000..e44c20485 --- /dev/null +++ b/go/memory_resource_test.go @@ -0,0 +1,19 @@ +package cuvs_test + +import ( + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestMemoryResource(t *testing.T) { + mem, err := cuvs.NewCuvsPoolMemory(60, 100, false) + if err != nil { + t.Fatal("Failed to create memory resource:", err) + } + + err = mem.Close() + if err != nil { + t.Fatal("Failed to close memory resource:", err) + } +} diff --git a/go/resources.go b/go/resources.go new file mode 100644 index 000000000..562aa7b7f --- /dev/null +++ b/go/resources.go @@ -0,0 +1,57 @@ +package cuvs + +// #include +import "C" + +type cuvsResource C.cuvsResources_t + +// Resources are objects that are shared between function calls, +// and includes things like CUDA streams, cuBLAS handles and other +// resources that are expensive to create. +type Resource struct { + Resource C.cuvsResources_t +} + +// Returns a new Resource object +func NewResource(stream C.cudaStream_t) (Resource, error) { + res := C.cuvsResources_t(0) + err := CheckCuvs(CuvsError(C.cuvsResourcesCreate(&res))) + if err != nil { + return Resource{}, err + } + + if stream != nil { + err := CheckCuvs(CuvsError(C.cuvsStreamSet(res, stream))) + if err != nil { + return Resource{}, err + } + } + + return Resource{Resource: res}, nil +} + +// Syncs the current cuda stream +func (r Resource) Sync() error { + return CheckCuvs(CuvsError(C.cuvsStreamSync(r.Resource))) +} + +// Gets the current cuda stream +func (r Resource) GetCudaStream() (C.cudaStream_t, error) { + var stream C.cudaStream_t + + err := CheckCuvs(CuvsError(C.cuvsStreamGet(r.Resource, &stream))) + if err != nil { + return C.cudaStream_t(nil), err + } + + return stream, nil +} + +func (r Resource) Close() error { + err := CheckCuvs(CuvsError(C.cuvsResourcesDestroy(r.Resource))) + if err != nil { + return err + } + + return nil +} From 52dd92ccec83924b2045d645c4b48cdb59c741ea Mon Sep 17 00:00:00 2001 From: rhdong Date: Thu, 6 Feb 2025 13:10:58 -0800 Subject: [PATCH 07/12] [Feat] Add Support for Index `merge` in CAGRA (#618) Authors: - rhdong (https://github.com/rhdong) Approvers: - James Lamb (https://github.com/jameslamb) - Corey J. Nolet (https://github.com/cjnolet) - Ishan Chattopadhyaya (https://github.com/chatman) URL: https://github.com/rapidsai/cuvs/pull/618 --- cpp/CMakeLists.txt | 4 + cpp/include/cuvs/neighbors/cagra.hpp | 189 +++++++++++++++++ cpp/src/neighbors/cagra.cuh | 9 + cpp/src/neighbors/cagra_merge_float.cu | 35 +++ cpp/src/neighbors/cagra_merge_half.cu | 35 +++ cpp/src/neighbors/cagra_merge_int8.cu | 35 +++ cpp/src/neighbors/cagra_merge_uint8.cu | 35 +++ .../neighbors/detail/cagra/cagra_merge.cuh | 139 ++++++++++++ cpp/tests/neighbors/ann_cagra.cuh | 199 ++++++++++++++++++ .../ann_cagra/test_float_uint32_t.cu | 6 + .../neighbors/ann_cagra/test_half_uint32_t.cu | 6 + .../ann_cagra/test_int8_t_uint32_t.cu | 5 + .../ann_cagra/test_uint8_t_uint32_t.cu | 5 + python/libcuvs/pyproject.toml | 2 +- 14 files changed, 703 insertions(+), 1 deletion(-) create mode 100644 cpp/src/neighbors/cagra_merge_float.cu create mode 100644 cpp/src/neighbors/cagra_merge_half.cu create mode 100644 cpp/src/neighbors/cagra_merge_int8.cu create mode 100644 cpp/src/neighbors/cagra_merge_uint8.cu create mode 100644 cpp/src/neighbors/detail/cagra/cagra_merge.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b324c6786..6cd87cbad 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -396,6 +396,10 @@ if(BUILD_SHARED_LIBS) src/neighbors/cagra_serialize_half.cu src/neighbors/cagra_serialize_int8.cu src/neighbors/cagra_serialize_uint8.cu + src/neighbors/cagra_merge_float.cu + src/neighbors/cagra_merge_half.cu + src/neighbors/cagra_merge_int8.cu + src/neighbors/cagra_merge_uint8.cu src/neighbors/iface/iface_cagra_float_uint32_t.cu src/neighbors/iface/iface_cagra_half_uint32_t.cu src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index d8edf742b..1d0acbe35 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -264,6 +264,51 @@ struct extend_params { * 0. */ uint32_t max_chunk_size = 0; }; +/** + * @} + */ + +/** + * @defgroup cagra_cpp_merge_params CAGRA index merge parameters + * @{ + */ + +/** + * @brief Determines the strategy for merging CAGRA graphs. + * + * @note Currently, only the PHYSICAL strategy is supported. + */ +enum MergeStrategy { + /** + * @brief Physical merge: Builds a new CAGRA graph from the union of dataset points + * in existing CAGRA graphs. + * + * This is expensive to build but does not impact search latency or quality. + * Preferred for many smaller CAGRA graphs. + * + * @note Currently, this is the only supported strategy. + */ + PHYSICAL +}; + +/** + * @brief Parameters for merging CAGRA indexes. + */ +struct merge_params { + merge_params() = default; + + /** + * @brief Constructs merge parameters with given index parameters. + * @param params Parameters for creating the output index. + */ + explicit merge_params(const cagra::index_params& params) : output_index_params(params) {} + + /// Parameters for creating the output index. + cagra::index_params output_index_params; + + /// Strategy for merging. Defaults to `MergeStrategy::PHYSICAL`. + MergeStrategy strategy = MergeStrategy::PHYSICAL; +}; /** * @} @@ -1794,6 +1839,150 @@ void serialize_to_hnswlib( std::optional> dataset = std::nullopt); +/** + * @defgroup cagra_cpp_index_merge CAGRA index build functions + * @{ + */ + +/** @brief Merge multiple CAGRA indices into a single index. + * + * This function merges multiple CAGRA indices into one, combining both the datasets and graph + * structures. + * + * @note: When device memory is sufficient, the dataset attached to the returned index is allocated + * in device memory by default; otherwise, host memory is used automatically. + * + * Usage example: + * @code{.cpp} + * using namespace raft::neighbors; + * auto dataset0 = raft::make_host_matrix(handle, size0, dim); + * auto dataset1 = raft::make_host_matrix(handle, size1, dim); + * + * auto index0 = cagra::build(res, index_params, dataset0); + * auto index1 = cagra::build(res, index_params, dataset1); + * + * std::vector*> indices{&index0, &index1}; + * cagra::merge_params params{index_params}; + * + * auto merged_index = cagra::merge(res, params, indices); + * @endcode + * + * @param[in] res RAFT resources used for the merge operation. + * @param[in] params Parameters that control the merging process. + * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: + * - Have attached datasets with the same dimension. + * + * @return A new CAGRA index containing the merged indices, graph, and dataset. + */ +auto merge(raft::resources const& res, + const cuvs::neighbors::cagra::merge_params& params, + std::vector*>& indices) + -> cuvs::neighbors::cagra::index; + +/** @brief Merge multiple CAGRA indices into a single index. + * + * This function merges multiple CAGRA indices into one, combining both the datasets and graph + * structures. + * + * @note: When device memory is sufficient, the dataset attached to the returned index is allocated + * in device memory by default; otherwise, host memory is used automatically. + * + * Usage example: + * @code{.cpp} + * using namespace raft::neighbors; + * auto dataset0 = raft::make_host_matrix(handle, size0, dim); + * auto dataset1 = raft::make_host_matrix(handle, size1, dim); + * + * auto index0 = cagra::build(res, index_params, dataset0); + * auto index1 = cagra::build(res, index_params, dataset1); + * + * std::vector*> indices{&index0, &index1}; + * cagra::merge_params params{index_params}; + * + * auto merged_index = cagra::merge(res, params, indices); + * @endcode + * + * @param[in] res RAFT resources used for the merge operation. + * @param[in] params Parameters that control the merging process. + * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: + * - Have attached datasets with the same dimension. + * + * @return A new CAGRA index containing the merged indices, graph, and dataset. + */ +auto merge(raft::resources const& res, + const cuvs::neighbors::cagra::merge_params& params, + std::vector*>& indices) + -> cuvs::neighbors::cagra::index; + +/** @brief Merge multiple CAGRA indices into a single index. + * + * This function merges multiple CAGRA indices into one, combining both the datasets and graph + * structures. + * + * @note: When device memory is sufficient, the dataset attached to the returned index is allocated + * in device memory by default; otherwise, host memory is used automatically. + * + * Usage example: + * @code{.cpp} + * using namespace raft::neighbors; + * auto dataset0 = raft::make_host_matrix(handle, size0, dim); + * auto dataset1 = raft::make_host_matrix(handle, size1, dim); + * + * auto index0 = cagra::build(res, index_params, dataset0); + * auto index1 = cagra::build(res, index_params, dataset1); + * + * std::vector*> indices{&index0, &index1}; + * cagra::merge_params params{index_params}; + * + * auto merged_index = cagra::merge(res, params, indices); + * @endcode + * + * @param[in] res RAFT resources used for the merge operation. + * @param[in] params Parameters that control the merging process. + * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: + * - Have attached datasets with the same dimension. + * + * @return A new CAGRA index containing the merged indices, graph, and dataset. + */ +auto merge(raft::resources const& res, + const cuvs::neighbors::cagra::merge_params& params, + std::vector*>& indices) + -> cuvs::neighbors::cagra::index; + +/** @brief Merge multiple CAGRA indices into a single index. + * + * This function merges multiple CAGRA indices into one, combining both the datasets and graph + * structures. + * + * @note: When device memory is sufficient, the dataset attached to the returned index is allocated + * in device memory by default; otherwise, host memory is used automatically. + * + * Usage example: + * @code{.cpp} + * using namespace raft::neighbors; + * auto dataset0 = raft::make_host_matrix(handle, size0, dim); + * auto dataset1 = raft::make_host_matrix(handle, size1, dim); + * + * auto index0 = cagra::build(res, index_params, dataset0); + * auto index1 = cagra::build(res, index_params, dataset1); + * + * std::vector*> indices{&index0, &index1}; + * cagra::merge_params params{index_params}; + * + * auto merged_index = cagra::merge(res, params, indices); + * @endcode + * + * @param[in] res RAFT resources used for the merge operation. + * @param[in] params Parameters that control the merging process. + * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must: + * - Have attached datasets with the same dimension. + * + * @return A new CAGRA index containing the merged indices, graph, and dataset. + */ +auto merge(raft::resources const& res, + const cuvs::neighbors::cagra::merge_params& params, + std::vector*>& indices) + -> cuvs::neighbors::cagra::index; /** * @} */ diff --git a/cpp/src/neighbors/cagra.cuh b/cpp/src/neighbors/cagra.cuh index f294c9b44..305788f7c 100644 --- a/cpp/src/neighbors/cagra.cuh +++ b/cpp/src/neighbors/cagra.cuh @@ -18,6 +18,7 @@ #include "detail/cagra/add_nodes.cuh" #include "detail/cagra/cagra_build.cuh" +#include "detail/cagra/cagra_merge.cuh" #include "detail/cagra/cagra_search.cuh" #include "detail/cagra/graph_core.cuh" @@ -380,6 +381,14 @@ void extend( cagra::extend_core(handle, additional_dataset, index, params, ndv, ngv); } +template +index merge(raft::resources const& handle, + const cagra::merge_params& params, + std::vector*>& indices) +{ + return cagra::detail::merge(handle, params, indices); +} + /** @} */ // end group cagra } // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/cagra_merge_float.cu b/cpp/src/neighbors/cagra_merge_float.cu new file mode 100644 index 000000000..951c0c5fe --- /dev/null +++ b/cpp/src/neighbors/cagra_merge_float.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cagra.cuh" +#include + +namespace cuvs::neighbors::cagra { + +#define RAFT_INST_CAGRA_MERGE(T, IdxT) \ + auto merge(raft::resources const& handle, \ + const cuvs::neighbors::cagra::merge_params& params, \ + std::vector*>& indices) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::merge(handle, params, indices); \ + } + +RAFT_INST_CAGRA_MERGE(float, uint32_t); + +#undef RAFT_INST_CAGRA_MERGE + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/cagra_merge_half.cu b/cpp/src/neighbors/cagra_merge_half.cu new file mode 100644 index 000000000..704a00f74 --- /dev/null +++ b/cpp/src/neighbors/cagra_merge_half.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cagra.cuh" +#include + +namespace cuvs::neighbors::cagra { + +#define RAFT_INST_CAGRA_MERGE(T, IdxT) \ + auto merge(raft::resources const& handle, \ + const cuvs::neighbors::cagra::merge_params& params, \ + std::vector*>& indices) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::merge(handle, params, indices); \ + } + +RAFT_INST_CAGRA_MERGE(half, uint32_t); + +#undef RAFT_INST_CAGRA_MERGE + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/cagra_merge_int8.cu b/cpp/src/neighbors/cagra_merge_int8.cu new file mode 100644 index 000000000..a7e903562 --- /dev/null +++ b/cpp/src/neighbors/cagra_merge_int8.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cagra.cuh" +#include + +namespace cuvs::neighbors::cagra { + +#define RAFT_INST_CAGRA_MERGE(T, IdxT) \ + auto merge(raft::resources const& handle, \ + const cuvs::neighbors::cagra::merge_params& params, \ + std::vector*>& indices) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::merge(handle, params, indices); \ + } + +RAFT_INST_CAGRA_MERGE(int8_t, uint32_t); + +#undef RAFT_INST_CAGRA_MERGE + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/cagra_merge_uint8.cu b/cpp/src/neighbors/cagra_merge_uint8.cu new file mode 100644 index 000000000..a4fc7149c --- /dev/null +++ b/cpp/src/neighbors/cagra_merge_uint8.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cagra.cuh" +#include + +namespace cuvs::neighbors::cagra { + +#define RAFT_INST_CAGRA_MERGE(T, IdxT) \ + auto merge(raft::resources const& handle, \ + const cuvs::neighbors::cagra::merge_params& params, \ + std::vector*>& indices) \ + ->cuvs::neighbors::cagra::index \ + { \ + return cuvs::neighbors::cagra::merge(handle, params, indices); \ + } + +RAFT_INST_CAGRA_MERGE(uint8_t, uint32_t); + +#undef RAFT_INST_CAGRA_MERGE + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh new file mode 100644 index 000000000..bc29cb206 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +namespace cuvs::neighbors::cagra::detail { + +template +index merge(raft::resources const& handle, + const cagra::merge_params& params, + std::vector*>& indices) +{ + std::size_t dim = 0; + std::size_t new_dataset_size = 0; + int64_t stride = -1; + + for (auto index : indices) { + RAFT_EXPECTS(index != nullptr, + "Null pointer detected in 'indices'. Ensure all elements are valid before usage."); + using ds_idx_type = decltype(index->data().n_rows()); + if (auto* strided_dset = dynamic_cast*>(&index->data()); + strided_dset != nullptr) { + if (dim == 0) { + dim = index->dim(); + stride = strided_dset->stride(); + } else { + RAFT_EXPECTS(dim == index->dim(), "Dimension of datasets in indices must be equal."); + } + new_dataset_size += index->size(); + } else if (dynamic_cast*>(&index->data()) != + nullptr) { + RAFT_FAIL( + "cagra::merge only supports an index to which the dataset is attached. Please check if the " + "index was built with index_param.attach_dataset_on_build = true, or if a dataset was " + "attached after the build."); + } else { + RAFT_FAIL("cagra::merge only supports an uncompressed dataset index"); + } + } + + IdxT offset = 0; + + auto merge_dataset = [&](T* dst) { + for (auto index : indices) { + using ds_idx_type = decltype(index->data().n_rows()); + auto* strided_dset = dynamic_cast*>(&index->data()); + + RAFT_CUDA_TRY(cudaMemcpy2DAsync(dst + offset * dim, + sizeof(T) * dim, + strided_dset->view().data_handle(), + sizeof(T) * stride, + sizeof(T) * dim, + strided_dset->n_rows(), + cudaMemcpyDefault, + raft::resource::get_cuda_stream(handle))); + + offset += IdxT(index->data().n_rows()); + } + }; + + cagra::index_params output_index_params = params.output_index_params; + + try { + auto updated_dataset = raft::make_device_matrix( + handle, std::int64_t(new_dataset_size), std::int64_t(dim)); + + merge_dataset(updated_dataset.data_handle()); + + auto merged_index = + cagra::build(handle, output_index_params, raft::make_const_mdspan(updated_dataset.view())); + if (!merged_index.data().is_owning() && output_index_params.attach_dataset_on_build) { + using matrix_t = decltype(updated_dataset); + using layout_t = typename matrix_t::layout_type; + using container_policy_t = typename matrix_t::container_policy_type; + using owning_t = owning_dataset; + auto out_layout = raft::make_strided_layout(updated_dataset.view().extents(), + std::array{stride, 1}); + merged_index.update_dataset(handle, owning_t{std::move(updated_dataset), out_layout}); + } + RAFT_LOG_DEBUG("cagra merge: using device memory for merged dataset"); + return merged_index; + + } catch (std::bad_alloc& e) { + RAFT_LOG_DEBUG("cagra::merge: using host memory for merged dataset"); + + auto updated_dataset = + raft::make_host_matrix(std::int64_t(new_dataset_size), std::int64_t(dim)); + + merge_dataset(updated_dataset.data_handle()); + + auto merged_index = + cagra::build(handle, output_index_params, raft::make_const_mdspan(updated_dataset.view())); + if (!merged_index.data().is_owning() && output_index_params.attach_dataset_on_build) { + using matrix_t = decltype(updated_dataset); + using layout_t = typename matrix_t::layout_type; + using container_policy_t = typename matrix_t::container_policy_type; + using owning_t = owning_dataset; + auto out_layout = raft::make_strided_layout(updated_dataset.view().extents(), + std::array{stride, 1}); + merged_index.update_dataset(handle, owning_t{std::move(updated_dataset), out_layout}); + } + return merged_index; + } +} + +} // namespace cuvs::neighbors::cagra::detail diff --git a/cpp/tests/neighbors/ann_cagra.cuh b/cpp/tests/neighbors/ann_cagra.cuh index aedb11543..1e695f9a8 100644 --- a/cpp/tests/neighbors/ann_cagra.cuh +++ b/cpp/tests/neighbors/ann_cagra.cuh @@ -922,6 +922,205 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { rmm::device_uvector search_queries; }; +template +class AnnCagraIndexMergeTest : public ::testing::TestWithParam { + public: + AnnCagraIndexMergeTest() + : stream_(raft::resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + protected: + void testCagra() + { + // TODO (tarang-jain): remove when NN Descent index building support InnerProduct. Reference + // issue: https://github.com/rapidsai/raft/issues/2276 + if (ps.metric == InnerProduct && ps.build_algo == graph_build_algo::NN_DESCENT) GTEST_SKIP(); + if (ps.compression != std::nullopt) GTEST_SKIP(); + // IVF_PQ and NN_DESCENT graph builds do not support BitwiseHamming + if (ps.metric == cuvs::distance::DistanceType::BitwiseHamming && + ((!std::is_same_v) || + (ps.build_algo != graph_build_algo::ITERATIVE_CAGRA_SEARCH))) + GTEST_SKIP(); + // If the dataset dimension is small and the dataset size is large, there can be a lot of + // dataset vectors that have the same distance to the query, especially in the binary Hamming + // distance, making it impossible to make a top-k ground truth. + if (ps.metric == cuvs::distance::DistanceType::BitwiseHamming && + (ps.k * ps.dim * 8 / 5 /*(=magic number)*/ < ps.n_rows)) + GTEST_SKIP(); + + size_t queries_size = ps.n_queries * ps.k; + std::vector indices_Cagra(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_Cagra(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + + cuvs::neighbors::naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database.data(), + ps.n_queries, + ps.n_rows, + ps.dim, + ps.k, + ps.metric); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + + { + rmm::device_uvector distances_dev(queries_size, stream_); + rmm::device_uvector indices_dev(queries_size, stream_); + + { + cagra::index_params index_params; + index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is + // not used for knn_graph building. + + switch (ps.build_algo) { + case graph_build_algo::IVF_PQ: + index_params.graph_build_params = + graph_build_params::ivf_pq_params(raft::matrix_extent(ps.n_rows, ps.dim)); + if (ps.ivf_pq_search_refine_ratio) { + std::get( + index_params.graph_build_params) + .refinement_rate = *ps.ivf_pq_search_refine_ratio; + } + break; + case graph_build_algo::NN_DESCENT: { + index_params.graph_build_params = + graph_build_params::nn_descent_params(index_params.intermediate_graph_degree); + break; + } + case graph_build_algo::ITERATIVE_CAGRA_SEARCH: { + index_params.graph_build_params = graph_build_params::iterative_search_params(); + break; + } + case graph_build_algo::AUTO: + // do nothing + break; + }; + + const double splite_ratio = 0.55; + const std::size_t database0_size = ps.n_rows * splite_ratio; + const std::size_t database1_size = ps.n_rows - database0_size; + + auto database0_view = raft::make_device_matrix_view( + (const DataT*)database.data(), database0_size, ps.dim); + + auto database1_view = raft::make_device_matrix_view( + (const DataT*)database.data() + database0_view.size(), database1_size, ps.dim); + + cagra::index index0(handle_); + cagra::index index1(handle_); + if (ps.host_dataset) { + { + std::optional> database_host{std::nullopt}; + database_host = raft::make_host_matrix(database0_size, ps.dim); + raft::copy(database_host->data_handle(), + database0_view.data_handle(), + database0_view.size(), + stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host->data_handle(), database0_size, ps.dim); + index0 = cagra::build(handle_, index_params, database_host_view); + } + { + std::optional> database_host{std::nullopt}; + database_host = raft::make_host_matrix(database1_size, ps.dim); + raft::copy(database_host->data_handle(), + database1_view.data_handle(), + database1_view.size(), + stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host->data_handle(), database1_size, ps.dim); + index1 = cagra::build(handle_, index_params, database_host_view); + } + } else { + index0 = cagra::build(handle_, index_params, database0_view); + index1 = cagra::build(handle_, index_params, database1_view); + }; + std::vector*> indices{&index0, &index1}; + cagra::merge_params merge_params{index_params}; + auto index = cagra::merge(handle_, merge_params, indices); + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.n_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, ps.k); + + cagra::search_params search_params; + search_params.algo = ps.algo; + search_params.max_queries = ps.max_queries; + search_params.team_size = ps.team_size; + search_params.itopk_size = ps.itopk_size; + + cagra::search( + handle_, search_params, index, search_queries_view, indices_out_view, dists_out_view); + raft::update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_); + raft::update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + + double min_recall = ps.min_recall; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_Cagra, + distances_naive, + distances_Cagra, + ps.n_queries, + ps.k, + 0.006, + min_recall)); + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); + } + } + + void SetUp() override + { + database.resize(((size_t)ps.n_rows) * ps.dim, stream_); + search_queries.resize(ps.n_queries * ps.dim, stream_); + raft::random::RngState r(1234ULL); + InitDataset(handle_, database.data(), ps.n_rows, ps.dim, ps.metric, r); + InitDataset(handle_, search_queries.data(), ps.n_queries, ps.dim, ps.metric, r); + raft::resource::sync_stream(handle_); + } + + void TearDown() override + { + raft::resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnCagraInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + inline std::vector generate_inputs() { // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries diff --git a/cpp/tests/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_float_uint32_t.cu index 640657ccb..16c0d3d2a 100644 --- a/cpp/tests/neighbors/ann_cagra/test_float_uint32_t.cu +++ b/cpp/tests/neighbors/ann_cagra/test_float_uint32_t.cu @@ -29,6 +29,9 @@ TEST_P(AnnCagraAddNodesTestF_U32, AnnCagraAddNodes) { this->testCagra(); } typedef AnnCagraFilterTest AnnCagraFilterTestF_U32; TEST_P(AnnCagraFilterTestF_U32, AnnCagra) { this->testCagra(); } +typedef AnnCagraIndexMergeTest AnnCagraIndexMergeTestF_U32; +TEST_P(AnnCagraIndexMergeTestF_U32, AnnCagraIndexMerge) { this->testCagra(); } + INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest, AnnCagraAddNodesTestF_U32, @@ -36,5 +39,8 @@ INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest, INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestF_U32, ::testing::ValuesIn(inputs_filtering)); +INSTANTIATE_TEST_CASE_P(AnnCagraIndexMergeTest, + AnnCagraIndexMergeTestF_U32, + ::testing::ValuesIn(inputs)); } // namespace cuvs::neighbors::cagra diff --git a/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu index f03de69d2..de682cee4 100644 --- a/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu +++ b/cpp/tests/neighbors/ann_cagra/test_half_uint32_t.cu @@ -23,6 +23,12 @@ namespace cuvs::neighbors::cagra { typedef AnnCagraTest AnnCagraTestF16_U32; TEST_P(AnnCagraTestF16_U32, AnnCagra) { this->testCagra(); } +typedef AnnCagraIndexMergeTest AnnCagraIndexMergeTestF16_U32; +TEST_P(AnnCagraIndexMergeTestF16_U32, AnnCagraIndexMerge) { this->testCagra(); } + INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF16_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraIndexMergeTest, + AnnCagraIndexMergeTestF16_U32, + ::testing::ValuesIn(inputs)); } // namespace cuvs::neighbors::cagra diff --git a/cpp/tests/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_int8_t_uint32_t.cu index 32f6289fe..28be13c5b 100644 --- a/cpp/tests/neighbors/ann_cagra/test_int8_t_uint32_t.cu +++ b/cpp/tests/neighbors/ann_cagra/test_int8_t_uint32_t.cu @@ -26,6 +26,8 @@ typedef AnnCagraAddNodesTest AnnCagraAddNodes TEST_P(AnnCagraAddNodesTestI8_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraFilterTest AnnCagraFilterTestI8_U32; TEST_P(AnnCagraFilterTestI8_U32, AnnCagra) { this->testCagra(); } +typedef AnnCagraIndexMergeTest AnnCagraIndexMergeTestI8_U32; +TEST_P(AnnCagraIndexMergeTestI8_U32, AnnCagra) { this->testCagra(); } INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest, @@ -34,5 +36,8 @@ INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest, INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestI8_U32, ::testing::ValuesIn(inputs_filtering)); +INSTANTIATE_TEST_CASE_P(AnnCagraIndexMergeTest, + AnnCagraIndexMergeTestI8_U32, + ::testing::ValuesIn(inputs)); } // namespace cuvs::neighbors::cagra diff --git a/cpp/tests/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/tests/neighbors/ann_cagra/test_uint8_t_uint32_t.cu index 53f804be6..dd9ffd1ae 100644 --- a/cpp/tests/neighbors/ann_cagra/test_uint8_t_uint32_t.cu +++ b/cpp/tests/neighbors/ann_cagra/test_uint8_t_uint32_t.cu @@ -26,6 +26,8 @@ typedef AnnCagraAddNodesTest AnnCagraAddNode TEST_P(AnnCagraAddNodesTestU8_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraFilterTest AnnCagraFilterTestU8_U32; TEST_P(AnnCagraFilterTestU8_U32, AnnCagra) { this->testCagra(); } +typedef AnnCagraIndexMergeTest AnnCagraIndexMergeTestU8_U32; +TEST_P(AnnCagraIndexMergeTestU8_U32, AnnCagra) { this->testCagra(); } INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest, @@ -34,5 +36,8 @@ INSTANTIATE_TEST_CASE_P(AnnCagraAddNodesTest, INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestU8_U32, ::testing::ValuesIn(inputs_filtering)); +INSTANTIATE_TEST_CASE_P(AnnCagraIndexMergeTest, + AnnCagraIndexMergeTestU8_U32, + ::testing::ValuesIn(inputs)); } // namespace cuvs::neighbors::cagra diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml index 28443b782..3fd69b989 100644 --- a/python/libcuvs/pyproject.toml +++ b/python/libcuvs/pyproject.toml @@ -105,4 +105,4 @@ select = [ ] # detect when package size grows significantly -max_allowed_size_compressed = '1.1G' +max_allowed_size_compressed = '1.2G' From 4b289a045483cceee0f33796f65aeaf4747579b2 Mon Sep 17 00:00:00 2001 From: Severin Dicks <37635888+Intron7@users.noreply.github.com> Date: Thu, 6 Feb 2025 22:39:34 +0100 Subject: [PATCH 08/12] add docs for nn_descent (#668) This PR adds docs for cuvs nn_descent Authors: - Severin Dicks (https://github.com/Intron7) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/668 --- docs/source/python_api/neighbors.rst | 1 + .../source/python_api/neighbors_nn_decent.rst | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 docs/source/python_api/neighbors_nn_decent.rst diff --git a/docs/source/python_api/neighbors.rst b/docs/source/python_api/neighbors.rst index cd4f2609c..8e2bad9f1 100644 --- a/docs/source/python_api/neighbors.rst +++ b/docs/source/python_api/neighbors.rst @@ -14,3 +14,4 @@ Nearest Neighbors neighbors_hnsw.rst neighbors_ivf_flat.rst neighbors_ivf_pq.rst + neighbors_nn_decent.rst diff --git a/docs/source/python_api/neighbors_nn_decent.rst b/docs/source/python_api/neighbors_nn_decent.rst new file mode 100644 index 000000000..df872d7e9 --- /dev/null +++ b/docs/source/python_api/neighbors_nn_decent.rst @@ -0,0 +1,24 @@ +NN-Descent +====== + +.. role:: py(code) + :language: python + :class: highlight + +Index build parameters +###################### + +.. autoclass:: cuvs.neighbors.nn_descent.IndexParams + :members: + + +Index +##### + +.. autoclass:: cuvs.neighbors.nn_descent.Index + :members: + +Index build +########### + +.. autofunction:: cuvs.neighbors.nn_descent.build From f15c1ea93eb63b99a8f885d1e627f94ab7a74196 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Fri, 7 Feb 2025 15:24:29 +0100 Subject: [PATCH 09/12] Fix ann-bench dataset blob integer overflow leading to incorrect data copy beyond 4B elems (#671) ann-bench keeps data dimensions as `uint32_t`. We use `std::fread` to copy the data from a file to the host memory and pass `n_rows * n_cols` there, which gets casted to size_t only after the multiplication. This leads to integer overflow for the datasets larger than 4B elements and a partial data copy. This PR fixes the bug by casting the dimensions before the multiplication. The bug only affects the benchmark cases where the data is requested in the host memory not backed by a file. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/cuvs/pull/671 --- cpp/bench/ann/src/common/blob.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/bench/ann/src/common/blob.hpp b/cpp/bench/ann/src/common/blob.hpp index 81310ae0b..3a51372cf 100644 --- a/cpp/bench/ann/src/common/blob.hpp +++ b/cpp/bench/ann/src/common/blob.hpp @@ -453,7 +453,8 @@ struct blob_mmap { size_t size = data_end - data_start; mmap_owner owner{size, flags}; std::fseek(file_.descriptor().value(), data_start, SEEK_SET); - size_t n_elems = file_.rows_limit() * file_.n_cols(); + auto n_elems = + static_cast(file_.rows_limit()) * static_cast(file_.n_cols()); if (std::fread(owner.data(), sizeof(T), n_elems, file_.descriptor().value()) != n_elems) { throw std::runtime_error{"cuvs::bench::blob_mmap() fread " + file_.path() + " failed"}; } From 904051ee89af61235496c7bed444bb1a2f535941 Mon Sep 17 00:00:00 2001 From: Tarang Jain <40517122+tarang-jain@users.noreply.github.com> Date: Fri, 7 Feb 2025 20:22:38 +0530 Subject: [PATCH 10/12] Add deep-100M to datasets.yaml for cuvs-bench (#670) Authors: - Tarang Jain (https://github.com/tarang-jain) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/670 --- .../cuvs_bench/cuvs_bench/config/datasets/datasets.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml index 188d24d20..d64e232b7 100644 --- a/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml +++ b/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml @@ -125,3 +125,11 @@ query_file: wiki_all_88M/queries.fbin groundtruth_neighbors_file: wiki_all_88M/groundtruth.88M.neighbors.ibin distance: euclidean + +- name: deep-100M + dims: 96 + subset_size: 100000000 + base_file: deep-100M/base.1B.fbin + query_file: deep-100M/query.public.10K.fbin + groundtruth_neighbors_file: deep-100M/groundtruth.public.10K.ibin + distance: euclidean From ab597ecaae9335f9edd791574f68b40aef66e734 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Fri, 7 Feb 2025 09:39:55 -0800 Subject: [PATCH 11/12] Add filtering to python for ivf_flat (#664) Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/664 --- cpp/include/cuvs/neighbors/ivf_flat.h | 7 +- cpp/src/neighbors/ivf_flat_c.cpp | 36 +++++-- cpp/tests/neighbors/run_ivf_flat_c.c | 6 +- examples/c/src/ivf_flat_c_example.c | 11 +- go/ivf_flat/ivf_flat.go | 6 +- .../cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pxd | 4 +- .../cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx | 12 ++- python/cuvs/cuvs/tests/ann_utils.py | 92 +++++++++++++++- python/cuvs/cuvs/tests/test_cagra.py | 100 ++---------------- python/cuvs/cuvs/tests/test_ivf_flat.py | 11 +- rust/cuvs/src/ivf_flat/index.rs | 6 ++ 11 files changed, 182 insertions(+), 109 deletions(-) diff --git a/cpp/include/cuvs/neighbors/ivf_flat.h b/cpp/include/cuvs/neighbors/ivf_flat.h index 5c6162041..c46a9b2cd 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.h +++ b/cpp/include/cuvs/neighbors/ivf_flat.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -267,13 +268,17 @@ cuvsError_t cuvsIvfFlatBuild(cuvsResources_t res, * @param[in] queries DLManagedTensor* queries dataset to search * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries * @param[out] distances DLManagedTensor* output `k` distances for queries + * @param[in] filter cuvsFilter input filter that can be used + to filter queries and neighbors based on the given bitset. */ cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res, cuvsIvfFlatSearchParams_t search_params, cuvsIvfFlatIndex_t index, DLManagedTensor* queries, DLManagedTensor* neighbors, - DLManagedTensor* distances); + DLManagedTensor* distances, + cuvsFilter filter); + /** * @} */ diff --git a/cpp/src/neighbors/ivf_flat_c.cpp b/cpp/src/neighbors/ivf_flat_c.cpp index 2acc6b678..b38f1808d 100644 --- a/cpp/src/neighbors/ivf_flat_c.cpp +++ b/cpp/src/neighbors/ivf_flat_c.cpp @@ -67,7 +67,8 @@ void _search(cuvsResources_t res, cuvsIvfFlatIndex index, DLManagedTensor* queries_tensor, DLManagedTensor* neighbors_tensor, - DLManagedTensor* distances_tensor) + DLManagedTensor* distances_tensor, + cuvsFilter filter) { auto res_ptr = reinterpret_cast(res); auto index_ptr = reinterpret_cast*>(index.addr); @@ -82,8 +83,27 @@ void _search(cuvsResources_t res, auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); auto distances_mds = cuvs::core::from_dlpack(distances_tensor); - cuvs::neighbors::ivf_flat::search( - *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds); + if (filter.type == NO_FILTER) { + cuvs::neighbors::ivf_flat::search( + *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds); + } else if (filter.type == BITSET) { + using filter_mdspan_type = raft::device_vector_view; + auto removed_indices_tensor = reinterpret_cast(filter.addr); + auto removed_indices = cuvs::core::from_dlpack(removed_indices_tensor); + cuvs::core::bitset_view removed_indices_bitset(removed_indices, + index_ptr->size()); + auto bitset_filter_obj = cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset); + cuvs::neighbors::ivf_flat::search(*res_ptr, + search_params, + *index_ptr, + queries_mds, + neighbors_mds, + distances_mds, + bitset_filter_obj); + + } else { + RAFT_FAIL("Unsupported filter type: BITMAP"); + } } template @@ -179,7 +199,9 @@ extern "C" cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res, cuvsIvfFlatIndex_t index_c_ptr, DLManagedTensor* queries_tensor, DLManagedTensor* neighbors_tensor, - DLManagedTensor* distances_tensor) + DLManagedTensor* distances_tensor, + cuvsFilter filter) + { return cuvs::core::translate_exceptions([=] { auto queries = queries_tensor->dl_tensor; @@ -203,13 +225,13 @@ extern "C" cuvsError_t cuvsIvfFlatSearch(cuvsResources_t res, if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { _search( - res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); } else if (queries.dtype.code == kDLInt && queries.dtype.bits == 8) { _search( - res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); } else if (queries.dtype.code == kDLUInt && queries.dtype.bits == 8) { _search( - res, *params, index, queries_tensor, neighbors_tensor, distances_tensor); + res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter); } else { RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", queries.dtype.code, diff --git a/cpp/tests/neighbors/run_ivf_flat_c.c b/cpp/tests/neighbors/run_ivf_flat_c.c index 8cd79c91f..d58f11bf3 100644 --- a/cpp/tests/neighbors/run_ivf_flat_c.c +++ b/cpp/tests/neighbors/run_ivf_flat_c.c @@ -91,12 +91,16 @@ void run_ivf_flat(int64_t n_rows, distances_tensor.dl_tensor.shape = distances_shape; distances_tensor.dl_tensor.strides = NULL; + cuvsFilter filter; + filter.type = NO_FILTER; + filter.addr = (uintptr_t)NULL; + // search index cuvsIvfFlatSearchParams_t search_params; cuvsIvfFlatSearchParamsCreate(&search_params); search_params->n_probes = n_probes; cuvsIvfFlatSearch( - res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor); + res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor, filter); // de-allocate index and res cuvsIvfFlatSearchParamsDestroy(search_params); diff --git a/examples/c/src/ivf_flat_c_example.c b/examples/c/src/ivf_flat_c_example.c index 2121ca35e..510d624f3 100644 --- a/examples/c/src/ivf_flat_c_example.c +++ b/examples/c/src/ivf_flat_c_example.c @@ -67,8 +67,12 @@ void ivf_flat_build_search_simple(cuvsResources_t *res, DLManagedTensor * datase search_params->n_probes = 50; // Search the `index` built using `ivfFlatBuild` + cuvsFilter filter; + filter.type = NO_FILTER; + filter.addr = (uintptr_t)NULL; + cuvsError_t search_status = cuvsIvfFlatSearch(*res, search_params, index, - queries_tensor, &neighbors_tensor, &distances_tensor); + queries_tensor, &neighbors_tensor, &distances_tensor, filter); if (build_status != CUVS_SUCCESS) { printf("%s.\n", cuvsGetLastErrorText()); } @@ -165,8 +169,11 @@ void ivf_flat_build_extend_search(cuvsResources_t *res, DLManagedTensor * trains search_params->n_probes = 10; // Search the `index` built using `ivfFlatBuild` + cuvsFilter filter; + filter.type = NO_FILTER; + filter.addr = (uintptr_t)NULL; cuvsError_t search_status = cuvsIvfFlatSearch(*res, search_params, index, - queries_tensor, &neighbors_tensor, &distances_tensor); + queries_tensor, &neighbors_tensor, &distances_tensor, filter); if (search_status != CUVS_SUCCESS) { printf("%s.\n", cuvsGetLastErrorText()); exit(-1); diff --git a/go/ivf_flat/ivf_flat.go b/go/ivf_flat/ivf_flat.go index 3330eb95e..61e17172f 100644 --- a/go/ivf_flat/ivf_flat.go +++ b/go/ivf_flat/ivf_flat.go @@ -67,6 +67,10 @@ func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *Iv if !index.trained { return errors.New("index needs to be built before calling search") } + prefilter := C.cuvsFilter{ + addr: 0, + _type: C.NO_FILTER, + } - return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor))))) + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), prefilter))) } diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pxd b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pxd index 96bc557e6..f2bd6a9b1 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pxd +++ b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pxd @@ -21,6 +21,7 @@ from libcpp cimport bool from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor from cuvs.distance_type cimport cuvsDistanceType +from cuvs.neighbors.filters.filters cimport cuvsFilter cdef extern from "cuvs/neighbors/ivf_flat.h" nogil: @@ -71,7 +72,8 @@ cdef extern from "cuvs/neighbors/ivf_flat.h" nogil: cuvsIvfFlatIndex_t index, DLManagedTensor* queries, DLManagedTensor* neighbors, - DLManagedTensor* distances) except + + DLManagedTensor* distances, + cuvsFilter filter) except + cuvsError_t cuvsIvfFlatSerialize(cuvsResources_t res, const char * filename, diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx index 7a169e1a0..437499c1e 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx @@ -34,6 +34,7 @@ from pylibraft.common.interruptible import cuda_interruptible from cuvs.distance import DISTANCE_TYPES from cuvs.neighbors.common import _check_input_array +from cuvs.neighbors.filters import no_filter from libc.stdint cimport ( int8_t, @@ -274,7 +275,8 @@ def search(SearchParams search_params, k, neighbors=None, distances=None, - resources=None): + resources=None, + filter=None): """ Find the k nearest neighbors for each query. @@ -293,6 +295,8 @@ def search(SearchParams search_params, distances : Optional CUDA array interface compliant matrix shape (n_queries, k) If supplied, the distances to the neighbors will be written here in-place. (default None) + filter: Optional cuvs.neighbors.cuvsFilter can be used to filter + neighbors based on a given bitset. (default None) {resources_docstring} Examples @@ -339,6 +343,9 @@ def search(SearchParams search_params, _check_input_array(distances_cai, [np.dtype('float32')], exp_rows=n_queries, exp_cols=k) + if filter is None: + filter = no_filter() + cdef cuvsIvfFlatSearchParams* params = search_params.params cdef cuvsError_t search_status cdef cydlpack.DLManagedTensor* queries_dlpack = \ @@ -356,7 +363,8 @@ def search(SearchParams search_params, index.index, queries_dlpack, neighbors_dlpack, - distances_dlpack + distances_dlpack, + filter.prefilter )) return (distances, neighbors) diff --git a/python/cuvs/cuvs/tests/ann_utils.py b/python/cuvs/cuvs/tests/ann_utils.py index 60db7f327..b8f5d0bb0 100644 --- a/python/cuvs/cuvs/tests/ann_utils.py +++ b/python/cuvs/cuvs/tests/ann_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +13,10 @@ # limitations under the License. import numpy as np +from pylibraft.common import device_ndarray +from sklearn.neighbors import NearestNeighbors + +from cuvs.neighbors import filters def generate_data(shape, dtype): @@ -33,3 +37,89 @@ def calc_recall(ann_idx, true_nn_idx): n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size recall = n / ann_idx.size return recall + + +def create_sparse_bitset(n_size, sparsity): + bits_per_uint32 = 32 + num_bits = n_size + num_uint32s = (num_bits + bits_per_uint32 - 1) // bits_per_uint32 + num_ones = int(num_bits * sparsity) + + array = np.zeros(num_uint32s, dtype=np.uint32) + indices = np.random.choice(num_bits, num_ones, replace=False) + + for index in indices: + i = index // bits_per_uint32 + bit_position = index % bits_per_uint32 + array[i] |= 1 << bit_position + + return array + + +def run_filtered_search_test( + search_module, + sparsity, + n_rows=10000, + n_cols=10, + n_queries=10, + k=10, +): + dataset = generate_data((n_rows, n_cols), np.float32) + queries = generate_data((n_queries, n_cols), np.float32) + + bitset = create_sparse_bitset(n_rows, sparsity) + + dataset_device = device_ndarray(dataset) + queries_device = device_ndarray(queries) + bitset_device = device_ndarray(bitset) + + build_params = search_module.IndexParams() + index = search_module.build(build_params, dataset_device) + + filter_ = filters.from_bitset(bitset_device) + + search_params = search_module.SearchParams() + ret_distances, ret_indices = search_module.search( + search_params, + index, + queries_device, + k, + filter=filter_, + ) + + # Convert bitset to bool array for validation + bitset_as_uint8 = bitset.view(np.uint8) + bool_filter = np.unpackbits(bitset_as_uint8) + bool_filter = bool_filter.reshape(-1, 4, 8) + bool_filter = np.flip(bool_filter, axis=2) + bool_filter = bool_filter.reshape(-1)[:n_rows] + bool_filter = np.logical_not(bool_filter) # Flip so True means filtered + + # Get filtered dataset for reference calculation + non_filtered_mask = ~bool_filter + filtered_dataset = dataset[non_filtered_mask] + + nn_skl = NearestNeighbors( + n_neighbors=k, algorithm="brute", metric="euclidean" + ) + nn_skl.fit(filtered_dataset) + skl_idx = nn_skl.kneighbors(queries, return_distance=False) + + actual_indices = ret_indices.copy_to_host() + + filtered_idx_map = ( + np.cumsum(~bool_filter) - 1 + ) # -1 because cumsum starts at 1 + + # Map ANN indices to filtered space + mapped_actual_indices = np.take( + filtered_idx_map, actual_indices, mode="clip" + ) + + filtered_indices = np.where(bool_filter)[0] + for i in range(n_queries): + assert not np.intersect1d(filtered_indices, actual_indices[i]).size + + recall = calc_recall(mapped_actual_indices, skl_idx) + + assert recall > 0.7 diff --git a/python/cuvs/cuvs/tests/test_cagra.py b/python/cuvs/cuvs/tests/test_cagra.py index 831ef11f7..f3de488da 100644 --- a/python/cuvs/cuvs/tests/test_cagra.py +++ b/python/cuvs/cuvs/tests/test_cagra.py @@ -19,8 +19,12 @@ from sklearn.neighbors import NearestNeighbors from sklearn.preprocessing import normalize -from cuvs.neighbors import cagra, filters -from cuvs.tests.ann_utils import calc_recall, generate_data +from cuvs.neighbors import cagra +from cuvs.tests.ann_utils import ( + calc_recall, + generate_data, + run_filtered_search_test, +) def run_cagra_build_search_test( @@ -139,97 +143,9 @@ def test_cagra_dataset_dtype_host_device( ) -def create_sparse_bitset(n_size, sparsity): - bits_per_uint32 = 32 - num_bits = n_size - num_uint32s = (num_bits + bits_per_uint32 - 1) // bits_per_uint32 - num_ones = int(num_bits * sparsity) - - array = np.zeros(num_uint32s, dtype=np.uint32) - indices = np.random.choice(num_bits, num_ones, replace=False) - - for index in indices: - i = index // bits_per_uint32 - bit_position = index % bits_per_uint32 - array[i] |= 1 << bit_position - - return array - - @pytest.mark.parametrize("sparsity", [0.2, 0.5, 0.7, 1.0]) -def test_filtered_cagra( - sparsity, - n_rows=10000, - n_cols=10, - n_queries=10, - k=10, -): - dataset = generate_data((n_rows, n_cols), np.float32) - queries = generate_data((n_queries, n_cols), np.float32) - - bitset = create_sparse_bitset(n_rows, sparsity) - - dataset_device = device_ndarray(dataset) - queries_device = device_ndarray(queries) - bitset_device = device_ndarray(bitset) - - build_params = cagra.IndexParams() - index = cagra.build(build_params, dataset_device) - - filter_ = filters.from_bitset(bitset_device) - - out_idx = np.zeros((n_queries, k), dtype=np.uint32) - out_dist = np.zeros((n_queries, k), dtype=np.float32) - out_idx_device = device_ndarray(out_idx) - out_dist_device = device_ndarray(out_dist) - - search_params = cagra.SearchParams() - ret_distances, ret_indices = cagra.search( - search_params, - index, - queries_device, - k, - neighbors=out_idx_device, - distances=out_dist_device, - filter=filter_, - ) - - # Convert bitset to bool array for validation - bitset_as_uint8 = bitset.view(np.uint8) - bool_filter = np.unpackbits(bitset_as_uint8) - bool_filter = bool_filter.reshape(-1, 4, 8) - bool_filter = np.flip(bool_filter, axis=2) - bool_filter = bool_filter.reshape(-1)[:n_rows] - bool_filter = np.logical_not(bool_filter) # Flip so True means filtered - - # Get filtered dataset for reference calculation - non_filtered_mask = ~bool_filter - filtered_dataset = dataset[non_filtered_mask] - - nn_skl = NearestNeighbors( - n_neighbors=k, algorithm="brute", metric="euclidean" - ) - nn_skl.fit(filtered_dataset) - skl_idx = nn_skl.kneighbors(queries, return_distance=False) - - actual_indices = out_idx_device.copy_to_host() - - filtered_idx_map = ( - np.cumsum(~bool_filter) - 1 - ) # -1 because cumsum starts at 1 - - # Map CAGRA indices to filtered space - mapped_actual_indices = np.take( - filtered_idx_map, actual_indices, mode="clip" - ) - - filtered_indices = np.where(bool_filter)[0] - for i in range(n_queries): - assert not np.intersect1d(filtered_indices, actual_indices[i]).size - - recall = calc_recall(mapped_actual_indices, skl_idx) - - assert recall > 0.7 +def test_filtered_cagra(sparsity): + run_filtered_search_test(cagra, sparsity) @pytest.mark.parametrize( diff --git a/python/cuvs/cuvs/tests/test_ivf_flat.py b/python/cuvs/cuvs/tests/test_ivf_flat.py index c3ec0252a..6b89041fa 100644 --- a/python/cuvs/cuvs/tests/test_ivf_flat.py +++ b/python/cuvs/cuvs/tests/test_ivf_flat.py @@ -20,7 +20,11 @@ from sklearn.preprocessing import normalize from cuvs.neighbors import ivf_flat -from cuvs.tests.ann_utils import calc_recall, generate_data +from cuvs.tests.ann_utils import ( + calc_recall, + generate_data, + run_filtered_search_test, +) def run_ivf_flat_build_search_test( @@ -129,3 +133,8 @@ def test_extend(dtype): dtype=dtype, add_data_on_build=False, ) + + +@pytest.mark.parametrize("sparsity", [0.5, 0.7, 1.0]) +def test_filtered_ivf_flat(sparsity): + run_filtered_search_test(ivf_flat, sparsity) diff --git a/rust/cuvs/src/ivf_flat/index.rs b/rust/cuvs/src/ivf_flat/index.rs index b1462a0e6..f66904142 100644 --- a/rust/cuvs/src/ivf_flat/index.rs +++ b/rust/cuvs/src/ivf_flat/index.rs @@ -78,6 +78,11 @@ impl Index { distances: &ManagedTensor, ) -> Result<()> { unsafe { + let prefilter = ffi::cuvsFilter { + addr: 0, + type_: ffi::cuvsFilterType::NO_FILTER, + }; + check_cuvs(ffi::cuvsIvfFlatSearch( res.0, params.0, @@ -85,6 +90,7 @@ impl Index { queries.as_ptr(), neighbors.as_ptr(), distances.as_ptr(), + prefilter, )) } } From d760d856fd12b4734b6e284099b3c416ee8d2c2a Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 7 Feb 2025 13:16:55 -0600 Subject: [PATCH 12/12] Run cuvs-bench pytests and end-to-end tests in CI (#574) PR does the following: - [x] Modifies CI to run pytest and e2e test of cuvs-bench - [x] We need to test the additional time needed to run the tests. They should be fast, but if they are not, then we can add an additional job to run them in parallel. - [x] Adds synthetic test-data generation so the CI jobs don't depend on downloading datasets, and users can have easy testing locally. - [ ] Few improvements to be done to docs, yaml and other things to make it easy for users. - [x] Check in some additional pytests that hadn't been checked in before. Authors: - Dante Gama Dessavre (https://github.com/dantegd) - Corey J. Nolet (https://github.com/cjnolet) - Micka (https://github.com/lowener) Approvers: - James Lamb (https://github.com/jameslamb) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/574 --- ci/test_python.sh | 17 +- .../bench_ann_cuda-118_arch-aarch64.yaml | 4 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 4 +- .../bench_ann_cuda-128_arch-aarch64.yaml | 4 +- .../bench_ann_cuda-128_arch-x86_64.yaml | 4 +- conda/recipes/cuvs-bench-cpu/meta.yaml | 3 +- conda/recipes/cuvs-bench/meta.yaml | 3 +- dependencies.yaml | 9 +- python/cuvs_bench/.coveragerc | 3 + .../cuvs_bench/config/algos/cuvs_cagra.yaml | 8 + .../config/algos/cuvs_cagra_hnswlib.yaml | 9 + .../config/algos/cuvs_ivf_flat.yaml | 7 + .../cuvs_bench/config/algos/cuvs_ivf_pq.yaml | 12 + .../config/algos/cuvs_mg_cagra.yaml | 8 + .../config/algos/cuvs_mg_ivf_flat.yaml | 7 + .../config/algos/cuvs_mg_ivf_pq.yaml | 12 + .../config/algos/faiss_cpu_ivf_flat.yaml | 8 + .../config/algos/faiss_cpu_ivf_pq.yaml | 8 + .../config/algos/faiss_gpu_cagra.yaml | 8 + .../config/algos/faiss_gpu_ivf_flat.yaml | 7 + .../config/algos/faiss_gpu_ivf_pq.yaml | 12 + .../cuvs_bench/config/algos/hnswlib.yaml | 6 + .../config/datasets/bigann-100M.yaml | 295 ------ .../cuvs_bench/config/datasets/datasets.yaml | 7 + .../cuvs_bench/config/datasets/deep-100M.yaml | 896 ------------------ .../cuvs_bench/config/datasets/deep-1B.yaml | 26 - .../config/datasets/deep-image-96-inner.yaml | 712 -------------- .../datasets/fashion-mnist-784-euclidean.yaml | 778 --------------- .../config/datasets/gist-960-euclidean.yaml | 777 --------------- .../config/datasets/glove-100-angular.yaml | 777 --------------- .../config/datasets/glove-100-inner.yaml | 777 --------------- .../config/datasets/glove-50-angular.yaml | 777 --------------- .../config/datasets/glove-50-inner.yaml | 777 --------------- .../config/datasets/lastfm-65-angular.yaml | 777 --------------- .../config/datasets/mnist-784-euclidean.yaml | 778 --------------- .../config/datasets/nytimes-256-angular.yaml | 778 --------------- .../config/datasets/nytimes-256-inner.yaml | 778 --------------- .../config/datasets/sift-128-euclidean.yaml | 562 ----------- .../config/datasets/wiki_all_10M.yaml | 357 ------- .../config/datasets/wiki_all_1M.yaml | 371 -------- .../config/datasets/wiki_all_88M.yaml | 357 ------- .../cuvs_bench/get_dataset/__main__.py | 170 +++- python/cuvs_bench/cuvs_bench/plot/__main__.py | 5 +- .../cuvs_bench/cuvs_bench/run/data_export.py | 6 +- python/cuvs_bench/cuvs_bench/run/run.py | 10 +- .../cuvs_bench/cuvs_bench/tests/test_cli.py | 422 +++++++++ .../cuvs_bench/cuvs_bench/tests/test_run.py | 162 +++- python/cuvs_bench/pyproject.toml | 2 + 48 files changed, 869 insertions(+), 11428 deletions(-) create mode 100644 python/cuvs_bench/.coveragerc delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/bigann-100M.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/deep-100M.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/deep-1B.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/deep-image-96-inner.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/fashion-mnist-784-euclidean.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/gist-960-euclidean.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/glove-100-angular.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/glove-100-inner.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/glove-50-angular.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/glove-50-inner.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/lastfm-65-angular.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/mnist-784-euclidean.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-angular.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-inner.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/sift-128-euclidean.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_10M.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_1M.yaml delete mode 100644 python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_88M.yaml create mode 100644 python/cuvs_bench/cuvs_bench/tests/test_cli.py diff --git a/ci/test_python.sh b/ci/test_python.sh index e8749fe79..6ba36bc5a 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. set -euo pipefail @@ -34,7 +34,8 @@ rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ "libcuvs=${RAPIDS_VERSION}" \ - "cuvs=${RAPIDS_VERSION}" + "cuvs=${RAPIDS_VERSION}" \ + "cuvs-bench=${RAPIDS_VERSION}" rapids-logger "Check GPU usage" nvidia-smi @@ -54,5 +55,17 @@ pytest \ --cov-report=term \ tests +rapids-logger "pytest cuvs-bench" +popd +pushd python/cuvs_bench/cuvs_bench +pytest \ + --cache-clear \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-cuvs.xml" \ + --cov-config=../.coveragerc \ + --cov=cuvs \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuvs-bench-coverage.xml" \ + --cov-report=term \ + tests + rapids-logger "Test script exiting with value: $EXITCODE" exit ${EXITCODE} diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 2e2ad8446..e3da7c376 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -37,7 +37,7 @@ dependencies: - libcusparse=11.7.5.86 - libcuvs==25.2.*,>=0.0.0a0 - librmm==25.2.*,>=0.0.0a0 -- matplotlib +- matplotlib-base - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 @@ -47,6 +47,8 @@ dependencies: - pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 +- requests +- scikit-learn - setuptools - sysroot_linux-aarch64==2.28 - wheel diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 90243415c..1f8d61c2d 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -37,7 +37,7 @@ dependencies: - libcusparse=11.7.5.86 - libcuvs==25.2.*,>=0.0.0a0 - librmm==25.2.*,>=0.0.0a0 -- matplotlib +- matplotlib-base - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 @@ -47,6 +47,8 @@ dependencies: - pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 +- requests +- scikit-learn - setuptools - sysroot_linux-64==2.28 - wheel diff --git a/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml index 43d3ca821..330871a12 100644 --- a/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml @@ -34,7 +34,7 @@ dependencies: - libcusparse-dev - libcuvs==25.2.*,>=0.0.0a0 - librmm==25.2.*,>=0.0.0a0 -- matplotlib +- matplotlib-base - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 @@ -43,6 +43,8 @@ dependencies: - pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 +- requests +- scikit-learn - setuptools - sysroot_linux-aarch64==2.28 - wheel diff --git a/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml index 54588287a..8216bcd48 100644 --- a/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml @@ -34,7 +34,7 @@ dependencies: - libcusparse-dev - libcuvs==25.2.*,>=0.0.0a0 - librmm==25.2.*,>=0.0.0a0 -- matplotlib +- matplotlib-base - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 @@ -43,6 +43,8 @@ dependencies: - pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 +- requests +- scikit-learn - setuptools - sysroot_linux-64==2.28 - wheel diff --git a/conda/recipes/cuvs-bench-cpu/meta.yaml b/conda/recipes/cuvs-bench-cpu/meta.yaml index 016df56be..ddc13d568 100644 --- a/conda/recipes/cuvs-bench-cpu/meta.yaml +++ b/conda/recipes/cuvs-bench-cpu/meta.yaml @@ -58,11 +58,12 @@ requirements: - click - glog {{ glog_version }} - h5py {{ h5py_version }} - - matplotlib + - matplotlib-base - numpy >=1.23,<3.0a0 - pandas - pyyaml - python + - requests about: home: https://rapids.ai/ license: Apache-2.0 diff --git a/conda/recipes/cuvs-bench/meta.yaml b/conda/recipes/cuvs-bench/meta.yaml index 33b1745ec..1b151a16b 100644 --- a/conda/recipes/cuvs-bench/meta.yaml +++ b/conda/recipes/cuvs-bench/meta.yaml @@ -93,12 +93,13 @@ requirements: - glog {{ glog_version }} - cuvs {{ version }} - h5py {{ h5py_version }} - - matplotlib + - matplotlib-base - pandas - pyyaml # rmm is needed to determine if package is gpu-enabled - pylibraft ={{ minor_version }} - python + - requests - rmm ={{ minor_version }} about: home: https://rapids.ai/ diff --git a/dependencies.yaml b/dependencies.yaml index cfa63250d..7109f4de5 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -555,9 +555,16 @@ dependencies: packages: - click - cuvs==25.2.*,>=0.0.0a0 - - matplotlib - pandas - pyyaml + - requests + - scikit-learn + - output_types: [conda] + packages: + - matplotlib-base + - output_types: [requirements, pyproject] + packages: + - matplotlib depends_on_libcuvs: common: - output_types: conda diff --git a/python/cuvs_bench/.coveragerc b/python/cuvs_bench/.coveragerc new file mode 100644 index 000000000..2ee96a94d --- /dev/null +++ b/python/cuvs_bench/.coveragerc @@ -0,0 +1,3 @@ +# Configuration file for Python coverage tests +[run] +source = cuvs_bench diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml index edacb25b5..240d2ac8e 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra.yaml @@ -11,3 +11,11 @@ groups: search: itopk: [32, 64, 128, 256, 512] search_width: [1, 2, 4, 8, 16, 32, 64] + test: + build: + graph_degree: [32] + intermediate_graph_degree: [32] + graph_build_algo: ["NN_DESCENT"] + search: + itopk: [32] + search_width: [1, 2] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml index 630dc94ff..063502290 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_cagra_hnswlib.yaml @@ -12,3 +12,12 @@ groups: ef_construction: [64, 128, 256, 512] search: ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800] + test: + build: + graph_degree: [32] + intermediate_graph_degree: [32] + graph_build_algo: ["NN_DESCENT"] + hierarchy: ["none", "cpu"] + ef_construction: [64] + search: + ef: [10,20] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_flat.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_flat.yaml index 22afc79ab..28e69c0ab 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_flat.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_flat.yaml @@ -7,3 +7,10 @@ groups: niter: [20, 25] search: nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000] + test: + build: + nlist: [1024] + ratio: [1] + niter: [20] + search: + nprobe: [1, 5] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_pq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_pq.yaml index d68e7973a..154ae4b14 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_pq.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_pq.yaml @@ -39,3 +39,15 @@ groups: internalDistanceDtype: ["float"] smemLutDtype: ["float", "fp8", "half"] refine_ratio: [1] + test: + build: + nlist: [1024] + pq_dim: [64] + pq_bits: [8] + ratio: [10] + niter: [25] + search: + nprobe: [1] + internalDistanceDtype: ["float"] + smemLutDtype: ["float", "fp8", "half"] + refine_ratio: [1] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_cagra.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_cagra.yaml index d730e33fb..195755b32 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_cagra.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_cagra.yaml @@ -11,3 +11,11 @@ groups: search: itopk: [32, 64, 128, 256, 512] search_width: [1, 2, 4, 8, 16, 32, 64] + test: + build: + graph_degree: [32] + intermediate_graph_degree: [32] + graph_build_algo: ["NN_DESCENT"] + search: + itopk: [32] + search_width: [1, 2] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_ivf_flat.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_ivf_flat.yaml index 317e615e5..68373b90f 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_ivf_flat.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_ivf_flat.yaml @@ -7,3 +7,10 @@ groups: niter: [20, 25] search: nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000] + test: + build: + nlist: [1024] + ratio: [1] + niter: [20] + search: + nprobe: [1, 5] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_ivf_pq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_ivf_pq.yaml index 7213968f4..511018c37 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_ivf_pq.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_mg_ivf_pq.yaml @@ -39,3 +39,15 @@ groups: internalDistanceDtype: ["float"] smemLutDtype: ["float", "fp8", "half"] refine_ratio: [1] + test: + build: + nlist: [1024] + pq_dim: [64] + pq_bits: [8] + ratio: [10] + niter: [25] + search: + nprobe: [1] + internalDistanceDtype: ["float"] + smemLutDtype: ["float", "fp8", "half"] + refine_ratio: [1] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_flat.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_flat.yaml index aa7409dce..6e11df8d7 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_flat.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_flat.yaml @@ -8,3 +8,11 @@ groups: search: nprobe: [1, 5, 10, 50, 100, 200] refine_ratio: [1] + test: + build: + nlist: [2048] + ratio: [10] + useFloat16: [False] + search: + nprobe: [1, 5] + refine_ratio: [1] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_pq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_pq.yaml index a531ec829..10b944070 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_pq.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_ivf_pq.yaml @@ -16,3 +16,11 @@ groups: bitsPerCode: [8, 6, 5, 4] search: nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] + test: + build: + nlist: [1024] + M: [48] + ratio: [10] + bitsPerCode: [8] + search: + nprobe: [1, 5] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml index 578885096..3ec60c7b8 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml @@ -11,3 +11,11 @@ groups: search: itopk: [32, 64, 128, 256, 512] search_width: [1, 2, 4, 8, 16, 32, 64] + test: + build: + graph_degree: [32] + intermediate_graph_degree: [32] + graph_build_algo: ["NN_DESCENT"] + search: + itopk: [32] + search_width: [1, 2] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_flat.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_flat.yaml index 20329e60d..148121ec3 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_flat.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_flat.yaml @@ -28,3 +28,10 @@ groups: use_cuvs: [True] search: nprobe: [10, 20, 30, 50, 100, 200, 500, 1000] + test: + build: + nlist: [1024] + ratio: [4] + use_cuvs: [False] + search: + nprobe: [1, 5] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml index d6cfe0569..158246a03 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml @@ -74,3 +74,15 @@ groups: search: nprobe: [20, 30, 40, 50, 100, 200, 500, 1000] refine_ratio: [1, 2, 4] + test: + build: + nlist: [1024] + M: [96] + ratio: [4] + usePrecomputed: [True] + useFloat16: [True] + use_cuvs: [False] + bitsPerCode: [8] + search: + nprobe: [10] + refine_ratio: [1, 2] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/hnswlib.yaml b/python/cuvs_bench/cuvs_bench/config/algos/hnswlib.yaml index 93d8cff2d..cf2862289 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/hnswlib.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/hnswlib.yaml @@ -8,3 +8,9 @@ groups: efConstruction: [64, 128, 256, 512] search: ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800] + test: + build: + M: [12] + efConstruction: [64] + search: + ef: [10, 20] diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/bigann-100M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/bigann-100M.yaml deleted file mode 100644 index 69f75d1f7..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/bigann-100M.yaml +++ /dev/null @@ -1,295 +0,0 @@ -dataset: - base_file: bigann-1B/base.1B.u8bin - distance: euclidean - groundtruth_neighbors_file: bigann-100M/groundtruth.neighbors.ibin - name: bigann-100M - query_file: bigann-1B/query.public.10K.u8bin - subset_size: 100000000 -index: -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 5000 - pq_dim: 64 - ratio: 10 - file: bigann-100M/raft_ivf_pq/dimpq64-cluster5K - name: raft_ivf_pq.dimpq64-cluster5K - search_params: - - internalDistanceDtype: float - nprobe: 20 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 30 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 40 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1000 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 20 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 30 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 40 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1000 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 20 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 1000 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 10000 - pq_dim: 64 - ratio: 10 - file: bigann-100M/raft_ivf_pq/dimpq64-cluster5K - name: raft_ivf_pq.dimpq64-cluster10K - search_params: - - internalDistanceDtype: float - nprobe: 20 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 30 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 40 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1000 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 20 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 30 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 40 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1000 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 20 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 1000 - smemLutDtype: half -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: bigann-100M/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: bigann-100M/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: bigann-100M/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: bigann-100M/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 100000 - ratio: 5 - file: bigann-100M/raft_ivf_flat/nlist100K - name: raft_ivf_flat.nlist100K - search_params: - - max_batch: 10000 - max_k: 10 - nprobe: 20 - - max_batch: 10000 - max_k: 10 - nprobe: 30 - - max_batch: 10000 - max_k: 10 - nprobe: 40 - - max_batch: 10000 - max_k: 10 - nprobe: 50 - - max_batch: 10000 - max_k: 10 - nprobe: 100 - - max_batch: 10000 - max_k: 10 - nprobe: 200 - - max_batch: 10000 - max_k: 10 - nprobe: 500 - - max_batch: 10000 - max_k: 10 - nprobe: 1000 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: bigann-100M/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: bigann-100M/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 -search_basic_param: - batch_size: 10000 - k: 10 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml index d64e232b7..d86b92ea6 100644 --- a/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml +++ b/python/cuvs_bench/cuvs_bench/config/datasets/datasets.yaml @@ -105,6 +105,13 @@ groundtruth_neighbors_file: sift-128-euclidean/groundtruth.neighbors.ibin distance: euclidean +- name: test-data + dims: 32 + base_file: test-data/ann_benchmarks_like.base.fbin + query_file: test-data/ann_benchmarks_like.query.fbin + groundtruth_neighbors_file: test-data/ann_benchmarks_like.groundtruth.neighbors.ibin + distance: euclidean + - name: wiki_all_1M dims: 768 base_file: wiki_all_1M/base.1M.fbin diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/deep-100M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/deep-100M.yaml deleted file mode 100644 index 90b1f9721..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/deep-100M.yaml +++ /dev/null @@ -1,896 +0,0 @@ -dataset: - base_file: deep-100M/base.1B.fbin - distance: euclidean - groundtruth_neighbors_file: deep-100M/groundtruth.neighbors.ibin - name: deep-100M - query_file: deep-100M/query.public.10K.fbin - subset_size: 100000000 -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: deep-100M/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: deep-100M/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: deep-100M/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: deep-100M/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 50000 - file: deep-100M/faiss_gpu_ivf_flat/nlist50K - name: faiss_gpu_ivf_flat.nlist50K - search_params: - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 100000 - file: deep-100M/faiss_gpu_ivf_flat/nlist100K - name: faiss_gpu_ivf_flat.nlist100K - search_params: - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 200000 - file: deep-100M/faiss_gpu_ivf_flat/nlist200K - name: faiss_gpu_ivf_flat.nlist200K - search_params: - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_pq - build_param: - M: 48 - nlist: 16384 - file: deep-100M/faiss_gpu_ivf_pq/M48-nlist16K - name: faiss_gpu_ivf_pq.M48-nlist16K - search_params: - - nprobe: 10 - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 -- algo: faiss_gpu_ivf_pq - build_param: - M: 48 - nlist: 50000 - file: deep-100M/faiss_gpu_ivf_pq/M48-nlist50K - name: faiss_gpu_ivf_pq.M48-nlist50K - search_params: - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_pq - build_param: - M: 48 - nlist: 100000 - file: deep-100M/faiss_gpu_ivf_pq/M48-nlist100K - name: faiss_gpu_ivf_pq.M48-nlist100K - search_params: - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_pq - build_param: - M: 48 - nlist: 200000 - file: deep-100M/faiss_gpu_ivf_pq/M48-nlist200K - name: faiss_gpu_ivf_pq.M48-nlist200K - search_params: - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 50000 - ratio: 5 - file: deep-100M/raft_ivf_flat/nlist50K - name: raft_ivf_flat.nlist50K - search_params: - - max_batch: 10000 - max_k: 10 - nprobe: 20 - - max_batch: 10000 - max_k: 10 - nprobe: 30 - - max_batch: 10000 - max_k: 10 - nprobe: 40 - - max_batch: 10000 - max_k: 10 - nprobe: 50 - - max_batch: 10000 - max_k: 10 - nprobe: 100 - - max_batch: 10000 - max_k: 10 - nprobe: 200 - - max_batch: 10000 - max_k: 10 - nprobe: 500 - - max_batch: 10000 - max_k: 10 - nprobe: 1000 -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 100000 - ratio: 5 - file: deep-100M/raft_ivf_flat/nlist100K - name: raft_ivf_flat.nlist100K - search_params: - - max_batch: 10000 - max_k: 10 - nprobe: 20 - - max_batch: 10000 - max_k: 10 - nprobe: 30 - - max_batch: 10000 - max_k: 10 - nprobe: 40 - - max_batch: 10000 - max_k: 10 - nprobe: 50 - - max_batch: 10000 - max_k: 10 - nprobe: 100 - - max_batch: 10000 - max_k: 10 - nprobe: 200 - - max_batch: 10000 - max_k: 10 - nprobe: 500 - - max_batch: 10000 - max_k: 10 - nprobe: 1000 -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 200000 - ratio: 5 - file: deep-100M/raft_ivf_flat/nlist200K - name: raft_ivf_flat.nlist200K - search_params: - - max_batch: 10000 - max_k: 10 - nprobe: 20 - - max_batch: 10000 - max_k: 10 - nprobe: 30 - - max_batch: 10000 - max_k: 10 - nprobe: 40 - - max_batch: 10000 - max_k: 10 - nprobe: 50 - - max_batch: 10000 - max_k: 10 - nprobe: 100 - - max_batch: 10000 - max_k: 10 - nprobe: 200 - - max_batch: 10000 - max_k: 10 - nprobe: 500 - - max_batch: 10000 - max_k: 10 - nprobe: 1000 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 50000 - pq_bits: 5 - pq_dim: 96 - ratio: 10 - file: deep-100M/raft_ivf_pq/d96b5n50K - name: raft_ivf_pq.d96b5n50K - search_params: - - internalDistanceDtype: float - nprobe: 20 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 30 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 40 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1000 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 2000 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 5000 - refine_ratio: 2 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 20 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 30 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 40 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 50 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 100 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 200 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 1000 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 2000 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 5000 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 20 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 30 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 40 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1000 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 2000 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 5000 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 1000 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 2000 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 5000 - refine_ratio: 2 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 1000 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 2000 - refine_ratio: 2 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 5000 - refine_ratio: 2 - smemLutDtype: fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 50000 - pq_bits: 5 - pq_dim: 64 - ratio: 10 - file: deep-100M/raft_ivf_pq/d64b5n50K - name: raft_ivf_pq.d64b5n50K - search_params: - - internalDistanceDtype: float - nprobe: 20 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 30 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 40 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1000 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 2000 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 5000 - refine_ratio: 4 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 20 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 30 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 40 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 50 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 100 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 200 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 1000 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 2000 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 5000 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 20 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 30 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 40 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1000 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 2000 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 5000 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 1000 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 2000 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 5000 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 1000 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 2000 - refine_ratio: 4 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 5000 - refine_ratio: 4 - smemLutDtype: fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: float - search_result_file: result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_cagra - build_param: - graph_degree: 32 - intermediate_graph_degree: 48 - file: deep-100M/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - algo: single_cta - itopk: 32 - max_iterations: 0 - search_width: 1 - - algo: single_cta - itopk: 32 - max_iterations: 32 - search_width: 1 - - algo: single_cta - itopk: 64 - max_iterations: 16 - search_width: 4 - - algo: single_cta - itopk: 64 - max_iterations: 64 - search_width: 1 - - algo: single_cta - itopk: 96 - max_iterations: 48 - search_width: 2 - - algo: single_cta - itopk: 128 - max_iterations: 16 - search_width: 8 - - algo: single_cta - itopk: 128 - max_iterations: 64 - search_width: 2 - - algo: single_cta - itopk: 192 - max_iterations: 24 - search_width: 8 - - algo: single_cta - itopk: 192 - max_iterations: 96 - search_width: 2 - - algo: single_cta - itopk: 256 - max_iterations: 32 - search_width: 8 - - algo: single_cta - itopk: 384 - max_iterations: 48 - search_width: 8 - - algo: single_cta - itopk: 512 - max_iterations: 64 - search_width: 8 -- algo: raft_cagra - build_param: - graph_degree: 32 - intermediate_graph_degree: 48 - file: deep-100M/raft_cagra/dim32 - name: raft_cagra.dim32.multi_cta - search_params: - - algo: multi_cta - itopk: 32 - max_iterations: 0 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 32 - search_width: 1 - - algo: multi_cta - itopk: 64 - max_iterations: 16 - search_width: 4 - - algo: multi_cta - itopk: 64 - max_iterations: 64 - search_width: 1 - - algo: multi_cta - itopk: 96 - max_iterations: 48 - search_width: 2 - - algo: multi_cta - itopk: 128 - max_iterations: 16 - search_width: 8 - - algo: multi_cta - itopk: 128 - max_iterations: 64 - search_width: 2 - - algo: multi_cta - itopk: 192 - max_iterations: 24 - search_width: 8 - - algo: multi_cta - itopk: 192 - max_iterations: 96 - search_width: 2 - - algo: multi_cta - itopk: 256 - max_iterations: 32 - search_width: 8 - - algo: multi_cta - itopk: 384 - max_iterations: 48 - search_width: 8 - - algo: multi_cta - itopk: 512 - max_iterations: 64 - search_width: 8 -- algo: raft_cagra - build_param: - graph_degree: 32 - intermediate_graph_degree: 48 - file: deep-100M/raft_cagra/dim32 - name: raft_cagra.dim32.multi_kernel - search_params: - - algo: multi_kernel - itopk: 32 - max_iterations: 0 - search_width: 1 - - algo: multi_kernel - itopk: 32 - max_iterations: 32 - search_width: 1 - - algo: multi_kernel - itopk: 64 - max_iterations: 16 - search_width: 4 - - algo: multi_kernel - itopk: 64 - max_iterations: 64 - search_width: 1 - - algo: multi_kernel - itopk: 96 - max_iterations: 48 - search_width: 2 - - algo: multi_kernel - itopk: 128 - max_iterations: 16 - search_width: 8 - - algo: multi_kernel - itopk: 128 - max_iterations: 64 - search_width: 2 - - algo: multi_kernel - itopk: 192 - max_iterations: 24 - search_width: 8 - - algo: multi_kernel - itopk: 192 - max_iterations: 96 - search_width: 2 - - algo: multi_kernel - itopk: 256 - max_iterations: 32 - search_width: 8 - - algo: multi_kernel - itopk: 384 - max_iterations: 48 - search_width: 8 - - algo: multi_kernel - itopk: 512 - max_iterations: 64 - search_width: 8 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: deep-100M/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - max_iterations: 0 - search_width: 1 - - itopk: 32 - max_iterations: 32 - search_width: 1 - - itopk: 64 - max_iterations: 16 - search_width: 4 - - itopk: 64 - max_iterations: 64 - search_width: 1 - - itopk: 96 - max_iterations: 48 - search_width: 2 - - itopk: 128 - max_iterations: 16 - search_width: 8 - - itopk: 128 - max_iterations: 64 - search_width: 2 - - itopk: 192 - max_iterations: 24 - search_width: 8 - - itopk: 192 - max_iterations: 96 - search_width: 2 - - itopk: 256 - max_iterations: 32 - search_width: 8 - - itopk: 384 - max_iterations: 48 - search_width: 8 - - itopk: 512 - max_iterations: 64 - search_width: 8 -search_basic_param: - batch_size: 10000 - k: 10 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/deep-1B.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/deep-1B.yaml deleted file mode 100644 index 061677e62..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/deep-1B.yaml +++ /dev/null @@ -1,26 +0,0 @@ -dataset: - base_file: deep-1B/base.1B.fbin - distance: inner_product - groundtruth_neighbors_file: deep-1B/groundtruth.neighbors.ibin - name: deep-1B - query_file: deep-1B/query.public.10K.fbin -index: -- algo: faiss_gpu_ivf_pq - build_param: - M: 48 - nlist: 50000 - file: deep-1B/faiss_gpu_ivf_pq/M48-nlist50K - name: faiss_gpu_ivf_pq.M48-nlist50K - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 -search_basic_param: - batch_size: 10000 - k: 10 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/deep-image-96-inner.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/deep-image-96-inner.yaml deleted file mode 100644 index 2ec584eb5..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/deep-image-96-inner.yaml +++ /dev/null @@ -1,712 +0,0 @@ -dataset: - base_file: deep-image-96-inner/base.fbin - distance: euclidean - groundtruth_neighbors_file: deep-image-96-inner/groundtruth.neighbors.ibin - name: deep-image-96-inner - query_file: deep-image-96-inner/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/deep-image-96-inner/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/deep-image-96-inner/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/deep-image-96-inner/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/deep-image-96-inner/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/deep-image-96-inner/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/deep-image-96-inner/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/deep-image-96-inner/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/deep-image-96-inner/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/deep-image-96-inner/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/deep-image-96-inner/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/deep-image-96-inner/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/deep-image-96-inner/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 1024 - smemLutDtype: half - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 5 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: float - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: half - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: half - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: half - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: half - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: half - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: half - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: half - nprobe: 1024 - smemLutDtype: float - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1024 - smemLutDtype: float - search_result_file: result/deep-image-96-inner/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/deep-image-96-inner/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/deep-image-96-inner/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/deep-image-96-inner/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/deep-image-96-inner/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/deep-image-96-inner/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/deep-image-96-inner/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/deep-image-96-inner/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/deep-image-96-inner/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/fashion-mnist-784-euclidean.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/fashion-mnist-784-euclidean.yaml deleted file mode 100644 index e92d3badc..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/fashion-mnist-784-euclidean.yaml +++ /dev/null @@ -1,778 +0,0 @@ -dataset: - base_file: fashion-mnist-784-euclidean/base.fbin - distance: euclidean - groundtruth_neighbors_file: fashion-mnist-784-euclidean/groundtruth.neighbors.ibin - name: fashion-mnist-784-euclidean - query_file: fashion-mnist-784-euclidean/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/fashion-mnist-784-euclidean/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/fashion-mnist-784-euclidean/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/fashion-mnist-784-euclidean/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/fashion-mnist-784-euclidean/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/fashion-mnist-784-euclidean/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/fashion-mnist-784-euclidean/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/fashion-mnist-784-euclidean/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/fashion-mnist-784-euclidean/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/fashion-mnist-784-euclidean/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/fashion-mnist-784-euclidean/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/fashion-mnist-784-euclidean/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/fashion-mnist-784-euclidean/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/fashion-mnist-784-euclidean/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/fashion-mnist-784-euclidean/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/fashion-mnist-784-euclidean/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/fashion-mnist-784-euclidean/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/fashion-mnist-784-euclidean/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/fashion-mnist-784-euclidean/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/fashion-mnist-784-euclidean/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/gist-960-euclidean.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/gist-960-euclidean.yaml deleted file mode 100644 index d72dff74d..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/gist-960-euclidean.yaml +++ /dev/null @@ -1,777 +0,0 @@ -dataset: - base_file: gist-960-euclidean/base.fbin - distance: euclidean - name: gist-960-euclidean - query_file: gist-960-euclidean/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/gist-960-euclidean/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/gist-960-euclidean/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/gist-960-euclidean/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/gist-960-euclidean/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/gist-960-euclidean/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/gist-960-euclidean/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/gist-960-euclidean/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/gist-960-euclidean/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/gist-960-euclidean/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/gist-960-euclidean/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/gist-960-euclidean/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/gist-960-euclidean/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/gist-960-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/gist-960-euclidean/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/gist-960-euclidean/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/gist-960-euclidean/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/gist-960-euclidean/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/gist-960-euclidean/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/gist-960-euclidean/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/gist-960-euclidean/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/gist-960-euclidean/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/glove-100-angular.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/glove-100-angular.yaml deleted file mode 100644 index dab60ed4c..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/glove-100-angular.yaml +++ /dev/null @@ -1,777 +0,0 @@ -dataset: - base_file: glove-100-angular/base.fbin - distance: euclidean - name: glove-100-angular - query_file: glove-100-angular/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/glove-100-angular/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-100-angular/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/glove-100-angular/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-100-angular/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/glove-100-angular/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-100-angular/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/glove-100-angular/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-100-angular/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/glove-100-angular/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/glove-100-angular/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/glove-100-angular/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/glove-100-angular/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/glove-100-angular/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/glove-100-angular/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/glove-100-angular/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/glove-100-angular/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/glove-100-angular/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/glove-100-angular/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-100-angular/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/glove-100-angular/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-angular/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/glove-100-angular/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-100-angular/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/glove-100-angular/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/glove-100-angular/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/glove-100-angular/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/glove-100-angular/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/glove-100-inner.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/glove-100-inner.yaml deleted file mode 100644 index 972703735..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/glove-100-inner.yaml +++ /dev/null @@ -1,777 +0,0 @@ -dataset: - base_file: glove-100-inner/base.fbin - distance: euclidean - name: glove-100-inner - query_file: glove-100-inner/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/glove-100-inner/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-100-inner/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/glove-100-inner/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-100-inner/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/glove-100-inner/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-100-inner/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/glove-100-inner/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-100-inner/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/glove-100-inner/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/glove-100-inner/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: glove-100-inner/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: glove-100-inner/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: glove-100-inner/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: glove-100-inner/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/glove-100-inner/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-100-inner/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/glove-100-inner/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/glove-100-inner/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/glove-100-inner/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: glove-100-inner/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-100-inner/faiss_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: glove-100-inner/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/glove-100-inner/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-100-inner/raft_gpu_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/glove-100-inner/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-100-inner/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/glove-100-inner/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-100-inner/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/glove-100-inner/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-100-inner/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/glove-100-inner/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/glove-100-inner/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/glove-100-inner/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/glove-100-inner/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/glove-50-angular.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/glove-50-angular.yaml deleted file mode 100644 index d68d1700c..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/glove-50-angular.yaml +++ /dev/null @@ -1,777 +0,0 @@ -dataset: - base_file: glove-50-angular/base.fbin - distance: euclidean - name: glove-50-angular - query_file: glove-50-angular/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/glove-50-angular/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-50-angular/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/glove-50-angular/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-50-angular/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/glove-50-angular/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-50-angular/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/glove-50-angular/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-50-angular/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/glove-50-angular/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/glove-50-angular/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/glove-50-angular/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/glove-50-angular/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/glove-50-angular/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/glove-50-angular/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/glove-50-angular/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/glove-50-angular/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/glove-50-angular/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/glove-50-angular/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-50-angular/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/glove-50-angular/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-angular/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/glove-50-angular/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-50-angular/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/glove-50-angular/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/glove-50-angular/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/glove-50-angular/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/glove-50-angular/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/glove-50-inner.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/glove-50-inner.yaml deleted file mode 100644 index 379c79796..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/glove-50-inner.yaml +++ /dev/null @@ -1,777 +0,0 @@ -dataset: - base_file: glove-50-inner/base.fbin - distance: euclidean - name: glove-50-inner - query_file: glove-50-inner/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/glove-50-inner/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-50-inner/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/glove-50-inner/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-50-inner/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/glove-50-inner/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-50-inner/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/glove-50-inner/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/glove-50-inner/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/glove-50-inner/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/glove-50-inner/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/glove-50-inner/faiss_ivf_flat/nlist1024 - name: faiss_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/glove-50-inner/faiss_ivf_flat/nlist2048 - name: faiss_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/glove-50-inner/faiss_ivf_flat/nlist4096 - name: faiss_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/glove-50-inner/faiss_ivf_flat/nlist8192 - name: faiss_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/glove-50-inner/faiss_ivf_flat/nlist16384 - name: faiss_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-50-inner/faiss_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/glove-50-inner/faiss_ivf_pq/M64-nlist1024 - name: faiss_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/glove-50-inner/faiss_ivf_pq/M64-nlist1024.noprecomp - name: faiss_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/glove-50-inner/faiss_ivf_sq/nlist1024-fp16 - name: faiss_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/glove-50-inner/faiss_ivf_sq/nlist2048-fp16 - name: faiss_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/glove-50-inner/faiss_ivf_sq/nlist4096-fp16 - name: faiss_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/glove-50-inner/faiss_ivf_sq/nlist8192-fp16 - name: faiss_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/glove-50-inner/faiss_ivf_sq/nlist16384-fp16 - name: faiss_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/glove-50-inner/faiss_ivf_sq/nlist1024-int8 - name: faiss_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/glove-50-inner/faiss_ivf_sq/nlist2048-int8 - name: faiss_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/glove-50-inner/faiss_ivf_sq/nlist4096-int8 - name: faiss_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/glove-50-inner/faiss_ivf_sq/nlist8192-int8 - name: faiss_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/glove-50-inner/faiss_ivf_sq/nlist16384-int8 - name: faiss_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-50-inner/faiss_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/glove-50-inner/faiss_flat/flat - name: faiss_flat - search_params: - - {} - search_result_file: result/glove-50-inner/faiss_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: half - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: fp8 - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/glove-50-inner/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - nprobe: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - nprobe: 1024 - smemLutDtype: float - search_result_file: result/glove-50-inner/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/glove-50-inner/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/glove-50-inner/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/glove-50-inner/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/glove-50-inner/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/glove-50-inner/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/glove-50-inner/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/glove-50-inner/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/glove-50-inner/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/lastfm-65-angular.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/lastfm-65-angular.yaml deleted file mode 100644 index f28dc5850..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/lastfm-65-angular.yaml +++ /dev/null @@ -1,777 +0,0 @@ -dataset: - base_file: lastfm-65-angular/base.fbin - distance: euclidean - name: lastfm-65-angular - query_file: lastfm-65-angular/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/lastfm-65-angular/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/lastfm-65-angular/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/lastfm-65-angular/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/lastfm-65-angular/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/lastfm-65-angular/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/lastfm-65-angular/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/lastfm-65-angular/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/lastfm-65-angular/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/lastfm-65-angular/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/lastfm-65-angular/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/lastfm-65-angular/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/lastfm-65-angular/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/lastfm-65-angular/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/lastfm-65-angular/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/lastfm-65-angular/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/lastfm-65-angular/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/lastfm-65-angular/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/lastfm-65-angular/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/lastfm-65-angular/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/lastfm-65-angular/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/lastfm-65-angular/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/mnist-784-euclidean.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/mnist-784-euclidean.yaml deleted file mode 100644 index 1ffdd2124..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/mnist-784-euclidean.yaml +++ /dev/null @@ -1,778 +0,0 @@ -dataset: - base_file: mnist-784-euclidean/base.fbin - distance: euclidean - groundtruth_neighbors_file: mnist-784-euclidean/groundtruth.neighbors.ibin - name: mnist-784-euclidean - query_file: mnist-784-euclidean/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/mnist-784-euclidean/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/mnist-784-euclidean/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/mnist-784-euclidean/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/mnist-784-euclidean/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/mnist-784-euclidean/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/mnist-784-euclidean/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/mnist-784-euclidean/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/mnist-784-euclidean/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/mnist-784-euclidean/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/mnist-784-euclidean/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/mnist-784-euclidean/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/mnist-784-euclidean/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/mnist-784-euclidean/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/mnist-784-euclidean/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/mnist-784-euclidean/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/mnist-784-euclidean/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/mnist-784-euclidean/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/mnist-784-euclidean/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/mnist-784-euclidean/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/mnist-784-euclidean/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-angular.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-angular.yaml deleted file mode 100644 index e9dcd8317..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-angular.yaml +++ /dev/null @@ -1,778 +0,0 @@ -dataset: - base_file: nytimes-256-angular/base.fbin - distance: euclidean - groundtruth_neighbors_file: nytimes-256-angular/groundtruth.neighbors.ibin - name: nytimes-256-angular - query_file: nytimes-256-angular/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/nytimes-256-angular/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/nytimes-256-angular/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/nytimes-256-angular/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/nytimes-256-angular/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/nytimes-256-angular/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/nytimes-256-angular/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/nytimes-256-angular/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/nytimes-256-angular/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/nytimes-256-angular/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/nytimes-256-angular/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/nytimes-256-angular/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} - search_result_file: result/nytimes-256-angular/faiss_gpu_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/nytimes-256-angular/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/nytimes-256-angular/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-angular/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/nytimes-256-angular/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/nytimes-256-angular/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/nytimes-256-angular/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/nytimes-256-angular/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/nytimes-256-angular/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/nytimes-256-angular/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-inner.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-inner.yaml deleted file mode 100644 index fa0094e11..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/nytimes-256-inner.yaml +++ /dev/null @@ -1,778 +0,0 @@ -dataset: - base_file: nytimes-256-inner/base.fbin - distance: euclidean - groundtruth_neighbors_file: nytimes-256-inner/groundtruth.neighbors.ibin - name: nytimes-256-inner - query_file: nytimes-256-inner/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: index/nytimes-256-inner/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/nytimes-256-inner/hnswlib/M12 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: index/nytimes-256-inner/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/nytimes-256-inner/hnswlib/M16 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: index/nytimes-256-inner/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/nytimes-256-inner/hnswlib/M24 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: index/nytimes-256-inner/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 - search_result_file: result/nytimes-256-inner/hnswlib/M36 -- algo: raft_bfknn - build_param: {} - file: index/nytimes-256-inner/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 - search_result_file: result/nytimes-256-inner/raft_bfknn/bfknn -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: index/nytimes-256-inner/faiss_ivf_flat/nlist1024 - name: faiss_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_flat/nlist1024 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: index/nytimes-256-inner/faiss_ivf_flat/nlist2048 - name: faiss_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_flat/nlist2048 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: index/nytimes-256-inner/faiss_ivf_flat/nlist4096 - name: faiss_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_flat/nlist4096 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: index/nytimes-256-inner/faiss_ivf_flat/nlist8192 - name: faiss_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_flat/nlist8192 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: index/nytimes-256-inner/faiss_ivf_flat/nlist16384 - name: faiss_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/nytimes-256-inner/faiss_ivf_flat/nlist16384 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: index/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024 - name: faiss_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: index/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024.noprecomp - name: faiss_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist1024-fp16 - name: faiss_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist1024-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist2048-fp16 - name: faiss_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist2048-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist4096-fp16 - name: faiss_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist4096-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist8192-fp16 - name: faiss_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist8192-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist16384-fp16 - name: faiss_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist16384-fp16 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist1024-int8 - name: faiss_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist1024-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist2048-int8 - name: faiss_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist2048-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist4096-int8 - name: faiss_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist4096-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist8192-int8 - name: faiss_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist8192-int8 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: index/nytimes-256-inner/faiss_ivf_sq/nlist16384-int8 - name: faiss_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/nytimes-256-inner/faiss_ivf_sq/nlist16384-int8 -- algo: faiss_gpu_flat - build_param: {} - file: index/nytimes-256-inner/faiss_flat/flat - name: faiss_flat - search_params: - - {} - search_result_file: result/nytimes-256-inner/faiss_flat/flat -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024 - name: raft_ivf_pq.dimpq128-cluster1024 - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-float - name: raft_ivf_pq.dimpq128-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 5 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-half - name: raft_ivf_pq.dimpq128-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq128-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq64-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 64 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-half - name: raft_ivf_pq.dimpq64-cluster1024-float-half - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: half - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: half - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 32 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq32-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 16 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8 - name: raft_ivf_pq.dimpq16-cluster1024-float-fp8 - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: fp8 - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8 -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 128 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-half-float - name: raft_ivf_pq.dimpq128-cluster1024-half-float - search_params: - - internalDistanceDtype: half - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: half - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-half-float -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1024 - pq_dim: 512 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_pq/dimpq512-cluster1024-float-float - name: raft_ivf_pq.dimpq512-cluster1024-float-float - search_params: - - internalDistanceDtype: float - k: 10 - numProbes: 10 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 50 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 100 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 200 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 500 - smemLutDtype: float - - internalDistanceDtype: float - k: 10 - numProbes: 1024 - smemLutDtype: float - search_result_file: result/nytimes-256-inner/raft_ivf_pq/dimpq512-cluster1024-float-float -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: index/nytimes-256-inner/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - search_result_file: result/nytimes-256-inner/raft_ivf_flat/nlist1024 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: index/nytimes-256-inner/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 - search_result_file: result/nytimes-256-inner/raft_ivf_flat/nlist16384 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: index/nytimes-256-inner/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/nytimes-256-inner/raft_cagra/dim32 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: index/nytimes-256-inner/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 - search_result_file: result/nytimes-256-inner/raft_cagra/dim64 -search_basic_param: - batch_size: 5000 - k: 10 - run_count: 3 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/sift-128-euclidean.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/sift-128-euclidean.yaml deleted file mode 100644 index 2588d8a36..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/sift-128-euclidean.yaml +++ /dev/null @@ -1,562 +0,0 @@ -dataset: - base_file: sift-128-euclidean/base.fbin - distance: euclidean - groundtruth_neighbors_file: sift-128-euclidean/groundtruth.neighbors.ibin - name: sift-128-euclidean - query_file: sift-128-euclidean/query.fbin -index: -- algo: hnswlib - build_param: - M: 12 - efConstruction: 500 - numThreads: 32 - file: sift-128-euclidean/hnswlib/M12 - name: hnswlib.M12 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 16 - efConstruction: 500 - numThreads: 32 - file: sift-128-euclidean/hnswlib/M16 - name: hnswlib.M16 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 24 - efConstruction: 500 - numThreads: 32 - file: sift-128-euclidean/hnswlib/M24 - name: hnswlib.M24 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: hnswlib - build_param: - M: 36 - efConstruction: 500 - numThreads: 32 - file: sift-128-euclidean/hnswlib/M36 - name: hnswlib.M36 - search_params: - - ef: 10 - - ef: 20 - - ef: 40 - - ef: 60 - - ef: 80 - - ef: 120 - - ef: 200 - - ef: 400 - - ef: 600 - - ef: 800 -- algo: raft_bfknn - build_param: {} - file: sift-128-euclidean/raft_bfknn/bfknn - name: raft_bfknn - search_params: - - probe: 1 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 1024 - file: sift-128-euclidean/faiss_gpu_ivf_flat/nlist1024 - name: faiss_gpu_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 2048 - file: sift-128-euclidean/faiss_gpu_ivf_flat/nlist2048 - name: faiss_gpu_ivf_flat.nlist2048 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 4096 - file: sift-128-euclidean/faiss_gpu_ivf_flat/nlist4096 - name: faiss_gpu_ivf_flat.nlist4096 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 8192 - file: sift-128-euclidean/faiss_gpu_ivf_flat/nlist8192 - name: faiss_gpu_ivf_flat.nlist8192 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_flat - build_param: - nlist: 16384 - file: sift-128-euclidean/faiss_gpu_ivf_flat/nlist16384 - name: faiss_gpu_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: true - file: sift-128-euclidean/faiss_gpu_ivf_pq/M64-nlist1024 - name: faiss_gpu_ivf_pq.M64-nlist1024 - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 1024 - useFloat16: true - usePrecomputed: false - file: sift-128-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp - name: faiss_gpu_ivf_pq.M64-nlist1024.noprecomp - search_params: - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: fp16 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16 - name: faiss_gpu_ivf_sq.nlist1024-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: fp16 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16 - name: faiss_gpu_ivf_sq.nlist2048-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: fp16 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16 - name: faiss_gpu_ivf_sq.nlist4096-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: fp16 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16 - name: faiss_gpu_ivf_sq.nlist8192-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: fp16 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16 - name: faiss_gpu_ivf_sq.nlist16384-fp16 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 1024 - quantizer_type: int8 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist1024-int8 - name: faiss_gpu_ivf_sq.nlist1024-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 2048 - quantizer_type: int8 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist2048-int8 - name: faiss_gpu_ivf_sq.nlist2048-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 4096 - quantizer_type: int8 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist4096-int8 - name: faiss_gpu_ivf_sq.nlist4096-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 8192 - quantizer_type: int8 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist8192-int8 - name: faiss_gpu_ivf_sq.nlist8192-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: faiss_gpu_ivf_sq - build_param: - nlist: 16384 - quantizer_type: int8 - file: sift-128-euclidean/faiss_gpu_ivf_sq/nlist16384-int8 - name: faiss_gpu_ivf_sq.nlist16384-int8 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 -- algo: faiss_gpu_flat - build_param: {} - file: sift-128-euclidean/faiss_gpu_flat/flat - name: faiss_gpu_flat - search_params: - - {} -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1000 - pq_bits: 8 - pq_dim: 64 - ratio: 1 - file: sift-128-euclidean/raft_ivf_pq/dimpq64-bitpq8-cluster1K - name: raft_ivf_pq.dimpq64-bitpq8-cluster1K - search_params: - - internalDistanceDtype: float - nprobe: 20 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 30 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 40 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1000 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 20 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 30 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 40 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1000 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 20 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 1000 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 25 - nlist: 1000 - pq_bits: 6 - pq_dim: 128 - ratio: 1 - file: sift-128-euclidean/raft_ivf_pq/dimpq128-bitpq6-cluster1K - name: raft_ivf_pq.dimpq128-bitpq6-cluster1K - search_params: - - internalDistanceDtype: float - nprobe: 20 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 30 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 40 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 1000 - smemLutDtype: float - - internalDistanceDtype: float - nprobe: 20 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 30 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 40 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 50 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 100 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 200 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 500 - smemLutDtype: fp8 - - internalDistanceDtype: float - nprobe: 1000 - smemLutDtype: fp8 - - internalDistanceDtype: half - nprobe: 20 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 1000 - smemLutDtype: half -- algo: raft_ivf_flat - build_param: - niter: 25 - nlist: 1024 - ratio: 1 - file: sift-128-euclidean/raft_ivf_flat/nlist1024 - name: raft_ivf_flat.nlist1024 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 -- algo: raft_ivf_flat - build_param: - niter: 20 - nlist: 16384 - ratio: 2 - file: sift-128-euclidean/raft_ivf_flat/nlist16384 - name: raft_ivf_flat.nlist16384 - search_params: - - nprobe: 1 - - nprobe: 5 - - nprobe: 10 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 - - nprobe: 1000 - - nprobe: 2000 -- algo: raft_cagra - build_param: - graph_degree: 32 - file: sift-128-euclidean/raft_cagra/dim32 - name: raft_cagra.dim32 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 -- algo: raft_cagra - build_param: - graph_degree: 64 - file: sift-128-euclidean/raft_cagra/dim64 - name: raft_cagra.dim64 - search_params: - - itopk: 32 - - itopk: 64 - - itopk: 128 -search_basic_param: - batch_size: 5000 - k: 10 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_10M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_10M.yaml deleted file mode 100644 index 090247be9..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_10M.yaml +++ /dev/null @@ -1,357 +0,0 @@ -dataset: - base_file: wiki_all_10M/base.88M.fbin - distance: euclidean - groundtruth_neighbors_file: wiki_all_10M/groundtruth.88M.neighbors.ibin - name: wiki_all_10M - query_file: wiki_all_10M/queries.fbin -index: -- algo: hnswlib - build_param: - M: 16 - efConstruction: 50 - numThreads: 56 - file: wiki_all_10M/hnswlib/M16.ef50 - name: hnswlib.M16.ef50 - search_params: - - ef: 10 - numThreads: 56 - - ef: 20 - numThreads: 56 - - ef: 40 - numThreads: 56 - - ef: 60 - numThreads: 56 - - ef: 80 - numThreads: 56 - - ef: 120 - numThreads: 56 - - ef: 200 - numThreads: 56 - - ef: 400 - numThreads: 56 - - ef: 600 - numThreads: 56 - - ef: 800 - numThreads: 56 -- algo: faiss_gpu_ivf_pq - build_param: - M: 32 - nlist: 16384 - ratio: 2 - file: wiki_all_10M/faiss_ivf_pq/M32-nlist16K_ratio2 - name: faiss_ivf_pq.M32-nlist16K - search_params: - - nprobe: 10 - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 16384 - ratio: 2 - file: wiki_all_10M/faiss_ivf_pq/M64-nlist16K_ratio2 - name: faiss_ivf_pq.M64-nlist16K - search_params: - - nprobe: 10 - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 128 - ratio: 10 - file: wiki_all_10M/raft_ivf_pq/d128-nlist16K - name: raft_ivf_pq.d128-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 1 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 64 - ratio: 10 - file: wiki_all_10M/raft_ivf_pq/d64-nlist16K - name: raft_ivf_pq.d64-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 4 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 32 - ratio: 10 - file: wiki_all_10M/raft_ivf_pq/d32-nlist16K - name: raft_ivf_pq.d32-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 32 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 32 - ratio: 10 - file: wiki_all_10M/raft_ivf_pq/d32-nlist16K - name: raft_ivf_pq.d32X-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 4 - smemLutDtype: half -- algo: raft_cagra - build_param: - graph_degree: 32 - intermediate_graph_degree: 48 - file: wiki_all_10M/raft_cagra/dim32.ibin - name: raft_cagra.dim32.multi_cta - search_params: - - algo: multi_cta - itopk: 32 - max_iterations: 0 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 32 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 36 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 40 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 44 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 48 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 16 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 24 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 26 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 32 - search_width: 2 - - algo: multi_cta - itopk: 64 - max_iterations: 16 - search_width: 4 - - algo: multi_cta - itopk: 64 - max_iterations: 64 - search_width: 1 - - algo: multi_cta - itopk: 96 - max_iterations: 48 - search_width: 2 - - algo: multi_cta - itopk: 128 - max_iterations: 16 - search_width: 8 - - algo: multi_cta - itopk: 128 - max_iterations: 64 - search_width: 2 - - algo: multi_cta - itopk: 192 - max_iterations: 24 - search_width: 8 - - algo: multi_cta - itopk: 192 - max_iterations: 96 - search_width: 2 - - algo: multi_cta - itopk: 256 - max_iterations: 32 - search_width: 8 - - algo: multi_cta - itopk: 384 - max_iterations: 48 - search_width: 8 - - algo: multi_cta - itopk: 512 - max_iterations: 64 - search_width: 8 -search_basic_param: - batch_size: 10000 - k: 10 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_1M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_1M.yaml deleted file mode 100644 index bc5abaca2..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_1M.yaml +++ /dev/null @@ -1,371 +0,0 @@ -dataset: - base_file: wiki_all_1M/base.1M.fbin - distance: euclidean - groundtruth_neighbors_file: wiki_all_1M/groundtruth.1M.neighbors.ibin - name: wiki_all_1M - query_file: wiki_all_1M/queries.fbin - subset_size: 1000000 -index: -- algo: hnswlib - build_param: - M: 16 - efConstruction: 50 - numThreads: 56 - file: wiki_all_1M/hnswlib/M16.ef50 - name: hnswlib.M16.ef50 - search_params: - - ef: 10 - numThreads: 56 - - ef: 20 - numThreads: 56 - - ef: 40 - numThreads: 56 - - ef: 60 - numThreads: 56 - - ef: 80 - numThreads: 56 - - ef: 120 - numThreads: 56 - - ef: 200 - numThreads: 56 - - ef: 400 - numThreads: 56 - - ef: 600 - numThreads: 56 - - ef: 800 - numThreads: 56 -- algo: faiss_gpu_ivf_pq - build_param: - M: 32 - nlist: 16384 - ratio: 2 - file: wiki_all_1M/faiss_ivf_pq/M32-nlist16K_ratio2 - name: faiss_ivf_pq.M32-nlist16K - search_params: - - nprobe: 10 - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 16384 - ratio: 2 - file: wiki_all_1M/faiss_ivf_pq/M64-nlist16K_ratio2 - name: faiss_ivf_pq.M64-nlist16K - search_params: - - nprobe: 10 - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 128 - ratio: 10 - file: wiki_all_1M/raft_ivf_pq/d128-nlist16K - name: raft_ivf_pq.d128-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 1 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 64 - ratio: 10 - file: wiki_all_1M/raft_ivf_pq/d64-nlist16K - name: raft_ivf_pq.d64-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 4 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 32 - ratio: 10 - file: wiki_all_1M/raft_ivf_pq/d32-nlist16K - name: raft_ivf_pq.d32-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 32 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 32 - ratio: 10 - file: wiki_all_1M/raft_ivf_pq/d32-nlist16K - name: raft_ivf_pq.d32X-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 4 - smemLutDtype: half -- algo: raft_cagra - build_param: - graph_build_algo: NN_DESCENT - graph_degree: 32 - intermediate_graph_degree: 48 - ivf_pq_build_niter: 10 - ivf_pq_build_nlist: 16384 - ivf_pq_build_pq_bits: 8 - ivf_pq_build_pq_dim: 32 - ivf_pq_build_ratio: 10 - ivf_pq_search_internalDistanceDtype: half - ivf_pq_search_nprobe: 30 - ivf_pq_search_refine_ratio: 8 - ivf_pq_search_smemLutDtype: half - nn_descent_intermediate_graph_degree: 72 - nn_descent_max_iterations: 10 - nn_descent_termination_threshold: 0.001 - file: wiki_all_1M/raft_cagra/dim32.ibin - name: raft_cagra.dim32.multi_cta - search_params: - - algo: multi_cta - itopk: 32 - max_iterations: 0 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 32 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 36 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 40 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 44 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 48 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 16 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 24 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 26 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 32 - search_width: 2 - - algo: multi_cta - itopk: 64 - max_iterations: 16 - search_width: 4 - - algo: multi_cta - itopk: 64 - max_iterations: 64 - search_width: 1 - - algo: multi_cta - itopk: 96 - max_iterations: 48 - search_width: 2 - - algo: multi_cta - itopk: 128 - max_iterations: 16 - search_width: 8 - - algo: multi_cta - itopk: 128 - max_iterations: 64 - search_width: 2 - - algo: multi_cta - itopk: 192 - max_iterations: 24 - search_width: 8 - - algo: multi_cta - itopk: 192 - max_iterations: 96 - search_width: 2 - - algo: multi_cta - itopk: 256 - max_iterations: 32 - search_width: 8 - - algo: multi_cta - itopk: 384 - max_iterations: 48 - search_width: 8 - - algo: multi_cta - itopk: 512 - max_iterations: 64 - search_width: 8 -search_basic_param: - batch_size: 10000 - k: 10 diff --git a/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_88M.yaml b/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_88M.yaml deleted file mode 100644 index a06d6de54..000000000 --- a/python/cuvs_bench/cuvs_bench/config/datasets/wiki_all_88M.yaml +++ /dev/null @@ -1,357 +0,0 @@ -dataset: - base_file: wiki_all_88M/base.88M.fbin - distance: euclidean - groundtruth_neighbors_file: wiki_all_88M/groundtruth.88M.neighbors.ibin - name: wiki_all_88M - query_file: wiki_all_88M/queries.fbin -index: -- algo: hnswlib - build_param: - M: 16 - efConstruction: 50 - numThreads: 56 - file: wiki_all_88M/hnswlib/M16.ef50 - name: hnswlib.M16.ef50 - search_params: - - ef: 10 - numThreads: 56 - - ef: 20 - numThreads: 56 - - ef: 40 - numThreads: 56 - - ef: 60 - numThreads: 56 - - ef: 80 - numThreads: 56 - - ef: 120 - numThreads: 56 - - ef: 200 - numThreads: 56 - - ef: 400 - numThreads: 56 - - ef: 600 - numThreads: 56 - - ef: 800 - numThreads: 56 -- algo: faiss_gpu_ivf_pq - build_param: - M: 32 - nlist: 16384 - ratio: 2 - file: wiki_all_88M/faiss_ivf_pq/M32-nlist16K_ratio2 - name: faiss_ivf_pq.M32-nlist16K - search_params: - - nprobe: 10 - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 -- algo: faiss_gpu_ivf_pq - build_param: - M: 64 - nlist: 16384 - ratio: 2 - file: wiki_all_88M/faiss_ivf_pq/M64-nlist16K_ratio2 - name: faiss_ivf_pq.M64-nlist16K - search_params: - - nprobe: 10 - - nprobe: 20 - - nprobe: 30 - - nprobe: 40 - - nprobe: 50 - - nprobe: 100 - - nprobe: 200 - - nprobe: 500 -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 128 - ratio: 10 - file: wiki_all_88M/raft_ivf_pq/d128-nlist16K - name: raft_ivf_pq.d128-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 1 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 1 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 64 - ratio: 10 - file: wiki_all_88M/raft_ivf_pq/d64-nlist16K - name: raft_ivf_pq.d64-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 4 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 32 - ratio: 10 - file: wiki_all_88M/raft_ivf_pq/d32-nlist16K - name: raft_ivf_pq.d32-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 32 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 32 - smemLutDtype: half -- algo: raft_ivf_pq - build_param: - niter: 10 - nlist: 16384 - pq_bits: 8 - pq_dim: 32 - ratio: 10 - file: wiki_all_88M/raft_ivf_pq/d32-nlist16K - name: raft_ivf_pq.d32X-nlist16K - search_params: - - internalDistanceDtype: half - nprobe: 20 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 16 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 8 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 30 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 40 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 50 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 100 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 200 - refine_ratio: 4 - smemLutDtype: half - - internalDistanceDtype: half - nprobe: 500 - refine_ratio: 4 - smemLutDtype: half -- algo: raft_cagra - build_param: - graph_degree: 32 - intermediate_graph_degree: 48 - file: wiki_all_88M/raft_cagra/dim32.ibin - name: raft_cagra.dim32.multi_cta - search_params: - - algo: multi_cta - itopk: 32 - max_iterations: 0 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 32 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 36 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 40 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 44 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 48 - search_width: 1 - - algo: multi_cta - itopk: 32 - max_iterations: 16 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 24 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 26 - search_width: 2 - - algo: multi_cta - itopk: 32 - max_iterations: 32 - search_width: 2 - - algo: multi_cta - itopk: 64 - max_iterations: 16 - search_width: 4 - - algo: multi_cta - itopk: 64 - max_iterations: 64 - search_width: 1 - - algo: multi_cta - itopk: 96 - max_iterations: 48 - search_width: 2 - - algo: multi_cta - itopk: 128 - max_iterations: 16 - search_width: 8 - - algo: multi_cta - itopk: 128 - max_iterations: 64 - search_width: 2 - - algo: multi_cta - itopk: 192 - max_iterations: 24 - search_width: 8 - - algo: multi_cta - itopk: 192 - max_iterations: 96 - search_width: 2 - - algo: multi_cta - itopk: 256 - max_iterations: 32 - search_width: 8 - - algo: multi_cta - itopk: 384 - max_iterations: 48 - search_width: 8 - - algo: multi_cta - itopk: 512 - max_iterations: 64 - search_width: 8 -search_basic_param: - batch_size: 10000 - k: 10 diff --git a/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py b/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py index a6b154ef2..d0c8023f4 100644 --- a/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py +++ b/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,11 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import argparse import os import subprocess -import sys -from urllib.request import urlretrieve + +import click +import h5py +import numpy as np +import requests +from scipy.spatial.distance import cdist +from sklearn.datasets import make_blobs def get_dataset_path(name, ann_bench_data_path): @@ -29,7 +33,12 @@ def get_dataset_path(name, ann_bench_data_path): def download_dataset(url, path): if not os.path.exists(path): print(f"downloading {url} -> {path}...") - urlretrieve(url, path) + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) def convert_hdf5_to_fbin(path, normalize): @@ -80,35 +89,140 @@ def download(name, normalize, ann_bench_data_path): raise -def main(): - call_path = os.getcwd() - if "RAPIDS_DATASET_ROOT_DIR" in os.environ: - default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR") - else: - default_dataset_path = os.path.join(call_path, "datasets/") - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter +def generate_ann_benchmark_like_data( + output_file="ann_benchmarks_like.hdf5", + n_train=1000, + n_test=100, + d=32, + centers=3, + k=100, + metric="euclidean", + dataset_path="test-data/", +): + """ + Generate a synthetic dataset in HDF5 format with a structure + similar to ann-benchmarks datasets. By default, ground truth + is computed for the top-100 nearest neighbors. + """ + + train_data, _ = make_blobs( + n_samples=n_train, n_features=d, centers=centers, random_state=42 ) - parser.add_argument( - "--dataset", help="dataset to download", default="glove-100-angular" + + test_data, _ = make_blobs( + n_samples=n_test, n_features=d, centers=centers, random_state=84 ) - parser.add_argument( - "--dataset-path", - help="path to download dataset", - default=default_dataset_path, + + test_data = test_data.astype(np.float32) + train_data = train_data.astype(np.float32) + + dist_matrix = cdist(test_data, train_data, metric=metric) + + actual_k = min(k, n_train) + neighbors = np.argsort(dist_matrix, axis=1)[:, :actual_k].astype(np.int32) + distances = np.take_along_axis(dist_matrix, neighbors, axis=1).astype( + np.float32 ) - parser.add_argument( - "--normalize", - help="normalize cosine distance to inner product", - action="store_true", + + full_path = os.path.join(dataset_path, "test-data") + os.makedirs(full_path, exist_ok=True) + full_path = os.path.join(full_path, output_file) + + with h5py.File(full_path, "w") as f: + # Datasets + f.create_dataset("train", data=train_data) + f.create_dataset("test", data=test_data) + f.create_dataset("neighbors", data=neighbors) + f.create_dataset("distances", data=distances) + + f.attrs["distance"] = metric + + convert_hdf5_to_fbin(full_path, normalize=True) + + print(f"Created {full_path} with:") + print(f" - train shape = {train_data.shape}") + print(f" - test shape = {test_data.shape}") + print(f" - neighbors shape = {neighbors.shape}") + print(f" - distances shape = {distances.shape}") + print(f" - metric = {metric}") + print(f" - neighbors per test sample = {actual_k}") + + +def get_default_dataset_path(): + return os.getenv( + "RAPIDS_DATASET_ROOT_DIR", os.path.join(os.getcwd(), "datasets") ) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - args = parser.parse_args() - download(args.dataset, args.normalize, args.dataset_path) +@click.command() +@click.option( + "--dataset", + default="glove-100-angular", + help="Dataset to download.", +) +@click.option( + "--test-data-n-train", + default=10000, + help="Number of training examples for the test data.", +) +@click.option( + "--test-data-n-test", + default=1000, + help="Number of test examples for the test data.", +) +@click.option( + "--test-data-dims", + default=32, + help="Dimensionality for the test data.", +) +@click.option( + "--test-data-k", + default=100, + help="K value for the test data.", +) +@click.option( + "--test-data-output-file", + default="ann_benchmarks_like.hdf5", + help="Output file name for the test data.", +) +@click.option( + "--dataset-path", + default=None, + help="Path to download the dataset. If not provided, defaults to " + "the value of RAPIDS_DATASET_ROOT_DIR or '/datasets'.", +) +@click.option( + "--normalize", + is_flag=True, + help="Normalize cosine distance to inner product.", +) +def main( + dataset, + test_data_n_train, + test_data_n_test, + test_data_dims, + test_data_k, + test_data_output_file, + dataset_path, + normalize, +): + # Compute default dataset_path if not provided. + if dataset_path is None: + dataset_path = get_default_dataset_path() + + if dataset == "test-data": + generate_ann_benchmark_like_data( + output_file=test_data_output_file, + n_train=test_data_n_train, + n_test=test_data_n_test, + d=test_data_dims, + centers=3, + k=test_data_k, + metric="euclidean", + dataset_path=dataset_path, + ) + else: + download(dataset, normalize, dataset_path) if __name__ == "__main__": diff --git a/python/cuvs_bench/cuvs_bench/plot/__main__.py b/python/cuvs_bench/cuvs_bench/plot/__main__.py index 7d5e3ae83..02d598c9f 100644 --- a/python/cuvs_bench/cuvs_bench/plot/__main__.py +++ b/python/cuvs_bench/cuvs_bench/plot/__main__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -497,8 +497,9 @@ def load_all_results( ) @click.option( "--mode", + "-m", type=click.Choice(["throughput", "latency"], case_sensitive=False), - default="throughput", + default="latency", help="Search mode whose Pareto frontier is used on the Y-axis.", ) @click.option( diff --git a/python/cuvs_bench/cuvs_bench/run/data_export.py b/python/cuvs_bench/cuvs_bench/run/data_export.py index 3b03995d4..d42e87246 100644 --- a/python/cuvs_bench/cuvs_bench/run/data_export.py +++ b/python/cuvs_bench/cuvs_bench/run/data_export.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -110,7 +110,8 @@ def clean_algo_name(algo_name): Cleaned algorithm name. """ - return algo_name[0] if "base" in algo_name[1] else "_".join(algo_name) + name = algo_name[0] if "base" in algo_name[1] else "_".join(algo_name) + return name.removesuffix(".json") def write_csv(file, algo_name, df, extra_columns=None, skip_cols=None): @@ -138,6 +139,7 @@ def write_csv(file, algo_name, df, extra_columns=None, skip_cols=None): "time": df["real_time"], } ) + # Add extra columns if provided if extra_columns: for col in extra_columns: diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py index a16f01b94..bec776679 100644 --- a/python/cuvs_bench/cuvs_bench/run/run.py +++ b/python/cuvs_bench/cuvs_bench/run/run.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -245,6 +245,7 @@ def prepare_executables( configurations. """ executables_to_run = {} + for algo, algo_conf in algos_conf.items(): validate_algorithm(algos_yaml, algo, gpu_present) for group, group_conf in algo_conf["groups"].items(): @@ -345,8 +346,9 @@ def get_build_path(executable: str) -> Optional[str]: devcontainer_path = "/home/coder/cuvs/cpp/build/latest/bench/ann" if os.path.exists(devcontainer_path): - print(f"-- Detected devcontainer artifacts in {devcontainer_path}.") - return devcontainer_path + devc_executable = os.path.join(devcontainer_path, executable) + print(f"-- Detected devcontainer artifact {devc_executable}.") + return devc_executable build_path = os.getenv("CUVS_HOME") if build_path: @@ -354,7 +356,7 @@ def get_build_path(executable: str) -> Optional[str]: build_path, "cpp", "build", "release", executable ) if os.path.exists(build_path): - print(f"-- Using RAFT bench from repository in {build_path}.") + print(f"-- Using cuVS bench from repository in {build_path}.") return build_path conda_path = os.getenv("CONDA_PREFIX") diff --git a/python/cuvs_bench/cuvs_bench/tests/test_cli.py b/python/cuvs_bench/cuvs_bench/tests/test_cli.py new file mode 100644 index 000000000..78685d7bd --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/tests/test_cli.py @@ -0,0 +1,422 @@ +# +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pathlib import Path + +import pandas as pd +import pytest +from click.testing import CliRunner +from cuvs_bench.get_dataset.__main__ import main + + +@pytest.fixture(scope="session") +def temp_datasets_dir(tmp_path_factory): + return tmp_path_factory.mktemp("datasets") + + +def test_get_dataset_creates_expected_files(temp_datasets_dir: Path): + runner = CliRunner() + dataset_path_arg = str(temp_datasets_dir) + + # Invoke the CLI command as if calling: + # python -m cuvs_bench.get_dataset --dataset test-data \ + # --dataset-path + result = runner.invoke( + main, ["--dataset", "test-data", "--dataset-path", dataset_path_arg] + ) + + assert result.exit_code == 0, f"CLI call failed: {result.output}" + + expected_files = [ + "test-data/ann_benchmarks_like.groundtruth.distances.fbin", + "test-data/ann_benchmarks_like.base.fbin", + "test-data/ann_benchmarks_like.groundtruth.neighbors.ibin", + "test-data/ann_benchmarks_like.query.fbin", + "test-data/ann_benchmarks_like.hdf5", + ] + + # Verify that each expected file exists in the datasets directory. + for filename in expected_files: + file_path = temp_datasets_dir / filename + assert ( + file_path.exists() + ), f"Expected file {filename} was not generated." + + +def test_run_command_creates_results(temp_datasets_dir: Path): + """ + This test simulates running the command: + + python -m cuvs_bench.run --dataset test-data --dataset-path datasets/ \ + --algorithms faiss_gpu_ivf_flat,faiss_gpu_ivf_sq,cuvs_ivf_flat,\ + cuvs_cagra,ggnn,cuvs_cagra_hnswlib, \ + --batch-size 100 -k 10 --groups test -m latency --force + + It then verifies that the set of expected result files + (both under result/build and result/search) + are created under datasets/test-data/ and are not empty. + """ + + dataset_path_arg = str(temp_datasets_dir) + + from cuvs_bench.run.__main__ import main as run_main + + runner = CliRunner() + run_args = [ + "--dataset", + "test-data", + "--dataset-path", + dataset_path_arg, + "--algorithms", + "faiss_gpu_ivf_flat,faiss_gpu_ivf_sq,cuvs_ivf_flat,cuvs_cagra,ggnn,cuvs_cagra_hnswlib,", # noqa: E501 + "--batch-size", + "100", + "-k", + "10", + "--groups", + "test", + "-m", + "latency", + "--force", + ] + result = runner.invoke(run_main, run_args) + assert ( + result.exit_code == 0 + ), f"Run command failed with output:\n{result.output}" + + common_build_header = [ + "algo_name", + "index_name", + "time", + "threads", + "cpu_time", + "GPU", + ] + + common_search_header = [ + "algo_name", + "index_name", + "recall", + "throughput", + "latency", + "threads", + "cpu_time", + "GPU", + ] + + # --- Verify that the expected result files exist and are not empty --- + expected_files = { + # Build files: + "test-data/result/build/cuvs_ivf_flat,test.csv": { + "header": common_build_header + + [ + "niter", + "nlist", + "ratio", + ], + "rows": 1, + }, + "test-data/result/build/cuvs_cagra_hnswlib,test.csv": { + "header": common_build_header + + [ + "ef_construction", + "graph_degree", + "intermediate_graph_degree", + "label", + ], + "rows": 2, + }, + "test-data/result/build/faiss_gpu_ivf_flat,test.csv": { + "header": common_build_header + + [ + "nlist", + "ratio", + "use_cuvs", + ], + "rows": 1, + }, + "test-data/result/build/cuvs_cagra,test.csv": { + "header": common_build_header + + [ + "graph_degree", + "intermediate_graph_degree", + "label", + ], + "rows": 1, + }, + # Search files: + "test-data/result/search/cuvs_cagra_hnswlib,test,k10,bs100,raw.csv": { + "header": common_search_header + + [ + "ef", + "end_to_end", + "k", + "n_queries", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "ef_construction", + "graph_degree", + "intermediate_graph_degree", + "label", + ], + "rows": 4, + }, + "test-data/result/search/cuvs_cagra,test,k10,bs100,latency.csv": { + "header": common_search_header + + [ + "end_to_end", + "itopk", + "k", + "n_queries", + "search_width", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "graph_degree", + "intermediate_graph_degree", + "label", + ], + "rows": 2, + }, + "test-data/result/search/cuvs_cagra,test,k10,bs100,throughput.csv": { + "header": common_search_header + + [ + "end_to_end", + "itopk", + "k", + "n_queries", + "search_width", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "graph_degree", + "intermediate_graph_degree", + "label", + ], + "rows": 2, + }, + "test-data/result/search/cuvs_cagra,test,k10,bs100,raw.csv": { + "header": common_search_header + + [ + "end_to_end", + "itopk", + "k", + "n_queries", + "search_width", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "graph_degree", + "intermediate_graph_degree", + "label", + ], + "rows": 2, + }, + "test-data/result/search/cuvs_ivf_flat,test,k10,bs100,latency.csv": { + "header": common_search_header + + [ + "end_to_end", + "k", + "n_queries", + "nprobe", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "niter", + "nlist", + "ratio", + ], + "rows": 2, + }, + "test-data/result/search/cuvs_ivf_flat,test,k10,bs100,raw.csv": { + "header": common_search_header + + [ + "end_to_end", + "k", + "n_queries", + "nprobe", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "niter", + "nlist", + "ratio", + ], + "rows": 2, + }, + "test-data/result/search/cuvs_ivf_flat,test,k10,bs100,throughput.csv": { # noqa: E501 + "header": common_search_header + + [ + "end_to_end", + "k", + "n_queries", + "nprobe", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "niter", + "nlist", + "ratio", + ], + "rows": 2, + }, + "test-data/result/search/faiss_gpu_ivf_flat,test,k10,bs100,latency.csv": { # noqa: E501 + "header": common_search_header + + [ + "end_to_end", + "k", + "n_queries", + "nprobe", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "nlist", + "ratio", + "use_cuvs", + ], + "rows": 2, + }, + "test-data/result/search/faiss_gpu_ivf_flat,test,k10,bs100,raw.csv": { + "header": common_search_header + + [ + "end_to_end", + "k", + "n_queries", + "nprobe", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "nlist", + "ratio", + "use_cuvs", + ], + "rows": 2, + }, + "test-data/result/search/faiss_gpu_ivf_flat,test,k10,bs100,throughput.csv": { # noqa: E501 + "header": common_search_header + + [ + "end_to_end", + "k", + "n_queries", + "nprobe", + "total_queries", + "build time", + "build threads", + "build cpu_time", + "build GPU", + "nlist", + "ratio", + "use_cuvs", + ], + "rows": 2, + }, + } + + for rel_path, expectations in expected_files.items(): + file_path = temp_datasets_dir / rel_path + assert file_path.exists(), f"Expected file {file_path} does not exist." + assert ( + file_path.stat().st_size > 0 + ), f"Expected file {file_path} is empty." + + df = pd.read_csv(file_path) + + actual_header = list(df.columns) + actual_rows = len(df) + + # breakpoint() + assert ( + actual_header == expectations["header"] + ), f"Wrong header produced in file f{rel_path}" + assert actual_rows == expectations["rows"] + + +def test_plot_command_creates_png_files(temp_datasets_dir: Path): + """ + This test simulates running the command: + + python -m cuvs_bench.plot --dataset test-data --dataset-path datasets/ \ + --algorithms faiss_gpu_ivf_flat,faiss_gpu_ivf_sq, \ + cuvs_ivf_flat,cuvs_cagra,ggnn,cuvs_cagra_hnswlib \ + --batch-size 100 -k 10 --groups test -m latency + + and then verifies that the following files are produced in the + working directory: + - search-test-data-k10-batch_size100.png + - build-test-data-k10-batch_size100.png + + It also checks that these files are not empty. + """ + + dataset_path_arg = str(temp_datasets_dir) + + from cuvs_bench.plot.__main__ import main as plot_main + + runner = CliRunner() + args = [ + "--dataset", + "test-data", + "--dataset-path", + dataset_path_arg, + "--output-filepath", + dataset_path_arg, + "--algorithms", + "faiss_gpu_ivf_flat,faiss_gpu_ivf_sq,cuvs_ivf_flat,cuvs_cagra,ggnn,cuvs_cagra_hnswlib", # noqa: E501 + "--batch-size", + "100", + "-k", + "10", + "--groups", + "test", + "-m", + "latency", + ] + result = runner.invoke(plot_main, args) + assert ( + result.exit_code == 0 + ), f"Plot command failed with output:\n{result.output}" + + # Expected output file names. + expected_files = [ + "search-test-data-k10-batch_size100.png", + "build-test-data-k10-batch_size100.png", + ] + + for filename in expected_files: + file_path = temp_datasets_dir / filename + assert file_path.exists(), f"Expected file {filename} does not exist." + assert ( + file_path.stat().st_size > 0 + ), f"Expected file {filename} is empty." diff --git a/python/cuvs_bench/cuvs_bench/tests/test_run.py b/python/cuvs_bench/cuvs_bench/tests/test_run.py index 7b7a481a0..c24dee1d2 100644 --- a/python/cuvs_bench/cuvs_bench/tests/test_run.py +++ b/python/cuvs_bench/cuvs_bench/tests/test_run.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,10 +16,11 @@ import itertools +import os from unittest.mock import MagicMock, mock_open, patch import pytest -from benchmark import ( +from cuvs_bench.run.run import ( find_executable, gather_algorithm_configs, get_dataset_configuration, @@ -60,15 +61,25 @@ def test_prepare_conf_file(): "search_basic_param": {"k": 10, "batch_size": 128}, } assert result == expected_result - result_no_subset = prepare_conf_file(dataset_conf, None, 10, 128) - assert result_no_subset["dataset"].get("subset_size") is None def test_gather_algorithm_configs(tmpdir): - scripts_path = tmpdir.mkdir("scripts") - algos_path = scripts_path.mkdir("algos") - algos_path.join("algo1.yaml").write("key: value") - algos_path.join("algo2.yaml").write("key: value") + scripts_base_path = tmpdir.mkdir("scripts_base") + + scripts_path = os.path.join(scripts_base_path, "scripts") + os.mkdir(scripts_path) + + algos_path = os.path.join(scripts_base_path, "config") + os.mkdir(algos_path) + algos_path = os.path.join(algos_path, "algos") + os.mkdir(algos_path) + + with open(os.path.join(algos_path, "algo1.yaml"), "w") as f: + f.write("key: value") + + with open(os.path.join(algos_path, "algo2.yaml"), "w") as f: + f.write("key: value") + result = gather_algorithm_configs(str(scripts_path), None) assert len(result) == 2 @@ -80,7 +91,7 @@ def test_gather_algorithm_configs(tmpdir): custom_conf_file = custom_conf_dir.join("custom_algo_file.yaml") custom_conf_file.write("key: value") result = gather_algorithm_configs(str(scripts_path), str(custom_conf_file)) - assert len(result) == 4 + assert len(result) == 3 def test_load_algorithms_conf(): @@ -91,23 +102,24 @@ def test_load_algorithms_conf(): group1: {} """ with patch("builtins.open", mock_open(read_data=yaml_content)): - result = load_algorithms_conf(algos_conf_fs, None, None) + result = load_algorithms_conf(algos_conf_fs, None, None, None) assert "algo1" in result with patch("builtins.open", mock_open(read_data=yaml_content)): - result = load_algorithms_conf(algos_conf_fs, ["algo1"], None) + result = load_algorithms_conf(algos_conf_fs, ["algo1"], None, None) assert "algo1" in result - result = load_algorithms_conf(algos_conf_fs, ["algo2"], None) + result = load_algorithms_conf(algos_conf_fs, ["algo2"], None, None) assert "algo1" not in result @patch( - "benchmark.find_executable", + "cuvs_bench.run.run.find_executable", return_value=("executable", "path", "filename"), ) -@patch("benchmark.validate_algorithm", return_value=True) +@patch("cuvs_bench.run.run.validate_algorithm", return_value=True) @patch( - "benchmark.prepare_indexes", return_value=[{"index_key": "index_value"}] + "cuvs_bench.run.run.prepare_indexes", + return_value=[{"index_key": "index_value"}], ) def test_prepare_executables( mock_prepare_indexes, mock_validate_algorithm, mock_find_executable @@ -130,25 +142,102 @@ def test_prepare_executables( count, batch_size, ) - assert "executable" in result - assert len(result["executable"]["index"]) == 1 + assert ("executable", "path", "filename") in result.keys() + assert len(result[("executable", "path", "filename")]["index"]) == 1 -def test_prepare_indexes(): - group_conf = {"build": {"param1": [1, 2]}, "search": {"param2": [3, 4]}} +@patch("cuvs_bench.run.run.validate_constraints", return_value=True) +@patch( + "cuvs_bench.run.run.validate_search_params", + return_value=[{"sparam": "dummy_value"}], +) +def test_prepare_indexes_valid( + mock_validate_search_params, mock_validate_constraints +): + group_conf = { + "build": { + "param1": [1, 2], + "param2": [3, 4], + }, + "search": {"sparam1": [True, False]}, + } + algo = "algo1" + group = "base" conf_file = {"dataset": {"dims": 128}} - result = prepare_indexes( + algos_conf = {} + dataset_path = "/tmp/dataset" + dataset = "data1" + count = 10 + batch_size = 32 + + indexes = prepare_indexes( group_conf, - "algo", - "group", + algo, + group, conf_file, - "dataset_path", - "dataset", - 10, - 128, + algos_conf, + dataset_path, + dataset, + count, + batch_size, ) - assert len(result) == 2 - assert "param1" in result[0]["build_param"] + + # There are 2 build parameters with 2 values each, so we expect 4 indexes. + assert len(indexes) == 4 + + for index in indexes: + assert index["algo"] == algo + + expected_filename = ( + index["name"] + if len(index["name"]) < 128 + else str(hash(index["name"])) + ) + expected_file = os.path.join( + dataset_path, dataset, "index", expected_filename + ) + assert index["file"] == expected_file + # Verify that our dummy search parameters were set. + assert index["search_params"] == [{"sparam": "dummy_value"}] + + +@patch("cuvs_bench.run.run.validate_constraints", return_value=False) +@patch( + "cuvs_bench.run.run.validate_search_params", + return_value=[{"sparam": "dummy_value"}], +) +def test_prepare_indexes_invalid( + mock_validate_search_params, mock_validate_constraints +): + group_conf = { + "build": { + "param1": [1, 2], + }, + "search": {}, + } + algo = "algo1" + group = "base" + conf_file = {"dataset": {"dims": 128}} + algos_conf = {} + dataset_path = "/tmp/dataset" + dataset = "data1" + count = 10 + batch_size = 32 + + indexes = prepare_indexes( + group_conf, + algo, + group, + conf_file, + algos_conf, + dataset_path, + dataset, + count, + batch_size, + ) + + # Since constraints fail, no indexes should be created. + assert indexes == [] def test_validate_search_params(): @@ -159,8 +248,10 @@ def test_validate_search_params(): result = validate_search_params( all_search_params, search_param_names, + {}, "algo", group_conf, + {"algo": {"constraints": []}}, conf_file, 10, 128, @@ -175,7 +266,7 @@ def test_rmm_present(): assert rmm_present() is False -@patch("benchmark.get_build_path", return_value="build_path") +@patch("cuvs_bench.run.run.get_build_path", return_value="build_path") def test_find_executable(mock_get_build_path): algos_conf = {"algo1": {"executable": "executable1"}} result = find_executable(algos_conf, "algo1", "group1", 10, 128) @@ -200,28 +291,33 @@ def test_validate_algorithm(): assert result is False -@patch("benchmark.import_module") +@patch("cuvs_bench.run.run.import_module") def test_validate_constraints(mock_import_module): + # Create a mock validator and have import_module return it. mock_validator = MagicMock() mock_import_module.return_value = mock_validator + + # Test case 1: The constraint function returns True. mock_validator.constraint_func.return_value = True algos_conf = { "algo1": {"constraints": {"build": "module.constraint_func"}} } result = validate_constraints( - algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None + algos_conf, "algo1", "build", {"param1": "value1"}, {}, 128, None, None ) assert result is True + # Test case 2: No constraints are specified; should return True. algos_conf = {"algo1": {"constraints": {}}} result = validate_constraints( - algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None + algos_conf, "algo1", "build", {"param1": "value1"}, {}, 128, None, None ) assert result is True + # Test case 3: The constraint function returns False. mock_validator.constraint_func.return_value = False algos_conf["algo1"]["constraints"]["build"] = "module.constraint_func" result = validate_constraints( - algos_conf, "algo1", "build", {"param1": "value1"}, 128, None, None + algos_conf, "algo1", "build", {"param1": "value1"}, {}, 128, None, None ) assert result is False diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 75e5406d4..420571963 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -23,6 +23,8 @@ dependencies = [ "matplotlib", "pandas", "pyyaml", + "requests", + "scikit-learn", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers",