From 3bbef9233165f7030bee47a1adcc1bf823fbeb01 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 12 Feb 2025 18:30:54 +0100 Subject: [PATCH] fix style --- docs/source/cuvs_bench/index.rst | 6 +++--- python/cuvs_bench/cuvs_bench/run/__main__.py | 2 +- python/cuvs_bench/cuvs_bench/run/run.py | 7 ++++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/source/cuvs_bench/index.rst b/docs/source/cuvs_bench/index.rst index 40c319f0d..f01649ce5 100644 --- a/docs/source/cuvs_bench/index.rst +++ b/docs/source/cuvs_bench/index.rst @@ -223,7 +223,7 @@ The usage of `python -m cuvs_bench.split_groundtruth` is: Testing on new datasets ----------------------- -To run benchmark on a dataset, it is required have a descriptor that defines the file names and a few other properties of that datase. +To run benchmark on a dataset, it is required have a descriptor that defines the file names and a few other properties of that datase. Descriptors for several popular datasets are already available in `datasets.yaml ``. Let's consider how to test on a new dataset. First we create a descriptor `mydataset.yaml` @@ -237,7 +237,7 @@ Let's consider how to test on a new dataset. First we create a descriptor `mydat groundtruth_neighbors_file: mydata-1M/groundtruth.neighbors.ibin distance: euclidean -Here `name` can be chosen arbitrarily. We pass `name` as the `--dataset` argument for the benchmark. The file names are relative to the path given by `--dataset-path` argument. +Here `name` can be chosen arbitrarily. We pass `name` as the `--dataset` argument for the benchmark. The file names are relative to the path given by `--dataset-path` argument. The `subset_size`` field is optional. This argument defines how many vectors to use from the dataset file, the first `subset_size` vectors will be used. This way you can define benchmarks on multiple subsets of the same dataset without duplicating the dataset vectors. Note that the ground truth vectors have to be generated for each subset separately. @@ -245,7 +245,7 @@ Note that the ground truth vectors have to be generated for each subset separate To run the benchmark on the newly defined `mydata-1M` dataset, you can use the following command line: .. code-black: bash - python -m cuvs_bench.run --dataset mydata-1M --dataset-path=/path/to/data/folder --dataset-configuration=mydataset.yaml --algorithms=cuvs_cagra + python -m cuvs_bench.run --dataset mydata-1M --dataset-path=/path/to/data/folder --dataset-configuration=mydataset.yaml --algorithms=cuvs_cagra Running with Docker containers ------------------------------ diff --git a/python/cuvs_bench/cuvs_bench/run/__main__.py b/python/cuvs_bench/cuvs_bench/run/__main__.py index 223bfbef3..46e4d9f4f 100644 --- a/python/cuvs_bench/cuvs_bench/run/__main__.py +++ b/python/cuvs_bench/cuvs_bench/run/__main__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py index b6b37221a..3f9d34ca4 100644 --- a/python/cuvs_bench/cuvs_bench/run/run.py +++ b/python/cuvs_bench/cuvs_bench/run/run.py @@ -298,7 +298,12 @@ def validate_algorithm(algos_conf: dict, algo: str, gpu_present: bool) -> bool: def find_executable( - algos_conf: dict, algo: str, group: str, k: int, batch_size: int, executable_dir: str + algos_conf: dict, + algo: str, + group: str, + k: int, + batch_size: int, + executable_dir: str, ) -> Tuple[str, str, Tuple[str, str]]: """ Find the executable for the given algorithm and group.