Merge branch 'branch-24.02' into improve_parallelism_of_refine_host

rapidsai · Jan 3, 2024 · fda1a7e · fda1a7e
2 parents b0a4a55 + 6d3572b
commit fda1a7e
Show file tree

Hide file tree

Showing 8 changed files with 152 additions and 85 deletions.
diff --git a/cpp/include/raft/core/cublas_macros.hpp b/cpp/include/raft/core/cublas_macros.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@
 namespace raft {
 
 /**
- * @ingroup error_handling
+ * @addtogroup error_handling
  * @{
  */
 
@@ -76,7 +76,7 @@ inline const char* cublas_error_to_string(cublasStatus_t err)
 #undef _CUBLAS_ERR_TO_STR
 
 /**
- * @ingroup assertion
+ * @addtogroup assertion
  * @{
  */
 
@@ -135,4 +135,4 @@ inline const char* cublas_error_to_string(cublasStatus_t err)
 #define CUBLAS_CHECK_NO_THROW(call) RAFT_CUBLAS_TRY_NO_THROW(call)
 #endif
 
-#endif
+#endif
diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,7 +29,11 @@ namespace raft {
 namespace linalg {
 
 /**
- * @ingroup arithmetic
+ * @defgroup arithmetic Arithmetic functions
+ * @{
+ */
+
+/**
  * @brief Elementwise scalar add operation on the input buffer
  *
  * @tparam InT     input data-type. Also the data-type upon which the math ops
@@ -87,6 +91,8 @@ void addDevScalar(
   detail::addDevScalar(outDev, inDev, singleScalarDev, len, stream);
 }
 
+/** @} */  // end of group add
+
 /**
  * @defgroup add_dense Addition Arithmetic
  * @{
@@ -220,4 +226,4 @@ void add_scalar(raft::resources const& handle,
 };  // end namespace linalg
 };  // end namespace raft
 
-#endif
+#endif
diff --git a/cpp/include/raft/neighbors/detail/refine_device.cuh b/cpp/include/raft/neighbors/detail/refine_device.cuh
@@ -51,7 +51,7 @@ void refine_device(raft::resources const& handle,
   uint32_t k              = static_cast<uint32_t>(indices.extent(1));
 
   RAFT_EXPECTS(k <= raft::matrix::detail::select::warpsort::kMaxCapacity,
-               "k must be lest than topk::kMaxCapacity (%d).",
+               "k must be less than topk::kMaxCapacity (%d).",
                raft::matrix::detail::select::warpsort::kMaxCapacity);
 
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(

diff --git a/cpp/include/raft/neighbors/nn_descent_types.hpp b/cpp/include/raft/neighbors/nn_descent_types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@
 
 namespace raft::neighbors::experimental::nn_descent {
 /**
- * @ingroup nn_descent
+ * @ingroup nn-descent
  * @{
  */
 

diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
@@ -171,8 +171,8 @@ options:
 
 `algo-groups`: this parameter is helpful to append any specific algorithm+group combination to run the benchmark for in addition to all the arguments from `algorithms` and `groups`. It is of the format `<algorithm>.<group>`, or for example, `raft_cagra.large`
 
-For every algorithm run by this script, it outputs an index build statistics JSON file in `<dataset-path/<dataset>/result/build/<algo_{group}-{k}-{batch_size}.json>`
-and an index search statistics JSON file in `<dataset-path/<dataset>/result/search/<algo_{group}-{k}-{batch_size}.json>`. NOTE: The filenams will not have "_{group}" if `group = "base"`.
+For every algorithm run by this script, it outputs an index build statistics JSON file in `<dataset-path/<dataset>/result/build/<{algo},{group}.json>`
+and an index search statistics JSON file in `<dataset-path/<dataset>/result/search/<{algo},{group},k{k},bs{batch_size}.json>`. NOTE: The filenames will not have ",{group}" if `group = "base"`.
 
 `dataset-path` :
 1. data is read from `<dataset-path>/<dataset>`
@@ -198,8 +198,8 @@ options:
   --dataset-path DATASET_PATH
                         path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR})
 ```
-Build statistics CSV file is stored in `<dataset-path/<dataset>/result/build/<algo_group.csv>`
-and index search statistics CSV file in `<dataset-path/<dataset>/result/search/<algo_group-k{k}-batch_size{batch_size}_{suffix}.csv>`, where suffix has three values:
+Build statistics CSV file is stored in `<dataset-path/<dataset>/result/build/<{algo},{group}.csv>`
+and index search statistics CSV file in `<dataset-path/<dataset>/result/search/<{algo},{group},k{k},bs{batch_size},{suffix}.csv>`, where suffix has three values:
 1. `raw`: All search results are exported
 2. `throughput`: Pareto frontier of throughput results is exported
 3. `latency`: Pareto frontier of latency results is exported
@@ -212,8 +212,8 @@ CSV files `<dataset-path/<dataset>/result/search/*.csv`.
 The usage of this script is:
 ```bash
 usage:  [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] [--output-filepath OUTPUT_FILEPATH] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS]
-        [-k COUNT] [-bs BATCH_SIZE] [--build] [--search] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--mode {throughput,latency}] [--time-unit {s,ms,us}]
-        [--raw]
+        [-k COUNT] [-bs BATCH_SIZE] [--build] [--search] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--x-start X_START] [--mode {throughput,latency}]
+        [--time-unit {s,ms,us}] [--raw]
 
 options:
   -h, --help            show this help message and exit
@@ -237,6 +237,7 @@ options:
   --x-scale X_SCALE     Scale to use when drawing the X-axis. Typically linear, logit or a2 (default: linear)
   --y-scale {linear,log,symlog,logit}
                         Scale to use when drawing the Y-axis (default: linear)
+  --x-start X_START     Recall values to start the x-axis from (default: 0.8)
   --mode {throughput,latency}
                         search mode whose Pareto frontier is used on the y-axis (default: throughput)
   --time-unit {s,ms,us}

diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py
@@ -74,7 +74,9 @@ def read_file(dataset, dataset_path, method):
                 try:
                     data = json.load(f)
                     df = pd.DataFrame(data["benchmarks"])
-                    yield os.path.join(dir, file), file.split("-")[0], df
+                    filename_split = file.split(",")
+                    algo_name = (filename_split[0], filename_split[1])
+                    yield os.path.join(dir, file), algo_name, df
                 except Exception as e:
                     print(
                         "An error occurred processing file %s (%s). "
@@ -85,7 +87,10 @@ def read_file(dataset, dataset_path, method):
 def convert_json_to_csv_build(dataset, dataset_path):
     for file, algo_name, df in read_file(dataset, dataset_path, "build"):
         try:
-            algo_name = algo_name.replace("_base", "")
+            if "base" in algo_name[1]:
+                algo_name = algo_name[0]
+            else:
+                algo_name = "_".join(algo_name)
             df["name"] = df["name"].str.split("/").str[0]
             write = pd.DataFrame(
                 {
@@ -97,12 +102,7 @@ def convert_json_to_csv_build(dataset, dataset_path):
             for name in df:
                 if name not in skip_build_cols:
                     write[name] = df[name]
-            filepath = os.path.normpath(file).split(os.sep)
-            filename = filepath[-1].split("-")[0] + ".csv"
-            write.to_csv(
-                os.path.join(f"{os.sep}".join(filepath[:-1]), filename),
-                index=False,
-            )
+            write.to_csv(file.replace(".json", ".csv"), index=False)
         except Exception as e:
             print(
                 "An error occurred processing file %s (%s). Skipping..."
@@ -140,9 +140,17 @@ def convert_json_to_csv_search(dataset, dataset_path):
     for file, algo_name, df in read_file(dataset, dataset_path, "search"):
         try:
             build_file = os.path.join(
-                dataset_path, dataset, "result", "build", f"{algo_name}.csv"
+                dataset_path,
+                dataset,
+                "result",
+                "build",
+                f"{','.join(algo_name)}.csv",
             )
-            algo_name = algo_name.replace("_base", "")
+            print(build_file)
+            if "base" in algo_name[1]:
+                algo_name = algo_name[0]
+            else:
+                algo_name = "_".join(algo_name)
             df["name"] = df["name"].str.split("/").str[0]
             try:
                 write = pd.DataFrame(
@@ -201,13 +209,13 @@ def convert_json_to_csv_search(dataset, dataset_path):
                     "appended in the Search CSV"
                 )
 
-            write.to_csv(file.replace(".json", "_raw.csv"), index=False)
+            write.to_csv(file.replace(".json", ",raw.csv"), index=False)
             throughput = get_frontier(write, "throughput")
             throughput.to_csv(
-                file.replace(".json", "_throughput.csv"), index=False
+                file.replace(".json", ",throughput.csv"), index=False
             )
             latency = get_frontier(write, "latency")
-            latency.to_csv(file.replace(".json", "_latency.csv"), index=False)
+            latency.to_csv(file.replace(".json", ",latency.csv"), index=False)
         except Exception as e:
             print(
                 "An error occurred processing file %s (%s). Skipping..."