Use -1 as default value for build time, indicating that index has bee…

…n loaded from file.
harsha-simhadri · Oct 26, 2023 · d7bdeb3 · d7bdeb3
1 parent 43b8d71
commit d7bdeb3
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 10 deletions.
diff --git a/benchmark/plotting/metrics.py b/benchmark/plotting/metrics.py
@@ -95,7 +95,7 @@ def index_size(attrs):
 
 
 def build_time(attrs):
-    return attrs.get("build_time", 1e6)
+    return attrs.get("build_time", -1)
 
 
 def dist_computations(nq, attrs):
@@ -144,11 +144,11 @@ def mean_latency(attrs):
         "function": lambda true_nn, run_nn, metrics, run_attrs: index_size(run_attrs),  # noqa
         "worst": float("inf")
     },
-    "queriessize": {
-        "description": "Index size (kB)/Queries per second (s)",
-        "function": lambda true_nn, run_nn, metrics, run_attrs: index_size(run_attrs) / queries_per_second(len(true_nn[0]), run_attrs), # noqa
-        "worst": float("inf")
-    },
+    # "queriessize": {
+    #     "description": "Index size (kB)/Queries per second (s)",
+    #     "function": lambda true_nn, run_nn, metrics, run_attrs: index_size(run_attrs) / queries_per_second(len(true_nn[0]), run_attrs), # noqa
+    #     "worst": float("inf")
+    # },
     "wspq": {
         "description": "Watt seconds per query (watt*s/query)",
         "function": lambda true_nn, run_nn, metrics, run_attrs: watt_seconds_per_query(true_nn, run_attrs),  

diff --git a/benchmark/runner.py b/benchmark/runner.py
@@ -44,6 +44,7 @@ def run(definition, dataset, count, run_count, rebuild,
 
     distance = ds.distance()
     search_type = ds.search_type()
+    build_time = -1 # default value used to indicate that the index was loaded from file
     print(f"Running {definition.algorithm} on {dataset}")
 
     custom_runner = RUNNERS.get(neurips23track, BaseRunner)
@@ -104,8 +105,8 @@ def run(definition, dataset, count, run_count, rebuild,
                 else:
                     descriptor, results = custom_runner.run_task(
                         algo, ds, distance, count, run_count, search_type, private_query)
-                # A bit unclear how to set this correctly if we usually load from file
-                #descriptor["build_time"] = build_time
+
+                descriptor["build_time"] = build_time
                 descriptor["index_size"] = index_size
                 descriptor["algo"] = definition.algorithm
                 descriptor["dataset"] = dataset

diff --git a/data_export.py b/data_export.py
@@ -103,12 +103,13 @@ def cleaned_run_metric(run_metrics):
                     run_metrics = compute_metrics_all_runs(dataset, dataset_name, results, args.recompute, \
                         args.sensors, args.search_times, args.private_query, \
                         neurips23track=track, runbook_path=runbook_path)
-                    dfs.append(pd.DataFrame(cleaned_run_metric(run_metrics)))
             else:
                 results = load_all_results(dataset_name, neurips23track=track)
                 run_metrics = compute_metrics_all_runs(dataset, dataset_name, results, args.recompute, \
                         args.sensors, args.search_times, args.private_query, neurips23track=track)
-                dfs.append(pd.DataFrame(cleaned_run_metric(run_metrics)))
+            results = cleaned_run_metric(run_metrics)
+            if len(results) > 0:
+                dfs.append(pd.DataFrame(results))
     dfs = [e for e in dfs if len(e) > 0]
     if len(dfs) > 0:
         data = pd.concat(dfs)