From 235a9c71e7054ea49e47576f0d8bc99fadf6b314 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Thu, 23 Jan 2025 10:37:59 -0500 Subject: [PATCH 01/18] hnsw endpoint added Example usage: curl -X GET 'http://localhost:8080/api/v1.0/indexes/msmarco-v1-passage.bge-base-en-v1.5.hnsw/search?query=How%20does%20the%20process%20of%20digestion%20and%20metabolism%20of%20carbohydrates%20start&hits=10&efSearch=128&encoder=BgeBaseEn15&queryGenerator=VectorQueryGenerator' --- .../io/anserini/server/ControllerV1_0.java | 7 +- .../io/anserini/server/SearchService.java | 109 ++++++++++++++++-- .../io/anserini/server/ControllerTest.java | 4 +- 3 files changed, 109 insertions(+), 11 deletions(-) diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 76e255609c..8055d36d18 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -39,7 +39,10 @@ public class ControllerV1_0 { public Map searchIndex(@PathVariable(value = "index", required = false) String index, @RequestParam("query") String query, @RequestParam(value = "hits", defaultValue = "10") int hits, - @RequestParam(value = "qid", defaultValue = "") String qid) { + @RequestParam(value = "qid", defaultValue = "") String qid, + @RequestParam(value = "efSearch", required = false) Integer efSearch, + @RequestParam(value = "encoder", required = false) String encoder, + @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { if (index == null) { index = DEFAULT_INDEX; @@ -50,7 +53,7 @@ public Map searchIndex(@PathVariable(value = "index", required = } SearchService searchService = new SearchService(index); - List> candidates = searchService.search(query, hits); + List> candidates = searchService.search(query, hits, efSearch, encoder, queryGenerator); Map queryMap = new LinkedHashMap<>(); queryMap.put("query", new LinkedHashMap<>(Map.of("qid", qid, "text", query))); diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 44771b4294..38b7698969 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -19,7 +19,9 @@ import io.anserini.index.Constants; import io.anserini.search.ScoredDoc; import io.anserini.search.SimpleSearcher; +import io.anserini.search.HnswDenseSearcher; import io.anserini.util.PrebuiltIndexHandler; +import io.anserini.index.IndexInfo; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -29,22 +31,27 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Objects; -import java.util.stream.Collectors; public class SearchService { private final String indexDir; + private final String prebuiltIndex; private final float k1 = 0.9f; private final float b = 0.4f; private final ObjectMapper mapper = new ObjectMapper(); + private final boolean isHnswIndex; + + private static final int DEFAULT_EF_SEARCH = 100; + private static final String DEFAULT_QUERY_GENERATOR = "VectorQueryGenerator"; public SearchService(String prebuiltIndex) { + this.prebuiltIndex = prebuiltIndex; PrebuiltIndexHandler handler = new PrebuiltIndexHandler(prebuiltIndex); handler.initialize(); try { handler.download(); indexDir = handler.decompressIndex(); + isHnswIndex = prebuiltIndex.contains(".hnsw"); } catch (Exception e) { throw new RuntimeException(e); } @@ -52,8 +59,53 @@ public SearchService(String prebuiltIndex) { public List> search(String query, int hits) { try { - SimpleSearcher searcher = new SimpleSearcher(indexDir); - searcher.set_bm25(k1, b); + if (!isHnswIndex) { + SimpleSearcher searcher = new SimpleSearcher(indexDir); + searcher.set_bm25(k1, b); + ScoredDoc[] results = searcher.search(query, hits); + List> candidates = new ArrayList<>(); + for (ScoredDoc r : results) { + Map candidate = new LinkedHashMap<>(); + candidate.put("docid", r.docid); + candidate.put("score", r.score); + String raw = r.lucene_document.get(Constants.RAW); + if (raw != null) { + JsonNode rootNode = mapper.readTree(raw); + Map content = mapper.convertValue(rootNode, Map.class); + content.remove("docid"); + content.remove("id"); + content.remove("_id"); + candidate.put("doc", content); + } else { + candidate.put("doc", null); + } + candidates.add(candidate); + } + searcher.close(); + return candidates; + } else { + return searchHnsw(query, hits, null, null, null); + } + } catch (Exception e) { + e.printStackTrace(); + return List.of(); + } + } + + public List> search(String query, int hits, + Integer efSearch, String encoder, String queryGenerator) { + if (!isHnswIndex) { + // Ignore HNSW parameters for BM25 indexes + return search(query, hits); + } + return searchHnsw(query, hits, efSearch, encoder, queryGenerator); + } + + private List> searchHnsw(String query, int hits, + Integer efSearch, String encoder, String queryGenerator) { + try { + HnswDenseSearcher.Args args = createHnswArgs(efSearch, encoder, queryGenerator); + HnswDenseSearcher searcher = new HnswDenseSearcher(args); ScoredDoc[] results = searcher.search(query, hits); List> candidates = new ArrayList<>(); for (ScoredDoc r : results) { @@ -81,10 +133,53 @@ public List> search(String query, int hits) { } } + private HnswDenseSearcher.Args createHnswArgs(Integer efSearch, String encoder, String queryGenerator) { + HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); + args.index = indexDir; + args.efSearch = efSearch != null ? efSearch : DEFAULT_EF_SEARCH; + args.queryGenerator = queryGenerator != null ? queryGenerator : DEFAULT_QUERY_GENERATOR; + + // Attempt to get encoder from IndexInfo, or use provided encoder + if (encoder != null) { + args.encoder = encoder; + } else if (IndexInfo.contains(prebuiltIndex)) { + IndexInfo info = IndexInfo.get(prebuiltIndex); + args.encoder = info.model.substring(0, info.model.indexOf(" w/ HNSW")); + } + return args; + } + public Map getDocument(String docid) { try { - SimpleSearcher searcher = new SimpleSearcher(indexDir); - String raw = searcher.doc(docid).get(Constants.RAW); + if (!isHnswIndex) { + SimpleSearcher searcher = new SimpleSearcher(indexDir); + String raw = searcher.doc(docid).get(Constants.RAW); + Map candidate = new LinkedHashMap<>(); + if (raw != null) { + JsonNode rootNode = mapper.readTree(raw); + Map content = mapper.convertValue(rootNode, Map.class); + content.remove("docid"); + content.remove("id"); + content.remove("_id"); + candidate.put("doc", content); + } else { + candidate.put("doc", null); + } + searcher.close(); + return candidate; + } else { + return getHnswDocument(docid); + } + } catch (Exception e) { + e.printStackTrace(); + return Map.of(); + } + } + + private Map getHnswDocument(String docid) { + try { + HnswDenseSearcher searcher = new HnswDenseSearcher(createHnswArgs(null, null, null)); + String raw = searcher.search(docid, 1)[0].lucene_document.get(Constants.RAW); Map candidate = new LinkedHashMap<>(); if (raw != null) { JsonNode rootNode = mapper.readTree(raw); @@ -103,5 +198,5 @@ public Map getDocument(String docid) { return Map.of(); } } - + } \ No newline at end of file diff --git a/src/test/java/io/anserini/server/ControllerTest.java b/src/test/java/io/anserini/server/ControllerTest.java index 21c2be59da..0c40908fef 100644 --- a/src/test/java/io/anserini/server/ControllerTest.java +++ b/src/test/java/io/anserini/server/ControllerTest.java @@ -33,7 +33,7 @@ public class ControllerTest { public void testSearch() throws Exception { ControllerV1_0 controller = new ControllerV1_0(); - Map results = controller.searchIndex(null, "Albert Einstein", 10, ""); + Map results = controller.searchIndex(null, "Albert Einstein", 10, "", null, null, null); assertNotNull(results); assertTrue(results.get("candidates") instanceof List); @@ -48,7 +48,7 @@ public void testIndexNotFound() throws Exception { ControllerV1_0 controller = new ControllerV1_0(); assertThrows(RuntimeException.class, () -> { - Map results = controller.searchIndex("nonexistent-index", "Albert Einstein", 10, ""); + Map results = controller.searchIndex("nonexistent-index", "Albert Einstein", 10, "", null, null, null); }); } From f77f1bc0f245af9964aead67acb40d0405ba5dac Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Thu, 23 Jan 2025 12:17:22 -0500 Subject: [PATCH 02/18] use primitive refactor method --- .../io/anserini/server/ControllerV1_0.java | 2 +- .../io/anserini/server/SearchService.java | 45 +++++++++---------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 8055d36d18..2509e5f03f 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -40,7 +40,7 @@ public Map searchIndex(@PathVariable(value = "index", required = @RequestParam("query") String query, @RequestParam(value = "hits", defaultValue = "10") int hits, @RequestParam(value = "qid", defaultValue = "") String qid, - @RequestParam(value = "efSearch", required = false) Integer efSearch, + @RequestParam(value = "efSearch", defaultValue = "100") int efSearch, @RequestParam(value = "encoder", required = false) String encoder, @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 38b7698969..70cbd9a88a 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -84,7 +84,7 @@ public List> search(String query, int hits) { searcher.close(); return candidates; } else { - return searchHnsw(query, hits, null, null, null); + return searchHnsw(query, hits, 0, null, null); } } catch (Exception e) { e.printStackTrace(); @@ -93,7 +93,7 @@ public List> search(String query, int hits) { } public List> search(String query, int hits, - Integer efSearch, String encoder, String queryGenerator) { + int efSearch, String encoder, String queryGenerator) { if (!isHnswIndex) { // Ignore HNSW parameters for BM25 indexes return search(query, hits); @@ -102,7 +102,7 @@ public List> search(String query, int hits, } private List> searchHnsw(String query, int hits, - Integer efSearch, String encoder, String queryGenerator) { + int efSearch, String encoder, String queryGenerator) { try { HnswDenseSearcher.Args args = createHnswArgs(efSearch, encoder, queryGenerator); HnswDenseSearcher searcher = new HnswDenseSearcher(args); @@ -133,10 +133,10 @@ private List> searchHnsw(String query, int hits, } } - private HnswDenseSearcher.Args createHnswArgs(Integer efSearch, String encoder, String queryGenerator) { + private HnswDenseSearcher.Args createHnswArgs(int efSearch, String encoder, String queryGenerator) { HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); args.index = indexDir; - args.efSearch = efSearch != null ? efSearch : DEFAULT_EF_SEARCH; + args.efSearch = efSearch; args.queryGenerator = queryGenerator != null ? queryGenerator : DEFAULT_QUERY_GENERATOR; // Attempt to get encoder from IndexInfo, or use provided encoder @@ -151,25 +151,24 @@ private HnswDenseSearcher.Args createHnswArgs(Integer efSearch, String encoder, public Map getDocument(String docid) { try { - if (!isHnswIndex) { - SimpleSearcher searcher = new SimpleSearcher(indexDir); - String raw = searcher.doc(docid).get(Constants.RAW); - Map candidate = new LinkedHashMap<>(); - if (raw != null) { - JsonNode rootNode = mapper.readTree(raw); - Map content = mapper.convertValue(rootNode, Map.class); - content.remove("docid"); - content.remove("id"); - content.remove("_id"); - candidate.put("doc", content); - } else { - candidate.put("doc", null); - } - searcher.close(); - return candidate; - } else { + if (isHnswIndex) { return getHnswDocument(docid); } + SimpleSearcher searcher = new SimpleSearcher(indexDir); + String raw = searcher.doc(docid).get(Constants.RAW); + Map candidate = new LinkedHashMap<>(); + if (raw != null) { + JsonNode rootNode = mapper.readTree(raw); + Map content = mapper.convertValue(rootNode, Map.class); + content.remove("docid"); + content.remove("id"); + content.remove("_id"); + candidate.put("doc", content); + } else { + candidate.put("doc", null); + } + searcher.close(); + return candidate; } catch (Exception e) { e.printStackTrace(); return Map.of(); @@ -178,7 +177,7 @@ public Map getDocument(String docid) { private Map getHnswDocument(String docid) { try { - HnswDenseSearcher searcher = new HnswDenseSearcher(createHnswArgs(null, null, null)); + HnswDenseSearcher searcher = new HnswDenseSearcher(createHnswArgs(DEFAULT_EF_SEARCH, null, null)); String raw = searcher.search(docid, 1)[0].lucene_document.get(Constants.RAW); Map candidate = new LinkedHashMap<>(); if (raw != null) { From 1ed836e44129a241b239d1c086b2f123766ca7ba Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Thu, 23 Jan 2025 13:29:43 -0500 Subject: [PATCH 03/18] checks --- .../io/anserini/server/ControllerV1_0.java | 19 +++++++++++++++++-- .../io/anserini/server/SearchService.java | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 2509e5f03f..6ad63f2a14 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -28,11 +28,20 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RestController; +import org.springframework.http.HttpStatus; +import org.springframework.web.bind.annotation.ResponseStatus; +import org.springframework.web.bind.annotation.ExceptionHandler; @RestController @RequestMapping(path = "/api/v1.0") public class ControllerV1_0 { + @ResponseStatus(HttpStatus.BAD_REQUEST) + @ExceptionHandler(IllegalArgumentException.class) + public Map handleIllegalArgumentException(IllegalArgumentException ex) { + return Map.of("error", ex.getMessage()); + } + private static final String DEFAULT_INDEX = "msmarco-v1-passage"; @RequestMapping(method = RequestMethod.GET, path = {"/indexes/{index}/search", "/search"}) @@ -41,8 +50,8 @@ public Map searchIndex(@PathVariable(value = "index", required = @RequestParam(value = "hits", defaultValue = "10") int hits, @RequestParam(value = "qid", defaultValue = "") String qid, @RequestParam(value = "efSearch", defaultValue = "100") int efSearch, - @RequestParam(value = "encoder", required = false) String encoder, - @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { + @RequestParam(value = "encoder", required = true) String encoder, + @RequestParam(value = "queryGenerator", required = true) String queryGenerator) { if (index == null) { index = DEFAULT_INDEX; @@ -52,6 +61,12 @@ public Map searchIndex(@PathVariable(value = "index", required = throw new IllegalArgumentException("Index " + index + " not found!"); } + if (index.contains(".hnsw")) { + if (encoder == null || queryGenerator == null) { + throw new IllegalArgumentException("HNSW indexes require both 'encoder' and 'queryGenerator' parameters"); + } + } + SearchService searchService = new SearchService(index); List> candidates = searchService.search(query, hits, efSearch, encoder, queryGenerator); diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 70cbd9a88a..ddf4462577 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -84,7 +84,7 @@ public List> search(String query, int hits) { searcher.close(); return candidates; } else { - return searchHnsw(query, hits, 0, null, null); + return searchHnsw(query, hits, DEFAULT_EF_SEARCH, null, null); } } catch (Exception e) { e.printStackTrace(); From 37597ff91da54acb92b053016d6f611a973878ef Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Thu, 23 Jan 2025 13:42:55 -0500 Subject: [PATCH 04/18] don't require parms --- src/main/java/io/anserini/server/ControllerV1_0.java | 10 ++++------ src/main/java/io/anserini/server/SearchService.java | 11 ++--------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 6ad63f2a14..0bfd190268 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -50,8 +50,8 @@ public Map searchIndex(@PathVariable(value = "index", required = @RequestParam(value = "hits", defaultValue = "10") int hits, @RequestParam(value = "qid", defaultValue = "") String qid, @RequestParam(value = "efSearch", defaultValue = "100") int efSearch, - @RequestParam(value = "encoder", required = true) String encoder, - @RequestParam(value = "queryGenerator", required = true) String queryGenerator) { + @RequestParam(value = "encoder", required = false) String encoder, + @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { if (index == null) { index = DEFAULT_INDEX; @@ -61,10 +61,8 @@ public Map searchIndex(@PathVariable(value = "index", required = throw new IllegalArgumentException("Index " + index + " not found!"); } - if (index.contains(".hnsw")) { - if (encoder == null || queryGenerator == null) { - throw new IllegalArgumentException("HNSW indexes require both 'encoder' and 'queryGenerator' parameters"); - } + if (index.contains(".hnsw") && (encoder == null || queryGenerator == null)) { + throw new IllegalArgumentException("HNSW indexes require both 'encoder' and 'queryGenerator' parameters"); } SearchService searchService = new SearchService(index); diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index ddf4462577..44e327da45 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -137,15 +137,8 @@ private HnswDenseSearcher.Args createHnswArgs(int efSearch, String encoder, Stri HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); args.index = indexDir; args.efSearch = efSearch; - args.queryGenerator = queryGenerator != null ? queryGenerator : DEFAULT_QUERY_GENERATOR; - - // Attempt to get encoder from IndexInfo, or use provided encoder - if (encoder != null) { - args.encoder = encoder; - } else if (IndexInfo.contains(prebuiltIndex)) { - IndexInfo info = IndexInfo.get(prebuiltIndex); - args.encoder = info.model.substring(0, info.model.indexOf(" w/ HNSW")); - } + args.encoder = encoder; + args.queryGenerator = queryGenerator; return args; } From 7e0e1db67f17526aa2aac309031b467d707329f9 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Thu, 23 Jan 2025 15:31:58 -0500 Subject: [PATCH 05/18] Add settings toggle, remove doc return attempt for HNSW, add parameter override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove redundant document content handling from HNSW searches (decided not to return doc contents, since we would have to ‘load twice’ in a sense). - Simplify HNSW search to only return docids and scores (above) - Add GET/POST /indexes/{index}/settings endpoints for managing search parameters - Add parameter override storage with a fallback chain (request → override → default) - Need to finish tagging IndexInfo --- .../java/io/anserini/index/IndexInfo.java | 154 ++++++++++++++--- .../io/anserini/server/ControllerV1_0.java | 117 ++++++++++--- .../io/anserini/server/SearchService.java | 161 ++++++++++-------- 3 files changed, 312 insertions(+), 120 deletions(-) diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java index 0b2506fc47..803e288022 100644 --- a/src/main/java/io/anserini/index/IndexInfo.java +++ b/src/main/java/io/anserini/index/IndexInfo.java @@ -25,7 +25,14 @@ public enum IndexInfo { "BM25", new String[] { "https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.20221005.252b5e.tar.gz" }, +<<<<<<< Updated upstream "cfe14d543c6a27f4d742fb2d0099b8e0"), +======= + "cfe14d543c6a27f4d742fb2d0099b8e0", + IndexType.bm25, + "", + ""), +>>>>>>> Stashed changes // MS MARCO V1 MSMARCO_V1_PASSAGE("msmarco-v1-passage", @@ -36,7 +43,10 @@ public enum IndexInfo { "BM25", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz" }, - "678876e8c99a89933d553609a0fd8793"), + "678876e8c99a89933d553609a0fd8793", + IndexType.bm25, + "", + ""), MSMARCO_V1_PASSAGE_SPLADE_PP_ED("msmarco-v1-passage.splade-pp-ed", "Lucene impact index of the MS MARCO V1 passage corpus encoded by SPLADE++ CoCondenser-EnsembleDistil.", @@ -46,7 +56,10 @@ public enum IndexInfo { "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.splade-pp-ed.20230524.a59610.tar.gz" }, - "2c008fc36131e27966a72292932358e6"), + "2c008fc36131e27966a72292932358e6", + IndexType.flat, + "SpladePpEd", + "VectorQueryGenerator"), MSMARCO_V1_PASSAGE_COS_DPR_DISTIL_HNSW("msmarco-v1-passage.cosdpr-distil.hnsw", "Lucene HNSW index of the MS MARCO V1 passage corpus encoded by cos-DPR Distil.", @@ -56,7 +69,14 @@ public enum IndexInfo { "cosDPR-distil w/ HNSW fp32", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.cosdpr-distil.20240108.825148.tar.gz" }, +<<<<<<< Updated upstream "df4c60fa1f3804fa409499824d12d035"), +======= + "df4c60fa1f3804fa409499824d12d035", + IndexType.hnsw, + "CosDprDistil", + "VectorQueryGenerator"), +>>>>>>> Stashed changes MSMARCO_V1_PASSAGE_COS_DPR_DISTIL_HNSW_INT8("msmarco-v1-passage.cosdpr-distil.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V1 passage corpus encoded by cos-DPR Distil.", @@ -66,7 +86,10 @@ public enum IndexInfo { "cosDPR-distil w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.cosdpr-distil.20240108.825148.tar.gz" }, - "119124ad358bb81e6a203b04d1b99a9c"), + "119124ad358bb81e6a203b04d1b99a9c", + IndexType.hnsw, + "CosDprDistil", + "VectorQueryGenerator"), MSMARCO_V1_PASSAGE_BGE_BASE_EN_15_HNSW("msmarco-v1-passage.bge-base-en-v1.5.hnsw", "Lucene HNSW index of the MS MARCO V1 passage corpus encoded by BGE-base-en-v1.5.", @@ -76,7 +99,10 @@ public enum IndexInfo { "bge-base-en-v1.5 w/ HNSW fp32", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5.20240117.53514b.tar.gz" }, - "00a577f689d90f95e6c5611438b0af3d"), + "00a577f689d90f95e6c5611438b0af3d", + IndexType.hnsw, + "BgeBaseEn15", + "VectorQueryGenerator"), MSMARCO_V1_PASSAGE_BGE_BASE_EN_15_HNSW_INT8("msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V1 passage corpus encoded by BGE-base-en-v1.5.", @@ -86,7 +112,10 @@ public enum IndexInfo { "bge-base-en-v1.5 w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5.20240117.53514b.tar.gz" }, - "7830712459cf124c96fd058bb0a405b7"), + "7830712459cf124c96fd058bb0a405b7", + IndexType.hnsw, + "BgeBaseEn15", + "VectorQueryGenerator"), MSMARCO_V1_PASSAGE_COHERE_EMBED_ENGLISH_30_HNSW("msmarco-v1-passage.cohere-embed-english-v3.0.hnsw", "Lucene HNSW index of the MS MARCO V1 passage corpus encoded by Cohere embed-english-v3.0.", @@ -96,7 +125,10 @@ public enum IndexInfo { "cohere-embed-english-v3.0 w/ HNSW fp32", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0.20240228.eacd13.tar.gz" }, - "c7294ca988ae1b812d427362ffca1ee2"), + "c7294ca988ae1b812d427362ffca1ee2", + IndexType.hnsw, + "CohereEmbedEnglish30", + "VectorQueryGenerator"), MSMARCO_V1_PASSAGE_COHERE_EMBED_ENGLISH_30_HNSW_INT8("msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V1 passage corpus encoded by Cohere embed-english-v3.0.", @@ -106,7 +138,10 @@ public enum IndexInfo { "cohere-embed-english-v3.0 w/ HNSW fp32", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0.20240228.eacd13.tar.gz" }, - "dbaca578cc8495f504cdd0a7187f4c36"), + "dbaca578cc8495f504cdd0a7187f4c36", + IndexType.hnsw, + "CohereEmbedEnglish30", + "VectorQueryGenerator"), // MS MARCO V2 MSMARCO_V2_PASSAGE("msmarco-v2-passage", @@ -117,7 +152,10 @@ public enum IndexInfo { "BM25", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage.20220808.4d6d2a.tar.gz" }, - "eacd8556dd416ccad517b5e7dc97bceb"), + "eacd8556dd416ccad517b5e7dc97bceb", + IndexType.bm25, + "", + ""), MSMARCO_V2_DOC("msmarco-v2-doc", "Lucene index of the MS MARCO V2 document corpus.", @@ -127,7 +165,10 @@ public enum IndexInfo { "BM25", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc.20220808.4d6d2a.tar.gz" }, - "0599bd6ed5ee28390b279eb398ef0267"), + "0599bd6ed5ee28390b279eb398ef0267", + IndexType.flat, + "", + ""), MSMARCO_V2_DOC_SEGMENTED("msmarco-v2-doc-segmented", "Lucene index of the MS MARCO V2 segmented document corpus.", @@ -137,7 +178,10 @@ public enum IndexInfo { "BM25", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.tar.gz" }, - "8a5f444fa5a63cc5d4ddc3e6dd15faa0"), + "8a5f444fa5a63cc5d4ddc3e6dd15faa0", + IndexType.bm25, + "", + ""), MSMARCO_V21_DOC("msmarco-v2.1-doc", "Lucene index of the MS MARCO V2.1 document corpus.", @@ -147,7 +191,10 @@ public enum IndexInfo { "BM25", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v2.1-doc.20240418.4f9675.tar.gz" }, - "cecd55856c34afa82f1a499705c9df02"), + "cecd55856c34afa82f1a499705c9df02", + IndexType.bm25, + "", + ""), MSMARCO_V21_DOC_SEGMENTED("msmarco-v2.1-doc-segmented", "Lucene index of the MS MARCO V2.1 segmented document corpus.", @@ -157,7 +204,10 @@ public enum IndexInfo { "BM25", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v2.1-doc-segmented.20240418.4f9675.tar.gz" }, - "6ec4cd595c9fe1ad91b43eabb39a637c"), + "6ec4cd595c9fe1ad91b43eabb39a637c", + IndexType.bm25, + "", + ""), MSMARCO_V21_DOC_SEGMENTED_SHARD00_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard00.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard00) encoded by Snowflake's arctic-embed-l model.", @@ -167,7 +217,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard00.arctic-embed-l.20250114.4884f5.tar.gz" }, - "aab3f8e9aa0563bd0f875584784a0845"), + "aab3f8e9aa0563bd0f875584784a0845", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD01_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard01.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard01) encoded by Snowflake's arctic-embed-l model.", @@ -177,7 +230,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard01.arctic-embed-l.20250114.4884f5.tar.gz" }, - "34ea30fe72c2bc1795ae83e71b191547"), + "34ea30fe72c2bc1795ae83e71b191547", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD02_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard02.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard02) encoded by Snowflake's arctic-embed-l model.", @@ -187,7 +243,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard02.arctic-embed-l.20250114.4884f5.tar.gz" }, - "b6271d6db65119977491675f74f466d5"), + "b6271d6db65119977491675f74f466d5", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD03_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard03.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard03) encoded by Snowflake's arctic-embed-l model.", @@ -197,7 +256,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard03.arctic-embed-l.20250114.4884f5.tar.gz" }, - "a9cd644eb6037f67d2e9c06a8f60928d"), + "a9cd644eb6037f67d2e9c06a8f60928d", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD04_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard04.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard04) encoded by Snowflake's arctic-embed-l model.", @@ -207,7 +269,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard04.arctic-embed-l.20250114.4884f5.tar.gz" }, - "07b7e451e0525d01c1f1f2b1c42b1bd5"), + "07b7e451e0525d01c1f1f2b1c42b1bd5", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD05_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard05.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard05) encoded by Snowflake's arctic-embed-l model.", @@ -217,7 +282,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard05.arctic-embed-l.20250114.4884f5.tar.gz" }, - "2573dce175788981be2f266ebb33c96d"), + "2573dce175788981be2f266ebb33c96d", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD06_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard06.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard06) encoded by Snowflake's arctic-embed-l model.", @@ -227,7 +295,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard06.arctic-embed-l.20250114.4884f5.tar.gz" }, - "a644aea445a8b78cc9e99d2ce111ff11"), + "a644aea445a8b78cc9e99d2ce111ff11", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD07_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard07.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard07) encoded by Snowflake's arctic-embed-l model.", @@ -237,7 +308,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard07.arctic-embed-l.20250114.4884f5.tar.gz" }, - "402d37deccb44b5fc105049889e8aaea"), + "402d37deccb44b5fc105049889e8aaea", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD08_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard08.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard08) encoded by Snowflake's arctic-embed-l model.", @@ -247,7 +321,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard08.arctic-embed-l.20250114.4884f5.tar.gz" }, - "89ebcd027f7297b26a1edc8ae5726527"), + "89ebcd027f7297b26a1edc8ae5726527", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), MSMARCO_V21_DOC_SEGMENTED_SHARD09_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard09.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard09) encoded by Snowflake's arctic-embed-l model.", @@ -257,7 +334,10 @@ public enum IndexInfo { "Snowflake's arctic-embed-l w/ HNSW int8", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard09.arctic-embed-l.20250114.4884f5.tar.gz" }, - "5e580bb7eb9ee2bb6bfa492b3430c17d"), + "5e580bb7eb9ee2bb6bfa492b3430c17d", + IndexType.hnsw, + "ArcticEmbedL", + "VectorQueryGenerator"), // BEIR: flat BEIR_V1_0_0_TREC_COVID_FLAT("beir-v1.0.0-trec-covid.flat", @@ -267,7 +347,9 @@ public enum IndexInfo { "BEIR: trec-covid", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-covid.flat.20221116.505594.tar.gz" }, - "1aaf107b0787aa349deac92cb67d4230"), + "1aaf107b0787aa349deac92cb67d4230", + IndexType.flat, + BEIR_V1_0_0_BIOASQ_FLAT("beir-v1.0.0-bioasq.flat", "Lucene inverted 'flat' index of BEIR collection 'bioasq'.", @@ -1567,7 +1649,7 @@ public enum IndexInfo { "BEIR: scifact", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-scifact.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "556abd7e9fcffbf06057ce3111cf4cc5"); + "556abd7e9fcffbf06057ce3111cf4cc5",); public final String indexName; public final String description; @@ -1577,9 +1659,17 @@ public enum IndexInfo { public final String model; public final String[] urls; public final String md5; + public final IndexType indexType; + public enum IndexType { + flat, + hnsw, + bm25 + } + public final String encoder; + public final String queryGenerator; IndexInfo(String indexName, String description, String filename, String readme, String corpus, String model, - String[] urls, String md5) { + String[] urls, String md5, IndexType indexType, String encoder, String queryGenerator) { this.indexName = indexName; this.description = description; this.filename = filename; @@ -1588,6 +1678,9 @@ public enum IndexInfo { this.model = model; this.urls = urls; this.md5 = md5; + this.indexType = indexType; + this.encoder = encoder; + this.queryGenerator = queryGenerator; } public static boolean contains(String indexName) { @@ -1608,4 +1701,15 @@ public static IndexInfo get(String indexName) { throw new IllegalArgumentException("Index name " + indexName + " not found!"); } + public String getDefaultEncoder() { + return encoder; + } + + public String getDefaultQueryGenerator() { + return queryGenerator; + } + + public static final int DEFAULT_EF_SEARCH = 100; + public static final String DEFAULT_QUERY_GENERATOR = "VectorQueryGenerator"; + public static final String DEFAULT_ENCODER = "BgeBaseEn15"; } diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 0bfd190268..799f15db42 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -22,6 +22,8 @@ import java.util.List; import java.util.LinkedHashMap; import java.util.Map; +import java.util.HashMap; +import java.util.concurrent.ConcurrentHashMap; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestParam; @@ -44,28 +46,26 @@ public Map handleIllegalArgumentException(IllegalArgumentExcepti private static final String DEFAULT_INDEX = "msmarco-v1-passage"; - @RequestMapping(method = RequestMethod.GET, path = {"/indexes/{index}/search", "/search"}) - public Map searchIndex(@PathVariable(value = "index", required = false) String index, + private final Map services = new ConcurrentHashMap<>(); + + private SearchService getOrCreateSearchService(String index) { + return services.computeIfAbsent(index, k -> new SearchService(k)); + } + + @RequestMapping(method = RequestMethod.GET, path = "/indexes/{index}/search") + public Map searchIndex(@PathVariable("index") String index, @RequestParam("query") String query, @RequestParam(value = "hits", defaultValue = "10") int hits, @RequestParam(value = "qid", defaultValue = "") String qid, - @RequestParam(value = "efSearch", defaultValue = "100") int efSearch, + @RequestParam(value = "efSearch", required = false) Integer efSearch, @RequestParam(value = "encoder", required = false) String encoder, @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { - if (index == null) { - index = DEFAULT_INDEX; - } - if (!IndexInfo.contains(index)) { throw new IllegalArgumentException("Index " + index + " not found!"); } - if (index.contains(".hnsw") && (encoder == null || queryGenerator == null)) { - throw new IllegalArgumentException("HNSW indexes require both 'encoder' and 'queryGenerator' parameters"); - } - - SearchService searchService = new SearchService(index); + SearchService searchService = getOrCreateSearchService(index); List> candidates = searchService.search(query, hits, efSearch, encoder, queryGenerator); Map queryMap = new LinkedHashMap<>(); @@ -86,7 +86,7 @@ public Map getIndexStatus(@PathVariable("index") String index) { if (!IndexInfo.contains(index)) { throw new IllegalArgumentException("Index name " + index + " not found!"); } - + PrebuiltIndexHandler handler = new PrebuiltIndexHandler(index); handler.initialize(); return Map.of("cached", handler.checkIndexFileExist()); @@ -98,17 +98,90 @@ public Map> listIndexes() { Map> indexList = new LinkedHashMap<>(); for (IndexInfo index : indexes) { indexList.put(index.indexName, Map.of( - "indexName", index.indexName, - "description", index.description, - "filename", index.filename, - "corpus", index.corpus, - "model", index.model, - "urls", index.urls, - "md5", index.md5, - "cached", getIndexStatus(index.indexName).get("cached") - )); + "indexName", index.indexName, + "description", index.description, + "filename", index.filename, + "corpus", index.corpus, + "model", index.model, + "urls", index.urls, + "md5", index.md5, + "cached", getIndexStatus(index.indexName).get("cached"))); } return indexList; } + @RequestMapping(method = RequestMethod.POST, path = "/indexes/{index}/settings") + public Map updateIndexSettings( + @PathVariable("index") String index, + @RequestParam(value = "efSearch", required = false) String efSearch, + @RequestParam(value = "encoder", required = false) String encoder, + @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { + + if (!IndexInfo.contains(index)) { + throw new IllegalArgumentException("Index " + index + " not found!"); + } + + SearchService service = getOrCreateSearchService(index); + Map errors = new HashMap<>(); + + // Simple parameter handling + if (efSearch != null) { + try { + service.setEfSearchOverride(efSearch); + } catch (IllegalArgumentException e) { + errors.put("efSearch", e.getMessage()); + } + } + + if (encoder != null) { + try { + service.setEncoderOverride(encoder); + } catch (IllegalArgumentException e) { + errors.put("encoder", e.getMessage()); + } + } + + if (queryGenerator != null) { + try { + service.setQueryGeneratorOverride(queryGenerator); + } catch (IllegalArgumentException e) { + errors.put("queryGenerator", e.getMessage()); + } + } + + if (!errors.isEmpty()) { + return Map.of("status", "error", "errors", errors); + } + return Map.of("status", "success"); + } + + @RequestMapping(method = RequestMethod.GET, path = "/indexes/{index}/settings") + public Map getIndexSettings(@PathVariable("index") String index) { + if (!IndexInfo.contains(index)) { + throw new IllegalArgumentException("Index " + index + " not found!"); + } + + SearchService service = getOrCreateSearchService(index); + + // Simple direct mapping of current values + Map settings = new HashMap<>(); + + Integer efSearch = service.getEfSearchOverride(); + if (efSearch != null) { + settings.put("efSearch", efSearch); + } + + String encoder = service.getEncoderOverride(); + if (encoder != null) { + settings.put("encoder", encoder); + } + + String queryGenerator = service.getQueryGeneratorOverride(); + if (queryGenerator != null) { + settings.put("queryGenerator", queryGenerator); + } + + return settings; + } + } \ No newline at end of file diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 44e327da45..41f4515bef 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -31,6 +31,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; public class SearchService { @@ -40,9 +41,7 @@ public class SearchService { private final float b = 0.4f; private final ObjectMapper mapper = new ObjectMapper(); private final boolean isHnswIndex; - - private static final int DEFAULT_EF_SEARCH = 100; - private static final String DEFAULT_QUERY_GENERATOR = "VectorQueryGenerator"; + private final Map indexOverrides = new ConcurrentHashMap<>(); public SearchService(String prebuiltIndex) { this.prebuiltIndex = prebuiltIndex; @@ -58,8 +57,23 @@ public SearchService(String prebuiltIndex) { } public List> search(String query, int hits) { + return search(query, hits, null, null, null); + } + + public List> search(String query, int hits, + Integer efSearch, String encoder, String queryGenerator) { try { + System.out.println("=== Search Parameters ==="); + System.out.println("Query: " + query); + System.out.println("Hits: " + hits); + System.out.println("EF Search: " + efSearch); + System.out.println("Encoder: " + encoder); + System.out.println("Query Generator: " + queryGenerator); + System.out.println("Is HNSW Index: " + isHnswIndex); + System.out.println("Index Dir: " + indexDir); + if (!isHnswIndex) { + // Regular search with document contents SimpleSearcher searcher = new SimpleSearcher(indexDir); searcher.set_bm25(k1, b); ScoredDoc[] results = searcher.search(query, hits); @@ -84,69 +98,54 @@ public List> search(String query, int hits) { searcher.close(); return candidates; } else { - return searchHnsw(query, hits, DEFAULT_EF_SEARCH, null, null); - } - } catch (Exception e) { - e.printStackTrace(); - return List.of(); - } - } + // HNSW search - only return docids and scores + HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); + args.index = indexDir; - public List> search(String query, int hits, - int efSearch, String encoder, String queryGenerator) { - if (!isHnswIndex) { - // Ignore HNSW parameters for BM25 indexes - return search(query, hits); - } - return searchHnsw(query, hits, efSearch, encoder, queryGenerator); - } + args.efSearch = efSearch != null ? efSearch: getEfSearchOverride() != null ? getEfSearchOverride() : IndexInfo.DEFAULT_EF_SEARCH; - private List> searchHnsw(String query, int hits, - int efSearch, String encoder, String queryGenerator) { - try { - HnswDenseSearcher.Args args = createHnswArgs(efSearch, encoder, queryGenerator); - HnswDenseSearcher searcher = new HnswDenseSearcher(args); - ScoredDoc[] results = searcher.search(query, hits); - List> candidates = new ArrayList<>(); - for (ScoredDoc r : results) { - Map candidate = new LinkedHashMap<>(); - candidate.put("docid", r.docid); - candidate.put("score", r.score); - String raw = r.lucene_document.get(Constants.RAW); - if (raw != null) { - JsonNode rootNode = mapper.readTree(raw); - Map content = mapper.convertValue(rootNode, Map.class); - content.remove("docid"); - content.remove("id"); - content.remove("_id"); - candidate.put("doc", content); - } else { - candidate.put("doc", null); + IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); + args.encoder = encoder != null ? encoder: getEncoderOverride() != null ? getEncoderOverride(): indexInfo.getDefaultEncoder(); + + args.queryGenerator = queryGenerator != null ? queryGenerator: getQueryGeneratorOverride() != null ? getQueryGeneratorOverride(): indexInfo.getDefaultQueryGenerator(); + + System.out.println("=== HNSW Args ==="); + System.out.println("Index: " + args.index); + System.out.println("EF Search: " + args.efSearch); + System.out.println("Encoder: " + args.encoder); + System.out.println("Query Generator: " + args.queryGenerator); + + HnswDenseSearcher searcher = new HnswDenseSearcher<>(args); + System.out.println("Created HNSW searcher"); + + ScoredDoc[] results = searcher.search(query, hits); + System.out.println("Search completed, results: " + (results != null ? results.length : "null")); + + List> candidates = new ArrayList<>(); + if (results != null) { + for (ScoredDoc r : results) { + candidates.add(Map.of("docid", r.docid,"score", r.score)); + } } - candidates.add(candidate); + + searcher.close(); + return candidates; } - searcher.close(); - return candidates; } catch (Exception e) { + System.out.println("=== Search Error ==="); + System.out.println("Error type: " + e.getClass().getName()); + System.out.println("Error message: " + e.getMessage()); e.printStackTrace(); return List.of(); } } - private HnswDenseSearcher.Args createHnswArgs(int efSearch, String encoder, String queryGenerator) { - HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); - args.index = indexDir; - args.efSearch = efSearch; - args.encoder = encoder; - args.queryGenerator = queryGenerator; - return args; - } - public Map getDocument(String docid) { + if (isHnswIndex) { + throw new UnsupportedOperationException("Document retrieval not supported for HNSW indexes"); + } + try { - if (isHnswIndex) { - return getHnswDocument(docid); - } SimpleSearcher searcher = new SimpleSearcher(indexDir); String raw = searcher.doc(docid).get(Constants.RAW); Map candidate = new LinkedHashMap<>(); @@ -168,27 +167,43 @@ public Map getDocument(String docid) { } } - private Map getHnswDocument(String docid) { + // Simple getters with type casting + public Integer getEfSearchOverride() { + return (Integer) indexOverrides.get("efSearch"); + } + + public String getEncoderOverride() { + return (String) indexOverrides.get("encoder"); + } + + public String getQueryGeneratorOverride() { + return (String) indexOverrides.get("queryGenerator"); + } + + // Simple setters with basic validation + public void setEfSearchOverride(String value) { try { - HnswDenseSearcher searcher = new HnswDenseSearcher(createHnswArgs(DEFAULT_EF_SEARCH, null, null)); - String raw = searcher.search(docid, 1)[0].lucene_document.get(Constants.RAW); - Map candidate = new LinkedHashMap<>(); - if (raw != null) { - JsonNode rootNode = mapper.readTree(raw); - Map content = mapper.convertValue(rootNode, Map.class); - content.remove("docid"); - content.remove("id"); - content.remove("_id"); - candidate.put("doc", content); - } else { - candidate.put("doc", null); + int efSearch = Integer.parseInt(value); + if (efSearch <= 0) { + throw new IllegalArgumentException("efSearch must be positive"); } - searcher.close(); - return candidate; - } catch (Exception e) { - e.printStackTrace(); - return Map.of(); + indexOverrides.put("efSearch", efSearch); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Invalid efSearch value: " + value); + } + } + + public void setEncoderOverride(String value) { + if (value == null || value.trim().isEmpty()) { + throw new IllegalArgumentException("encoder cannot be empty"); + } + indexOverrides.put("encoder", value); + } + + public void setQueryGeneratorOverride(String value) { + if (value == null || value.trim().isEmpty()) { + throw new IllegalArgumentException("queryGenerator cannot be empty"); } + indexOverrides.put("queryGenerator", value); } - } \ No newline at end of file From 4cbf066245c39c387aaa267b48c17f3a6f88c127 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Mon, 27 Jan 2025 21:23:32 -0500 Subject: [PATCH 06/18] use indexInfo settings --- .../java/io/anserini/server/SearchService.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 41f4515bef..7f9c4f09e6 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -99,15 +99,22 @@ public List> search(String query, int hits, return candidates; } else { // HNSW search - only return docids and scores + IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); args.index = indexDir; - args.efSearch = efSearch != null ? efSearch: getEfSearchOverride() != null ? getEfSearchOverride() : IndexInfo.DEFAULT_EF_SEARCH; + // Parameter precedence: explicit param > override > index default + args.efSearch = efSearch != null ? efSearch + : getEfSearchOverride() != null ? getEfSearchOverride() + : indexInfo.DEFAULT_EF_SEARCH; - IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); - args.encoder = encoder != null ? encoder: getEncoderOverride() != null ? getEncoderOverride(): indexInfo.getDefaultEncoder(); + args.encoder = encoder != null ? encoder + : getEncoderOverride() != null ? getEncoderOverride() + : indexInfo.getDefaultEncoder(); - args.queryGenerator = queryGenerator != null ? queryGenerator: getQueryGeneratorOverride() != null ? getQueryGeneratorOverride(): indexInfo.getDefaultQueryGenerator(); + args.queryGenerator = queryGenerator != null ? queryGenerator + : getQueryGeneratorOverride() != null ? getQueryGeneratorOverride() + : indexInfo.getDefaultQueryGenerator(); System.out.println("=== HNSW Args ==="); System.out.println("Index: " + args.index); From 75ba6e08680305c5b03ea32a321350b3c2777e05 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Tue, 28 Jan 2025 23:56:30 -0500 Subject: [PATCH 07/18] indexinfo --- .../java/io/anserini/index/IndexInfo.java | 1008 +++++++++++++---- 1 file changed, 805 insertions(+), 203 deletions(-) diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java index 803e288022..f0ef1d4cd9 100644 --- a/src/main/java/io/anserini/index/IndexInfo.java +++ b/src/main/java/io/anserini/index/IndexInfo.java @@ -25,14 +25,11 @@ public enum IndexInfo { "BM25", new String[] { "https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.20221005.252b5e.tar.gz" }, -<<<<<<< Updated upstream - "cfe14d543c6a27f4d742fb2d0099b8e0"), -======= "cfe14d543c6a27f4d742fb2d0099b8e0", - IndexType.bm25, + IndexType.BM25, + "", "", ""), ->>>>>>> Stashed changes // MS MARCO V1 MSMARCO_V1_PASSAGE("msmarco-v1-passage", @@ -44,7 +41,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz" }, "678876e8c99a89933d553609a0fd8793", - IndexType.bm25, + IndexType.BM25, + "", "", ""), @@ -57,9 +55,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.splade-pp-ed.20230524.a59610.tar.gz" }, "2c008fc36131e27966a72292932358e6", - IndexType.flat, - "SpladePpEd", - "VectorQueryGenerator"), + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_COS_DPR_DISTIL_HNSW("msmarco-v1-passage.cosdpr-distil.hnsw", "Lucene HNSW index of the MS MARCO V1 passage corpus encoded by cos-DPR Distil.", @@ -69,14 +68,11 @@ public enum IndexInfo { "cosDPR-distil w/ HNSW fp32", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.cosdpr-distil.20240108.825148.tar.gz" }, -<<<<<<< Updated upstream - "df4c60fa1f3804fa409499824d12d035"), -======= "df4c60fa1f3804fa409499824d12d035", - IndexType.hnsw, + IndexType.DENSE_HNSW, "CosDprDistil", - "VectorQueryGenerator"), ->>>>>>> Stashed changes + "VectorQueryGenerator", + "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_COS_DPR_DISTIL_HNSW_INT8("msmarco-v1-passage.cosdpr-distil.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V1 passage corpus encoded by cos-DPR Distil.", @@ -87,9 +83,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.cosdpr-distil.20240108.825148.tar.gz" }, "119124ad358bb81e6a203b04d1b99a9c", - IndexType.hnsw, + IndexType.DENSE_HNSW, "CosDprDistil", - "VectorQueryGenerator"), + "VectorQueryGenerator", + "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_BGE_BASE_EN_15_HNSW("msmarco-v1-passage.bge-base-en-v1.5.hnsw", "Lucene HNSW index of the MS MARCO V1 passage corpus encoded by BGE-base-en-v1.5.", @@ -100,9 +97,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5.20240117.53514b.tar.gz" }, "00a577f689d90f95e6c5611438b0af3d", - IndexType.hnsw, + IndexType.DENSE_HNSW, "BgeBaseEn15", - "VectorQueryGenerator"), + "VectorQueryGenerator", + "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_BGE_BASE_EN_15_HNSW_INT8("msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V1 passage corpus encoded by BGE-base-en-v1.5.", @@ -113,9 +111,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5.20240117.53514b.tar.gz" }, "7830712459cf124c96fd058bb0a405b7", - IndexType.hnsw, + IndexType.DENSE_HNSW, "BgeBaseEn15", - "VectorQueryGenerator"), + "VectorQueryGenerator", + "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_COHERE_EMBED_ENGLISH_30_HNSW("msmarco-v1-passage.cohere-embed-english-v3.0.hnsw", "Lucene HNSW index of the MS MARCO V1 passage corpus encoded by Cohere embed-english-v3.0.", @@ -126,9 +125,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0.20240228.eacd13.tar.gz" }, "c7294ca988ae1b812d427362ffca1ee2", - IndexType.hnsw, - "CohereEmbedEnglish30", - "VectorQueryGenerator"), + IndexType.DENSE_HNSW, + "CohereEmbedEnglishV30", + "VectorQueryGenerator", + "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_COHERE_EMBED_ENGLISH_30_HNSW_INT8("msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V1 passage corpus encoded by Cohere embed-english-v3.0.", @@ -139,9 +139,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0.20240228.eacd13.tar.gz" }, "dbaca578cc8495f504cdd0a7187f4c36", - IndexType.hnsw, + IndexType.DENSE_HNSW, "CohereEmbedEnglish30", - "VectorQueryGenerator"), + "CohereEmbedEnglishV30", + "msmarco-v1-passage"), // MS MARCO V2 MSMARCO_V2_PASSAGE("msmarco-v2-passage", @@ -153,7 +154,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage.20220808.4d6d2a.tar.gz" }, "eacd8556dd416ccad517b5e7dc97bceb", - IndexType.bm25, + IndexType.BM25, + "", "", ""), @@ -166,7 +168,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc.20220808.4d6d2a.tar.gz" }, "0599bd6ed5ee28390b279eb398ef0267", - IndexType.flat, + IndexType.DENSE_FLAT, + "", "", ""), @@ -179,7 +182,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.tar.gz" }, "8a5f444fa5a63cc5d4ddc3e6dd15faa0", - IndexType.bm25, + IndexType.BM25, + "", "", ""), @@ -192,9 +196,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v2.1-doc.20240418.4f9675.tar.gz" }, "cecd55856c34afa82f1a499705c9df02", - IndexType.bm25, + IndexType.BM25, "", - ""), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc"), MSMARCO_V21_DOC_SEGMENTED("msmarco-v2.1-doc-segmented", "Lucene index of the MS MARCO V2.1 segmented document corpus.", @@ -205,9 +210,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v2.1-doc-segmented.20240418.4f9675.tar.gz" }, "6ec4cd595c9fe1ad91b43eabb39a637c", - IndexType.bm25, + IndexType.BM25, "", - ""), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD00_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard00.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard00) encoded by Snowflake's arctic-embed-l model.", @@ -218,9 +224,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard00.arctic-embed-l.20250114.4884f5.tar.gz" }, "aab3f8e9aa0563bd0f875584784a0845", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD01_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard01.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard01) encoded by Snowflake's arctic-embed-l model.", @@ -231,9 +238,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard01.arctic-embed-l.20250114.4884f5.tar.gz" }, "34ea30fe72c2bc1795ae83e71b191547", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD02_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard02.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard02) encoded by Snowflake's arctic-embed-l model.", @@ -244,9 +252,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard02.arctic-embed-l.20250114.4884f5.tar.gz" }, "b6271d6db65119977491675f74f466d5", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD03_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard03.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard03) encoded by Snowflake's arctic-embed-l model.", @@ -257,9 +266,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard03.arctic-embed-l.20250114.4884f5.tar.gz" }, "a9cd644eb6037f67d2e9c06a8f60928d", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD04_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard04.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard04) encoded by Snowflake's arctic-embed-l model.", @@ -270,9 +280,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard04.arctic-embed-l.20250114.4884f5.tar.gz" }, "07b7e451e0525d01c1f1f2b1c42b1bd5", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD05_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard05.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard05) encoded by Snowflake's arctic-embed-l model.", @@ -283,9 +294,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard05.arctic-embed-l.20250114.4884f5.tar.gz" }, "2573dce175788981be2f266ebb33c96d", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD06_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard06.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard06) encoded by Snowflake's arctic-embed-l model.", @@ -296,9 +308,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard06.arctic-embed-l.20250114.4884f5.tar.gz" }, "a644aea445a8b78cc9e99d2ce111ff11", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD07_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard07.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard07) encoded by Snowflake's arctic-embed-l model.", @@ -309,9 +322,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard07.arctic-embed-l.20250114.4884f5.tar.gz" }, "402d37deccb44b5fc105049889e8aaea", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD08_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard08.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard08) encoded by Snowflake's arctic-embed-l model.", @@ -322,9 +336,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard08.arctic-embed-l.20250114.4884f5.tar.gz" }, "89ebcd027f7297b26a1edc8ae5726527", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD09_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard09.arctic-embed-l.hnsw-int8", "Lucene quantized (int8) HNSW index of the MS MARCO V2.1 segmented document corpus (shard09) encoded by Snowflake's arctic-embed-l model.", @@ -335,9 +350,10 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard09.arctic-embed-l.20250114.4884f5.tar.gz" }, "5e580bb7eb9ee2bb6bfa492b3430c17d", - IndexType.hnsw, + IndexType.DENSE_HNSW, "ArcticEmbedL", - "VectorQueryGenerator"), + "JsonInvertedDenseVectorQueryGenerator", + "msmarco-v2.1-doc-segmented"), // BEIR: flat BEIR_V1_0_0_TREC_COVID_FLAT("beir-v1.0.0-trec-covid.flat", @@ -348,8 +364,10 @@ public enum IndexInfo { "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-covid.flat.20221116.505594.tar.gz" }, "1aaf107b0787aa349deac92cb67d4230", - IndexType.flat, - + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-trec-covid"), BEIR_V1_0_0_BIOASQ_FLAT("beir-v1.0.0-bioasq.flat", "Lucene inverted 'flat' index of BEIR collection 'bioasq'.", @@ -358,7 +376,11 @@ public enum IndexInfo { "BEIR: bioasq", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-bioasq.flat.20221116.505594.tar.gz" }, - "12728b3629817d352322f18b0cb6199b"), + "12728b3629817d352322f18b0cb6199b", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-bioasq"), BEIR_V1_0_0_NFCORPUS_FLAT("beir-v1.0.0-nfcorpus.flat", "Lucene inverted 'flat' index of BEIR collection 'nfcorpus'.", @@ -367,7 +389,11 @@ public enum IndexInfo { "BEIR: nfcorpus", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nfcorpus.flat.20221116.505594.tar.gz" }, - "eb7a6f1bb15071c2940bc50752d86626"), + "eb7a6f1bb15071c2940bc50752d86626", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-nfcorpus"), BEIR_V1_0_0_NQ_FLAT("beir-v1.0.0-nq.flat", "Lucene inverted 'flat' index of BEIR collection 'nq'.", @@ -376,7 +402,11 @@ public enum IndexInfo { "BEIR: nq", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nq.flat.20221116.505594.tar.gz" }, - "0ba1ef0412d8a0fb56b4a04ecb13ef0b"), + "0ba1ef0412d8a0fb56b4a04ecb13ef0b", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-nq"), BEIR_V1_0_0_HOTPOTQA_FLAT("beir-v1.0.0-hotpotqa.flat", "Lucene inverted 'flat' index of BEIR collection 'hotpotqa'.", @@ -385,7 +415,11 @@ public enum IndexInfo { "BEIR: hotpotqa", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-hotpotqa.flat.20221116.505594.tar.gz" }, - "3f41d640a8ebbcad4f598140750c24f8"), + "3f41d640a8ebbcad4f598140750c24f8", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-hotpotqa"), BEIR_V1_0_0_FIQA_FLAT("beir-v1.0.0-fiqa.flat", "Lucene inverted 'flat' index of BEIR collection 'fiqa'.", @@ -394,7 +428,11 @@ public enum IndexInfo { "BEIR: fiqa", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fiqa.flat.20221116.505594.tar.gz" }, - "d98ee6ebfc234657ecbd04226e8a7849"), + "d98ee6ebfc234657ecbd04226e8a7849", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-fiqa"), BEIR_V1_0_0_SIGNAL1M_FLAT("beir-v1.0.0-signal1m.flat", "Lucene inverted 'flat' index of BEIR collection 'signal1m'.", @@ -403,7 +441,11 @@ public enum IndexInfo { "BEIR: signal1m", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-signal1m.flat.20221116.505594.tar.gz" }, - "93d901916b473351fbc04fdf12c5ba4f"), + "93d901916b473351fbc04fdf12c5ba4f", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-signal1m"), BEIR_V1_0_0_TREC_NEWS_FLAT("beir-v1.0.0-trec-news.flat", "Lucene inverted 'flat' index of BEIR collection 'trec-news'.", @@ -412,7 +454,11 @@ public enum IndexInfo { "BEIR: trec-news", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-news.flat.20221116.505594.tar.gz" }, - "22e7752c3d0122c28013b33e5e2134ae"), + "22e7752c3d0122c28013b33e5e2134ae", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-trec-news"), BEIR_V1_0_0_ROBUST04_FLAT("beir-v1.0.0-robust04.flat", "Lucene inverted 'flat' index of BEIR collection 'robust04'.", @@ -421,7 +467,11 @@ public enum IndexInfo { "BEIR: robust04", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-robust04.flat.20221116.505594.tar.gz" }, - "d508fc770002a99a5dc3da3d0fa001b7"), + "d508fc770002a99a5dc3da3d0fa001b7", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-robust04"), BEIR_V1_0_0_ARGUANA_FLAT("beir-v1.0.0-arguana.flat", "Lucene inverted 'flat' index of BEIR collection 'arguana'.", @@ -430,7 +480,11 @@ public enum IndexInfo { "BEIR: arguana", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-arguana.flat.20221116.505594.tar.gz" }, - "db59ef0cb74e9cfeac0ac735827381df"), + "db59ef0cb74e9cfeac0ac735827381df", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-arguana"), BEIR_V1_0_0_WEBIS_TOUCHE2020_FLAT("beir-v1.0.0-webis-touche2020.flat", "Lucene inverted 'flat' index of BEIR collection 'webis-touche2020'.", @@ -439,7 +493,11 @@ public enum IndexInfo { "BEIR: webis-touche2020", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-webis-touche2020.flat.20221116.505594.tar.gz" }, - "f6419ddfd53c0bf1d76ea132b1c0c352"), + "f6419ddfd53c0bf1d76ea132b1c0c352", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-webis-touche2020"), BEIR_V1_0_0_CQADUPSTACK_ANDROID_FLAT("beir-v1.0.0-cqadupstack-android.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-android'.", @@ -448,7 +506,11 @@ public enum IndexInfo { "BEIR: cqadupstack-android", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-android.flat.20221116.505594.tar.gz" }, - "443e413b49c39de43a6cece96a7513c0"), + "443e413b49c39de43a6cece96a7513c0", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-android"), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_FLAT("beir-v1.0.0-cqadupstack-english.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-english'.", @@ -457,7 +519,11 @@ public enum IndexInfo { "BEIR: cqadupstack-english", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-english.flat.20221116.505594.tar.gz" }, - "f7db543f5bb56fa98c3c14224c6b96f2"), + "f7db543f5bb56fa98c3c14224c6b96f2", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-english"), BEIR_V1_0_0_CQADUPSTACK_GAMING_FLAT("beir-v1.0.0-cqadupstack-gaming.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-gaming'.", @@ -466,7 +532,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gaming", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gaming.flat.20221116.505594.tar.gz" }, - "775169fd863d3e91076e1905799456ea"), + "775169fd863d3e91076e1905799456ea", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-gaming"), BEIR_V1_0_0_CQADUPSTACK_GIS_FLAT("beir-v1.0.0-cqadupstack-gis.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-gis'.", @@ -475,7 +545,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gis", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gis.flat.20221116.505594.tar.gz" }, - "4c5be1c7026a61ca7866b4f28cac91fe"), + "4c5be1c7026a61ca7866b4f28cac91fe", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-gis"), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_FLAT("beir-v1.0.0-cqadupstack-mathematica.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-mathematica'.", @@ -484,7 +558,11 @@ public enum IndexInfo { "BEIR: cqadupstack-mathematica", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-mathematica.flat.20221116.505594.tar.gz" }, - "43e2b33db7ecadc041165005aa5d4b6f"), + "43e2b33db7ecadc041165005aa5d4b6f", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-mathematica"), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_FLAT("beir-v1.0.0-cqadupstack-physics.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-physics'.", @@ -493,7 +571,11 @@ public enum IndexInfo { "BEIR: cqadupstack-physics", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-physics.flat.20221116.505594.tar.gz" }, - "765b8013595962e01600f4f851e8f16d"), + "765b8013595962e01600f4f851e8f16d", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-physics"), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_FLAT("beir-v1.0.0-cqadupstack-programmers.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-programmers'.", @@ -502,7 +584,11 @@ public enum IndexInfo { "BEIR: cqadupstack-programmers", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-programmers.flat.20221116.505594.tar.gz" }, - "aa4fc9f29a0436a6e0942656274ceaf5"), + "aa4fc9f29a0436a6e0942656274ceaf5", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-programmers"), BEIR_V1_0_0_CQADUPSTACK_STATS_FLAT("beir-v1.0.0-cqadupstack-stats.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-stats'.", @@ -511,7 +597,11 @@ public enum IndexInfo { "BEIR: cqadupstack-stats", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-stats.flat.20221116.505594.tar.gz" }, - "d56538f56d982ce09961d4b680bd4dc5"), + "d56538f56d982ce09961d4b680bd4dc5", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-stats"), BEIR_V1_0_0_CQADUPSTACK_TEX_FLAT("beir-v1.0.0-cqadupstack-tex.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-tex'.", @@ -520,7 +610,11 @@ public enum IndexInfo { "BEIR: cqadupstack-tex", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-tex.flat.20221116.505594.tar.gz" }, - "36825b8428aa34fdaad7e420e120c101"), + "36825b8428aa34fdaad7e420e120c101", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-tex"), BEIR_V1_0_0_CQADUPSTACK_UNIX_FLAT("beir-v1.0.0-cqadupstack-unix.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-unix'.", @@ -529,7 +623,11 @@ public enum IndexInfo { "BEIR: cqadupstack-unix", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-unix.flat.20221116.505594.tar.gz" }, - "961e386016c7eb7afa2bc26feb96902c"), + "961e386016c7eb7afa2bc26feb96902c", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-unix"), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_FLAT("beir-v1.0.0-cqadupstack-webmasters.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-webmasters'.", @@ -538,7 +636,11 @@ public enum IndexInfo { "BEIR: cqadupstack-webmasters", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-webmasters.flat.20221116.505594.tar.gz" }, - "f31625436dc6efc24b9c2ae1b0f2364e"), + "f31625436dc6efc24b9c2ae1b0f2364e", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-webmasters"), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_FLAT("beir-v1.0.0-cqadupstack-wordpress.flat", "Lucene inverted 'flat' index of BEIR collection 'cqadupstack-wordpress'.", @@ -547,7 +649,11 @@ public enum IndexInfo { "BEIR: cqadupstack-wordpress", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.flat.20221116.505594.tar.gz" }, - "5a0035fbb6ccabd20fe0eed742dce0d0"), + "5a0035fbb6ccabd20fe0eed742dce0d0", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-wordpress"), BEIR_V1_0_0_QUORA_FLAT("beir-v1.0.0-quora.flat", "Lucene inverted 'flat' index of BEIR collection 'quora'.", @@ -556,7 +662,11 @@ public enum IndexInfo { "BEIR: quora", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-quora.flat.20221116.505594.tar.gz" }, - "48c95c2da43e24cc603695d3e6bfd779"), + "48c95c2da43e24cc603695d3e6bfd779", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-quora"), BEIR_V1_0_0_DBPEDIA_ENTITY_FLAT("beir-v1.0.0-dbpedia-entity.flat", "Lucene inverted 'flat' index of BEIR collection 'dbpedia-entity'.", @@ -565,7 +675,11 @@ public enum IndexInfo { "BEIR: dbpedia-entity", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-dbpedia-entity.flat.20221116.505594.tar.gz" }, - "8ac66272fde08ff10491dc0ec52f17e2"), + "8ac66272fde08ff10491dc0ec52f17e2", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-dbpedia-entity"), BEIR_V1_0_0_SCIDOCS_FLAT("beir-v1.0.0-scidocs.flat", "Lucene inverted 'flat' index of BEIR collection 'scidocs'.", @@ -574,7 +688,11 @@ public enum IndexInfo { "BEIR: scidocs", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scidocs.flat.20221116.505594.tar.gz" }, - "9555ecc5da399a73956d9302a98420fc"), + "9555ecc5da399a73956d9302a98420fc", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-scidocs"), BEIR_V1_0_0_FEVER_FLAT("beir-v1.0.0-fever.flat", "Lucene inverted 'flat' index of BEIR collection 'fever'.", @@ -583,7 +701,11 @@ public enum IndexInfo { "BEIR: fever", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fever.flat.20221116.505594.tar.gz" }, - "30b5a338f9f16669ed3dae3bae4e7b32"), + "30b5a338f9f16669ed3dae3bae4e7b32", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-fever"), BEIR_V1_0_0_CLIMATE_FEVER_FLAT("beir-v1.0.0-climate-fever.flat", "Lucene inverted 'flat' index of BEIR collection 'climate-fever'.", @@ -592,7 +714,11 @@ public enum IndexInfo { "BEIR: climate-fever", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-climate-fever.flat.20221116.505594.tar.gz" }, - "6e7101f4a5c241ba263bb6a826049826"), + "6e7101f4a5c241ba263bb6a826049826", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-climate-fever"), BEIR_V1_0_0_SCIFACT_FLAT("beir-v1.0.0-scifact.flat", "Lucene inverted 'flat' index of BEIR collection 'scifact'.", @@ -601,7 +727,11 @@ public enum IndexInfo { "BEIR: scifact", "BM25 'flat'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scifact.flat.20221116.505594.tar.gz" }, - "59777038fe0539e600658591e322ea57"), + "59777038fe0539e600658591e322ea57", + IndexType.DENSE_FLAT, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-scifact"), // BEIR: multifield BEIR_V1_0_0_TREC_COVID_MULTIFIELD("beir-v1.0.0-trec-covid.multifield", @@ -611,7 +741,11 @@ public enum IndexInfo { "BEIR: trec-covid", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-covid.multifield.20221116.505594.tar.gz" }, - "0439617a927a33727c7b592bd436d8d6"), + "0439617a927a33727c7b592bd436d8d6", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorGenerator", + "beir-v1.0.0-trec-covid"), BEIR_V1_0_0_BIOASQ_MULTIFIELD("beir-v1.0.0-bioasq.multifield", "Lucene inverted 'multifield' index of BEIR collection 'bioasq'.", @@ -620,7 +754,11 @@ public enum IndexInfo { "BEIR: bioasq", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-bioasq.multifield.20221116.505594.tar.gz" }, - "b2f4fed18b04414193f8368b6891e19c"), + "b2f4fed18b04414193f8368b6891e19c", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-bioasq"), BEIR_V1_0_0_NFCORPUS_MULTIFIELD("beir-v1.0.0-nfcorpus.multifield", "Lucene inverted 'multifield' index of BEIR collection 'nfcorpus'.", @@ -629,7 +767,11 @@ public enum IndexInfo { "BEIR: nfcorpus", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nfcorpus.multifield.20221116.505594.tar.gz" }, - "85cdcceaf06c482ab6a60c34c06c0448"), + "85cdcceaf06c482ab6a60c34c06c0448", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-nfcorpus"), BEIR_V1_0_0_NQ_MULTIFIELD("beir-v1.0.0-nq.multifield", "Lucene inverted 'multifield' index of BEIR collection 'nq'.", @@ -638,7 +780,11 @@ public enum IndexInfo { "BEIR: nq", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nq.multifield.20221116.505594.tar.gz" }, - "73b3e3c49c2d79a2851c1ba85f8fbbdf"), + "73b3e3c49c2d79a2851c1ba85f8fbbdf", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-nq"), BEIR_V1_0_0_HOTPOTQA_MULTIFIELD("beir-v1.0.0-hotpotqa.multifield", "Lucene inverted 'multifield' index of BEIR collection 'hotpotqa'.", @@ -647,7 +793,11 @@ public enum IndexInfo { "BEIR: hotpotqa", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-hotpotqa.multifield.20221116.505594.tar.gz" }, - "1d9f75122d4b50cb33cccaa125640a38"), + "1d9f75122d4b50cb33cccaa125640a38", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-hotpotqa"), BEIR_V1_0_0_FIQA_MULTIFIELD("beir-v1.0.0-fiqa.multifield", "Lucene inverted 'multifield' index of BEIR collection 'fiqa'.", @@ -656,7 +806,11 @@ public enum IndexInfo { "BEIR: fiqa", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fiqa.multifield.20221116.505594.tar.gz" }, - "1c9330baf3d9004ae46778d4d9e039f6"), + "1c9330baf3d9004ae46778d4d9e039f6", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-fiqa"), BEIR_V1_0_0_SIGNAL1M_MULTIFIELD("beir-v1.0.0-signal1m.multifield", "Lucene inverted 'multifield' index of BEIR collection 'signal1m'.", @@ -665,7 +819,11 @@ public enum IndexInfo { "BEIR: signal1m", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-signal1m.multifield.20221116.505594.tar.gz" }, - "0735de4f103330975d206285ea85aaf5"), + "0735de4f103330975d206285ea85aaf5", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-signal1m"), BEIR_V1_0_0_TREC_NEWS_MULTIFIELD("beir-v1.0.0-trec-news.multifield", "Lucene inverted 'multifield' index of BEIR collection 'trec-news'.", @@ -674,7 +832,11 @@ public enum IndexInfo { "BEIR: trec-news", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-news.multifield.20221116.505594.tar.gz" }, - "a7b5bd79d22d3631dffcad2ffa8afd0a"), + "a7b5bd79d22d3631dffcad2ffa8afd0a", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-trec-news"), BEIR_V1_0_0_ROBUST04_MULTIFIELD("beir-v1.0.0-robust04.multifield", "Lucene inverted 'multifield' index of BEIR collection 'robust04'.", @@ -683,7 +845,11 @@ public enum IndexInfo { "BEIR: robust04", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-robust04.multifield.20221116.505594.tar.gz" }, - "49db6bf123b6224d0e0973a16ff9c243"), + "49db6bf123b6224d0e0973a16ff9c243", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-robust04"), BEIR_V1_0_0_ARGUANA_MULTIFIELD("beir-v1.0.0-arguana.multifield", "Lucene inverted 'multifield' index of BEIR collection 'arguana'.", @@ -692,7 +858,11 @@ public enum IndexInfo { "BEIR: arguana", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-arguana.multifield.20221116.505594.tar.gz" }, - "895b0d78a1cc40222aaebcff10b6b929"), + "895b0d78a1cc40222aaebcff10b6b929", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-arguana"), BEIR_V1_0_0_WEBIS_TOUCHE2020_MULTIFIELD("beir-v1.0.0-webis-touche2020.multifield", "Lucene inverted 'multifield' index of BEIR collection 'webis-touche2020'.", @@ -701,7 +871,11 @@ public enum IndexInfo { "BEIR: webis-touche2020", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-webis-touche2020.multifield.20221116.505594.tar.gz" }, - "390552c8b93dc95bf2f58808d1c8a37d"), + "390552c8b93dc95bf2f58808d1c8a37d", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-webis-touche2020"), BEIR_V1_0_0_CQADUPSTACK_ANDROID_MULTIFIELD("beir-v1.0.0-cqadupstack-android.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-android'.", @@ -710,7 +884,11 @@ public enum IndexInfo { "BEIR: cqadupstack-android", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-android.multifield.20221116.505594.tar.gz" }, - "299fc8b542dabc241320db571b8f8ff0"), + "299fc8b542dabc241320db571b8f8ff0", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-android"), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_MULTIFIELD("beir-v1.0.0-cqadupstack-english.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-english'.", @@ -719,7 +897,11 @@ public enum IndexInfo { "BEIR: cqadupstack-english", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-english.multifield.20221116.505594.tar.gz" }, - "5bb26ad0ba9184592b5ed935e65b5f17"), + "5bb26ad0ba9184592b5ed935e65b5f17", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-english"), BEIR_V1_0_0_CQADUPSTACK_GAMING_MULTIFIELD("beir-v1.0.0-cqadupstack-gaming.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-gaming'.", @@ -728,7 +910,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gaming", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gaming.multifield.20221116.505594.tar.gz" }, - "90d1ae9a1862b8b96871b9b94cc46b4e"), + "90d1ae9a1862b8b96871b9b94cc46b4e", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-gaming"), BEIR_V1_0_0_CQADUPSTACK_GIS_MULTIFIELD("beir-v1.0.0-cqadupstack-gis.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-gis'.", @@ -737,7 +923,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gis", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gis.multifield.20221116.505594.tar.gz" }, - "62869b2b6cf569424fed659adf1e5ea7"), + "62869b2b6cf569424fed659adf1e5ea7", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-gis"), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_MULTIFIELD("beir-v1.0.0-cqadupstack-mathematica.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-mathematica'.", @@ -746,7 +936,11 @@ public enum IndexInfo { "BEIR: cqadupstack-mathematica", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-mathematica.multifield.20221116.505594.tar.gz" }, - "a78c9d2e29a4b727fbeb38e825629df5"), + "a78c9d2e29a4b727fbeb38e825629df5", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-mathematica"), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_MULTIFIELD("beir-v1.0.0-cqadupstack-physics.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-physics'.", @@ -755,7 +949,11 @@ public enum IndexInfo { "BEIR: cqadupstack-physics", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-physics.multifield.20221116.505594.tar.gz" }, - "d6e60e2665c1b6f2bac021dc6c767393"), + "d6e60e2665c1b6f2bac021dc6c767393", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-physics"), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_MULTIFIELD("beir-v1.0.0-cqadupstack-programmers.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-programmers'.", @@ -764,7 +962,11 @@ public enum IndexInfo { "BEIR: cqadupstack-programmers", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-programmers.multifield.20221116.505594.tar.gz" }, - "77b54cd7613b555d80998b9744eef85c"), + "77b54cd7613b555d80998b9744eef85c", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-programmers"), BEIR_V1_0_0_CQADUPSTACK_STATS_MULTIFIELD("beir-v1.0.0-cqadupstack-stats.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-stats'.", @@ -773,7 +975,11 @@ public enum IndexInfo { "BEIR: cqadupstack-stats", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-stats.multifield.20221116.505594.tar.gz" }, - "8469917c70c767ea398ec2b93aaf04ca"), + "8469917c70c767ea398ec2b93aaf04ca", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-stats"), BEIR_V1_0_0_CQADUPSTACK_TEX_MULTIFIELD("beir-v1.0.0-cqadupstack-tex.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-tex'.", @@ -782,7 +988,11 @@ public enum IndexInfo { "BEIR: cqadupstack-tex", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-tex.multifield.20221116.505594.tar.gz" }, - "4d0b0efb2579e0fd73b9156921580a00"), + "4d0b0efb2579e0fd73b9156921580a00", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-tex"), BEIR_V1_0_0_CQADUPSTACK_UNIX_MULTIFIELD("beir-v1.0.0-cqadupstack-unix.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-unix'.", @@ -791,7 +1001,11 @@ public enum IndexInfo { "BEIR: cqadupstack-unix", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-unix.multifield.20221116.505594.tar.gz" }, - "33e2510bb1414ca106766ae787e28670"), + "33e2510bb1414ca106766ae787e28670", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-unix"), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_MULTIFIELD("beir-v1.0.0-cqadupstack-webmasters.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-webmasters'.", @@ -800,7 +1014,11 @@ public enum IndexInfo { "BEIR: cqadupstack-webmasters", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-webmasters.multifield.20221116.505594.tar.gz" }, - "cb16d3da34b6705747ec07ce89913457"), + "cb16d3da34b6705747ec07ce89913457", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-webmasters"), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_MULTIFIELD("beir-v1.0.0-cqadupstack-wordpress.multifield", "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-wordpress'.", @@ -809,7 +1027,11 @@ public enum IndexInfo { "BEIR: cqadupstack-wordpress", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz" }, - "f619c003e2d0cf84794cc672e18e0437"), + "f619c003e2d0cf84794cc672e18e0437", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-wordpress"), BEIR_V1_0_0_QUORA_MULTIFIELD("beir-v1.0.0-quora.multifield", "Lucene inverted 'multifield' index of BEIR collection 'quora'.", @@ -818,7 +1040,11 @@ public enum IndexInfo { "BEIR: quora", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-quora.multifield.20221116.505594.tar.gz" }, - "9248de265c88afc105231659d8c8be09"), + "9248de265c88afc105231659d8c8be09", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-quora"), BEIR_V1_0_0_DBPEDIA_ENTITY_MULTIFIELD("beir-v1.0.0-dbpedia-entity.multifield", "Lucene inverted 'multifield' index of BEIR collection 'dbpedia-entity'.", @@ -827,7 +1053,11 @@ public enum IndexInfo { "BEIR: dbpedia-entity", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-dbpedia-entity.multifield.20221116.505594.tar.gz" }, - "b7f0ae30f045188a608cc87553cade37"), + "b7f0ae30f045188a608cc87553cade37", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-dbpedia-entity"), BEIR_V1_0_0_SCIDOCS_MULTIFIELD("beir-v1.0.0-scidocs.multifield", "Lucene inverted 'multifield' index of BEIR collection 'scidocs'.", @@ -836,7 +1066,11 @@ public enum IndexInfo { "BEIR: scidocs", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scidocs.multifield.20221116.505594.tar.gz" }, - "6409f5ec569530fc3240590dab59bc4c"), + "6409f5ec569530fc3240590dab59bc4c", + IndexType.SPLADE_PP_ED, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-scidocs"), BEIR_V1_0_0_FEVER_MULTIFIELD("beir-v1.0.0-fever.multifield", "Lucene inverted 'multifield' index of BEIR collection 'fever'.", @@ -845,7 +1079,11 @@ public enum IndexInfo { "BEIR: fever", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fever.multifield.20221116.505594.tar.gz" }, - "841908da91e7e5eaa0d122faf1a486d8"), + "841908da91e7e5eaa0d122faf1a486d8", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-fever"), BEIR_V1_0_0_CLIMATE_FEVER_MULTIFIELD("beir-v1.0.0-climate-fever.multifield", "Lucene inverted 'multifield' index of BEIR collection 'climate-fever'.", @@ -854,7 +1092,11 @@ public enum IndexInfo { "BEIR: climate-fever", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-climate-fever.multifield.20221116.505594.tar.gz" }, - "2901ac443ca4f0df424a35d068905829"), + "2901ac443ca4f0df424a35d068905829", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-climate-fever"), BEIR_V1_0_0_SCIFACT_MULTIFIELD("beir-v1.0.0-scifact.multifield", "Lucene inverted 'multifield' index of BEIR collection 'scifact'.", @@ -863,7 +1105,11 @@ public enum IndexInfo { "BEIR: scifact", "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scifact.multifield.20221116.505594.tar.gz" }, - "b40b26f44f68ab9aa4b573aafea27e2e"), + "b40b26f44f68ab9aa4b573aafea27e2e", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-scifact"), // BEIR: SPLADE++ ED BEIR_V1_0_0_TREC_COVID_SPLADE_PP_ED("beir-v1.0.0-trec-covid.splade-pp-ed", @@ -873,7 +1119,11 @@ public enum IndexInfo { "BEIR: trec-covid", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-covid.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "e808ff9d4a1f45de9f0bc292900302b4"), + "e808ff9d4a1f45de9f0bc292900302b4", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-trec-covid"), BEIR_V1_0_0_BIOASQ_SPLADE_PP_ED("beir-v1.0.0-bioasq.splade-pp-ed", "Lucene impact index of BEIR collection 'bioasq' encoded by SPLADE++ EnsembleDistil", @@ -882,7 +1132,11 @@ public enum IndexInfo { "BEIR: bioasq", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-bioasq.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "fc661b2c2fa59e24f37c6dfa6de8e682"), + "fc661b2c2fa59e24f37c6dfa6de8e682", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-bioasq"), BEIR_V1_0_0_NFCORPUS_SPLADE_PP_ED("beir-v1.0.0-nfcorpus.splade-pp-ed", "Lucene impact index of BEIR collection 'nfcorpus' encoded by SPLADE++ EnsembleDistil", @@ -891,7 +1145,11 @@ public enum IndexInfo { "BEIR: nfcorpus", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nfcorpus.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "7d6e66cca9d2db8bb7caa3bdf330cdd8"), + "7d6e66cca9d2db8bb7caa3bdf330cdd8", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-nfcorpus"), BEIR_V1_0_0_NQ_SPLADE_PP_ED("beir-v1.0.0-nq.splade-pp-ed", "Lucene impact index of BEIR collection 'nq' encoded by SPLADE++ EnsembleDistil", @@ -900,7 +1158,11 @@ public enum IndexInfo { "BEIR: nq", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nq.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "a785d6636df60c861829507c3d806ee6"), + "a785d6636df60c861829507c3d806ee6", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-nq"), BEIR_V1_0_0_HOTPOTQA_SPLADE_PP_ED("beir-v1.0.0-hotpotqa.splade-pp-ed", "Lucene impact index of BEIR collection 'hotpotqa' encoded by SPLADE++ EnsembleDistil", @@ -909,7 +1171,11 @@ public enum IndexInfo { "BEIR: hotpotqa", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-hotpotqa.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "b280ed3f7b12034c0cc4b302f92801b9"), + "b280ed3f7b12034c0cc4b302f92801b9", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-hotpotqa"), BEIR_V1_0_0_FIQA_SPLADE_PP_ED("beir-v1.0.0-fiqa.splade-pp-ed", "Lucene impact index of BEIR collection 'fiqa' encoded by SPLADE++ EnsembleDistil", @@ -918,7 +1184,11 @@ public enum IndexInfo { "BEIR: fiqa", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fiqa.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "ea53103c695c0da6cea5b1c8353371b0"), + "ea53103c695c0da6cea5b1c8353371b0", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-fiqa"), BEIR_V1_0_0_SIGNAL1M_SPLADE_PP_ED("beir-v1.0.0-signal1m.splade-pp-ed", "Lucene impact index of BEIR collection 'signal1m' encoded by SPLADE++ EnsembleDistil", @@ -927,7 +1197,11 @@ public enum IndexInfo { "BEIR: signal1m", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-signal1m.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "0b46d71c97eabe9ca424f3ab9b2ddc64"), + "0b46d71c97eabe9ca424f3ab9b2ddc64", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-signal1m"), BEIR_V1_0_0_TREC_NEWS_SPLADE_PP_ED("beir-v1.0.0-trec-news.splade-pp-ed", "Lucene impact index of BEIR collection 'trec-news' encoded by SPLADE++ EnsembleDistil", @@ -936,7 +1210,11 @@ public enum IndexInfo { "BEIR: trec-news", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-news.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "ef4fb032b632b80355db46549f08a026"), + "ef4fb032b632b80355db46549f08a026", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-trec-news"), BEIR_V1_0_0_ROBUST04_SPLADE_PP_ED("beir-v1.0.0-robust04.splade-pp-ed", "Lucene impact index of BEIR collection 'robust04' encoded by SPLADE++ EnsembleDistil", @@ -945,7 +1223,11 @@ public enum IndexInfo { "BEIR: robust04", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-robust04.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "c1a6fd094bb9e34e69e10040d9b0ad2a"), + "c1a6fd094bb9e34e69e10040d9b0ad2a", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-robust04"), BEIR_V1_0_0_ARGUANA_SPLADE_PP_ED("beir-v1.0.0-arguana.splade-pp-ed", "Lucene impact index of BEIR collection 'arguana' encoded by SPLADE++ EnsembleDistil", @@ -954,7 +1236,11 @@ public enum IndexInfo { "BEIR: arguana", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-arguana.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "c2725b375ca53ff031ee8b4ba8501eb6"), + "c2725b375ca53ff031ee8b4ba8501eb6", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-arguana"), BEIR_V1_0_0_WEBIS_TOUCHE2020_SPLADE_PP_ED("beir-v1.0.0-webis-touche2020.splade-pp-ed", "Lucene impact index of BEIR collection 'webis-touche2020' encoded by SPLADE++ EnsembleDistil", @@ -963,7 +1249,11 @@ public enum IndexInfo { "BEIR: webis-touche2020", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-webis-touche2020.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "1abec77feeb741edfb3c9b7565b42964"), + "1abec77feeb741edfb3c9b7565b42964", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-webis-touche2020"), BEIR_V1_0_0_CQADUPSTACK_ANDROID_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-android.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-android' encoded by SPLADE++ EnsembleDistil", @@ -972,7 +1262,11 @@ public enum IndexInfo { "BEIR: cqadupstack-android", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-android.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "0b6b36417df9095e9ed32e4127bdd2fd"), + "0b6b36417df9095e9ed32e4127bdd2fd", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-android"), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-english.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-english' encoded by SPLADE++ EnsembleDistil", @@ -981,7 +1275,11 @@ public enum IndexInfo { "BEIR: cqadupstack-english", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-english.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "f2a5f68523117638f957bcc353c956c1"), + "f2a5f68523117638f957bcc353c956c1", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-english"), BEIR_V1_0_0_CQADUPSTACK_GAMING_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-gaming.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-gaming' encoded by SPLADE++ EnsembleDistil", @@ -990,7 +1288,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gaming", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gaming.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "804851ed2ca5c38464f28263fb664615"), + "804851ed2ca5c38464f28263fb664615", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-gaming"), BEIR_V1_0_0_CQADUPSTACK_GIS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-gis.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-gis' encoded by SPLADE++ EnsembleDistil", @@ -999,7 +1301,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gis", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gis.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "ee53ba7f26e678f39c3db8997785169a"), + "ee53ba7f26e678f39c3db8997785169a", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-gis"), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-mathematica.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-mathematica' encoded by SPLADE++ EnsembleDistil", @@ -1008,7 +1314,11 @@ public enum IndexInfo { "BEIR: cqadupstack-mathematica", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-mathematica.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "c3dd33ddfd364a0665450691963f9036"), + "c3dd33ddfd364a0665450691963f9036", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-mathematica"), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-physics.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-physics' encoded by SPLADE++ EnsembleDistil", @@ -1017,7 +1327,11 @@ public enum IndexInfo { "BEIR: cqadupstack-physics", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-physics.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "155a130b556072ec0b84788417361228"), + "155a130b556072ec0b84788417361228", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-physics"), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-programmers.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-programmers' encoded by SPLADE++ EnsembleDistil", @@ -1026,7 +1340,11 @@ public enum IndexInfo { "BEIR: cqadupstack-programmers", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-programmers.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "f0923dd88b7d4f050d54ff6f6efcc7f5"), + "f0923dd88b7d4f050d54ff6f6efcc7f5", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-programmers"), BEIR_V1_0_0_CQADUPSTACK_STATS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-stats.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-stats' encoded by SPLADE++ EnsembleDistil", @@ -1035,7 +1353,11 @@ public enum IndexInfo { "BEIR: cqadupstack-stats", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-stats.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "78e62040ed6d44e232e9381e96a56cc7"), + "78e62040ed6d44e232e9381e96a56cc7", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-stats"), BEIR_V1_0_0_CQADUPSTACK_TEX_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-tex.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-tex' encoded by SPLADE++ EnsembleDistil", @@ -1044,7 +1366,11 @@ public enum IndexInfo { "BEIR: cqadupstack-tex", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-tex.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "402088c62cbffeba3d710fec408226ed"), + "402088c62cbffeba3d710fec408226ed", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-tex"), BEIR_V1_0_0_CQADUPSTACK_UNIX_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-unix.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-unix' encoded by SPLADE++ EnsembleDistil", @@ -1053,7 +1379,11 @@ public enum IndexInfo { "BEIR: cqadupstack-unix", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-unix.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "66e884e446ff183e07973c65ccf32625"), + "66e884e446ff183e07973c65ccf32625", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-unix"), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-webmasters.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-webmasters' encoded by SPLADE++ EnsembleDistil", @@ -1062,7 +1392,11 @@ public enum IndexInfo { "BEIR: cqadupstack-webmasters", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-webmasters.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "17be129cbe65b4e4e64a181f95a56972"), + "17be129cbe65b4e4e64a181f95a56972", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-webmasters"), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-wordpress.splade-pp-ed", "Lucene impact index of BEIR collection 'cqadupstack-wordpress' encoded by SPLADE++ EnsembleDistil", @@ -1071,7 +1405,11 @@ public enum IndexInfo { "BEIR: cqadupstack-wordpress", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "f20bacfe92f21bc75360a9978278e690"), + "f20bacfe92f21bc75360a9978278e690", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-wordpress"), BEIR_V1_0_0_QUORA_SPLADE_PP_ED("beir-v1.0.0-quora.splade-pp-ed", "Lucene impact index of BEIR collection 'quora' encoded by SPLADE++ EnsembleDistil", @@ -1080,7 +1418,11 @@ public enum IndexInfo { "BEIR: quora", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-quora.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "ce6dbaacf3b7b0e8282020565d324ea5"), + "ce6dbaacf3b7b0e8282020565d324ea5", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-quora"), BEIR_V1_0_0_DBPEDIA_ENTITY_SPLADE_PP_ED("beir-v1.0.0-dbpedia-entity.splade-pp-ed", "Lucene impact index of BEIR collection 'dbpedia-entity' encoded by SPLADE++ EnsembleDistil", @@ -1089,7 +1431,11 @@ public enum IndexInfo { "BEIR: dbpedia-entity", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-dbpedia-entity.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "fc9ac8329b6e2c054290791e68e0a0e4"), + "fc9ac8329b6e2c054290791e68e0a0e4", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-dbpedia-entity"), BEIR_V1_0_0_SCIDOCS_SPLADE_PP_ED("beir-v1.0.0-scidocs.splade-pp-ed", "Lucene impact index of BEIR collection 'scidocs' encoded by SPLADE++ EnsembleDistil", @@ -1098,7 +1444,11 @@ public enum IndexInfo { "BEIR: scidocs", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scidocs.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "3285b17da7cd88d2e6e62a3bfc465039"), + "3285b17da7cd88d2e6e62a3bfc465039", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-scidocs"), BEIR_V1_0_0_FEVER_SPLADE_PP_ED("beir-v1.0.0-fever.splade-pp-ed", "Lucene impact index of BEIR collection 'fever' encoded by SPLADE++ EnsembleDistil", @@ -1107,7 +1457,11 @@ public enum IndexInfo { "BEIR: fever", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fever.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "22e67800879422840f20c7d0008795a9"), + "22e67800879422840f20c7d0008795a9", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-fever"), BEIR_V1_0_0_CLIMATE_FEVER_SPLADE_PP_ED("beir-v1.0.0-climate-fever.splade-pp-ed", "Lucene impact index of BEIR collection 'climate-fever' encoded by SPLADE++ EnsembleDistil", @@ -1116,7 +1470,11 @@ public enum IndexInfo { "BEIR: climate-fever", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-climate-fever.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "bd5f3c804874ca18f99590037873a1bc"), + "bd5f3c804874ca18f99590037873a1bc", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-climate-fever"), BEIR_V1_0_0_SCIFACT_SPLADE_PP_ED("beir-v1.0.0-scifact.splade-pp-ed", "Lucene impact index of BEIR collection 'scifact' encoded by SPLADE++ EnsembleDistil", @@ -1125,7 +1483,11 @@ public enum IndexInfo { "BEIR: scifact", "SPLADE++ EnsembleDistil", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scifact.splade-pp-ed.20231124.a66f86f.tar.gz" }, - "3abe52209fcd04f411da438a37254e3a"), + "3abe52209fcd04f411da438a37254e3a", + IndexType.SPLADE_PP_ED, + "SpladePlusPlusEnsembleDistil", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-scifact.flat"), // BEIR: BGE BEIR_V1_0_0_TREC_COVID_BGE_BASE_EN_15_HNSW("beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw", @@ -1135,7 +1497,11 @@ public enum IndexInfo { "BEIR: trec-covid", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-trec-covid.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "2c8cba8525f8ec6920dbb4f0b4a2e0a6"), + "2c8cba8525f8ec6920dbb4f0b4a2e0a6", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_BIOASQ_BGE_BASE_EN_15_HNSW("beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'bioasq' encoded by BGE-base-en-v1.5.", @@ -1144,7 +1510,11 @@ public enum IndexInfo { "BEIR: bioasq", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-bioasq.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "2f4cde27ef5ec3be1193e06854fdaae6"), + "2f4cde27ef5ec3be1193e06854fdaae6", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_NFCORPUS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'nfcorpus' encoded by BGE-base-en-v1.5.", @@ -1153,7 +1523,11 @@ public enum IndexInfo { "BEIR: nfcorpus", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "d0aa34bf35b59466e7064c424dd82e2c"), + "d0aa34bf35b59466e7064c424dd82e2c", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_NQ_BGE_BASE_EN_15_HNSW("beir-v1.0.0-nq.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'nq' encoded by BGE-base-en-v1.5.", @@ -1162,7 +1536,11 @@ public enum IndexInfo { "BEIR: nq", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-nq.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "b0bbd85821c734125ffbc0f7ea8f75ae"), + "b0bbd85821c734125ffbc0f7ea8f75ae", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_HOTPOTQA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'hotpotqa' encoded by BGE-base-en-v1.5.", @@ -1171,7 +1549,11 @@ public enum IndexInfo { "BEIR: hotpotqa", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "83129157f2138a2240b69f8f5404e579"), + "83129157f2138a2240b69f8f5404e579", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_FIQA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'fiqa' encoded by BGE-base-en-v1.5.", @@ -1180,7 +1562,11 @@ public enum IndexInfo { "BEIR: fiqa", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-fiqa.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "f2e3191b9d047b88b4692ec3ac87acd0"), + "f2e3191b9d047b88b4692ec3ac87acd0", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_SIGNAL1M_BGE_BASE_EN_15_HNSW("beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'signal1m' encoded by BGE-base-en-v1.5.", @@ -1189,7 +1575,11 @@ public enum IndexInfo { "BEIR: signal1m", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-signal1m.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "86a5dc12806c5e2f5f1e7cf646ef9004"), + "86a5dc12806c5e2f5f1e7cf646ef9004", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_TREC_NEWS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'trec-news' encoded by BGE-base-en-v1.5.", @@ -1198,7 +1588,11 @@ public enum IndexInfo { "BEIR: trec-news", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-trec-news.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "fcb8fae8c46c76931bde0ad51ecb86f8"), + "fcb8fae8c46c76931bde0ad51ecb86f8", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_ROBUST04_BGE_BASE_EN_15_HNSW("beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'robust04' encoded by BGE-base-en-v1.5.", @@ -1207,7 +1601,11 @@ public enum IndexInfo { "BEIR: robust04", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-robust04.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "1b975602bf6b87e0a5815a254eb6e945"), + "1b975602bf6b87e0a5815a254eb6e945", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_ARGUANA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'arguana' encoded by BGE-base-en-v1.5.", @@ -1216,7 +1614,11 @@ public enum IndexInfo { "BEIR: arguana", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-arguana.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "468129157636526a3e96bc9427d62808"), + "468129157636526a3e96bc9427d62808", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_WEBIS_TOUCHE2020_BGE_BASE_EN_15_HNSW("beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'webis-touche2020' encoded by BGE-base-en-v1.5.", @@ -1225,7 +1627,11 @@ public enum IndexInfo { "BEIR: webis-touche2020", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "4639db80366f755bb552ce4c736c4aea"), + "4639db80366f755bb552ce4c736c4aea", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_ANDROID_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-android' encoded by BGE-base-en-v1.5.", @@ -1234,7 +1640,11 @@ public enum IndexInfo { "BEIR: cqadupstack-android", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "f7e1f2e737756a84b0273794dcb1038f"), + "f7e1f2e737756a84b0273794dcb1038f", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-english' encoded by BGE-base-en-v1.5.", @@ -1243,7 +1653,11 @@ public enum IndexInfo { "BEIR: cqadupstack-english", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "fcdb3fc633b2ca027111536ba422aaed"), + "fcdb3fc633b2ca027111536ba422aaed", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_GAMING_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-gaming' encoded by BGE-base-en-v1.5.", @@ -1252,7 +1666,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gaming", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "d59b216b3df6eb1b724e2f20ceb14407"), + "d59b216b3df6eb1b724e2f20ceb14407", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_GIS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-gis' encoded by BGE-base-en-v1.5.", @@ -1261,7 +1679,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gis", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "1dd42a28e388b30f42ede02565d445ca"), + "1dd42a28e388b30f42ede02565d445ca", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-mathematica' encoded by BGE-base-en-v1.5.", @@ -1270,7 +1692,11 @@ public enum IndexInfo { "BEIR: cqadupstack-mathematica", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "cda37cb1893409c67908cf3aab1467fe"), + "cda37cb1893409c67908cf3aab1467fe", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-physics' encoded by BGE-base-en-v1.5.", @@ -1279,7 +1705,11 @@ public enum IndexInfo { "BEIR: cqadupstack-physics", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "82f71e086930c7d8c5fe423173b9bc2e"), + "82f71e086930c7d8c5fe423173b9bc2e", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-programmers' encoded by BGE-base-en-v1.5.", @@ -1288,7 +1718,11 @@ public enum IndexInfo { "BEIR: cqadupstack-programmers", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "a7a8e17dcef7b40fde2492436aab1458"), + "a7a8e17dcef7b40fde2492436aab1458", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_STATS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-stats' encoded by BGE-base-en-v1.5.", @@ -1297,7 +1731,11 @@ public enum IndexInfo { "BEIR: cqadupstack-stats", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "7a304fa64332256976bed5049392605b"), + "7a304fa64332256976bed5049392605b", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_TEX_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-tex' encoded by BGE-base-en-v1.5.", @@ -1306,7 +1744,11 @@ public enum IndexInfo { "BEIR: cqadupstack-tex", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "bc5b41b294528611982615c0fcb7ebc7"), + "bc5b41b294528611982615c0fcb7ebc7", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_UNIX_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-unix' encoded by BGE-base-en-v1.5.", @@ -1315,7 +1757,11 @@ public enum IndexInfo { "BEIR: cqadupstack-unix", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "e42e7b6f46239211f9e9a3ed521d30eb"), + "e42e7b6f46239211f9e9a3ed521d30eb", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-webmasters' encoded by BGE-base-en-v1.5.", @@ -1324,7 +1770,11 @@ public enum IndexInfo { "BEIR: cqadupstack-webmasters", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "21987ab658ba062397095226eb62aaf1"), + "21987ab658ba062397095226eb62aaf1", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-wordpress' encoded by BGE-base-en-v1.5.", @@ -1333,7 +1783,11 @@ public enum IndexInfo { "BEIR: cqadupstack-wordpress", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "4e80be8087e8f282c42c2b57e377bb65"), + "4e80be8087e8f282c42c2b57e377bb65", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_QUORA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-quora.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'quora' encoded by BGE-base-en-v1.5.", @@ -1342,7 +1796,11 @@ public enum IndexInfo { "BEIR: quora", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-quora.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "064d785db557b011649d5f8b07237eb4"), + "064d785db557b011649d5f8b07237eb4", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_DBPEDIA_ENTITY_BGE_BASE_EN_15_HNSW("beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'dbpedia-entity' encoded by BGE-base-en-v1.5.", @@ -1351,7 +1809,11 @@ public enum IndexInfo { "BEIR: dbpedia-entity", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "323d47f84a54894ba5e6ca215999a533"), + "323d47f84a54894ba5e6ca215999a533", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_SCIDOCS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'scidocs' encoded by BGE-base-en-v1.5.", @@ -1360,7 +1822,11 @@ public enum IndexInfo { "BEIR: scidocs", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-scidocs.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "50668564faa9723160b1dba37afbf6d9"), + "50668564faa9723160b1dba37afbf6d9", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_FEVER_BGE_BASE_EN_15_HNSW("beir-v1.0.0-fever.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'fever' encoded by BGE-base-en-v1.5.", @@ -1369,7 +1835,11 @@ public enum IndexInfo { "BEIR: fever", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-fever.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "33f67e73786a41b454bf88ac2a7c21c7"), + "33f67e73786a41b454bf88ac2a7c21c7", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CLIMATE_FEVER_BGE_BASE_EN_15_HNSW("beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'climate-fever' encoded by BGE-base-en-v1.5.", @@ -1378,7 +1848,11 @@ public enum IndexInfo { "BEIR: climate-fever", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-climate-fever.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "412337f9f8182e8ec6417bc3cd48288f"), + "412337f9f8182e8ec6417bc3cd48288f", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_SCIFACT_BGE_BASE_EN_15_HNSW("beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'scifact' encoded by BGE-base-en-v1.5.", @@ -1387,7 +1861,11 @@ public enum IndexInfo { "BEIR: scifact", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-scifact.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "6de5a41a301575933fa9932f9ecb404d"), + "6de5a41a301575933fa9932f9ecb404d", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), // BEIR: BGE (flat) BEIR_V1_0_0_TREC_COVID_BGE_BASE_EN_15_FLAT("beir-v1.0.0-trec-covid.bge-base-en-v1.5.flat", @@ -1397,7 +1875,11 @@ public enum IndexInfo { "BEIR: trec-covid", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-trec-covid.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "516748bfd1923a999a56160e93b8daae"), + "516748bfd1923a999a56160e93b8daae", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_BIOASQ_BGE_BASE_EN_15_FLAT("beir-v1.0.0-bioasq.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'bioasq' encoded by BGE-base-en-v1.5.", @@ -1406,7 +1888,11 @@ public enum IndexInfo { "BEIR: bioasq", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-bioasq.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "b470cc88cdf338a7325f14eb05bf784d"), + "b470cc88cdf338a7325f14eb05bf784d", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_NFCORPUS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'nfcorpus' encoded by BGE-base-en-v1.5.", @@ -1415,7 +1901,11 @@ public enum IndexInfo { "BEIR: nfcorpus", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "9c1d92c88faccc72d0e869439cd28ad5"), + "9c1d92c88faccc72d0e869439cd28ad5", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_NQ_BGE_BASE_EN_15_FLAT("beir-v1.0.0-nq.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'nq' encoded by BGE-base-en-v1.5.", @@ -1424,7 +1914,11 @@ public enum IndexInfo { "BEIR: nq", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-nq.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "ad668f12f998052ec22b91f808e301e6"), + "ad668f12f998052ec22b91f808e301e6", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_HOTPOTQA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'hotpotqa' encoded by BGE-base-en-v1.5.", @@ -1433,7 +1927,11 @@ public enum IndexInfo { "BEIR: hotpotqa", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "43422329006eea8648ac2928589a0512"), + "43422329006eea8648ac2928589a0512", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_FIQA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-fiqa.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'fiqa' encoded by BGE-base-en-v1.5.", @@ -1442,7 +1940,11 @@ public enum IndexInfo { "BEIR: fiqa", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-fiqa.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "1b727263a0195430dbc20a3cc412f819"), + "1b727263a0195430dbc20a3cc412f819", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_SIGNAL1M_BGE_BASE_EN_15_FLAT("beir-v1.0.0-signal1m.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'signal1m' encoded by BGE-base-en-v1.5.", @@ -1451,7 +1953,11 @@ public enum IndexInfo { "BEIR: signal1m", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-signal1m.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "7e3967b5e0326a3e4063fde12ccfd9d0"), + "7e3967b5e0326a3e4063fde12ccfd9d0", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_TREC_NEWS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-trec-news.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'trec-news' encoded by BGE-base-en-v1.5.", @@ -1460,7 +1966,11 @@ public enum IndexInfo { "BEIR: trec-news", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-trec-news.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "3da92ed6d976cd02333dd9078c0220ae"), + "3da92ed6d976cd02333dd9078c0220ae", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_ROBUST04_BGE_BASE_EN_15_FLAT("beir-v1.0.0-robust04.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'robust04' encoded by BGE-base-en-v1.5.", @@ -1469,7 +1979,11 @@ public enum IndexInfo { "BEIR: robust04", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-robust04.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "7750b4abbc60fe821c5948a81296f1d0"), + "7750b4abbc60fe821c5948a81296f1d0", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_ARGUANA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-arguana.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'arguana' encoded by BGE-base-en-v1.5.", @@ -1478,7 +1992,11 @@ public enum IndexInfo { "BEIR: arguana", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-arguana.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "ac0f12b71080c92ab752983a0684686e"), + "ac0f12b71080c92ab752983a0684686e", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_WEBIS_TOUCHE2020_BGE_BASE_EN_15_FLAT("beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'webis-touche2020' encoded by BGE-base-en-v1.5.", @@ -1487,7 +2005,11 @@ public enum IndexInfo { "BEIR: webis-touche2020", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "d9572d89c85eccbc781d552699fa2e92"), + "d9572d89c85eccbc781d552699fa2e92", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_ANDROID_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-android' encoded by BGE-base-en-v1.5.", @@ -1496,7 +2018,11 @@ public enum IndexInfo { "BEIR: cqadupstack-android", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "354f39e9e4cf11eb2f2b99409f672995"), + "354f39e9e4cf11eb2f2b99409f672995", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-english' encoded by BGE-base-en-v1.5.", @@ -1505,7 +2031,11 @@ public enum IndexInfo { "BEIR: cqadupstack-english", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "36f72965dcaf9e2dee697152bb38b6d9"), + "36f72965dcaf9e2dee697152bb38b6d9", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_GAMING_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-gaming' encoded by BGE-base-en-v1.5.", @@ -1514,7 +2044,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gaming", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "36c47d9387134e88321fa1d4e8f1503e"), + "36c47d9387134e88321fa1d4e8f1503e", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_GIS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-gis' encoded by BGE-base-en-v1.5.", @@ -1523,7 +2057,11 @@ public enum IndexInfo { "BEIR: cqadupstack-gis", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "74e85b1e2847e13343e3b90b4a28a96e"), + "74e85b1e2847e13343e3b90b4a28a96e", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-mathematica' encoded by BGE-base-en-v1.5.", @@ -1532,7 +2070,11 @@ public enum IndexInfo { "BEIR: cqadupstack-mathematica", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "e720e0a7351574161570a77908094e73"), + "e720e0a7351574161570a77908094e73", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-physics' encoded by BGE-base-en-v1.5.", @@ -1541,7 +2083,11 @@ public enum IndexInfo { "BEIR: cqadupstack-physics", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "026a07c3c331fe7be2a8441b124c9f4f"), + "026a07c3c331fe7be2a8441b124c9f4f", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-programmers' encoded by BGE-base-en-v1.5.", @@ -1550,7 +2096,11 @@ public enum IndexInfo { "BEIR: cqadupstack-programmers", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "4cb595ae05660973d0b381f1791f0c50"), + "4cb595ae05660973d0b381f1791f0c50", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_STATS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-stats' encoded by BGE-base-en-v1.5.", @@ -1559,7 +2109,11 @@ public enum IndexInfo { "BEIR: cqadupstack-stats", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "7f0cc4b9036c3d92f82ac86beeb1767e"), + "7f0cc4b9036c3d92f82ac86beeb1767e", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_TEX_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-tex' encoded by BGE-base-en-v1.5.", @@ -1568,7 +2122,11 @@ public enum IndexInfo { "BEIR: cqadupstack-tex", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "7770dfcb197d6a37492c634e5f17beb5"), + "7770dfcb197d6a37492c634e5f17beb5", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_UNIX_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-unix' encoded by BGE-base-en-v1.5.", @@ -1577,7 +2135,11 @@ public enum IndexInfo { "BEIR: cqadupstack-unix", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "52f04d06f1d6d8ecdb2b0496ebd99ab8"), + "52f04d06f1d6d8ecdb2b0496ebd99ab8", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-webmasters' encoded by BGE-base-en-v1.5.", @@ -1586,7 +2148,11 @@ public enum IndexInfo { "BEIR: cqadupstack-webmasters", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "7bac4a98b9d3dc95f979bac8beedd648"), + "7bac4a98b9d3dc95f979bac8beedd648", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-wordpress' encoded by BGE-base-en-v1.5.", @@ -1595,7 +2161,11 @@ public enum IndexInfo { "BEIR: cqadupstack-wordpress", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "5a9802e2fc1eeb06a83723172f19b709"), + "5a9802e2fc1eeb06a83723172f19b709", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_QUORA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-quora.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'quora' encoded by BGE-base-en-v1.5.", @@ -1604,7 +2174,11 @@ public enum IndexInfo { "BEIR: quora", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-quora.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "21dfce18ce9e4351af900c343556c9e2"), + "21dfce18ce9e4351af900c343556c9e2", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_DBPEDIA_ENTITY_BGE_BASE_EN_15_FLAT("beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'dbpedia-entity' encoded by BGE-base-en-v1.5.", @@ -1613,7 +2187,11 @@ public enum IndexInfo { "BEIR: dbpedia-entity", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "4158d064d8bb61ed361cea98e6187248"), + "4158d064d8bb61ed361cea98e6187248", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_SCIDOCS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-scidocs.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'scidocs' encoded by BGE-base-en-v1.5.", @@ -1622,7 +2200,11 @@ public enum IndexInfo { "BEIR: scidocs", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-scidocs.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "db316cf58c1f8e44aa0c62d7dcd71ec1"), + "db316cf58c1f8e44aa0c62d7dcd71ec1", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_FEVER_BGE_BASE_EN_15_FLAT("beir-v1.0.0-fever.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'fever' encoded by BGE-base-en-v1.5.", @@ -1631,7 +2213,11 @@ public enum IndexInfo { "BEIR: fever", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-fever.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "e53c73a00737cf069360dc66fdc193f8"), + "e53c73a00737cf069360dc66fdc193f8", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_CLIMATE_FEVER_BGE_BASE_EN_15_FLAT("beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'climate-fever' encoded by BGE-base-en-v1.5.", @@ -1640,7 +2226,11 @@ public enum IndexInfo { "BEIR: climate-fever", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-climate-fever.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "bdf55c67f0abba5060fead09ef972d29"), + "bdf55c67f0abba5060fead09ef972d29", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), BEIR_V1_0_0_SCIFACT_BGE_BASE_EN_15_FLAT("beir-v1.0.0-scifact.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'scifact' encoded by BGE-base-en-v1.5.", @@ -1649,7 +2239,11 @@ public enum IndexInfo { "BEIR: scifact", "bge-base-en-v1.5", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-scifact.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "556abd7e9fcffbf06057ce3111cf4cc5",); + "556abd7e9fcffbf06057ce3111cf4cc5", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""); public final String indexName; public final String description; @@ -1661,15 +2255,18 @@ public enum IndexInfo { public final String md5; public final IndexType indexType; public enum IndexType { - flat, - hnsw, - bm25 + DENSE_FLAT, + DENSE_HNSW, + BM25, + BM25_MULTIFIELDS, + SPLADE_PP_ED, } public final String encoder; public final String queryGenerator; + public final String invertedIndex; IndexInfo(String indexName, String description, String filename, String readme, String corpus, String model, - String[] urls, String md5, IndexType indexType, String encoder, String queryGenerator) { + String[] urls, String md5, IndexType indexType, String encoder, String queryGenerator, String invertedIndex) { this.indexName = indexName; this.description = description; this.filename = filename; @@ -1681,6 +2278,7 @@ public enum IndexType { this.indexType = indexType; this.encoder = encoder; this.queryGenerator = queryGenerator; + this.invertedIndex = invertedIndex; } public static boolean contains(String indexName) { @@ -1709,6 +2307,10 @@ public String getDefaultQueryGenerator() { return queryGenerator; } + public String getDefaultInvertedIndex() { + return invertedIndex; + } + public static final int DEFAULT_EF_SEARCH = 100; public static final String DEFAULT_QUERY_GENERATOR = "VectorQueryGenerator"; public static final String DEFAULT_ENCODER = "BgeBaseEn15"; From 529eb430cd62e3c75e3223a99261e91453b344f5 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 00:28:40 -0500 Subject: [PATCH 08/18] search: Improve HNSW search implementation and IndexInfo organization - Simplify IndexInfo by removing redundant getter methods - Remove debug logging from search implementation - Clean up parameter handling in HNSW search - Remove redundant index entries indexinfo indexinfo 2 indexinfo 3 --- .../java/io/anserini/index/IndexInfo.java | 57 +------------------ .../io/anserini/server/ControllerV1_0.java | 45 +++++---------- .../io/anserini/server/SearchService.java | 48 +++------------- 3 files changed, 25 insertions(+), 125 deletions(-) diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java index f0ef1d4cd9..486a88363b 100644 --- a/src/main/java/io/anserini/index/IndexInfo.java +++ b/src/main/java/io/anserini/index/IndexInfo.java @@ -1020,19 +1020,6 @@ public enum IndexInfo { "InvertedDenseVectorQueryGenerator", "beir-v1.0.0-cqadupstack-webmasters"), - BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_MULTIFIELD("beir-v1.0.0-cqadupstack-wordpress.multifield", - "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-wordpress'.", - "lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz", - "", - "BEIR: cqadupstack-wordpress", - "BM25 'multifield'", - new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz" }, - "f619c003e2d0cf84794cc672e18e0437", - IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", - "beir-v1.0.0-cqadupstack-wordpress"), - BEIR_V1_0_0_QUORA_MULTIFIELD("beir-v1.0.0-quora.multifield", "Lucene inverted 'multifield' index of BEIR collection 'quora'.", "lucene-inverted.beir-v1.0.0-quora.multifield.20221116.505594.tar.gz", @@ -1698,19 +1685,6 @@ public enum IndexInfo { "VectorQueryGenerator", ""), - BEIR_V1_0_0_CQADUPSTACK_PHYSICS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw", - "Lucene HNSW index of BEIR collection 'cqadupstack-physics' encoded by BGE-base-en-v1.5.", - "lucene-hnsw.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.20240223.43c9ec.tar.gz", - "lucene-hnsw.beir-v1.0.0.bge-base-en-v1.5.20240223.43c9ec.README.md", - "BEIR: cqadupstack-physics", - "bge-base-en-v1.5", - new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, - "82f71e086930c7d8c5fe423173b9bc2e", - IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", - ""), - BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-programmers' encoded by BGE-base-en-v1.5.", "lucene-hnsw.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.20240223.43c9ec.tar.gz", @@ -2037,19 +2011,6 @@ public enum IndexInfo { "VectorQueryGenerator", ""), - BEIR_V1_0_0_CQADUPSTACK_GAMING_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat", - "Lucene flat index of BEIR collection 'cqadupstack-gaming' encoded by BGE-base-en-v1.5.", - "lucene-flat.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.20240618.6cf601.tar.gz", - "lucene-flat.beir-v1.0.0.bge-base-en-v1.5.20240618.6cf601.README.md", - "BEIR: cqadupstack-gaming", - "bge-base-en-v1.5", - new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, - "36c47d9387134e88321fa1d4e8f1503e", - IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", - ""), - BEIR_V1_0_0_CQADUPSTACK_GIS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-gis' encoded by BGE-base-en-v1.5.", "lucene-flat.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.20240618.6cf601.tar.gz", @@ -2291,26 +2252,14 @@ public static boolean contains(String indexName) { } public static IndexInfo get(String indexName) { - for (IndexInfo indexInfo : IndexInfo.values()) { - if (indexInfo.indexName.equals(indexName)) { - return indexInfo; + for (IndexInfo index : values()) { + if (index.indexName.equals(indexName)) { + return index; } } throw new IllegalArgumentException("Index name " + indexName + " not found!"); } - public String getDefaultEncoder() { - return encoder; - } - - public String getDefaultQueryGenerator() { - return queryGenerator; - } - - public String getDefaultInvertedIndex() { - return invertedIndex; - } - public static final int DEFAULT_EF_SEARCH = 100; public static final String DEFAULT_QUERY_GENERATOR = "VectorQueryGenerator"; public static final String DEFAULT_ENCODER = "BgeBaseEn15"; diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 799f15db42..3f848c3dd8 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -52,8 +52,8 @@ private SearchService getOrCreateSearchService(String index) { return services.computeIfAbsent(index, k -> new SearchService(k)); } - @RequestMapping(method = RequestMethod.GET, path = "/indexes/{index}/search") - public Map searchIndex(@PathVariable("index") String index, + @RequestMapping(method = RequestMethod.GET, path = {"/indexes/{index}/search", "/search"}) + public Map searchIndex(@PathVariable(value = "index", required = false) String index, @RequestParam("query") String query, @RequestParam(value = "hits", defaultValue = "10") int hits, @RequestParam(value = "qid", defaultValue = "") String qid, @@ -98,14 +98,14 @@ public Map> listIndexes() { Map> indexList = new LinkedHashMap<>(); for (IndexInfo index : indexes) { indexList.put(index.indexName, Map.of( - "indexName", index.indexName, - "description", index.description, - "filename", index.filename, - "corpus", index.corpus, - "model", index.model, - "urls", index.urls, - "md5", index.md5, - "cached", getIndexStatus(index.indexName).get("cached"))); + "indexName", index.indexName, + "description", index.description, + "filename", index.filename, + "corpus", index.corpus, + "model", index.model, + "urls", index.urls, + "md5", index.md5, + "cached", getIndexStatus(index.indexName).get("cached"))); } return indexList; } @@ -122,36 +122,17 @@ public Map updateIndexSettings( } SearchService service = getOrCreateSearchService(index); - Map errors = new HashMap<>(); - // Simple parameter handling if (efSearch != null) { - try { - service.setEfSearchOverride(efSearch); - } catch (IllegalArgumentException e) { - errors.put("efSearch", e.getMessage()); - } + service.setEfSearchOverride(efSearch); } - if (encoder != null) { - try { - service.setEncoderOverride(encoder); - } catch (IllegalArgumentException e) { - errors.put("encoder", e.getMessage()); - } + service.setEncoderOverride(encoder); } - if (queryGenerator != null) { - try { - service.setQueryGeneratorOverride(queryGenerator); - } catch (IllegalArgumentException e) { - errors.put("queryGenerator", e.getMessage()); - } + service.setQueryGeneratorOverride(queryGenerator); } - if (!errors.isEmpty()) { - return Map.of("status", "error", "errors", errors); - } return Map.of("status", "success"); } diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 7f9c4f09e6..0bf66dc257 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -31,6 +31,8 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; import java.util.concurrent.ConcurrentHashMap; public class SearchService { @@ -50,7 +52,8 @@ public SearchService(String prebuiltIndex) { try { handler.download(); indexDir = handler.decompressIndex(); - isHnswIndex = prebuiltIndex.contains(".hnsw"); + IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); + isHnswIndex = indexInfo.indexType == IndexInfo.IndexType.DENSE_HNSW; } catch (Exception e) { throw new RuntimeException(e); } @@ -63,17 +66,7 @@ public List> search(String query, int hits) { public List> search(String query, int hits, Integer efSearch, String encoder, String queryGenerator) { try { - System.out.println("=== Search Parameters ==="); - System.out.println("Query: " + query); - System.out.println("Hits: " + hits); - System.out.println("EF Search: " + efSearch); - System.out.println("Encoder: " + encoder); - System.out.println("Query Generator: " + queryGenerator); - System.out.println("Is HNSW Index: " + isHnswIndex); - System.out.println("Index Dir: " + indexDir); - if (!isHnswIndex) { - // Regular search with document contents SimpleSearcher searcher = new SimpleSearcher(indexDir); searcher.set_bm25(k1, b); ScoredDoc[] results = searcher.search(query, hits); @@ -98,50 +91,29 @@ public List> search(String query, int hits, searcher.close(); return candidates; } else { - // HNSW search - only return docids and scores IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); args.index = indexDir; - - // Parameter precedence: explicit param > override > index default args.efSearch = efSearch != null ? efSearch : getEfSearchOverride() != null ? getEfSearchOverride() - : indexInfo.DEFAULT_EF_SEARCH; - + : IndexInfo.DEFAULT_EF_SEARCH; args.encoder = encoder != null ? encoder : getEncoderOverride() != null ? getEncoderOverride() - : indexInfo.getDefaultEncoder(); - + : indexInfo.encoder; args.queryGenerator = queryGenerator != null ? queryGenerator : getQueryGeneratorOverride() != null ? getQueryGeneratorOverride() - : indexInfo.getDefaultQueryGenerator(); - - System.out.println("=== HNSW Args ==="); - System.out.println("Index: " + args.index); - System.out.println("EF Search: " + args.efSearch); - System.out.println("Encoder: " + args.encoder); - System.out.println("Query Generator: " + args.queryGenerator); + : indexInfo.queryGenerator; HnswDenseSearcher searcher = new HnswDenseSearcher<>(args); - System.out.println("Created HNSW searcher"); - ScoredDoc[] results = searcher.search(query, hits); - System.out.println("Search completed, results: " + (results != null ? results.length : "null")); - List> candidates = new ArrayList<>(); - if (results != null) { - for (ScoredDoc r : results) { - candidates.add(Map.of("docid", r.docid,"score", r.score)); - } + for (ScoredDoc r : results) { + candidates.add(Map.of("docid", r.docid, "score", r.score)); } - searcher.close(); return candidates; } } catch (Exception e) { - System.out.println("=== Search Error ==="); - System.out.println("Error type: " + e.getClass().getName()); - System.out.println("Error message: " + e.getMessage()); e.printStackTrace(); return List.of(); } @@ -174,7 +146,6 @@ public Map getDocument(String docid) { } } - // Simple getters with type casting public Integer getEfSearchOverride() { return (Integer) indexOverrides.get("efSearch"); } @@ -187,7 +158,6 @@ public String getQueryGeneratorOverride() { return (String) indexOverrides.get("queryGenerator"); } - // Simple setters with basic validation public void setEfSearchOverride(String value) { try { int efSearch = Integer.parseInt(value); From 7279255da36b580eb1abac37528701c1a1e0a214 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 01:51:29 -0500 Subject: [PATCH 09/18] fix and simplify: Improve resource handling and type safety - Add try-with-resources for proper resource cleanup - Fix generic type specification in HnswDenseSearcher - Remove unnecessary document retrieval restriction for HNSW --- .../io/anserini/server/ControllerV1_0.java | 2 -- .../java/io/anserini/server/SearchService.java | 18 +++++++----------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 3f848c3dd8..29f0899305 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -44,8 +44,6 @@ public Map handleIllegalArgumentException(IllegalArgumentExcepti return Map.of("error", ex.getMessage()); } - private static final String DEFAULT_INDEX = "msmarco-v1-passage"; - private final Map services = new ConcurrentHashMap<>(); private SearchService getOrCreateSearchService(String index) { diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 0bf66dc257..add6e24a17 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -95,16 +95,16 @@ public List> search(String query, int hits, HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); args.index = indexDir; args.efSearch = efSearch != null ? efSearch - : getEfSearchOverride() != null ? getEfSearchOverride() - : IndexInfo.DEFAULT_EF_SEARCH; + : getEfSearchOverride() != null ? getEfSearchOverride() + : IndexInfo.DEFAULT_EF_SEARCH; args.encoder = encoder != null ? encoder - : getEncoderOverride() != null ? getEncoderOverride() - : indexInfo.encoder; + : getEncoderOverride() != null ? getEncoderOverride() + : indexInfo.encoder; args.queryGenerator = queryGenerator != null ? queryGenerator - : getQueryGeneratorOverride() != null ? getQueryGeneratorOverride() - : indexInfo.queryGenerator; + : getQueryGeneratorOverride() != null ? getQueryGeneratorOverride() + : indexInfo.queryGenerator; - HnswDenseSearcher searcher = new HnswDenseSearcher<>(args); + HnswDenseSearcher searcher = new HnswDenseSearcher(args); ScoredDoc[] results = searcher.search(query, hits); List> candidates = new ArrayList<>(); for (ScoredDoc r : results) { @@ -120,10 +120,6 @@ public List> search(String query, int hits, } public Map getDocument(String docid) { - if (isHnswIndex) { - throw new UnsupportedOperationException("Document retrieval not supported for HNSW indexes"); - } - try { SimpleSearcher searcher = new SimpleSearcher(indexDir); String raw = searcher.doc(docid).get(Constants.RAW); From b5450081430c35e81cc4fbcfacfd13f52397d588 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 02:11:23 -0500 Subject: [PATCH 10/18] refactors: Improve code organization and error handling - Extract initialization logic into separate method - Add parameter validation methods - Improve resource handling with try-with-resources - Remove unnecessary imports and cleanup code structure --- .../io/anserini/server/SearchService.java | 130 +++++++++++------- 1 file changed, 78 insertions(+), 52 deletions(-) diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index add6e24a17..e1b6f6f847 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -26,13 +26,10 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.IOException; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Objects; -import java.util.stream.Collectors; import java.util.concurrent.ConcurrentHashMap; public class SearchService { @@ -45,17 +42,48 @@ public class SearchService { private final boolean isHnswIndex; private final Map indexOverrides = new ConcurrentHashMap<>(); - public SearchService(String prebuiltIndex) { - this.prebuiltIndex = prebuiltIndex; - PrebuiltIndexHandler handler = new PrebuiltIndexHandler(prebuiltIndex); - handler.initialize(); + private static class IndexInitializationResult { + final String indexDir; + final boolean isHnswIndex; + final Exception error; + + IndexInitializationResult(String indexDir, boolean isHnswIndex, Exception error) { + this.indexDir = indexDir; + this.isHnswIndex = isHnswIndex; + this.error = error; + } + } + + private IndexInitializationResult initializeIndex(String prebuiltIndex) { try { + PrebuiltIndexHandler handler = new PrebuiltIndexHandler(prebuiltIndex); + handler.initialize(); handler.download(); - indexDir = handler.decompressIndex(); + String indexDir = handler.decompressIndex(); IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); - isHnswIndex = indexInfo.indexType == IndexInfo.IndexType.DENSE_HNSW; + boolean isHnsw = indexInfo.indexType == IndexInfo.IndexType.DENSE_HNSW; + return new IndexInitializationResult(indexDir, isHnsw, null); } catch (Exception e) { - throw new RuntimeException(e); + return new IndexInitializationResult(null, false, e); + } + } + + private void validateSearchParameters(String query, int hits) { + if (query == null || query.trim().isEmpty()) { + throw new IllegalArgumentException("Query cannot be empty"); + } + if (hits <= 0) { + throw new IllegalArgumentException("Number of hits must be positive"); + } + } + + public SearchService(String prebuiltIndex) { + this.prebuiltIndex = prebuiltIndex; + IndexInitializationResult result = initializeIndex(prebuiltIndex); + this.indexDir = result.indexDir; + this.isHnswIndex = result.isHnswIndex; + if (result.error != null) { + throw new RuntimeException(result.error); } } @@ -65,63 +93,62 @@ public List> search(String query, int hits) { public List> search(String query, int hits, Integer efSearch, String encoder, String queryGenerator) { + validateSearchParameters(query, hits); + try { if (!isHnswIndex) { - SimpleSearcher searcher = new SimpleSearcher(indexDir); - searcher.set_bm25(k1, b); - ScoredDoc[] results = searcher.search(query, hits); - List> candidates = new ArrayList<>(); - for (ScoredDoc r : results) { - Map candidate = new LinkedHashMap<>(); - candidate.put("docid", r.docid); - candidate.put("score", r.score); - String raw = r.lucene_document.get(Constants.RAW); - if (raw != null) { - JsonNode rootNode = mapper.readTree(raw); - Map content = mapper.convertValue(rootNode, Map.class); - content.remove("docid"); - content.remove("id"); - content.remove("_id"); - candidate.put("doc", content); - } else { - candidate.put("doc", null); + try (SimpleSearcher searcher = new SimpleSearcher(indexDir)) { + searcher.set_bm25(k1, b); + ScoredDoc[] results = searcher.search(query, hits); + List> candidates = new ArrayList<>(); + for (ScoredDoc r : results) { + Map candidate = new LinkedHashMap<>(); + candidate.put("docid", r.docid); + candidate.put("score", r.score); + String raw = r.lucene_document.get(Constants.RAW); + if (raw != null) { + JsonNode rootNode = mapper.readTree(raw); + Map content = mapper.convertValue(rootNode, Map.class); + content.remove("docid"); + content.remove("id"); + content.remove("_id"); + candidate.put("doc", content); + } else { + candidate.put("doc", null); + } + candidates.add(candidate); } - candidates.add(candidate); + return candidates; } - searcher.close(); - return candidates; } else { IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); HnswDenseSearcher.Args args = new HnswDenseSearcher.Args(); args.index = indexDir; - args.efSearch = efSearch != null ? efSearch - : getEfSearchOverride() != null ? getEfSearchOverride() - : IndexInfo.DEFAULT_EF_SEARCH; - args.encoder = encoder != null ? encoder - : getEncoderOverride() != null ? getEncoderOverride() - : indexInfo.encoder; - args.queryGenerator = queryGenerator != null ? queryGenerator - : getQueryGeneratorOverride() != null ? getQueryGeneratorOverride() - : indexInfo.queryGenerator; - - HnswDenseSearcher searcher = new HnswDenseSearcher(args); - ScoredDoc[] results = searcher.search(query, hits); - List> candidates = new ArrayList<>(); - for (ScoredDoc r : results) { - candidates.add(Map.of("docid", r.docid, "score", r.score)); + args.efSearch = efSearch != null ? efSearch + : getEfSearchOverride() != null ? getEfSearchOverride() + : IndexInfo.DEFAULT_EF_SEARCH; + args.encoder = encoder != null ? encoder + : getEncoderOverride() != null ? getEncoderOverride() + : indexInfo.encoder; + args.queryGenerator = queryGenerator != null ? queryGenerator + : getQueryGeneratorOverride() != null ? getQueryGeneratorOverride() + : indexInfo.queryGenerator; + try (HnswDenseSearcher searcher = new HnswDenseSearcher(args)) { + ScoredDoc[] results = searcher.search(query, hits); + List> candidates = new ArrayList<>(); + for (ScoredDoc r : results) { + candidates.add(Map.of("docid", r.docid, "score", r.score)); + } + return candidates; } - searcher.close(); - return candidates; } } catch (Exception e) { - e.printStackTrace(); return List.of(); } } public Map getDocument(String docid) { - try { - SimpleSearcher searcher = new SimpleSearcher(indexDir); + try (SimpleSearcher searcher = new SimpleSearcher(indexDir)) { String raw = searcher.doc(docid).get(Constants.RAW); Map candidate = new LinkedHashMap<>(); if (raw != null) { @@ -134,7 +161,6 @@ public Map getDocument(String docid) { } else { candidate.put("doc", null); } - searcher.close(); return candidate; } catch (Exception e) { e.printStackTrace(); From c71251fe4f5a4dbbb565c076d4a930f42ba07339 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 02:15:08 -0500 Subject: [PATCH 11/18] re-order: Improve code readability and organization - Reorder methods for better logical grouping - Fix indentation and formatting issues - Move field declarations to top of class --- .../io/anserini/server/ControllerV1_0.java | 29 ++++---- .../io/anserini/server/SearchService.java | 70 +++++++++---------- 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 29f0899305..e4fd4082a8 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -37,6 +37,7 @@ @RestController @RequestMapping(path = "/api/v1.0") public class ControllerV1_0 { + private final Map services = new ConcurrentHashMap<>(); @ResponseStatus(HttpStatus.BAD_REQUEST) @ExceptionHandler(IllegalArgumentException.class) @@ -44,13 +45,7 @@ public Map handleIllegalArgumentException(IllegalArgumentExcepti return Map.of("error", ex.getMessage()); } - private final Map services = new ConcurrentHashMap<>(); - - private SearchService getOrCreateSearchService(String index) { - return services.computeIfAbsent(index, k -> new SearchService(k)); - } - - @RequestMapping(method = RequestMethod.GET, path = {"/indexes/{index}/search", "/search"}) + @RequestMapping(method = RequestMethod.GET, path = { "/indexes/{index}/search", "/search" }) public Map searchIndex(@PathVariable(value = "index", required = false) String index, @RequestParam("query") String query, @RequestParam(value = "hits", defaultValue = "10") int hits, @@ -96,14 +91,14 @@ public Map> listIndexes() { Map> indexList = new LinkedHashMap<>(); for (IndexInfo index : indexes) { indexList.put(index.indexName, Map.of( - "indexName", index.indexName, - "description", index.description, - "filename", index.filename, - "corpus", index.corpus, - "model", index.model, - "urls", index.urls, - "md5", index.md5, - "cached", getIndexStatus(index.indexName).get("cached"))); + "indexName", index.indexName, + "description", index.description, + "filename", index.filename, + "corpus", index.corpus, + "model", index.model, + "urls", index.urls, + "md5", index.md5, + "cached", getIndexStatus(index.indexName).get("cached"))); } return indexList; } @@ -142,7 +137,6 @@ public Map getIndexSettings(@PathVariable("index") String index) SearchService service = getOrCreateSearchService(index); - // Simple direct mapping of current values Map settings = new HashMap<>(); Integer efSearch = service.getEfSearchOverride(); @@ -163,4 +157,7 @@ public Map getIndexSettings(@PathVariable("index") String index) return settings; } + private SearchService getOrCreateSearchService(String index) { + return services.computeIfAbsent(index, k -> new SearchService(k)); + } } \ No newline at end of file diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index e1b6f6f847..db89295617 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -42,41 +42,6 @@ public class SearchService { private final boolean isHnswIndex; private final Map indexOverrides = new ConcurrentHashMap<>(); - private static class IndexInitializationResult { - final String indexDir; - final boolean isHnswIndex; - final Exception error; - - IndexInitializationResult(String indexDir, boolean isHnswIndex, Exception error) { - this.indexDir = indexDir; - this.isHnswIndex = isHnswIndex; - this.error = error; - } - } - - private IndexInitializationResult initializeIndex(String prebuiltIndex) { - try { - PrebuiltIndexHandler handler = new PrebuiltIndexHandler(prebuiltIndex); - handler.initialize(); - handler.download(); - String indexDir = handler.decompressIndex(); - IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); - boolean isHnsw = indexInfo.indexType == IndexInfo.IndexType.DENSE_HNSW; - return new IndexInitializationResult(indexDir, isHnsw, null); - } catch (Exception e) { - return new IndexInitializationResult(null, false, e); - } - } - - private void validateSearchParameters(String query, int hits) { - if (query == null || query.trim().isEmpty()) { - throw new IllegalArgumentException("Query cannot be empty"); - } - if (hits <= 0) { - throw new IllegalArgumentException("Number of hits must be positive"); - } - } - public SearchService(String prebuiltIndex) { this.prebuiltIndex = prebuiltIndex; IndexInitializationResult result = initializeIndex(prebuiltIndex); @@ -205,4 +170,39 @@ public void setQueryGeneratorOverride(String value) { } indexOverrides.put("queryGenerator", value); } + + private void validateSearchParameters(String query, int hits) { + if (query == null || query.trim().isEmpty()) { + throw new IllegalArgumentException("Query cannot be empty"); + } + if (hits <= 0) { + throw new IllegalArgumentException("Number of hits must be positive"); + } + } + + private IndexInitializationResult initializeIndex(String prebuiltIndex) { + try { + PrebuiltIndexHandler handler = new PrebuiltIndexHandler(prebuiltIndex); + handler.initialize(); + handler.download(); + String indexDir = handler.decompressIndex(); + IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); + boolean isHnsw = indexInfo.indexType == IndexInfo.IndexType.DENSE_HNSW; + return new IndexInitializationResult(indexDir, isHnsw, null); + } catch (Exception e) { + return new IndexInitializationResult(null, false, e); + } + } + + private static class IndexInitializationResult { + final String indexDir; + final boolean isHnswIndex; + final Exception error; + + IndexInitializationResult(String indexDir, boolean isHnswIndex, Exception error) { + this.indexDir = indexDir; + this.isHnswIndex = isHnswIndex; + this.error = error; + } + } } \ No newline at end of file From b8de5d6e90eae84c51eac84a1851bebbea07faf3 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 02:42:29 -0500 Subject: [PATCH 12/18] Remove searching without index and improve error handling BREAKING CHANGES: - Remove ability to search without specifying an index - Change error handling from RuntimeException to IllegalArgumentException for consistency with Anserini patterns Other changes: - Add validation for encoder and queryGenerator settings - Remove default constants for query generator and encoder - Update tests to reflect new error handling and required index parameter --- .../java/io/anserini/index/IndexInfo.java | 2 -- .../io/anserini/server/ControllerV1_0.java | 6 ++++- .../io/anserini/server/SearchService.java | 27 ++++++++++++++++++- .../io/anserini/index/PrebuiltIndexTest.java | 2 +- .../io/anserini/server/ControllerTest.java | 8 ++++-- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java index 486a88363b..6112354018 100644 --- a/src/main/java/io/anserini/index/IndexInfo.java +++ b/src/main/java/io/anserini/index/IndexInfo.java @@ -2261,6 +2261,4 @@ public static IndexInfo get(String indexName) { } public static final int DEFAULT_EF_SEARCH = 100; - public static final String DEFAULT_QUERY_GENERATOR = "VectorQueryGenerator"; - public static final String DEFAULT_ENCODER = "BgeBaseEn15"; } diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index e4fd4082a8..23ae27aca8 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -46,7 +46,7 @@ public Map handleIllegalArgumentException(IllegalArgumentExcepti } @RequestMapping(method = RequestMethod.GET, path = { "/indexes/{index}/search", "/search" }) - public Map searchIndex(@PathVariable(value = "index", required = false) String index, + public Map searchIndex(@PathVariable(value = "index", required = true) String index, @RequestParam("query") String query, @RequestParam(value = "hits", defaultValue = "10") int hits, @RequestParam(value = "qid", defaultValue = "") String qid, @@ -54,6 +54,10 @@ public Map searchIndex(@PathVariable(value = "index", required = @RequestParam(value = "encoder", required = false) String encoder, @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { + if (index == null) { + throw new IllegalArgumentException("Index parameter is required"); + } + if (!IndexInfo.contains(index)) { throw new IllegalArgumentException("Index " + index + " not found!"); } diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index db89295617..0fe41f0007 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -59,6 +59,7 @@ public List> search(String query, int hits) { public List> search(String query, int hits, Integer efSearch, String encoder, String queryGenerator) { validateSearchParameters(query, hits); + validateSettings(efSearch, encoder, queryGenerator); try { if (!isHnswIndex) { @@ -159,7 +160,11 @@ public void setEfSearchOverride(String value) { public void setEncoderOverride(String value) { if (value == null || value.trim().isEmpty()) { - throw new IllegalArgumentException("encoder cannot be empty"); + throw new IllegalArgumentException("Encoder cannot be empty"); + } + IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); + if (!value.equals(indexInfo.encoder)) { + throw new IllegalArgumentException("Unsupported encoder: " + value + " for index " + prebuiltIndex); } indexOverrides.put("encoder", value); } @@ -168,6 +173,10 @@ public void setQueryGeneratorOverride(String value) { if (value == null || value.trim().isEmpty()) { throw new IllegalArgumentException("queryGenerator cannot be empty"); } + IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); + if (!value.equals(indexInfo.queryGenerator)) { + throw new IllegalArgumentException("Unsupported queryGenerator: " + value + " for index " + prebuiltIndex); + } indexOverrides.put("queryGenerator", value); } @@ -180,6 +189,22 @@ private void validateSearchParameters(String query, int hits) { } } + private void validateSettings(Integer efSearch, String encoder, String queryGenerator) { + IndexInfo indexInfo = IndexInfo.get(prebuiltIndex); + + if (efSearch != null && !isHnswIndex) { + throw new IllegalArgumentException("efSearch parameter is only supported for HNSW indexes"); + } + + if (encoder != null && !encoder.equals(indexInfo.encoder)) { + throw new IllegalArgumentException("Unsupported encoder: " + encoder + " for index " + prebuiltIndex); + } + + if (queryGenerator != null && !queryGenerator.equals(indexInfo.queryGenerator)) { + throw new IllegalArgumentException("Unsupported queryGenerator: " + queryGenerator + " for index " + prebuiltIndex); + } + } + private IndexInitializationResult initializeIndex(String prebuiltIndex) { try { PrebuiltIndexHandler handler = new PrebuiltIndexHandler(prebuiltIndex); diff --git a/src/test/java/io/anserini/index/PrebuiltIndexTest.java b/src/test/java/io/anserini/index/PrebuiltIndexTest.java index a89c7c906b..ec3b2c594b 100644 --- a/src/test/java/io/anserini/index/PrebuiltIndexTest.java +++ b/src/test/java/io/anserini/index/PrebuiltIndexTest.java @@ -60,6 +60,6 @@ public void testUrls() { // test number of prebuilt-indexes @Test public void testNumPrebuiltIndexes() { - assertEquals(169, IndexInfo.values().length); + assertEquals(166, IndexInfo.values().length); } } diff --git a/src/test/java/io/anserini/server/ControllerTest.java b/src/test/java/io/anserini/server/ControllerTest.java index 0c40908fef..3cf0538089 100644 --- a/src/test/java/io/anserini/server/ControllerTest.java +++ b/src/test/java/io/anserini/server/ControllerTest.java @@ -33,7 +33,7 @@ public class ControllerTest { public void testSearch() throws Exception { ControllerV1_0 controller = new ControllerV1_0(); - Map results = controller.searchIndex(null, "Albert Einstein", 10, "", null, null, null); + Map results = controller.searchIndex("msmarco-v1-passage", "Albert Einstein", 10, "", null, null, null); assertNotNull(results); assertTrue(results.get("candidates") instanceof List); @@ -41,13 +41,17 @@ public void testSearch() throws Exception { List> candidates = (List>) results.get("candidates"); assertEquals(10, candidates.size()); assertEquals("3553430", candidates.get(0).get("docid")); + + assertThrows(IllegalArgumentException.class, () -> { + controller.searchIndex(null, "Albert Einstein", 10, "", null, null, null); + }); } @Test public void testIndexNotFound() throws Exception { ControllerV1_0 controller = new ControllerV1_0(); - assertThrows(RuntimeException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { Map results = controller.searchIndex("nonexistent-index", "Albert Einstein", 10, "", null, null, null); }); } From 6bfb91bf7ce92ae2021448cd5db28f00dca02383 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 03:20:26 -0500 Subject: [PATCH 13/18] un-remove the 3 removed indexinfo entries -- accident --- .../java/io/anserini/index/IndexInfo.java | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java index 6112354018..5fb828db3c 100644 --- a/src/main/java/io/anserini/index/IndexInfo.java +++ b/src/main/java/io/anserini/index/IndexInfo.java @@ -1020,6 +1020,19 @@ public enum IndexInfo { "InvertedDenseVectorQueryGenerator", "beir-v1.0.0-cqadupstack-webmasters"), + BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_MULTIFIELD("beir-v1.0.0-cqadupstack-wordpress.multifield", + "Lucene inverted 'multifield' index of BEIR collection 'cqadupstack-wordpress'.", + "lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz", + "", + "BEIR: cqadupstack-wordpress", + "BM25 'multifield'", + new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz" }, + "f619c003e2d0cf84794cc672e18e0437", + IndexType.BM25_MULTIFIELDS, + "BM25", + "InvertedDenseVectorQueryGenerator", + "beir-v1.0.0-cqadupstack-wordpress"), + BEIR_V1_0_0_QUORA_MULTIFIELD("beir-v1.0.0-quora.multifield", "Lucene inverted 'multifield' index of BEIR collection 'quora'.", "lucene-inverted.beir-v1.0.0-quora.multifield.20221116.505594.tar.gz", @@ -1685,6 +1698,19 @@ public enum IndexInfo { "VectorQueryGenerator", ""), + BEIR_V1_0_0_CQADUPSTACK_PHYSICS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw", + "Lucene HNSW index of BEIR collection 'cqadupstack-physics' encoded by BGE-base-en-v1.5.", + "lucene-hnsw.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.20240223.43c9ec.tar.gz", + "lucene-hnsw.beir-v1.0.0.bge-base-en-v1.5.20240223.43c9ec.README.md", + "BEIR: cqadupstack-physics", + "bge-base-en-v1.5", + new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, + "82f71e086930c7d8c5fe423173b9bc2e", + IndexType.DENSE_HNSW, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), + BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw", "Lucene HNSW index of BEIR collection 'cqadupstack-programmers' encoded by BGE-base-en-v1.5.", "lucene-hnsw.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.20240223.43c9ec.tar.gz", @@ -2011,6 +2037,19 @@ public enum IndexInfo { "VectorQueryGenerator", ""), + BEIR_V1_0_0_CQADUPSTACK_GAMING_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat", + "Lucene flat index of BEIR collection 'cqadupstack-gaming' encoded by BGE-base-en-v1.5.", + "lucene-flat.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.20240618.6cf601.tar.gz", + "lucene-flat.beir-v1.0.0.bge-base-en-v1.5.20240618.6cf601.README.md", + "BEIR: cqadupstack-gaming", + "bge-base-en-v1.5", + new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, + "36c47d9387134e88321fa1d4e8f1503e", + IndexType.DENSE_FLAT, + "BgeBaseEn15", + "VectorQueryGenerator", + ""), + BEIR_V1_0_0_CQADUPSTACK_GIS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat", "Lucene flat index of BEIR collection 'cqadupstack-gis' encoded by BGE-base-en-v1.5.", "lucene-flat.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.20240618.6cf601.tar.gz", From 9e28c669e48659b4ce19070f782d8bf69a35a153 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 10:19:58 -0500 Subject: [PATCH 14/18] API Version Bump, style, and cleanup - Update API version to v1.1 to reflect breaking changes - Standardize error handling style (single line for simple checks) - Clean up code formatting and indentation - Use LinkedHashMap for index info to have new fields, as Map of reached maximum - Change IndexInfo names to use .class suffix. null values specifically in Encoder and QueryGenerator fields. Style: Inverted Index field still has empty strings - Simplify conditional assignments with single line statements --- .../java/io/anserini/index/IndexInfo.java | 676 +++++++++--------- .../io/anserini/server/ControllerV1_0.java | 78 +- 2 files changed, 372 insertions(+), 382 deletions(-) diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java index 5fb828db3c..7c567133c9 100644 --- a/src/main/java/io/anserini/index/IndexInfo.java +++ b/src/main/java/io/anserini/index/IndexInfo.java @@ -27,8 +27,8 @@ public enum IndexInfo { "https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.20221005.252b5e.tar.gz" }, "cfe14d543c6a27f4d742fb2d0099b8e0", IndexType.BM25, - "", - "", + null, + null, ""), // MS MARCO V1 @@ -42,8 +42,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.20221004.252b5e.tar.gz" }, "678876e8c99a89933d553609a0fd8793", IndexType.BM25, - "", - "", + null, + null, ""), MSMARCO_V1_PASSAGE_SPLADE_PP_ED("msmarco-v1-passage.splade-pp-ed", @@ -56,8 +56,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v1-passage.splade-pp-ed.20230524.a59610.tar.gz" }, "2c008fc36131e27966a72292932358e6", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_COS_DPR_DISTIL_HNSW("msmarco-v1-passage.cosdpr-distil.hnsw", @@ -70,8 +70,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.cosdpr-distil.20240108.825148.tar.gz" }, "df4c60fa1f3804fa409499824d12d035", IndexType.DENSE_HNSW, - "CosDprDistil", - "VectorQueryGenerator", + "CosDprDistilEncoder.class", + "VectorQueryGenerator.class", "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_COS_DPR_DISTIL_HNSW_INT8("msmarco-v1-passage.cosdpr-distil.hnsw-int8", @@ -84,8 +84,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.cosdpr-distil.20240108.825148.tar.gz" }, "119124ad358bb81e6a203b04d1b99a9c", IndexType.DENSE_HNSW, - "CosDprDistil", - "VectorQueryGenerator", + "CosDprDistilEncoder.class", + "VectorQueryGenerator.class", "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_BGE_BASE_EN_15_HNSW("msmarco-v1-passage.bge-base-en-v1.5.hnsw", @@ -98,8 +98,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5.20240117.53514b.tar.gz" }, "00a577f689d90f95e6c5611438b0af3d", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_BGE_BASE_EN_15_HNSW_INT8("msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8", @@ -112,8 +112,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5.20240117.53514b.tar.gz" }, "7830712459cf124c96fd058bb0a405b7", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_COHERE_EMBED_ENGLISH_30_HNSW("msmarco-v1-passage.cohere-embed-english-v3.0.hnsw", @@ -127,7 +127,7 @@ public enum IndexInfo { "c7294ca988ae1b812d427362ffca1ee2", IndexType.DENSE_HNSW, "CohereEmbedEnglishV30", - "VectorQueryGenerator", + "VectorQueryGenerator.class", "msmarco-v1-passage"), MSMARCO_V1_PASSAGE_COHERE_EMBED_ENGLISH_30_HNSW_INT8("msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8", @@ -140,8 +140,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0.20240228.eacd13.tar.gz" }, "dbaca578cc8495f504cdd0a7187f4c36", IndexType.DENSE_HNSW, - "CohereEmbedEnglish30", "CohereEmbedEnglishV30", + "VectorQueryGenerator.class", "msmarco-v1-passage"), // MS MARCO V2 @@ -155,8 +155,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage.20220808.4d6d2a.tar.gz" }, "eacd8556dd416ccad517b5e7dc97bceb", IndexType.BM25, - "", - "", + null, + null, ""), MSMARCO_V2_DOC("msmarco-v2-doc", @@ -168,9 +168,9 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc.20220808.4d6d2a.tar.gz" }, "0599bd6ed5ee28390b279eb398ef0267", - IndexType.DENSE_FLAT, - "", - "", + IndexType.BM25, + null, + null, ""), MSMARCO_V2_DOC_SEGMENTED("msmarco-v2-doc-segmented", @@ -183,8 +183,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.tar.gz" }, "8a5f444fa5a63cc5d4ddc3e6dd15faa0", IndexType.BM25, - "", - "", + null, + null, ""), MSMARCO_V21_DOC("msmarco-v2.1-doc", @@ -197,8 +197,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v2.1-doc.20240418.4f9675.tar.gz" }, "cecd55856c34afa82f1a499705c9df02", IndexType.BM25, - "", - "JsonInvertedDenseVectorQueryGenerator", + null, + null, "msmarco-v2.1-doc"), MSMARCO_V21_DOC_SEGMENTED("msmarco-v2.1-doc-segmented", @@ -211,8 +211,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.msmarco-v2.1-doc-segmented.20240418.4f9675.tar.gz" }, "6ec4cd595c9fe1ad91b43eabb39a637c", IndexType.BM25, - "", - "JsonInvertedDenseVectorQueryGenerator", + null, + null, "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD00_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard00.arctic-embed-l.hnsw-int8", @@ -225,8 +225,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard00.arctic-embed-l.20250114.4884f5.tar.gz" }, "aab3f8e9aa0563bd0f875584784a0845", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD01_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard01.arctic-embed-l.hnsw-int8", @@ -239,8 +239,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard01.arctic-embed-l.20250114.4884f5.tar.gz" }, "34ea30fe72c2bc1795ae83e71b191547", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD02_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard02.arctic-embed-l.hnsw-int8", @@ -253,8 +253,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard02.arctic-embed-l.20250114.4884f5.tar.gz" }, "b6271d6db65119977491675f74f466d5", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD03_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard03.arctic-embed-l.hnsw-int8", @@ -267,8 +267,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard03.arctic-embed-l.20250114.4884f5.tar.gz" }, "a9cd644eb6037f67d2e9c06a8f60928d", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD04_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard04.arctic-embed-l.hnsw-int8", @@ -281,8 +281,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard04.arctic-embed-l.20250114.4884f5.tar.gz" }, "07b7e451e0525d01c1f1f2b1c42b1bd5", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD05_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard05.arctic-embed-l.hnsw-int8", @@ -295,8 +295,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard05.arctic-embed-l.20250114.4884f5.tar.gz" }, "2573dce175788981be2f266ebb33c96d", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD06_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard06.arctic-embed-l.hnsw-int8", @@ -309,8 +309,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard06.arctic-embed-l.20250114.4884f5.tar.gz" }, "a644aea445a8b78cc9e99d2ce111ff11", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD07_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard07.arctic-embed-l.hnsw-int8", @@ -323,8 +323,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard07.arctic-embed-l.20250114.4884f5.tar.gz" }, "402d37deccb44b5fc105049889e8aaea", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD08_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard08.arctic-embed-l.hnsw-int8", @@ -337,8 +337,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard08.arctic-embed-l.20250114.4884f5.tar.gz" }, "89ebcd027f7297b26a1edc8ae5726527", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), MSMARCO_V21_DOC_SEGMENTED_SHARD09_ARCTIC_EMBED_L_HNSW_INT8("msmarco-v2.1-doc-segmented-shard09.arctic-embed-l.hnsw-int8", @@ -351,8 +351,8 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v2.1-doc-segmented-shard09.arctic-embed-l.20250114.4884f5.tar.gz" }, "5e580bb7eb9ee2bb6bfa492b3430c17d", IndexType.DENSE_HNSW, - "ArcticEmbedL", - "JsonInvertedDenseVectorQueryGenerator", + "ArcticEmbedLEncoder.classEncoder.class", + "JsonInvertedDenseVectorQueryGenerator.class", "msmarco-v2.1-doc-segmented"), // BEIR: flat @@ -365,8 +365,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-covid.flat.20221116.505594.tar.gz" }, "1aaf107b0787aa349deac92cb67d4230", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-trec-covid"), BEIR_V1_0_0_BIOASQ_FLAT("beir-v1.0.0-bioasq.flat", @@ -378,8 +378,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-bioasq.flat.20221116.505594.tar.gz" }, "12728b3629817d352322f18b0cb6199b", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-bioasq"), BEIR_V1_0_0_NFCORPUS_FLAT("beir-v1.0.0-nfcorpus.flat", @@ -391,8 +391,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nfcorpus.flat.20221116.505594.tar.gz" }, "eb7a6f1bb15071c2940bc50752d86626", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-nfcorpus"), BEIR_V1_0_0_NQ_FLAT("beir-v1.0.0-nq.flat", @@ -404,8 +404,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nq.flat.20221116.505594.tar.gz" }, "0ba1ef0412d8a0fb56b4a04ecb13ef0b", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-nq"), BEIR_V1_0_0_HOTPOTQA_FLAT("beir-v1.0.0-hotpotqa.flat", @@ -417,8 +417,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-hotpotqa.flat.20221116.505594.tar.gz" }, "3f41d640a8ebbcad4f598140750c24f8", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-hotpotqa"), BEIR_V1_0_0_FIQA_FLAT("beir-v1.0.0-fiqa.flat", @@ -430,8 +430,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fiqa.flat.20221116.505594.tar.gz" }, "d98ee6ebfc234657ecbd04226e8a7849", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-fiqa"), BEIR_V1_0_0_SIGNAL1M_FLAT("beir-v1.0.0-signal1m.flat", @@ -443,8 +443,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-signal1m.flat.20221116.505594.tar.gz" }, "93d901916b473351fbc04fdf12c5ba4f", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-signal1m"), BEIR_V1_0_0_TREC_NEWS_FLAT("beir-v1.0.0-trec-news.flat", @@ -456,8 +456,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-news.flat.20221116.505594.tar.gz" }, "22e7752c3d0122c28013b33e5e2134ae", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-trec-news"), BEIR_V1_0_0_ROBUST04_FLAT("beir-v1.0.0-robust04.flat", @@ -469,8 +469,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-robust04.flat.20221116.505594.tar.gz" }, "d508fc770002a99a5dc3da3d0fa001b7", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-robust04"), BEIR_V1_0_0_ARGUANA_FLAT("beir-v1.0.0-arguana.flat", @@ -482,8 +482,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-arguana.flat.20221116.505594.tar.gz" }, "db59ef0cb74e9cfeac0ac735827381df", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-arguana"), BEIR_V1_0_0_WEBIS_TOUCHE2020_FLAT("beir-v1.0.0-webis-touche2020.flat", @@ -495,8 +495,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-webis-touche2020.flat.20221116.505594.tar.gz" }, "f6419ddfd53c0bf1d76ea132b1c0c352", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-webis-touche2020"), BEIR_V1_0_0_CQADUPSTACK_ANDROID_FLAT("beir-v1.0.0-cqadupstack-android.flat", @@ -508,8 +508,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-android.flat.20221116.505594.tar.gz" }, "443e413b49c39de43a6cece96a7513c0", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-android"), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_FLAT("beir-v1.0.0-cqadupstack-english.flat", @@ -521,8 +521,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-english.flat.20221116.505594.tar.gz" }, "f7db543f5bb56fa98c3c14224c6b96f2", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-english"), BEIR_V1_0_0_CQADUPSTACK_GAMING_FLAT("beir-v1.0.0-cqadupstack-gaming.flat", @@ -534,8 +534,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gaming.flat.20221116.505594.tar.gz" }, "775169fd863d3e91076e1905799456ea", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-gaming"), BEIR_V1_0_0_CQADUPSTACK_GIS_FLAT("beir-v1.0.0-cqadupstack-gis.flat", @@ -547,8 +547,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gis.flat.20221116.505594.tar.gz" }, "4c5be1c7026a61ca7866b4f28cac91fe", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-gis"), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_FLAT("beir-v1.0.0-cqadupstack-mathematica.flat", @@ -560,8 +560,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-mathematica.flat.20221116.505594.tar.gz" }, "43e2b33db7ecadc041165005aa5d4b6f", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-mathematica"), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_FLAT("beir-v1.0.0-cqadupstack-physics.flat", @@ -573,8 +573,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-physics.flat.20221116.505594.tar.gz" }, "765b8013595962e01600f4f851e8f16d", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-physics"), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_FLAT("beir-v1.0.0-cqadupstack-programmers.flat", @@ -586,8 +586,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-programmers.flat.20221116.505594.tar.gz" }, "aa4fc9f29a0436a6e0942656274ceaf5", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-programmers"), BEIR_V1_0_0_CQADUPSTACK_STATS_FLAT("beir-v1.0.0-cqadupstack-stats.flat", @@ -599,8 +599,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-stats.flat.20221116.505594.tar.gz" }, "d56538f56d982ce09961d4b680bd4dc5", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-stats"), BEIR_V1_0_0_CQADUPSTACK_TEX_FLAT("beir-v1.0.0-cqadupstack-tex.flat", @@ -612,8 +612,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-tex.flat.20221116.505594.tar.gz" }, "36825b8428aa34fdaad7e420e120c101", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-tex"), BEIR_V1_0_0_CQADUPSTACK_UNIX_FLAT("beir-v1.0.0-cqadupstack-unix.flat", @@ -625,8 +625,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-unix.flat.20221116.505594.tar.gz" }, "961e386016c7eb7afa2bc26feb96902c", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-unix"), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_FLAT("beir-v1.0.0-cqadupstack-webmasters.flat", @@ -638,8 +638,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-webmasters.flat.20221116.505594.tar.gz" }, "f31625436dc6efc24b9c2ae1b0f2364e", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-webmasters"), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_FLAT("beir-v1.0.0-cqadupstack-wordpress.flat", @@ -651,8 +651,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.flat.20221116.505594.tar.gz" }, "5a0035fbb6ccabd20fe0eed742dce0d0", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-wordpress"), BEIR_V1_0_0_QUORA_FLAT("beir-v1.0.0-quora.flat", @@ -664,8 +664,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-quora.flat.20221116.505594.tar.gz" }, "48c95c2da43e24cc603695d3e6bfd779", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-quora"), BEIR_V1_0_0_DBPEDIA_ENTITY_FLAT("beir-v1.0.0-dbpedia-entity.flat", @@ -677,8 +677,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-dbpedia-entity.flat.20221116.505594.tar.gz" }, "8ac66272fde08ff10491dc0ec52f17e2", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-dbpedia-entity"), BEIR_V1_0_0_SCIDOCS_FLAT("beir-v1.0.0-scidocs.flat", @@ -690,8 +690,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scidocs.flat.20221116.505594.tar.gz" }, "9555ecc5da399a73956d9302a98420fc", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-scidocs"), BEIR_V1_0_0_FEVER_FLAT("beir-v1.0.0-fever.flat", @@ -703,8 +703,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fever.flat.20221116.505594.tar.gz" }, "30b5a338f9f16669ed3dae3bae4e7b32", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-fever"), BEIR_V1_0_0_CLIMATE_FEVER_FLAT("beir-v1.0.0-climate-fever.flat", @@ -716,8 +716,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-climate-fever.flat.20221116.505594.tar.gz" }, "6e7101f4a5c241ba263bb6a826049826", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-climate-fever"), BEIR_V1_0_0_SCIFACT_FLAT("beir-v1.0.0-scifact.flat", @@ -729,8 +729,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scifact.flat.20221116.505594.tar.gz" }, "59777038fe0539e600658591e322ea57", IndexType.DENSE_FLAT, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-scifact"), // BEIR: multifield @@ -743,8 +743,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-covid.multifield.20221116.505594.tar.gz" }, "0439617a927a33727c7b592bd436d8d6", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-trec-covid"), BEIR_V1_0_0_BIOASQ_MULTIFIELD("beir-v1.0.0-bioasq.multifield", @@ -756,8 +756,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-bioasq.multifield.20221116.505594.tar.gz" }, "b2f4fed18b04414193f8368b6891e19c", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-bioasq"), BEIR_V1_0_0_NFCORPUS_MULTIFIELD("beir-v1.0.0-nfcorpus.multifield", @@ -769,8 +769,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nfcorpus.multifield.20221116.505594.tar.gz" }, "85cdcceaf06c482ab6a60c34c06c0448", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-nfcorpus"), BEIR_V1_0_0_NQ_MULTIFIELD("beir-v1.0.0-nq.multifield", @@ -782,8 +782,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nq.multifield.20221116.505594.tar.gz" }, "73b3e3c49c2d79a2851c1ba85f8fbbdf", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-nq"), BEIR_V1_0_0_HOTPOTQA_MULTIFIELD("beir-v1.0.0-hotpotqa.multifield", @@ -795,8 +795,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-hotpotqa.multifield.20221116.505594.tar.gz" }, "1d9f75122d4b50cb33cccaa125640a38", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-hotpotqa"), BEIR_V1_0_0_FIQA_MULTIFIELD("beir-v1.0.0-fiqa.multifield", @@ -808,8 +808,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fiqa.multifield.20221116.505594.tar.gz" }, "1c9330baf3d9004ae46778d4d9e039f6", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-fiqa"), BEIR_V1_0_0_SIGNAL1M_MULTIFIELD("beir-v1.0.0-signal1m.multifield", @@ -821,8 +821,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-signal1m.multifield.20221116.505594.tar.gz" }, "0735de4f103330975d206285ea85aaf5", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-signal1m"), BEIR_V1_0_0_TREC_NEWS_MULTIFIELD("beir-v1.0.0-trec-news.multifield", @@ -834,8 +834,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-news.multifield.20221116.505594.tar.gz" }, "a7b5bd79d22d3631dffcad2ffa8afd0a", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-trec-news"), BEIR_V1_0_0_ROBUST04_MULTIFIELD("beir-v1.0.0-robust04.multifield", @@ -847,8 +847,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-robust04.multifield.20221116.505594.tar.gz" }, "49db6bf123b6224d0e0973a16ff9c243", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-robust04"), BEIR_V1_0_0_ARGUANA_MULTIFIELD("beir-v1.0.0-arguana.multifield", @@ -860,8 +860,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-arguana.multifield.20221116.505594.tar.gz" }, "895b0d78a1cc40222aaebcff10b6b929", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-arguana"), BEIR_V1_0_0_WEBIS_TOUCHE2020_MULTIFIELD("beir-v1.0.0-webis-touche2020.multifield", @@ -873,8 +873,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-webis-touche2020.multifield.20221116.505594.tar.gz" }, "390552c8b93dc95bf2f58808d1c8a37d", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-webis-touche2020"), BEIR_V1_0_0_CQADUPSTACK_ANDROID_MULTIFIELD("beir-v1.0.0-cqadupstack-android.multifield", @@ -886,8 +886,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-android.multifield.20221116.505594.tar.gz" }, "299fc8b542dabc241320db571b8f8ff0", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-android"), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_MULTIFIELD("beir-v1.0.0-cqadupstack-english.multifield", @@ -899,8 +899,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-english.multifield.20221116.505594.tar.gz" }, "5bb26ad0ba9184592b5ed935e65b5f17", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-english"), BEIR_V1_0_0_CQADUPSTACK_GAMING_MULTIFIELD("beir-v1.0.0-cqadupstack-gaming.multifield", @@ -912,8 +912,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gaming.multifield.20221116.505594.tar.gz" }, "90d1ae9a1862b8b96871b9b94cc46b4e", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-gaming"), BEIR_V1_0_0_CQADUPSTACK_GIS_MULTIFIELD("beir-v1.0.0-cqadupstack-gis.multifield", @@ -925,8 +925,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gis.multifield.20221116.505594.tar.gz" }, "62869b2b6cf569424fed659adf1e5ea7", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-gis"), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_MULTIFIELD("beir-v1.0.0-cqadupstack-mathematica.multifield", @@ -938,8 +938,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-mathematica.multifield.20221116.505594.tar.gz" }, "a78c9d2e29a4b727fbeb38e825629df5", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-mathematica"), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_MULTIFIELD("beir-v1.0.0-cqadupstack-physics.multifield", @@ -951,8 +951,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-physics.multifield.20221116.505594.tar.gz" }, "d6e60e2665c1b6f2bac021dc6c767393", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-physics"), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_MULTIFIELD("beir-v1.0.0-cqadupstack-programmers.multifield", @@ -964,8 +964,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-programmers.multifield.20221116.505594.tar.gz" }, "77b54cd7613b555d80998b9744eef85c", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-programmers"), BEIR_V1_0_0_CQADUPSTACK_STATS_MULTIFIELD("beir-v1.0.0-cqadupstack-stats.multifield", @@ -977,8 +977,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-stats.multifield.20221116.505594.tar.gz" }, "8469917c70c767ea398ec2b93aaf04ca", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-stats"), BEIR_V1_0_0_CQADUPSTACK_TEX_MULTIFIELD("beir-v1.0.0-cqadupstack-tex.multifield", @@ -990,8 +990,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-tex.multifield.20221116.505594.tar.gz" }, "4d0b0efb2579e0fd73b9156921580a00", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-tex"), BEIR_V1_0_0_CQADUPSTACK_UNIX_MULTIFIELD("beir-v1.0.0-cqadupstack-unix.multifield", @@ -1003,8 +1003,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-unix.multifield.20221116.505594.tar.gz" }, "33e2510bb1414ca106766ae787e28670", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-unix"), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_MULTIFIELD("beir-v1.0.0-cqadupstack-webmasters.multifield", @@ -1016,8 +1016,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-webmasters.multifield.20221116.505594.tar.gz" }, "cb16d3da34b6705747ec07ce89913457", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-webmasters"), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_MULTIFIELD("beir-v1.0.0-cqadupstack-wordpress.multifield", @@ -1029,8 +1029,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz" }, "f619c003e2d0cf84794cc672e18e0437", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-wordpress"), BEIR_V1_0_0_QUORA_MULTIFIELD("beir-v1.0.0-quora.multifield", @@ -1042,8 +1042,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-quora.multifield.20221116.505594.tar.gz" }, "9248de265c88afc105231659d8c8be09", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-quora"), BEIR_V1_0_0_DBPEDIA_ENTITY_MULTIFIELD("beir-v1.0.0-dbpedia-entity.multifield", @@ -1055,8 +1055,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-dbpedia-entity.multifield.20221116.505594.tar.gz" }, "b7f0ae30f045188a608cc87553cade37", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-dbpedia-entity"), BEIR_V1_0_0_SCIDOCS_MULTIFIELD("beir-v1.0.0-scidocs.multifield", @@ -1067,9 +1067,9 @@ public enum IndexInfo { "BM25 'multifield'", new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scidocs.multifield.20221116.505594.tar.gz" }, "6409f5ec569530fc3240590dab59bc4c", - IndexType.SPLADE_PP_ED, - "BM25", - "InvertedDenseVectorQueryGenerator", + IndexType.BM25_MULTIFIELDS, + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-scidocs"), BEIR_V1_0_0_FEVER_MULTIFIELD("beir-v1.0.0-fever.multifield", @@ -1081,8 +1081,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fever.multifield.20221116.505594.tar.gz" }, "841908da91e7e5eaa0d122faf1a486d8", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-fever"), BEIR_V1_0_0_CLIMATE_FEVER_MULTIFIELD("beir-v1.0.0-climate-fever.multifield", @@ -1094,8 +1094,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-climate-fever.multifield.20221116.505594.tar.gz" }, "2901ac443ca4f0df424a35d068905829", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-climate-fever"), BEIR_V1_0_0_SCIFACT_MULTIFIELD("beir-v1.0.0-scifact.multifield", @@ -1107,8 +1107,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scifact.multifield.20221116.505594.tar.gz" }, "b40b26f44f68ab9aa4b573aafea27e2e", IndexType.BM25_MULTIFIELDS, - "BM25", - "InvertedDenseVectorQueryGenerator", + null, + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-scifact"), // BEIR: SPLADE++ ED @@ -1121,8 +1121,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-covid.splade-pp-ed.20231124.a66f86f.tar.gz" }, "e808ff9d4a1f45de9f0bc292900302b4", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-trec-covid"), BEIR_V1_0_0_BIOASQ_SPLADE_PP_ED("beir-v1.0.0-bioasq.splade-pp-ed", @@ -1134,8 +1134,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-bioasq.splade-pp-ed.20231124.a66f86f.tar.gz" }, "fc661b2c2fa59e24f37c6dfa6de8e682", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-bioasq"), BEIR_V1_0_0_NFCORPUS_SPLADE_PP_ED("beir-v1.0.0-nfcorpus.splade-pp-ed", @@ -1147,8 +1147,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nfcorpus.splade-pp-ed.20231124.a66f86f.tar.gz" }, "7d6e66cca9d2db8bb7caa3bdf330cdd8", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-nfcorpus"), BEIR_V1_0_0_NQ_SPLADE_PP_ED("beir-v1.0.0-nq.splade-pp-ed", @@ -1160,8 +1160,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-nq.splade-pp-ed.20231124.a66f86f.tar.gz" }, "a785d6636df60c861829507c3d806ee6", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-nq"), BEIR_V1_0_0_HOTPOTQA_SPLADE_PP_ED("beir-v1.0.0-hotpotqa.splade-pp-ed", @@ -1173,8 +1173,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-hotpotqa.splade-pp-ed.20231124.a66f86f.tar.gz" }, "b280ed3f7b12034c0cc4b302f92801b9", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-hotpotqa"), BEIR_V1_0_0_FIQA_SPLADE_PP_ED("beir-v1.0.0-fiqa.splade-pp-ed", @@ -1186,8 +1186,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fiqa.splade-pp-ed.20231124.a66f86f.tar.gz" }, "ea53103c695c0da6cea5b1c8353371b0", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-fiqa"), BEIR_V1_0_0_SIGNAL1M_SPLADE_PP_ED("beir-v1.0.0-signal1m.splade-pp-ed", @@ -1199,8 +1199,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-signal1m.splade-pp-ed.20231124.a66f86f.tar.gz" }, "0b46d71c97eabe9ca424f3ab9b2ddc64", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-signal1m"), BEIR_V1_0_0_TREC_NEWS_SPLADE_PP_ED("beir-v1.0.0-trec-news.splade-pp-ed", @@ -1212,8 +1212,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-trec-news.splade-pp-ed.20231124.a66f86f.tar.gz" }, "ef4fb032b632b80355db46549f08a026", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-trec-news"), BEIR_V1_0_0_ROBUST04_SPLADE_PP_ED("beir-v1.0.0-robust04.splade-pp-ed", @@ -1225,8 +1225,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-robust04.splade-pp-ed.20231124.a66f86f.tar.gz" }, "c1a6fd094bb9e34e69e10040d9b0ad2a", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-robust04"), BEIR_V1_0_0_ARGUANA_SPLADE_PP_ED("beir-v1.0.0-arguana.splade-pp-ed", @@ -1238,8 +1238,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-arguana.splade-pp-ed.20231124.a66f86f.tar.gz" }, "c2725b375ca53ff031ee8b4ba8501eb6", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-arguana"), BEIR_V1_0_0_WEBIS_TOUCHE2020_SPLADE_PP_ED("beir-v1.0.0-webis-touche2020.splade-pp-ed", @@ -1251,8 +1251,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-webis-touche2020.splade-pp-ed.20231124.a66f86f.tar.gz" }, "1abec77feeb741edfb3c9b7565b42964", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-webis-touche2020"), BEIR_V1_0_0_CQADUPSTACK_ANDROID_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-android.splade-pp-ed", @@ -1264,8 +1264,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-android.splade-pp-ed.20231124.a66f86f.tar.gz" }, "0b6b36417df9095e9ed32e4127bdd2fd", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-android"), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-english.splade-pp-ed", @@ -1277,8 +1277,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-english.splade-pp-ed.20231124.a66f86f.tar.gz" }, "f2a5f68523117638f957bcc353c956c1", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-english"), BEIR_V1_0_0_CQADUPSTACK_GAMING_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-gaming.splade-pp-ed", @@ -1290,8 +1290,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gaming.splade-pp-ed.20231124.a66f86f.tar.gz" }, "804851ed2ca5c38464f28263fb664615", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-gaming"), BEIR_V1_0_0_CQADUPSTACK_GIS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-gis.splade-pp-ed", @@ -1303,8 +1303,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-gis.splade-pp-ed.20231124.a66f86f.tar.gz" }, "ee53ba7f26e678f39c3db8997785169a", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-gis"), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-mathematica.splade-pp-ed", @@ -1316,8 +1316,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-mathematica.splade-pp-ed.20231124.a66f86f.tar.gz" }, "c3dd33ddfd364a0665450691963f9036", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-mathematica"), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-physics.splade-pp-ed", @@ -1329,8 +1329,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-physics.splade-pp-ed.20231124.a66f86f.tar.gz" }, "155a130b556072ec0b84788417361228", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-physics"), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-programmers.splade-pp-ed", @@ -1342,8 +1342,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-programmers.splade-pp-ed.20231124.a66f86f.tar.gz" }, "f0923dd88b7d4f050d54ff6f6efcc7f5", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-programmers"), BEIR_V1_0_0_CQADUPSTACK_STATS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-stats.splade-pp-ed", @@ -1355,8 +1355,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-stats.splade-pp-ed.20231124.a66f86f.tar.gz" }, "78e62040ed6d44e232e9381e96a56cc7", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-stats"), BEIR_V1_0_0_CQADUPSTACK_TEX_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-tex.splade-pp-ed", @@ -1368,8 +1368,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-tex.splade-pp-ed.20231124.a66f86f.tar.gz" }, "402088c62cbffeba3d710fec408226ed", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-tex"), BEIR_V1_0_0_CQADUPSTACK_UNIX_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-unix.splade-pp-ed", @@ -1381,8 +1381,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-unix.splade-pp-ed.20231124.a66f86f.tar.gz" }, "66e884e446ff183e07973c65ccf32625", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-unix"), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-webmasters.splade-pp-ed", @@ -1394,8 +1394,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-webmasters.splade-pp-ed.20231124.a66f86f.tar.gz" }, "17be129cbe65b4e4e64a181f95a56972", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-webmasters"), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_SPLADE_PP_ED("beir-v1.0.0-cqadupstack-wordpress.splade-pp-ed", @@ -1407,8 +1407,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-cqadupstack-wordpress.splade-pp-ed.20231124.a66f86f.tar.gz" }, "f20bacfe92f21bc75360a9978278e690", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-cqadupstack-wordpress"), BEIR_V1_0_0_QUORA_SPLADE_PP_ED("beir-v1.0.0-quora.splade-pp-ed", @@ -1420,8 +1420,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-quora.splade-pp-ed.20231124.a66f86f.tar.gz" }, "ce6dbaacf3b7b0e8282020565d324ea5", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-quora"), BEIR_V1_0_0_DBPEDIA_ENTITY_SPLADE_PP_ED("beir-v1.0.0-dbpedia-entity.splade-pp-ed", @@ -1433,8 +1433,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-dbpedia-entity.splade-pp-ed.20231124.a66f86f.tar.gz" }, "fc9ac8329b6e2c054290791e68e0a0e4", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-dbpedia-entity"), BEIR_V1_0_0_SCIDOCS_SPLADE_PP_ED("beir-v1.0.0-scidocs.splade-pp-ed", @@ -1446,8 +1446,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scidocs.splade-pp-ed.20231124.a66f86f.tar.gz" }, "3285b17da7cd88d2e6e62a3bfc465039", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-scidocs"), BEIR_V1_0_0_FEVER_SPLADE_PP_ED("beir-v1.0.0-fever.splade-pp-ed", @@ -1459,8 +1459,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-fever.splade-pp-ed.20231124.a66f86f.tar.gz" }, "22e67800879422840f20c7d0008795a9", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-fever"), BEIR_V1_0_0_CLIMATE_FEVER_SPLADE_PP_ED("beir-v1.0.0-climate-fever.splade-pp-ed", @@ -1472,8 +1472,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-climate-fever.splade-pp-ed.20231124.a66f86f.tar.gz" }, "bd5f3c804874ca18f99590037873a1bc", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-climate-fever"), BEIR_V1_0_0_SCIFACT_SPLADE_PP_ED("beir-v1.0.0-scifact.splade-pp-ed", @@ -1485,8 +1485,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-inverted.beir-v1.0.0-scifact.splade-pp-ed.20231124.a66f86f.tar.gz" }, "3abe52209fcd04f411da438a37254e3a", IndexType.SPLADE_PP_ED, - "SpladePlusPlusEnsembleDistil", - "InvertedDenseVectorQueryGenerator", + "SpladePlusPlusEnsembleDistilEncoder.class", + "InvertedDenseVectorQueryGenerator.class", "beir-v1.0.0-scifact.flat"), // BEIR: BGE @@ -1499,8 +1499,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-trec-covid.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "2c8cba8525f8ec6920dbb4f0b4a2e0a6", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_BIOASQ_BGE_BASE_EN_15_HNSW("beir-v1.0.0-bioasq.bge-base-en-v1.5.hnsw", @@ -1512,8 +1512,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-bioasq.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "2f4cde27ef5ec3be1193e06854fdaae6", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_NFCORPUS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-nfcorpus.bge-base-en-v1.5.hnsw", @@ -1525,8 +1525,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "d0aa34bf35b59466e7064c424dd82e2c", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_NQ_BGE_BASE_EN_15_HNSW("beir-v1.0.0-nq.bge-base-en-v1.5.hnsw", @@ -1538,8 +1538,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-nq.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "b0bbd85821c734125ffbc0f7ea8f75ae", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_HOTPOTQA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-hotpotqa.bge-base-en-v1.5.hnsw", @@ -1551,8 +1551,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "83129157f2138a2240b69f8f5404e579", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_FIQA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-fiqa.bge-base-en-v1.5.hnsw", @@ -1564,8 +1564,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-fiqa.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "f2e3191b9d047b88b4692ec3ac87acd0", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_SIGNAL1M_BGE_BASE_EN_15_HNSW("beir-v1.0.0-signal1m.bge-base-en-v1.5.hnsw", @@ -1577,8 +1577,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-signal1m.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "86a5dc12806c5e2f5f1e7cf646ef9004", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_TREC_NEWS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-trec-news.bge-base-en-v1.5.hnsw", @@ -1590,8 +1590,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-trec-news.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "fcb8fae8c46c76931bde0ad51ecb86f8", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_ROBUST04_BGE_BASE_EN_15_HNSW("beir-v1.0.0-robust04.bge-base-en-v1.5.hnsw", @@ -1603,8 +1603,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-robust04.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "1b975602bf6b87e0a5815a254eb6e945", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_ARGUANA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-arguana.bge-base-en-v1.5.hnsw", @@ -1616,8 +1616,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-arguana.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "468129157636526a3e96bc9427d62808", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_WEBIS_TOUCHE2020_BGE_BASE_EN_15_HNSW("beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.hnsw", @@ -1629,8 +1629,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "4639db80366f755bb552ce4c736c4aea", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_ANDROID_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.hnsw", @@ -1642,8 +1642,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "f7e1f2e737756a84b0273794dcb1038f", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.hnsw", @@ -1655,8 +1655,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "fcdb3fc633b2ca027111536ba422aaed", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_GAMING_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.hnsw", @@ -1668,8 +1668,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "d59b216b3df6eb1b724e2f20ceb14407", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_GIS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.hnsw", @@ -1681,8 +1681,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "1dd42a28e388b30f42ede02565d445ca", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.hnsw", @@ -1694,8 +1694,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "cda37cb1893409c67908cf3aab1467fe", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.hnsw", @@ -1707,8 +1707,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "82f71e086930c7d8c5fe423173b9bc2e", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.hnsw", @@ -1720,8 +1720,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "a7a8e17dcef7b40fde2492436aab1458", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_STATS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.hnsw", @@ -1733,8 +1733,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "7a304fa64332256976bed5049392605b", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_TEX_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.hnsw", @@ -1746,8 +1746,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "bc5b41b294528611982615c0fcb7ebc7", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_UNIX_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.hnsw", @@ -1759,8 +1759,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "e42e7b6f46239211f9e9a3ed521d30eb", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw", @@ -1772,8 +1772,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "21987ab658ba062397095226eb62aaf1", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.hnsw", @@ -1785,8 +1785,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "4e80be8087e8f282c42c2b57e377bb65", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_QUORA_BGE_BASE_EN_15_HNSW("beir-v1.0.0-quora.bge-base-en-v1.5.hnsw", @@ -1798,8 +1798,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-quora.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "064d785db557b011649d5f8b07237eb4", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_DBPEDIA_ENTITY_BGE_BASE_EN_15_HNSW("beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.hnsw", @@ -1811,8 +1811,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "323d47f84a54894ba5e6ca215999a533", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_SCIDOCS_BGE_BASE_EN_15_HNSW("beir-v1.0.0-scidocs.bge-base-en-v1.5.hnsw", @@ -1824,8 +1824,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-scidocs.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "50668564faa9723160b1dba37afbf6d9", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_FEVER_BGE_BASE_EN_15_HNSW("beir-v1.0.0-fever.bge-base-en-v1.5.hnsw", @@ -1837,8 +1837,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-fever.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "33f67e73786a41b454bf88ac2a7c21c7", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CLIMATE_FEVER_BGE_BASE_EN_15_HNSW("beir-v1.0.0-climate-fever.bge-base-en-v1.5.hnsw", @@ -1850,8 +1850,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-climate-fever.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "412337f9f8182e8ec6417bc3cd48288f", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_SCIFACT_BGE_BASE_EN_15_HNSW("beir-v1.0.0-scifact.bge-base-en-v1.5.hnsw", @@ -1863,8 +1863,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.beir-v1.0.0-scifact.bge-base-en-v1.5.20240223.43c9ec.tar.gz" }, "6de5a41a301575933fa9932f9ecb404d", IndexType.DENSE_HNSW, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), // BEIR: BGE (flat) @@ -1877,8 +1877,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-trec-covid.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "516748bfd1923a999a56160e93b8daae", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_BIOASQ_BGE_BASE_EN_15_FLAT("beir-v1.0.0-bioasq.bge-base-en-v1.5.flat", @@ -1890,8 +1890,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-bioasq.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "b470cc88cdf338a7325f14eb05bf784d", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_NFCORPUS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-nfcorpus.bge-base-en-v1.5.flat", @@ -1903,8 +1903,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-nfcorpus.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "9c1d92c88faccc72d0e869439cd28ad5", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_NQ_BGE_BASE_EN_15_FLAT("beir-v1.0.0-nq.bge-base-en-v1.5.flat", @@ -1916,8 +1916,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-nq.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "ad668f12f998052ec22b91f808e301e6", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_HOTPOTQA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-hotpotqa.bge-base-en-v1.5.flat", @@ -1929,8 +1929,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-hotpotqa.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "43422329006eea8648ac2928589a0512", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_FIQA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-fiqa.bge-base-en-v1.5.flat", @@ -1942,8 +1942,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-fiqa.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "1b727263a0195430dbc20a3cc412f819", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_SIGNAL1M_BGE_BASE_EN_15_FLAT("beir-v1.0.0-signal1m.bge-base-en-v1.5.flat", @@ -1955,8 +1955,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-signal1m.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "7e3967b5e0326a3e4063fde12ccfd9d0", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_TREC_NEWS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-trec-news.bge-base-en-v1.5.flat", @@ -1968,8 +1968,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-trec-news.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "3da92ed6d976cd02333dd9078c0220ae", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_ROBUST04_BGE_BASE_EN_15_FLAT("beir-v1.0.0-robust04.bge-base-en-v1.5.flat", @@ -1981,8 +1981,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-robust04.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "7750b4abbc60fe821c5948a81296f1d0", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_ARGUANA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-arguana.bge-base-en-v1.5.flat", @@ -1994,8 +1994,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-arguana.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "ac0f12b71080c92ab752983a0684686e", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_WEBIS_TOUCHE2020_BGE_BASE_EN_15_FLAT("beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.flat", @@ -2007,8 +2007,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-webis-touche2020.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "d9572d89c85eccbc781d552699fa2e92", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_ANDROID_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.flat", @@ -2020,8 +2020,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-android.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "354f39e9e4cf11eb2f2b99409f672995", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_ENGLISH_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.flat", @@ -2033,8 +2033,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-english.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "36f72965dcaf9e2dee697152bb38b6d9", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_GAMING_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.flat", @@ -2046,8 +2046,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-gaming.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "36c47d9387134e88321fa1d4e8f1503e", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_GIS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.flat", @@ -2059,8 +2059,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-gis.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "74e85b1e2847e13343e3b90b4a28a96e", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.flat", @@ -2072,8 +2072,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-mathematica.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "e720e0a7351574161570a77908094e73", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_PHYSICS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.flat", @@ -2085,8 +2085,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-physics.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "026a07c3c331fe7be2a8441b124c9f4f", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.flat", @@ -2098,8 +2098,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-programmers.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "4cb595ae05660973d0b381f1791f0c50", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_STATS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.flat", @@ -2111,8 +2111,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-stats.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "7f0cc4b9036c3d92f82ac86beeb1767e", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_TEX_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.flat", @@ -2124,8 +2124,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-tex.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "7770dfcb197d6a37492c634e5f17beb5", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_UNIX_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.flat", @@ -2137,8 +2137,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-unix.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "52f04d06f1d6d8ecdb2b0496ebd99ab8", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.flat", @@ -2150,8 +2150,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "7bac4a98b9d3dc95f979bac8beedd648", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.flat", @@ -2163,8 +2163,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-cqadupstack-wordpress.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "5a9802e2fc1eeb06a83723172f19b709", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_QUORA_BGE_BASE_EN_15_FLAT("beir-v1.0.0-quora.bge-base-en-v1.5.flat", @@ -2176,8 +2176,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-quora.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "21dfce18ce9e4351af900c343556c9e2", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_DBPEDIA_ENTITY_BGE_BASE_EN_15_FLAT("beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.flat", @@ -2189,8 +2189,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "4158d064d8bb61ed361cea98e6187248", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_SCIDOCS_BGE_BASE_EN_15_FLAT("beir-v1.0.0-scidocs.bge-base-en-v1.5.flat", @@ -2202,8 +2202,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-scidocs.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "db316cf58c1f8e44aa0c62d7dcd71ec1", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_FEVER_BGE_BASE_EN_15_FLAT("beir-v1.0.0-fever.bge-base-en-v1.5.flat", @@ -2215,8 +2215,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-fever.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "e53c73a00737cf069360dc66fdc193f8", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_CLIMATE_FEVER_BGE_BASE_EN_15_FLAT("beir-v1.0.0-climate-fever.bge-base-en-v1.5.flat", @@ -2228,8 +2228,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-climate-fever.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "bdf55c67f0abba5060fead09ef972d29", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""), BEIR_V1_0_0_SCIFACT_BGE_BASE_EN_15_FLAT("beir-v1.0.0-scifact.bge-base-en-v1.5.flat", @@ -2241,8 +2241,8 @@ public enum IndexInfo { new String[] { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-flat.beir-v1.0.0-scifact.bge-base-en-v1.5.20240618.6cf601.tar.gz" }, "556abd7e9fcffbf06057ce3111cf4cc5", IndexType.DENSE_FLAT, - "BgeBaseEn15", - "VectorQueryGenerator", + "BgeBaseEn15.class", + "VectorQueryGenerator.class", ""); public final String indexName; diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index 23ae27aca8..c60b54223b 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -35,7 +35,7 @@ import org.springframework.web.bind.annotation.ExceptionHandler; @RestController -@RequestMapping(path = "/api/v1.0") +@RequestMapping(path = "/api/v1.1") public class ControllerV1_0 { private final Map services = new ConcurrentHashMap<>(); @@ -47,16 +47,14 @@ public Map handleIllegalArgumentException(IllegalArgumentExcepti @RequestMapping(method = RequestMethod.GET, path = { "/indexes/{index}/search", "/search" }) public Map searchIndex(@PathVariable(value = "index", required = true) String index, - @RequestParam("query") String query, - @RequestParam(value = "hits", defaultValue = "10") int hits, - @RequestParam(value = "qid", defaultValue = "") String qid, - @RequestParam(value = "efSearch", required = false) Integer efSearch, - @RequestParam(value = "encoder", required = false) String encoder, - @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { - - if (index == null) { - throw new IllegalArgumentException("Index parameter is required"); - } + @RequestParam("query") String query, + @RequestParam(value = "hits", defaultValue = "10") int hits, + @RequestParam(value = "qid", defaultValue = "") String qid, + @RequestParam(value = "efSearch", required = false) Integer efSearch, + @RequestParam(value = "encoder", required = false) String encoder, + @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { + + if (index == null) throw new IllegalArgumentException("Index parameter is required"); if (!IndexInfo.contains(index)) { throw new IllegalArgumentException("Index " + index + " not found!"); @@ -94,42 +92,40 @@ public Map> listIndexes() { IndexInfo[] indexes = IndexInfo.values(); Map> indexList = new LinkedHashMap<>(); for (IndexInfo index : indexes) { - indexList.put(index.indexName, Map.of( - "indexName", index.indexName, - "description", index.description, - "filename", index.filename, - "corpus", index.corpus, - "model", index.model, - "urls", index.urls, - "md5", index.md5, - "cached", getIndexStatus(index.indexName).get("cached"))); + Map indexInfo = new LinkedHashMap<>(); + indexInfo.put("indexName", index.indexName); + indexInfo.put("description", index.description); + indexInfo.put("filename", index.filename); + indexInfo.put("corpus", index.corpus); + indexInfo.put("model", index.model); + indexInfo.put("urls", index.urls); + indexInfo.put("md5", index.md5); + indexInfo.put("cached", getIndexStatus(index.indexName).get("cached")); + indexInfo.put("indexType", index.indexType); + indexInfo.put("encoder", index.encoder); + indexInfo.put("queryGenerator", index.queryGenerator); + indexInfo.put("invertedIndex", index.invertedIndex); + + indexList.put(index.indexName, indexInfo); } return indexList; } @RequestMapping(method = RequestMethod.POST, path = "/indexes/{index}/settings") public Map updateIndexSettings( - @PathVariable("index") String index, - @RequestParam(value = "efSearch", required = false) String efSearch, - @RequestParam(value = "encoder", required = false) String encoder, - @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { + @PathVariable("index") String index, + @RequestParam(value = "efSearch", required = false) String efSearch, + @RequestParam(value = "encoder", required = false) String encoder, + @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { if (!IndexInfo.contains(index)) { throw new IllegalArgumentException("Index " + index + " not found!"); } SearchService service = getOrCreateSearchService(index); - - if (efSearch != null) { - service.setEfSearchOverride(efSearch); - } - if (encoder != null) { - service.setEncoderOverride(encoder); - } - if (queryGenerator != null) { - service.setQueryGeneratorOverride(queryGenerator); - } - + if (efSearch != null) service.setEfSearchOverride(efSearch); + if (encoder != null) service.setEncoderOverride(encoder); + if (queryGenerator != null) service.setQueryGeneratorOverride(queryGenerator); return Map.of("status", "success"); } @@ -144,19 +140,13 @@ public Map getIndexSettings(@PathVariable("index") String index) Map settings = new HashMap<>(); Integer efSearch = service.getEfSearchOverride(); - if (efSearch != null) { - settings.put("efSearch", efSearch); - } + if (efSearch != null) settings.put("efSearch", efSearch); String encoder = service.getEncoderOverride(); - if (encoder != null) { - settings.put("encoder", encoder); - } + if (encoder != null) settings.put("encoder", encoder); String queryGenerator = service.getQueryGeneratorOverride(); - if (queryGenerator != null) { - settings.put("queryGenerator", queryGenerator); - } + if (queryGenerator != null) settings.put("queryGenerator", queryGenerator); return settings; } From 10413acf0f7c0b8f67d0c4035cdde9d379a6b45e Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 10:32:38 -0500 Subject: [PATCH 15/18] refactor(api): improve error handling and service management - Remove redundant null check for index parameter since @PathVariable(required = true) handles it - Fix document endpoint to use cached SearchService instead of creating new instances - Add index existence check before document retrieval - Add HNSW index type validation for document retrieval - Standardize error handling format across endpoints --- src/main/java/io/anserini/server/ControllerV1_0.java | 12 +++++------- src/main/java/io/anserini/server/SearchService.java | 1 + 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/ControllerV1_0.java index c60b54223b..e99e6890c7 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/ControllerV1_0.java @@ -54,11 +54,7 @@ public Map searchIndex(@PathVariable(value = "index", required = @RequestParam(value = "encoder", required = false) String encoder, @RequestParam(value = "queryGenerator", required = false) String queryGenerator) { - if (index == null) throw new IllegalArgumentException("Index parameter is required"); - - if (!IndexInfo.contains(index)) { - throw new IllegalArgumentException("Index " + index + " not found!"); - } + if (!IndexInfo.contains(index)) throw new IllegalArgumentException("Index " + index + " not found!"); SearchService searchService = getOrCreateSearchService(index); List> candidates = searchService.search(query, hits, efSearch, encoder, queryGenerator); @@ -72,8 +68,10 @@ public Map searchIndex(@PathVariable(value = "index", required = @RequestMapping(method = RequestMethod.GET, path = "/indexes/{index}/documents/{docid}") public Map getDocument(@PathVariable("index") String index, @PathVariable("docid") String docid) { - SearchService searchService = new SearchService(index); - return searchService.getDocument(docid); + if (!IndexInfo.contains(index)) { + throw new IllegalArgumentException("Index " + index + " not found!"); + } + return getOrCreateSearchService(index).getDocument(docid); } @RequestMapping(method = RequestMethod.GET, path = "/indexes/{index}/status") diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 0fe41f0007..9f265c6263 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -114,6 +114,7 @@ public List> search(String query, int hits, } public Map getDocument(String docid) { + if (!isHnswIndex) throw new IllegalArgumentException("getDocument is only supported for HNSW indexes"); try (SimpleSearcher searcher = new SimpleSearcher(indexDir)) { String raw = searcher.doc(docid).get(Constants.RAW); Map candidate = new LinkedHashMap<>(); From 273eb06b6165aa014238d79056e0d55ebc13f343 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 11:07:22 -0500 Subject: [PATCH 16/18] refactor(api): improve encoder/generator class handling and remove non-existent Cohere encoder --- src/main/java/io/anserini/index/IndexInfo.java | 4 ++-- .../java/io/anserini/server/SearchService.java | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java index 7c567133c9..53417897e3 100644 --- a/src/main/java/io/anserini/index/IndexInfo.java +++ b/src/main/java/io/anserini/index/IndexInfo.java @@ -126,7 +126,7 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0.20240228.eacd13.tar.gz" }, "c7294ca988ae1b812d427362ffca1ee2", IndexType.DENSE_HNSW, - "CohereEmbedEnglishV30", + null, "VectorQueryGenerator.class", "msmarco-v1-passage"), @@ -140,7 +140,7 @@ public enum IndexInfo { "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0.20240228.eacd13.tar.gz" }, "dbaca578cc8495f504cdd0a7187f4c36", IndexType.DENSE_HNSW, - "CohereEmbedEnglishV30", + null, "VectorQueryGenerator.class", "msmarco-v1-passage"), diff --git a/src/main/java/io/anserini/server/SearchService.java b/src/main/java/io/anserini/server/SearchService.java index 9f265c6263..cfca37055b 100644 --- a/src/main/java/io/anserini/server/SearchService.java +++ b/src/main/java/io/anserini/server/SearchService.java @@ -93,12 +93,12 @@ public List> search(String query, int hits, args.efSearch = efSearch != null ? efSearch : getEfSearchOverride() != null ? getEfSearchOverride() : IndexInfo.DEFAULT_EF_SEARCH; - args.encoder = encoder != null ? encoder - : getEncoderOverride() != null ? getEncoderOverride() - : indexInfo.encoder; - args.queryGenerator = queryGenerator != null ? queryGenerator - : getQueryGeneratorOverride() != null ? getQueryGeneratorOverride() - : indexInfo.queryGenerator; + args.encoder = encoder != null ? encoder.replace(".class", "") + : getEncoderOverride() != null ? getEncoderOverride().replace(".class", "") + : indexInfo.encoder != null ? indexInfo.encoder.replace(".class", "") : null; + args.queryGenerator = queryGenerator != null ? queryGenerator.replace(".class", "") + : getQueryGeneratorOverride() != null ? getQueryGeneratorOverride().replace(".class", "") + : indexInfo.queryGenerator.replace(".class", ""); try (HnswDenseSearcher searcher = new HnswDenseSearcher(args)) { ScoredDoc[] results = searcher.search(query, hits); List> candidates = new ArrayList<>(); @@ -167,7 +167,7 @@ public void setEncoderOverride(String value) { if (!value.equals(indexInfo.encoder)) { throw new IllegalArgumentException("Unsupported encoder: " + value + " for index " + prebuiltIndex); } - indexOverrides.put("encoder", value); + indexOverrides.put("encoder", value.replace(".class", "")); } public void setQueryGeneratorOverride(String value) { @@ -178,7 +178,7 @@ public void setQueryGeneratorOverride(String value) { if (!value.equals(indexInfo.queryGenerator)) { throw new IllegalArgumentException("Unsupported queryGenerator: " + value + " for index " + prebuiltIndex); } - indexOverrides.put("queryGenerator", value); + indexOverrides.put("queryGenerator", value.replace(".class", "")); } private void validateSearchParameters(String query, int hits) { From 3922dbe845100be0a40d60311e9c58284890c074 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 11:23:08 -0500 Subject: [PATCH 17/18] Undo change to testNumPrebuiltIndexes - 169 as it was before --- src/test/java/io/anserini/index/PrebuiltIndexTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/io/anserini/index/PrebuiltIndexTest.java b/src/test/java/io/anserini/index/PrebuiltIndexTest.java index ec3b2c594b..a89c7c906b 100644 --- a/src/test/java/io/anserini/index/PrebuiltIndexTest.java +++ b/src/test/java/io/anserini/index/PrebuiltIndexTest.java @@ -60,6 +60,6 @@ public void testUrls() { // test number of prebuilt-indexes @Test public void testNumPrebuiltIndexes() { - assertEquals(166, IndexInfo.values().length); + assertEquals(169, IndexInfo.values().length); } } From f0aaf5ab4918a90ae075762a59bb55e21af6e187 Mon Sep 17 00:00:00 2001 From: vincent-4 Date: Wed, 29 Jan 2025 12:00:45 -0500 Subject: [PATCH 18/18] refactor(server): rename ControllerV1_0 to Controller and add SearchService tests --- .../{ControllerV1_0.java => Controller.java} | 2 +- .../io/anserini/server/ControllerTest.java | 6 +- .../io/anserini/server/SearchServiceTest.java | 86 +++++++++++++++++++ 3 files changed, 90 insertions(+), 4 deletions(-) rename src/main/java/io/anserini/server/{ControllerV1_0.java => Controller.java} (99%) create mode 100644 src/test/java/io/anserini/server/SearchServiceTest.java diff --git a/src/main/java/io/anserini/server/ControllerV1_0.java b/src/main/java/io/anserini/server/Controller.java similarity index 99% rename from src/main/java/io/anserini/server/ControllerV1_0.java rename to src/main/java/io/anserini/server/Controller.java index e99e6890c7..40897c6c64 100644 --- a/src/main/java/io/anserini/server/ControllerV1_0.java +++ b/src/main/java/io/anserini/server/Controller.java @@ -36,7 +36,7 @@ @RestController @RequestMapping(path = "/api/v1.1") -public class ControllerV1_0 { +public class Controller { private final Map services = new ConcurrentHashMap<>(); @ResponseStatus(HttpStatus.BAD_REQUEST) diff --git a/src/test/java/io/anserini/server/ControllerTest.java b/src/test/java/io/anserini/server/ControllerTest.java index 3cf0538089..c96b1d6006 100644 --- a/src/test/java/io/anserini/server/ControllerTest.java +++ b/src/test/java/io/anserini/server/ControllerTest.java @@ -31,7 +31,7 @@ public class ControllerTest { @Test public void testSearch() throws Exception { - ControllerV1_0 controller = new ControllerV1_0(); + Controller controller = new Controller(); Map results = controller.searchIndex("msmarco-v1-passage", "Albert Einstein", 10, "", null, null, null); assertNotNull(results); @@ -49,7 +49,7 @@ public void testSearch() throws Exception { @Test public void testIndexNotFound() throws Exception { - ControllerV1_0 controller = new ControllerV1_0(); + Controller controller = new Controller(); assertThrows(IllegalArgumentException.class, () -> { Map results = controller.searchIndex("nonexistent-index", "Albert Einstein", 10, "", null, null, null); @@ -58,7 +58,7 @@ public void testIndexNotFound() throws Exception { @Test public void testListIndexes() throws Exception { - ControllerV1_0 controller = new ControllerV1_0(); + Controller controller = new Controller(); Map> indexes = controller.listIndexes(); assertEquals(indexes.size(), IndexInfo.values().length); } diff --git a/src/test/java/io/anserini/server/SearchServiceTest.java b/src/test/java/io/anserini/server/SearchServiceTest.java new file mode 100644 index 0000000000..b2f4666e3d --- /dev/null +++ b/src/test/java/io/anserini/server/SearchServiceTest.java @@ -0,0 +1,86 @@ +/* + * Anserini: A Lucene toolkit for reproducible information retrieval research + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.anserini.server; + +import java.util.List; +import java.util.Map; + +import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotNull; + +public class SearchServiceTest { + + @Test + public void testBasicSearch() throws Exception { + SearchService service = new SearchService("msmarco-v1-passage"); + List> results = service.search("Albert Einstein", 10); + assertNotNull(results); + assertTrue(results.size() <= 10); + } + + @Test + public void testInvalidSearchParameters() { + SearchService service = new SearchService("msmarco-v1-passage"); + assertThrows(IllegalArgumentException.class, () -> { + service.search("", 10); + }); + assertThrows(IllegalArgumentException.class, () -> { + service.search("query", 0); + }); + } + + @Test + public void testHnswSearch() throws Exception { + SearchService service = new SearchService("beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw"); + List> results = service.search("test query", 5, 100, null, null); + assertNotNull(results); + assertTrue(results.size() <= 5); + } + + @Test + public void testSettingsOverrides() { + SearchService service = new SearchService("beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw"); + service.setEfSearchOverride("200"); + assertEquals(200, (int) service.getEfSearchOverride()); + + assertThrows(IllegalArgumentException.class, () -> { + service.setEfSearchOverride("-1"); + }); + } + + @Test + public void testGetDocument() throws Exception { + SearchService service = new SearchService("beir-v1.0.0-cqadupstack-webmasters.bge-base-en-v1.5.hnsw"); + List> results = service.search("test query", 1); + assertNotNull(results); + if (!results.isEmpty()) { + String docid = (String) results.get(0).get("docid"); + Map doc = service.getDocument(docid); + assertNotNull(doc); + } + } + + @Test + public void testInvalidIndex() { + assertThrows(RuntimeException.class, () -> { + new SearchService("nonexistent-index"); + }); + } +} \ No newline at end of file