From b0309508db7625ee0173e5bf0595ee14df0e64fd Mon Sep 17 00:00:00 2001 From: Nora Er-Rouhly Date: Fri, 28 Feb 2025 11:29:18 +0000 Subject: [PATCH] Feature/similarity score threshold (#127) * remove bm25 query * understanding queries * move notebook * moving notebook to notebooks folder * setting similar score threshold * linting * fixing unit tests --------- Co-authored-by: nora-errouhly Co-authored-by: Tash Boyse <57753415+nboyse@users.noreply.github.com> Co-authored-by: Natasha Boyse --- redbox-core/redbox/retriever/queries.py | 3 ++- redbox-core/tests/retriever/test_retriever.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/redbox-core/redbox/retriever/queries.py b/redbox-core/redbox/retriever/queries.py index e5d67fab..86a7378a 100644 --- a/redbox-core/redbox/retriever/queries.py +++ b/redbox-core/redbox/retriever/queries.py @@ -130,6 +130,7 @@ def build_document_query( return { "size": ai_settings.rag_k, + "min_score": 0.65, "query": { "bool": { "must": [ @@ -138,7 +139,7 @@ def build_document_query( "vector_field": { "vector": query_vector, "k": ai_settings.rag_num_candidates, - "boost": ai_settings.knn_boost, + # "boost": ai_settings.knn_boost, "filter": query_filter, } } diff --git a/redbox-core/tests/retriever/test_retriever.py b/redbox-core/tests/retriever/test_retriever.py index 894a7c06..802f0856 100644 --- a/redbox-core/tests/retriever/test_retriever.py +++ b/redbox-core/tests/retriever/test_retriever.py @@ -7,7 +7,7 @@ TEST_CHAIN_PARAMETERS = ( { - "rag_k": 1, + "rag_k": 0, "rag_num_candidates": 100, "match_boost": 1, "knn_boost": 2, @@ -19,7 +19,7 @@ "rag_gauss_scale_max": 2.0, }, { - "rag_k": 2, + "rag_k": 0, "rag_num_candidates": 100, "match_boost": 1, "knn_boost": 2,