From 6c4b2412de07e7586d1f03dc6b42cb57a456621c Mon Sep 17 00:00:00 2001 From: jzonthemtn Date: Tue, 31 Dec 2024 10:02:37 -0500 Subject: [PATCH] Working on converting to a standalone app. --- .../eval/engine/OpenSearchEngine.java | 47 ++++++++ .../opensearch/eval/engine/SearchEngine.java | 17 +++ .../eval/runners/AbstractQuerySetRunner.java | 100 +----------------- .../runners/OpenSearchQuerySetRunner.java | 7 +- 4 files changed, 69 insertions(+), 102 deletions(-) diff --git a/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/engine/OpenSearchEngine.java b/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/engine/OpenSearchEngine.java index 77da718..b7c0ea3 100644 --- a/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/engine/OpenSearchEngine.java +++ b/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/engine/OpenSearchEngine.java @@ -14,11 +14,16 @@ import org.apache.logging.log4j.Logger; import org.opensearch.client.json.jackson.JacksonJsonpMapper; import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.FieldValue; import org.opensearch.client.opensearch._types.Refresh; import org.opensearch.client.opensearch._types.Time; import org.opensearch.client.opensearch._types.mapping.IntegerNumberProperty; import org.opensearch.client.opensearch._types.mapping.Property; import org.opensearch.client.opensearch._types.mapping.TypeMapping; +import org.opensearch.client.opensearch._types.query_dsl.BoolQuery; +import org.opensearch.client.opensearch._types.query_dsl.MatchQuery; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch._types.query_dsl.TermQuery; import org.opensearch.client.opensearch.core.BulkRequest; import org.opensearch.client.opensearch.core.BulkResponse; import org.opensearch.client.opensearch.core.IndexRequest; @@ -43,6 +48,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; @@ -120,6 +126,47 @@ public String indexQuerySet(final QuerySet querySet) throws IOException { } + @Override + public QuerySet getQuerySet(String querySetId) throws IOException { + + final Query query = Query.of(q -> q.term(m -> m.field("_id").value(FieldValue.of(querySetId)))); + + final SearchResponse searchResponse = client.search(s -> s.index(Constants.QUERY_SETS_INDEX_NAME).query(query).size(1), QuerySet.class); + + // TODO: Handle the query set not being found. + + return searchResponse.hits().hits().get(0).source(); + + } + + @Override + public Double getJudgmentValue(final String judgmentsId, final String userQuery, final String documentId) throws Exception { + + var boolQuery = BoolQuery.of(bq -> bq + .must( + List.of( + MatchQuery.of(mq -> mq.query(FieldValue.of("judgments_id")).field(judgmentsId)).toQuery(), + MatchQuery.of(mq -> mq.query(FieldValue.of("query")).field(userQuery)).toQuery(), + MatchQuery.of(mq -> mq.query(FieldValue.of("document_id")).field(documentId)).toQuery() + ) + ) + ); + + final Query query = Query.of(q -> q.bool(boolQuery)); + + final SearchResponse searchResponse = client.search(s -> s.index(Constants.JUDGMENTS_INDEX_NAME) + .query(query) + .from(0) + .size(1), Judgment.class); + + if(searchResponse.hits().hits().isEmpty()) { + return Double.NaN; + } else { + return searchResponse.hits().hits().get(0).source().getJudgment(); + } + + } + @Override public Collection getUbiQueries() throws IOException { diff --git a/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/engine/SearchEngine.java b/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/engine/SearchEngine.java index 611308b..b0f8191 100644 --- a/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/engine/SearchEngine.java +++ b/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/engine/SearchEngine.java @@ -30,4 +30,21 @@ public abstract class SearchEngine { public abstract String indexQuerySet(QuerySet querySet) throws IOException; public abstract Collection getUbiQueries() throws IOException; + /** + * Gets a query set from the index. + * @param querySetId The ID of the query set to get. + * @return The query set as a collection of maps of query to frequency + * @throws IOException Thrown if the query set cannot be retrieved. + */ + public abstract QuerySet getQuerySet(String querySetId) throws IOException; + + /** + * Get a judgment from the index. + * @param judgmentsId The ID of the judgments to find. + * @param query The user query. + * @param documentId The document ID. + * @return The value of the judgment, or NaN if the judgment cannot be found. + */ + public abstract Double getJudgmentValue(final String judgmentsId, final String query, final String documentId) throws Exception; + } diff --git a/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java b/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java index 3260ae0..6fad96f 100644 --- a/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java +++ b/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java @@ -10,15 +10,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.client.opensearch.core.SearchRequest; -import org.opensearch.client.opensearch.core.SearchResponse; -import org.opensearch.eval.Constants; import org.opensearch.eval.engine.SearchEngine; import java.util.ArrayList; -import java.util.Collection; import java.util.List; -import java.util.Map; /** * Base class for query set runners. Classes that extend this class @@ -58,99 +53,6 @@ abstract QuerySetRunResult run(String querySetId, final String judgmentsId, fina */ abstract void save(QuerySetRunResult result) throws Exception; - /** - * Gets a query set from the index. - * @param querySetId The ID of the query set to get. - * @return The query set as a collection of maps of query to frequency - * @throws Exception Thrown if the query set cannot be retrieved. - */ - public final Collection> getQuerySet(final String querySetId) throws Exception { - - // Get the query set. - final SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); - sourceBuilder.query(QueryBuilders.matchQuery("_id", querySetId)); - - // Will be at most one match. - sourceBuilder.from(0); - sourceBuilder.size(1); - - final SearchRequest searchRequest = new SearchRequest(Constants.QUERY_SETS_INDEX_NAME).source(sourceBuilder); - - // TODO: Don't use .get() - final SearchResponse searchResponse = client.search(searchRequest).get(); - - if(searchResponse.getHits().getHits().length > 0) { - - // The queries from the query set that will be run. - return (Collection>) searchResponse.getHits().getAt(0).getSourceAsMap().get("queries"); - - } else { - - LOGGER.error("Unable to get query set with ID {}", querySetId); - - // The query set was not found. - throw new RuntimeException("The query set with ID " + querySetId + " was not found."); - - } - - } - - /** - * Get a judgment from the index. - * @param judgmentsId The ID of the judgments to find. - * @param query The user query. - * @param documentId The document ID. - * @return The value of the judgment, or NaN if the judgment cannot be found. - */ - public Double getJudgmentValue(final String judgmentsId, final String query, final String documentId) throws Exception { - - // Find a judgment that matches the judgments_id, query_id, and document_id fields in the index. - - final BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); - boolQueryBuilder.must(QueryBuilders.termQuery("judgments_id", judgmentsId)); - boolQueryBuilder.must(QueryBuilders.termQuery("query", query)); - boolQueryBuilder.must(QueryBuilders.termQuery("document_id", documentId)); - - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(boolQueryBuilder); - - // Will be a max of 1 result since we are getting the judgments by ID. - searchSourceBuilder.from(0); - searchSourceBuilder.size(1); - - // Only include the judgment field in the response. - final String[] includeFields = new String[] {"judgment"}; - final String[] excludeFields = new String[] {}; - searchSourceBuilder.fetchSource(includeFields, excludeFields); - - final SearchRequest searchRequest = new SearchRequest(Constants.JUDGMENTS_INDEX_NAME).source(searchSourceBuilder); - - Double judgment = Double.NaN; - - final SearchResponse searchResponse = client.search(searchRequest).get(); - - if (searchResponse.getHits().getHits().length > 0) { - - final Map j = searchResponse.getHits().getAt(0).getSourceAsMap(); - - // LOGGER.debug("Judgment contains a value: {}", j.get("judgment")); - - // TODO: Why does this not exist in some cases? - if(j.containsKey("judgment")) { - judgment = (Double) j.get("judgment"); - } - - } else { - - // No judgment for this query/doc pair exists. - judgment = Double.NaN; - - } - - return judgment; - - } - /** * Gets the judgments for a query / document pairs. * @param judgmentsId The judgments collection for which the judgment to retrieve belongs. @@ -174,7 +76,7 @@ protected RelevanceScores getRelevanceScores(final String judgmentsId, final Str final String documentId = orderedDocumentIds.get(i); // Find the judgment value for this combination of query and documentId from the index. - final Double judgmentValue = getJudgmentValue(judgmentsId, query, documentId); + final Double judgmentValue = searchEngine.getJudgmentValue(judgmentsId, query, documentId); // If a judgment for this query/doc pair is not found, Double.NaN will be returned. if(!Double.isNaN(judgmentValue)) { diff --git a/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java b/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java index c4f2380..b761359 100644 --- a/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java +++ b/opensearch-search-quality-evaluation-framework/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java @@ -16,6 +16,7 @@ import org.opensearch.eval.metrics.NdcgSearchMetric; import org.opensearch.eval.metrics.PrecisionSearchMetric; import org.opensearch.eval.metrics.SearchMetric; +import org.opensearch.eval.model.data.QuerySet; import org.opensearch.eval.utils.TimeUtils; import java.util.ArrayList; @@ -48,15 +49,15 @@ public QuerySetRunResult run(final String querySetId, final String judgmentsId, final String searchPipeline, final String idField, final String query, final int k, final double threshold) throws Exception { - final Collection> querySet = getQuerySet(querySetId); - LOGGER.info("Found {} queries in query set {}", querySet.size(), querySetId); + final QuerySet querySet = searchEngine.getQuerySet(querySetId); + LOGGER.info("Found {} queries in query set {}", querySet.getQuerySetQueries().size(), querySetId); try { // The results of each query. final List queryResults = new ArrayList<>(); - for (Map queryMap : querySet) { + for (Map queryMap : querySet.getQuerySetQueries()) { // Loop over each query in the map and run each one. for (final String userQuery : queryMap.keySet()) {