From b17024243a57c6dfd31e4ad2d8fac01203fb3b52 Mon Sep 17 00:00:00 2001 From: jzonthemtn Date: Fri, 22 Nov 2024 09:53:06 -0500 Subject: [PATCH] #44 Working on wiring up PPTSS. --- data/esci/ubi_queries_events.ndjson.bz2 | 3 - .../scripts/create-query-set-no-sampling.sh | 2 +- .../create-query-set-using-pptss-sampling.sh | 4 +- .../SearchQualityEvaluationRestHandler.java | 89 +++++-------------- .../eval/samplers/AbstractQuerySampler.java | 64 +++++++++++++ .../samplers/AbstractSamplerParameters.java | 41 +++++++++ .../eval/samplers/AllQueriesQuerySampler.java | 73 +++++++++++++++ .../AllQueriesQuerySamplerParameters.java | 17 ++++ ...oportionalToSizeAbstractQuerySampler.java} | 56 ++++++++++-- ...obabilityProportionalToSizeParameters.java | 12 +-- .../eval/samplers/QuerySampler.java | 33 ------- 11 files changed, 271 insertions(+), 123 deletions(-) delete mode 100644 data/esci/ubi_queries_events.ndjson.bz2 create mode 100644 opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java create mode 100644 opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java create mode 100644 opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java create mode 100644 opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java rename opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/{ProbabilityProportionalToSizeQuerySampler.java => ProbabilityProportionalToSizeAbstractQuerySampler.java} (56%) delete mode 100644 opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/QuerySampler.java diff --git a/data/esci/ubi_queries_events.ndjson.bz2 b/data/esci/ubi_queries_events.ndjson.bz2 deleted file mode 100644 index d728d94..0000000 --- a/data/esci/ubi_queries_events.ndjson.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6811cd6c99311f7b08a549e7783eefdc84bf3bc40e3bfe3abef65efa91548fe9 -size 36696778 diff --git a/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-no-sampling.sh b/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-no-sampling.sh index c04886a..fc053d2 100755 --- a/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-no-sampling.sh +++ b/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-no-sampling.sh @@ -1,7 +1,7 @@ #!/bin/bash -e #QUERY_SET=`curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=pptss" | jq .query_set | tr -d '"'` -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=none&max_queries=500" +curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=none&query_set_size=500" #echo ${QUERY_SET} diff --git a/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-using-pptss-sampling.sh b/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-using-pptss-sampling.sh index 5f9f928..96c822a 100755 --- a/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-using-pptss-sampling.sh +++ b/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-using-pptss-sampling.sh @@ -1,11 +1,11 @@ #!/bin/bash -e #QUERY_SET=`curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=pptss" | jq .query_set | tr -d '"'` -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=pptss&max_queries=500" +curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=pptss&query_set_size=500" #echo ${QUERY_SET} -#curl -s http://localhost:9200/search_quality_eval_query_sets/_search | jq +#curl -s -X GET http://localhost:9200/search_quality_eval_query_sets/_doc/${QUERY_SET} | jq # Run the query set now. #curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/run?id=${QUERY_SET}" | jq diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java index 966c8c2..8743e0b 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java @@ -8,7 +8,6 @@ */ package org.opensearch.eval; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.action.delete.DeleteRequest; @@ -24,15 +23,16 @@ import org.opensearch.core.rest.RestStatus; import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel; import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModelParameters; +import org.opensearch.eval.samplers.AllQueriesQuerySampler; +import org.opensearch.eval.samplers.AllQueriesQuerySamplerParameters; +import org.opensearch.eval.samplers.ProbabilityProportionalToSizeAbstractQuerySampler; import org.opensearch.eval.samplers.ProbabilityProportionalToSizeParameters; -import org.opensearch.eval.samplers.ProbabilityProportionalToSizeQuerySampler; import org.opensearch.index.query.QueryBuilders; import org.opensearch.jobscheduler.spi.schedule.IntervalSchedule; import org.opensearch.rest.BaseRestHandler; import org.opensearch.rest.BytesRestResponse; import org.opensearch.rest.RestRequest; import org.opensearch.rest.RestResponse; -import org.opensearch.search.SearchHit; import org.opensearch.search.builder.SearchSourceBuilder; import java.io.IOException; @@ -40,10 +40,8 @@ import java.time.temporal.ChronoUnit; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.UUID; public class SearchQualityEvaluationRestHandler extends BaseRestHandler { @@ -89,7 +87,7 @@ public List routes() { protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { // Handle managing query sets. - if(StringUtils.equalsIgnoreCase(request.path(), QUERYSET_MANAGEMENT_URL)) { + if(QUERYSET_MANAGEMENT_URL.equalsIgnoreCase(request.path())) { // Creating a new query set by sampling the UBI queries. if (request.method().equals(RestRequest.Method.POST)) { @@ -100,36 +98,19 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli final int querySetSize = Integer.parseInt(request.param("query_set_size", "1000")); // Create a query set by finding all the unique user_query terms. - if (StringUtils.equalsIgnoreCase(sampling, "none")) { + if ("none".equalsIgnoreCase(sampling)) { // If we are not sampling queries, the query sets should just be directly // indexed into OpenSearch using the `ubu_queries` index directly. try { - // Get queries from the UBI queries index. - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(QueryBuilders.matchAllQuery()); - searchSourceBuilder.from(0); - searchSourceBuilder.size(querySetSize); + final AllQueriesQuerySamplerParameters parameters = new AllQueriesQuerySamplerParameters(name, description, sampling, querySetSize); + final AllQueriesQuerySampler sampler = new AllQueriesQuerySampler(client, parameters); - final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.UBI_QUERIES_INDEX_NAME); - searchRequest.source(searchSourceBuilder); + // Sample and index the queries. + final String querySetId = sampler.sample(); - final SearchResponse searchResponse = client.search(searchRequest).get(); - - // LOGGER.info("Found {} user queries from the ubi_queries index.", searchResponse.getHits().getTotalHits().toString()); - - final Set queries = new HashSet<>(); - for(final SearchHit hit : searchResponse.getHits().getHits()) { - final Map fields = hit.getSourceAsMap(); - queries.add(fields.get("user_query").toString()); - } - - // LOGGER.info("Found {} user queries from the ubi_queries index.", queries.size()); - - // Create the query set and return its ID. - final String querySetId = indexQuerySet(client, name, description, sampling, queries); return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"query_set\": \"" + querySetId + "\"}")); } catch(Exception ex) { @@ -138,19 +119,18 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli // Create a query set by using PPTSS sampling. - } else if (StringUtils.equalsIgnoreCase(sampling, "pptss")) { + } else if ("pptss".equalsIgnoreCase(sampling)) { - final ProbabilityProportionalToSizeParameters parameters = new ProbabilityProportionalToSizeParameters(querySetSize); - final ProbabilityProportionalToSizeQuerySampler sampler = new ProbabilityProportionalToSizeQuerySampler(parameters); + LOGGER.info("Creating query set using PPTSS"); - // TODO: Get all queries from the ubi_queries index. - - final Collection queries = sampler.sample(); + final ProbabilityProportionalToSizeParameters parameters = new ProbabilityProportionalToSizeParameters(name, description, sampling, querySetSize); + final ProbabilityProportionalToSizeAbstractQuerySampler sampler = new ProbabilityProportionalToSizeAbstractQuerySampler(client, parameters); try { - // Create the query set and return its ID. - final String querySetId = indexQuerySet(client, name, description, sampling, queries); + // Sample and index the queries. + final String querySetId = sampler.sample(); + return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"query_set\": \"" + querySetId + "\"}")); } catch(Exception ex) { @@ -168,7 +148,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } // Handle running query sets. - } else if(StringUtils.equalsIgnoreCase(request.path(), QUERYSET_RUN_URL)) { + } else if(QUERYSET_RUN_URL.equalsIgnoreCase(request.path())) { final String id = request.param("id"); @@ -203,7 +183,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"message\": \"Query set " + id + " run initiated.\"}")); // Handle the on-demand creation of implicit judgments. - } else if(StringUtils.equalsIgnoreCase(request.path(), IMPLICIT_JUDGMENTS_URL)) { + } else if(IMPLICIT_JUDGMENTS_URL.equalsIgnoreCase(request.path())) { if (request.method().equals(RestRequest.Method.POST)) { @@ -212,7 +192,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli final int maxRank = Integer.parseInt(request.param("max_rank", "20")); final long judgments; - if (StringUtils.equalsIgnoreCase(clickModel, "coec")) { + if ("coec".equalsIgnoreCase(clickModel)) { final CoecClickModelParameters coecClickModelParameters = new CoecClickModelParameters(true, maxRank); final CoecClickModel coecClickModel = new CoecClickModel(client, coecClickModelParameters); @@ -255,7 +235,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } // Handle the scheduling of creating implicit judgments. - } else if(StringUtils.equalsIgnoreCase(request.path(), SCHEDULING_URL)) { + } else if(SCHEDULING_URL.equalsIgnoreCase(request.path())) { if (request.method().equals(RestRequest.Method.POST)) { @@ -276,7 +256,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli // Read the start_time. final Instant startTime; - if (StringUtils.isEmpty(request.param("start_time"))) { + if (request.param("start_time") == null) { startTime = Instant.now(); } else { startTime = Instant.ofEpochMilli(Long.parseLong(request.param("start_time"))); @@ -284,7 +264,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli // Read the interval. final int interval; - if (StringUtils.isEmpty(request.param("interval"))) { + if (request.param("interval") == null) { // Default to every 24 hours. interval = 1440; } else { @@ -361,29 +341,4 @@ public void onFailure(Exception e) { } - /** - * Index the query set. - */ - private String indexQuerySet(final NodeClient client, final String name, final String description, final String sampling, Collection queries) throws Exception { - - final Map querySet = new HashMap<>(); - querySet.put("name", name); - querySet.put("description", description); - querySet.put("sampling", sampling); - querySet.put("queries", queries); - querySet.put("created_at", Instant.now().toEpochMilli()); - - final String querySetId = UUID.randomUUID().toString(); - - final IndexRequest indexRequest = new IndexRequest().index(SearchQualityEvaluationPlugin.QUERY_SETS_INDEX_NAME) - .id(querySetId) - .source(querySet) - .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); - - client.index(indexRequest).get(); - - return querySetId; - - } - } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java new file mode 100644 index 0000000..f31ec89 --- /dev/null +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java @@ -0,0 +1,64 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.samplers; + +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.client.node.NodeClient; +import org.opensearch.eval.SearchQualityEvaluationPlugin; + +import java.time.Instant; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; + +/** + * An interface for sampling UBI queries. + */ +public abstract class AbstractQuerySampler { + + /** + * Gets the name of the sampler. + * @return The name of the sampler. + */ + abstract String getName(); + + /** + * Samples the queries and inserts the query set into an index. + * @return A query set ID. + */ + abstract String sample() throws Exception; + + /** + * Index the query set. + */ + protected String indexQuerySet(final NodeClient client, final String name, final String description, final String sampling, Collection queries) throws Exception { + + final Map querySet = new HashMap<>(); + querySet.put("name", name); + querySet.put("description", description); + querySet.put("sampling", sampling); + querySet.put("queries", queries); + querySet.put("created_at", Instant.now().toEpochMilli()); + + final String querySetId = UUID.randomUUID().toString(); + + final IndexRequest indexRequest = new IndexRequest().index(SearchQualityEvaluationPlugin.QUERY_SETS_INDEX_NAME) + .id(querySetId) + .source(querySet) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + + client.index(indexRequest).get(); + + return querySetId; + + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java new file mode 100644 index 0000000..c8d731a --- /dev/null +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.samplers; + +public class AbstractSamplerParameters { + + private final String name; + private final String description; + private final String sampling; + private final int querySetSize; + + public AbstractSamplerParameters(final String name, final String description, final String sampling, final int querySetSize) { + this.name = name; + this.description = description; + this.sampling = sampling; + this.querySetSize = querySetSize; + } + + public String getName() { + return name; + } + + public String getDescription() { + return description; + } + + public String getSampling() { + return sampling; + } + + public int getQuerySetSize() { + return querySetSize; + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java new file mode 100644 index 0000000..29fb0b7 --- /dev/null +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java @@ -0,0 +1,73 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.samplers; + +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.node.NodeClient; +import org.opensearch.eval.SearchQualityEvaluationPlugin; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.SearchSourceBuilder; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * An implementation of {@link AbstractQuerySampler} that uses all UBI queries without any sampling. + */ +public class AllQueriesQuerySampler extends AbstractQuerySampler { + + private final NodeClient client; + private final AllQueriesQuerySamplerParameters parameters; + + /** + * Creates a new sampler. + * @param client The OpenSearch {@link NodeClient client}. + */ + public AllQueriesQuerySampler(final NodeClient client, final AllQueriesQuerySamplerParameters parameters) { + this.client = client; + this.parameters = parameters; + } + + @Override + public String getName() { + return "none"; + } + + @Override + public String sample() throws Exception { + + // Get queries from the UBI queries index. + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.query(QueryBuilders.matchAllQuery()); + searchSourceBuilder.from(0); + searchSourceBuilder.size(parameters.getQuerySetSize()); + + final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.UBI_QUERIES_INDEX_NAME); + searchRequest.source(searchSourceBuilder); + + final SearchResponse searchResponse = client.search(searchRequest).get(); + + // LOGGER.info("Found {} user queries from the ubi_queries index.", searchResponse.getHits().getTotalHits().toString()); + + final Set queries = new HashSet<>(); + for(final SearchHit hit : searchResponse.getHits().getHits()) { + final Map fields = hit.getSourceAsMap(); + queries.add(fields.get("user_query").toString()); + } + + // LOGGER.info("Found {} user queries from the ubi_queries index.", queries.size()); + + return indexQuerySet(client, parameters.getName(), parameters.getDescription(), parameters.getSampling(), queries); + + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java new file mode 100644 index 0000000..3149668 --- /dev/null +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java @@ -0,0 +1,17 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.samplers; + +public class AllQueriesQuerySamplerParameters extends AbstractSamplerParameters { + + public AllQueriesQuerySamplerParameters(final String name, final String description, final String sampling, final int querySetSize) { + super(name, description, sampling, querySetSize); + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeQuerySampler.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeAbstractQuerySampler.java similarity index 56% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeQuerySampler.java rename to opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeAbstractQuerySampler.java index 1307e9f..d348dc0 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeQuerySampler.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeAbstractQuerySampler.java @@ -8,7 +8,15 @@ */ package org.opensearch.eval.samplers; -import org.opensearch.eval.judgments.model.ubi.query.UbiQuery; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.node.NodeClient; +import org.opensearch.eval.SearchQualityEvaluationPlugin; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.SearchSourceBuilder; import java.util.ArrayList; import java.util.Collection; @@ -20,19 +28,24 @@ import java.util.Set; /** - * An implementation of {@link QuerySampler} that uses PPTSS sampling. + * An implementation of {@link AbstractQuerySampler} that uses PPTSS sampling. * See https://opensourceconnections.com/blog/2022/10/13/how-to-succeed-with-explicit-relevance-evaluation-using-probability-proportional-to-size-sampling/ * for more information on PPTSS. */ -public class ProbabilityProportionalToSizeQuerySampler implements QuerySampler { +public class ProbabilityProportionalToSizeAbstractQuerySampler extends AbstractQuerySampler { + private static final Logger LOGGER = LogManager.getLogger(ProbabilityProportionalToSizeAbstractQuerySampler.class); + + private final NodeClient client; private final ProbabilityProportionalToSizeParameters parameters; /** * Creates a new PPTSS sampler. + * @param client The OpenSearch {@link NodeClient client}. * @param parameters The {@link ProbabilityProportionalToSizeParameters parameters} for the sampling. */ - public ProbabilityProportionalToSizeQuerySampler(final ProbabilityProportionalToSizeParameters parameters) { + public ProbabilityProportionalToSizeAbstractQuerySampler(final NodeClient client, final ProbabilityProportionalToSizeParameters parameters) { + this.client = client; this.parameters = parameters; } @@ -42,7 +55,29 @@ public String getName() { } @Override - public Collection sample(final Collection userQueries) { + public String sample() throws Exception { + + // TODO: Can this be changed to an aggregation? + // An aggregation is limited (?) to 10,000 which could miss some queries. + + // Get queries from the UBI queries index. + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.query(QueryBuilders.matchAllQuery()); + searchSourceBuilder.from(0); + // TODO: Need to get all queries. + searchSourceBuilder.size(10000); + + final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.UBI_QUERIES_INDEX_NAME); + searchRequest.source(searchSourceBuilder); + + final SearchResponse searchResponse = client.search(searchRequest).get(); + + final Collection userQueries = new ArrayList<>(); + + for(final SearchHit hit : searchResponse.getHits().getHits()) { + final Map fields = hit.getSourceAsMap(); + userQueries.add(fields.get("user_query").toString()); + } final Map weights = new HashMap<>(); @@ -62,8 +97,8 @@ public Collection sample(final Collection userQueries) { // Ensure all normalized weights sum to 1. final double sumOfNormalizedWeights = normalizedWeights.values().stream().reduce(0.0, Double::sum); - if(sumOfNormalizedWeights != 1.0) { - throw new RuntimeException("Summed normalized weights do not equal 1.0"); + if(!compare(1.0, sumOfNormalizedWeights)) { + throw new RuntimeException("Summed normalized weights do not equal 1.0: Actual value: " + sumOfNormalizedWeights); } final Collection querySet = new ArrayList<>(); @@ -89,14 +124,19 @@ public Collection sample(final Collection userQueries) { for(Map.Entry entry : normalizedWeights.entrySet()) { if(entry.getValue() == nearestWeight) { querySet.add(entry.getKey()); + LOGGER.info("Generated random value: {}; Closest value = {}", random, entry.getKey()); break; } } } - return querySet; + return indexQuerySet(client, parameters.getName(), parameters.getDescription(), parameters.getSampling(), querySet); + + } + public static boolean compare(double a, double b) { + return Math.abs(a - b) < 0.00001; } } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java index 0001821..d5e4311 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java @@ -8,16 +8,10 @@ */ package org.opensearch.eval.samplers; -public class ProbabilityProportionalToSizeParameters { +public class ProbabilityProportionalToSizeParameters extends AbstractSamplerParameters { - private final int querySetSize; - - public ProbabilityProportionalToSizeParameters(int querySetSize) { - this.querySetSize = querySetSize; - } - - public int getQuerySetSize() { - return querySetSize; + public ProbabilityProportionalToSizeParameters(final String name, final String description, final String sampling, final int querySetSize) { + super(name, description, sampling, querySetSize); } } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/QuerySampler.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/QuerySampler.java deleted file mode 100644 index e943599..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/QuerySampler.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.samplers; - -import org.opensearch.eval.judgments.model.ubi.query.UbiQuery; - -import java.util.Collection; - -/** - * An interface for sampling UBI queries. - */ -public interface QuerySampler { - - /** - * Gets the name of the sampler. - * @return The name of the sampler. - */ - String getName(); - - /** - * Samples the queries. - * @param userQueries A collection of user queries from UBI queries. - * @return A collection of sampled user queries. - */ - Collection sample(Collection userQueries); - -}