diff --git a/opensearch-search-quality-evaluation-plugin/build.gradle b/opensearch-search-quality-evaluation-plugin/build.gradle index 0e0ae61..5c6c101 100644 --- a/opensearch-search-quality-evaluation-plugin/build.gradle +++ b/opensearch-search-quality-evaluation-plugin/build.gradle @@ -57,7 +57,6 @@ dependencies { compileOnly "org.apache.httpcomponents:httpcore:${versions.httpcore}" compileOnly "org.apache.httpcomponents:httpclient:${versions.httpclient}" compileOnly "commons-logging:commons-logging:${versions.commonslogging}" - implementation "org.apache.commons:commons-lang3:3.17.0" implementation "com.google.code.gson:gson:2.11.0" yamlRestTestImplementation "org.apache.logging.log4j:log4j-core:${versions.log4j}" } diff --git a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml index 8938320..c2ab3b7 100644 --- a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml +++ b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml @@ -10,7 +10,7 @@ services: logger.level: info OPENSEARCH_INITIAL_ADMIN_PASSWORD: SuperSecretPassword_123 http.max_content_length: 500mb - OPENSEARCH_JAVA_OPTS: "-Xms8g -Xmx8g" + OPENSEARCH_JAVA_OPTS: "-Xms16g -Xmx16g" ulimits: memlock: soft: -1 diff --git a/opensearch-search-quality-evaluation-plugin/licenses/commons-lang3-3.17.0.jar.sha1 b/opensearch-search-quality-evaluation-plugin/licenses/commons-lang3-3.17.0.jar.sha1 deleted file mode 100644 index 073922f..0000000 --- a/opensearch-search-quality-evaluation-plugin/licenses/commons-lang3-3.17.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b17d2136f0460dcc0d2016ceefca8723bdf4ee70 \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/licenses/commons-lang3-LICENSE.txt b/opensearch-search-quality-evaluation-plugin/licenses/commons-lang3-LICENSE.txt deleted file mode 100644 index 7a4a3ea..0000000 --- a/opensearch-search-quality-evaluation-plugin/licenses/commons-lang3-LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/licenses/commons-lang3-NOTICE.txt b/opensearch-search-quality-evaluation-plugin/licenses/commons-lang3-NOTICE.txt deleted file mode 100644 index e69de29..0000000 diff --git a/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh b/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh index c353326..b35ddc8 100755 --- a/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh +++ b/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh @@ -3,5 +3,19 @@ echo "Deleting existing judgments index..." curl -s -X DELETE http://localhost:9200/judgments +echo "Creating judgments index..." +curl -s -X PUT http://localhost:9200/judgments -H 'Content-Type: application/json' -d' + { + "mappings": { + "properties": { + "judgments_id": { "type": "keyword" }, + "query_id": { "type": "keyword" }, + "query": { "type": "keyword" }, + "document_id": { "type": "keyword" }, + "judgment": { "type": "double" } + } + } + }' + echo "Creating judgments..." curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=20" diff --git a/opensearch-search-quality-evaluation-plugin/scripts/run-query-set.sh b/opensearch-search-quality-evaluation-plugin/scripts/run-query-set.sh index 9a4843e..477dbd9 100755 --- a/opensearch-search-quality-evaluation-plugin/scripts/run-query-set.sh +++ b/opensearch-search-quality-evaluation-plugin/scripts/run-query-set.sh @@ -1,21 +1,57 @@ #!/bin/bash -e QUERY_SET_ID="${1}" -JUDGMENTS_ID="669fc8aa-3fe7-418f-952b-df7354af8f37" +JUDGMENTS_ID="9183599e-46dd-49e0-9584-df816164a4c2" INDEX="ecommerce" ID_FIELD="asin" -K="10" +K="20" +THRESHOLD="1.0" # Default value curl -s -X DELETE "http://localhost:9200/search_quality_eval_query_sets_run_results" +# Keyword search curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/run?id=${QUERY_SET_ID}&judgments_id=${JUDGMENTS_ID}&index=${INDEX}&id_field=${ID_FIELD}&k=${K}" \ -H "Content-Type: application/json" \ --data-binary '{ - "query": { - "match": { - "title": { - "query": "#$query##" - } + "multi_match": { + "query": "#$query##", + "fields": ["id", "title", "category", "bullets", "description", "attrs.Brand", "attrs.Color"] } - } }' + +## Neural search +#curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/run?id=${QUERY_SET_ID}&judgments_id=${JUDGMENTS_ID}&index=${INDEX}&id_field=${ID_FIELD}&k=${K}&search_pipeline=neural-search-pipeline" \ +# -H "Content-Type: application/json" \ +# --data-binary '{ +# "neural": { +# "title_embedding": { +# "query_text": ""#$query##", +# "k": "50" +# } +# } +# }' + +# Hybrid search +#curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/run?id=${QUERY_SET_ID}&judgments_id=${JUDGMENTS_ID}&index=${INDEX}&id_field=${ID_FIELD}&k=${K}&search_pipeline=hybrid-search-pipeline" \ +# -H "Content-Type: application/json" \ +# --data-binary '{ +# "hybrid": { +# "queries": [ +# { +# "match": { +# "title": { +# "query": "#$query##" +# } +# } +# }, +# { +# "neural": { +# "title_embedding": { +# "query_text": "#$query##", +# "k": "50" +# } +# } +# } +# ] +# } +# }' diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobRunner.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobRunner.java index 4b850d5..1503df0 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobRunner.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobRunner.java @@ -8,7 +8,6 @@ */ package org.opensearch.eval; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.action.index.IndexRequest; @@ -113,7 +112,7 @@ public void runJob(final ScheduledJobParameter jobParameter, final JobExecutionC final long startTime = System.currentTimeMillis(); final long judgments; - if(StringUtils.equalsIgnoreCase(searchQualityEvaluationJobParameter.getClickModel(), "coec")) { + if("coec".equalsIgnoreCase(searchQualityEvaluationJobParameter.getClickModel())) { LOGGER.info("Beginning implicit judgment generation using clicks-over-expected-clicks."); final CoecClickModelParameters coecClickModelParameters = new CoecClickModelParameters(true, searchQualityEvaluationJobParameter.getMaxRank()); diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java index 83ac507..094f6a5 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java @@ -22,7 +22,7 @@ import org.opensearch.core.rest.RestStatus; import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel; import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModelParameters; -import org.opensearch.eval.runners.OpenSearchAbstractQuerySetRunner; +import org.opensearch.eval.runners.OpenSearchQuerySetRunner; import org.opensearch.eval.runners.QuerySetRunResult; import org.opensearch.eval.samplers.AllQueriesQuerySampler; import org.opensearch.eval.samplers.AllQueriesQuerySamplerParameters; @@ -35,13 +35,13 @@ import org.opensearch.rest.RestResponse; import java.io.IOException; +import java.nio.charset.Charset; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; -import java.util.concurrent.atomic.AtomicBoolean; public class SearchQualityEvaluationRestHandler extends BaseRestHandler { @@ -157,8 +157,10 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli final String querySetId = request.param("id"); final String judgmentsId = request.param("judgments_id"); final String index = request.param("index"); + final String searchPipeline = request.param("search_pipeline", null); final String idField = request.param("id_field", "_id"); final int k = Integer.parseInt(request.param("k", "10")); + final double threshold = Double.parseDouble(request.param("threshold", "1.0")); if(querySetId == null || querySetId.isEmpty() || judgmentsId == null || judgmentsId.isEmpty() || index == null || index.isEmpty()) { return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"Missing required parameters.\"}")); @@ -173,7 +175,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } // Get the query JSON from the content. - final String query = new String(BytesReference.toBytes(request.content())); + final String query = new String(BytesReference.toBytes(request.content()), Charset.defaultCharset()); // Validate the query has a QUERY_PLACEHOLDER. if(!query.contains(QUERY_PLACEHOLDER)) { @@ -182,12 +184,12 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli try { - final OpenSearchAbstractQuerySetRunner openSearchQuerySetRunner = new OpenSearchAbstractQuerySetRunner(client); - final QuerySetRunResult querySetRunResult = openSearchQuerySetRunner.run(querySetId, judgmentsId, index, idField, query, k); + final OpenSearchQuerySetRunner openSearchQuerySetRunner = new OpenSearchQuerySetRunner(client); + final QuerySetRunResult querySetRunResult = openSearchQuerySetRunner.run(querySetId, judgmentsId, index, searchPipeline, idField, query, k, threshold); openSearchQuerySetRunner.save(querySetRunResult); } catch (Exception ex) { - LOGGER.error("Unable to run query set.", ex); + LOGGER.error("Unable to run query set. Verify query set and judgments exist.", ex); return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, ex.getMessage())); } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java index 7afac0c..1dfb18d 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java @@ -9,7 +9,6 @@ package org.opensearch.eval.judgments.clickmodel.coec; import com.google.gson.Gson; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.action.search.SearchRequest; @@ -24,8 +23,8 @@ import org.opensearch.eval.judgments.model.Judgment; import org.opensearch.eval.judgments.model.ubi.event.UbiEvent; import org.opensearch.eval.judgments.opensearch.OpenSearchHelper; -import org.opensearch.eval.judgments.util.MathUtils; import org.opensearch.eval.judgments.util.IncrementalUserQueryHash; +import org.opensearch.eval.judgments.util.MathUtils; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; @@ -57,6 +56,7 @@ public class CoecClickModel extends ClickModel { // UBI event names. public static final String EVENT_CLICK = "click"; + // TODO: Change "view" to "impression". public static final String EVENT_VIEW = "view"; private final CoecClickModelParameters parameters; @@ -244,6 +244,7 @@ private Map> getClickthroughRate(final int maxRank .source(searchSourceBuilder) .scroll(scroll); + // TODO Don't use .get() SearchResponse searchResponse = client.search(searchRequest).get(); String scrollId = searchResponse.getScrollId(); @@ -270,7 +271,7 @@ private Map> getClickthroughRate(final int maxRank // Get the ClickthroughRate object for the object that was interacted with. final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId())); - if (StringUtils.equalsIgnoreCase(ubiEvent.getActionName(), EVENT_CLICK)) { + if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) { //LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId()); clickthroughRate.logClick(); } else { @@ -339,10 +340,11 @@ public Map getRankAggregatedClickThrough() throws Exception { final Terms actionTerms = searchResponse.getAggregations().get("By_Action"); final Collection actionBuckets = actionTerms.getBuckets(); + for(final Terms.Bucket actionBucket : actionBuckets) { // Handle the "click" bucket. - if(StringUtils.equalsIgnoreCase(actionBucket.getKey().toString(), EVENT_CLICK)) { + if(EVENT_CLICK.equalsIgnoreCase(actionBucket.getKey().toString())) { final Terms positionTerms = actionBucket.getAggregations().get("By_Position"); final Collection positionBuckets = positionTerms.getBuckets(); @@ -354,7 +356,7 @@ public Map getRankAggregatedClickThrough() throws Exception { } // Handle the "view" bucket. - if(StringUtils.equalsIgnoreCase(actionBucket.getKey().toString(), EVENT_VIEW)) { + if(EVENT_VIEW.equalsIgnoreCase(actionBucket.getKey().toString())) { final Terms positionTerms = actionBucket.getAggregations().get("By_Position"); final Collection positionBuckets = positionTerms.getBuckets(); diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ClickthroughRate.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ClickthroughRate.java index 542affb..7d9efa4 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ClickthroughRate.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ClickthroughRate.java @@ -8,7 +8,6 @@ */ package org.opensearch.eval.judgments.model; -import org.apache.commons.lang3.builder.EqualsBuilder; import org.opensearch.eval.judgments.util.MathUtils; /** @@ -42,18 +41,6 @@ public ClickthroughRate(final String objectId, final int clicks, final int event this.events = events; } - @Override - public boolean equals(Object obj) { - return EqualsBuilder.reflectionEquals(this, obj); - } - - @Override - public int hashCode() { - int result = 17; - result = 29 * result + objectId.hashCode(); - return result; - } - @Override public String toString() { return "object_id: " + objectId + ", clicks: " + clicks + ", events: " + events + ", ctr: " + MathUtils.round(getClickthroughRate()); diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/Judgment.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/Judgment.java index 514b5dd..abda38e 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/Judgment.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/Judgment.java @@ -8,8 +8,6 @@ */ package org.opensearch.eval.judgments.model; -import org.apache.commons.lang3.builder.EqualsBuilder; -import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.eval.judgments.util.MathUtils; @@ -53,7 +51,7 @@ public Map getJudgmentAsMap() { final Map judgmentMap = new HashMap<>(); judgmentMap.put("query_id", queryId); judgmentMap.put("query", query); - judgmentMap.put("document", document); + judgmentMap.put("document_id", document); judgmentMap.put("judgment", judgment); return judgmentMap; @@ -106,21 +104,6 @@ public String toString() { return "query_id: " + queryId + ", query: " + query + ", document: " + document + ", judgment: " + MathUtils.round(judgment); } - @Override - public boolean equals(Object obj) { - return EqualsBuilder.reflectionEquals(this, obj); - } - - @Override - public int hashCode() { - return new HashCodeBuilder(17, 37). - append(queryId). - append(query). - append(document). - append(judgment). - toHashCode(); - } - /** * Gets the judgment's query ID. * @return The judgment's query ID. diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/UbiQuery.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/UbiQuery.java index 52d48e7..0b7ca0b 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/UbiQuery.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/UbiQuery.java @@ -9,8 +9,6 @@ package org.opensearch.eval.judgments.model.ubi.query; import com.google.gson.annotations.SerializedName; -import org.apache.commons.lang3.builder.EqualsBuilder; -import org.apache.commons.lang3.builder.HashCodeBuilder; import java.util.Map; @@ -47,20 +45,6 @@ public UbiQuery() { } - @Override - public boolean equals(Object obj) { - return EqualsBuilder.reflectionEquals(this, obj); - } - - @Override - public int hashCode() { - return new HashCodeBuilder(17, 37). - append(queryId). - append(userQuery). - append(clientId). - toHashCode(); - } - /** * Gets the timestamp for the query. * @return The timestamp for the query. diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java index 23ea955..d268933 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java @@ -328,6 +328,7 @@ public String indexJudgments(final Collection judgments) throws Except for(final Judgment judgment : judgments) { + // TODO: Add a timestamp. final Map j = judgment.getJudgmentAsMap(); j.put("judgments_id", judgmentsId); diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/DcgSearchMetric.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/DcgSearchMetric.java index 6d59d3a..55fa60a 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/DcgSearchMetric.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/DcgSearchMetric.java @@ -10,10 +10,18 @@ import java.util.List; +/** + * Subclass of {@link SearchMetric} that calculates Discounted Cumulative Gain @ k. + */ public class DcgSearchMetric extends SearchMetric { protected final List relevanceScores; + /** + * Creates new DCG metrics. + * @param k The k value. + * @param relevanceScores A list of relevance scores. + */ public DcgSearchMetric(final int k, final List relevanceScores) { super(k); this.relevanceScores = relevanceScores; @@ -26,12 +34,23 @@ public String getName() { @Override public double calculate() { + return calculateDcg(relevanceScores); + } + + protected double calculateDcg(final List relevanceScores) { double dcg = 0.0; - for(int i = 0; i < relevanceScores.size(); i++) { - double relevance = relevanceScores.get(i); - dcg += relevance / Math.log(i + 2); // Add 2 to avoid log(1) = 0 + for(int i = 1; i <= relevanceScores.size(); i++) { + + final double relevanceScore = relevanceScores.get(i - 1); + final double numerator = Math.pow(2, relevanceScore) - 1.0; + final double denominator = Math.log(i) / Math.log(i + 2); + + LOGGER.info("numerator = {}, denominator = {}", numerator, denominator); + dcg += (numerator / denominator); + } + return dcg; } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/NdcgSearchMetric.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/NdcgSearchMetric.java index b62aed7..5476d6c 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/NdcgSearchMetric.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/NdcgSearchMetric.java @@ -10,13 +10,18 @@ import java.util.List; +/** + * Subclass of {@link SearchMetric} that calculates Normalized Discounted Cumulative Gain @ k. + */ public class NdcgSearchMetric extends DcgSearchMetric { - private final List idealRelevanceScores; - - public NdcgSearchMetric(final int k, final List relevanceScores, final List idealRelevanceScores) { + /** + * Creates new NDCG metrics. + * @param k The k value. + * @param relevanceScores A list of relevancy scores. + */ + public NdcgSearchMetric(final int k, final List relevanceScores) { super(k, relevanceScores); - this.idealRelevanceScores = idealRelevanceScores; } @Override @@ -27,17 +32,11 @@ public String getName() { @Override public double calculate() { - double dcg = super.calculate(); + // Make the ideal relevance scores by sorting the relevance scores largest to smallest. + relevanceScores.sort(Double::compare); - double idcg = 0.0; - for(int i = 0; i < idealRelevanceScores.size(); i++) { - double relevance = idealRelevanceScores.get(i); - idcg += relevance / Math.log(i + 2); // Add 2 to avoid log(1) = 0 - } - - if(idcg == 0) { - return 0; - } + double dcg = super.calculate(); + double idcg = super.calculateDcg(relevanceScores); return dcg / idcg; diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/PrecisionSearchMetric.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/PrecisionSearchMetric.java index 9ae1e0c..fc4b80c 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/PrecisionSearchMetric.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/PrecisionSearchMetric.java @@ -10,12 +10,25 @@ import java.util.List; +/** + * Subclass of {@link SearchMetric} that calculates Precision @ k. + */ public class PrecisionSearchMetric extends SearchMetric { + private final double threshold; private final List relevanceScores; - public PrecisionSearchMetric(final int k, final List relevanceScores) { + /** + * Creates new precision metrics. + * @param k The k value. + * @param threshold The threshold for assigning binary relevancy scores to non-binary scores. + * Scores greater than or equal to this value will be assigned a relevancy score of 1 (relevant). + * Scores less than this value will be assigned a relevancy score of 0 (not relevant). + * @param relevanceScores A list of relevance scores. + */ + public PrecisionSearchMetric(final int k, final double threshold, final List relevanceScores) { super(k); + this.threshold = threshold; this.relevanceScores = relevanceScores; } @@ -27,9 +40,24 @@ public String getName() { @Override public double calculate() { - // TODO: Implement this. - return 0.0; + int numberOfRelevantItems = 0; + + for(final double relevanceScore : relevanceScores) { + if(relevanceScore >= threshold) { + numberOfRelevantItems++; + } + } + + return (double) numberOfRelevantItems / (double) k; + + } + /** + * Gets the threshold value. + * @return The threshold value. + */ + public double threshold() { + return threshold; } } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/SearchMetric.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/SearchMetric.java index 658c202..acd580a 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/SearchMetric.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/SearchMetric.java @@ -11,22 +11,60 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * Base class for search metrics. + */ public abstract class SearchMetric { - private static final Logger LOGGER = LogManager.getLogger(SearchMetric.class); + protected static final Logger LOGGER = LogManager.getLogger(SearchMetric.class); protected int k; + /** + * Gets the name of the metric, i.e. ndcg. + * @return The name of the metric. + */ public abstract String getName(); + /** + * Calculates the metric. + * @return The value of the metric. + */ public abstract double calculate(); + private Double value = Double.NaN; + + /** + * Creates the metric. + * @param k The k value. + */ public SearchMetric(final int k) { this.k = k; } + /** + * Gets the k value. + * @return The k value. + */ public int getK() { return k; } + /** + * Gets the value of the metric. If the metric has not yet been calculated, + * the metric will first be calculated by calling calculate. This + * function should be used in cases where repeated access to the metrics value is + * needed without recalculating the metrics value. + * @return The value of the metric. + */ + public double getValue() { + + if(Double.isNaN(value)) { + this.value = calculate(); + } + + return value; + + } + } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java index db14ce2..adecfd1 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java @@ -18,11 +18,14 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.builder.SearchSourceBuilder; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; import java.util.Map; /** * Base class for query set runners. Classes that extend this class - * should be specific to a search engine. See the {@link OpenSearchAbstractQuerySetRunner} for an example. + * should be specific to a search engine. See the {@link OpenSearchQuerySetRunner} for an example. */ public abstract class AbstractQuerySetRunner { @@ -39,12 +42,18 @@ public AbstractQuerySetRunner(final Client client) { * @param querySetId The ID of the query set to run. * @param judgmentsId The ID of the judgments set to use for search metric calculation. * @param index The name of the index to run the query sets against. + * @param searchPipeline The name of the search pipeline to use, or null to not use a search pipeline. * @param idField The field in the index that is used to uniquely identify a document. * @param query The query that will be used to run the query set. * @param k The k used for metrics calculation, i.e. DCG@k. + * @param threshold The cutoff for binary judgments. A judgment score greater than or equal + * to this value will be assigned a binary judgment value of 1. A judgment score + * less than this value will be assigned a binary judgment value of 0. * @return The query set {@link QuerySetRunResult results} and calculated metrics. */ - abstract QuerySetRunResult run(String querySetId, final String judgmentsId, final String index, final String idField, final String query, final int k) throws Exception; + abstract QuerySetRunResult run(String querySetId, final String judgmentsId, final String index, final String searchPipeline, + final String idField, final String query, final int k, + final double threshold) throws Exception; /** * Saves the query set results to a persistent store, which may be the search engine itself. @@ -52,22 +61,58 @@ public AbstractQuerySetRunner(final Client client) { */ abstract void save(QuerySetRunResult result) throws Exception; + /** + * Gets a query set from the index. + * @param querySetId The ID of the query set to get. + * @return The query set as a collection of maps of query to frequency + * @throws Exception Thrown if the query set cannot be retrieved. + */ + public final Collection> getQuerySet(final String querySetId) throws Exception { + + // Get the query set. + final SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.query(QueryBuilders.matchQuery("_id", querySetId)); + + // Will be at most one match. + sourceBuilder.from(0); + sourceBuilder.size(1); + + final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.QUERY_SETS_INDEX_NAME).source(sourceBuilder); + + // TODO: Don't use .get() + final SearchResponse searchResponse = client.search(searchRequest).get(); + + if(searchResponse.getHits().getHits().length > 0) { + + // The queries from the query set that will be run. + return (Collection>) searchResponse.getHits().getAt(0).getSourceAsMap().get("queries"); + + } else { + + LOGGER.error("Unable to get query set with ID {}", querySetId); + + // The query set was not found. + throw new RuntimeException("The query set with ID " + querySetId + " was not found."); + + } + + } + /** * Get a judgment from the index. - * @param judgmentsId The judgements ID the judgment to find belongs to. + * @param judgmentsId The ID of the judgments to find. * @param query The user query. * @param documentId The document ID. * @return The value of the judgment, or NaN if the judgment cannot be found. - * @throws Exception Thrown if the indexed cannot be queried for the judgment. */ - public Double getJudgment(final String judgmentsId, final String query, final String documentId) throws Exception { + public Double getJudgmentValue(final String judgmentsId, final String query, final String documentId) throws Exception { // Find a judgment that matches the judgments_id, query_id, and document_id fields in the index. final BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); - boolQueryBuilder.must(QueryBuilders.matchQuery("judgments_id", judgmentsId)); - boolQueryBuilder.must(QueryBuilders.matchQuery("query", query)); - boolQueryBuilder.must(QueryBuilders.matchQuery("document_id", documentId)); + boolQueryBuilder.must(QueryBuilders.termQuery("judgments_id", judgmentsId)); + boolQueryBuilder.must(QueryBuilders.termQuery("query", query)); + boolQueryBuilder.must(QueryBuilders.termQuery("document_id", documentId)); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(boolQueryBuilder); @@ -76,29 +121,73 @@ public Double getJudgment(final String judgmentsId, final String query, final St searchSourceBuilder.from(0); searchSourceBuilder.size(1); - // Only include the judgment field. - String[] includeFields = new String[] {"judgment"}; - String[] excludeFields = new String[] {}; + // Only include the judgment field in the response. + final String[] includeFields = new String[] {"judgment"}; + final String[] excludeFields = new String[] {}; searchSourceBuilder.fetchSource(includeFields, excludeFields); - final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.JUDGMENTS_INDEX_NAME); - searchRequest.source(searchSourceBuilder); + final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.JUDGMENTS_INDEX_NAME).source(searchSourceBuilder); + + Double judgment = Double.NaN; - // TODO: Don't use .get() final SearchResponse searchResponse = client.search(searchRequest).get(); - if(searchResponse.getHits().getHits().length == 0) { + if (searchResponse.getHits().getHits().length > 0) { - // The judgments_id is probably not valid. - return Double.NaN; + final Map j = searchResponse.getHits().getAt(0).getSourceAsMap(); + + // TODO: Why does this not exist in some cases? + if(j.containsKey("judgment")) { + judgment = (Double) j.get("judgment"); + + if(judgment > 0) { + LOGGER.info("Found a nonzero judgment! = {}, {}", judgment, query); + } + + } } else { - final Map j = searchResponse.getHits().getAt(0).getSourceAsMap(); - return Double.parseDouble(j.get("judgment").toString()); + // No judgment for this query/doc pair exists. + judgment = Double.NaN; } + return judgment; + + } + + /** + * Gets the judgments for a query / document pairs. + * @param judgmentsId The judgments collection for which the judgment to retrieve belongs. + * @param query The user query. + * @param orderedDocumentIds A list of document IDs returned for the user query. + * @param k The k used for metrics calculation, i.e. DCG@k. + * @return An ordered list of relevance scores for the query / document pairs. + * @throws Exception Thrown if a judgment cannot be retrieved. + */ + protected List getRelevanceScores(final String judgmentsId, final String query, final List orderedDocumentIds, final int k) throws Exception { + + // Ordered list of scores. + final List scores = new ArrayList<>(); + + // Go through each document up to k and get the score. + for (int i = 0; i < k && i < orderedDocumentIds.size(); i++) { + + final String documentId = orderedDocumentIds.get(i); + + // Find the judgment value for this combination of query and documentId from the index. + final Double judgmentValue = getJudgmentValue(judgmentsId, query, documentId); + + // If a judgment for this query/doc pair is not found, Double.NaN will be returned. + if(!Double.isNaN(judgmentValue)) { + scores.add(judgmentValue); + } + + } + + return scores; + } } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchAbstractQuerySetRunner.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchAbstractQuerySetRunner.java deleted file mode 100644 index b5ffd9b..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchAbstractQuerySetRunner.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.runners; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.action.index.IndexRequest; -import org.opensearch.action.index.IndexResponse; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.client.Client; -import org.opensearch.core.action.ActionListener; -import org.opensearch.eval.SearchQualityEvaluationPlugin; -import org.opensearch.eval.judgments.model.Judgment; -import org.opensearch.eval.metrics.DcgSearchMetric; -import org.opensearch.eval.metrics.NdcgSearchMetric; -import org.opensearch.eval.metrics.PrecisionSearchMetric; -import org.opensearch.eval.metrics.SearchMetric; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.search.SearchHit; -import org.opensearch.search.builder.SearchSourceBuilder; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import static org.opensearch.eval.SearchQualityEvaluationRestHandler.QUERY_PLACEHOLDER; - -/** - * A {@link AbstractQuerySetRunner} for Amazon OpenSearch. - */ -public class OpenSearchAbstractQuerySetRunner extends AbstractQuerySetRunner { - - private static final Logger LOGGER = LogManager.getLogger(OpenSearchAbstractQuerySetRunner.class); - - /** - * Creates a new query set runner - * @param client An OpenSearch {@link Client}. - */ - public OpenSearchAbstractQuerySetRunner(final Client client) { - super(client); - } - - @Override - public QuerySetRunResult run(final String querySetId, final String judgmentsId, final String index, final String idField, final String query, final int k) throws Exception { - - // Get the query set. - final SearchSourceBuilder getQuerySetSearchSourceBuilder = new SearchSourceBuilder(); - getQuerySetSearchSourceBuilder.query(QueryBuilders.matchQuery("_id", querySetId)); - getQuerySetSearchSourceBuilder.from(0); - // TODO: Need to page through to make sure we get all of the queries. - getQuerySetSearchSourceBuilder.size(500); - - final SearchRequest getQuerySetSearchRequest = new SearchRequest(SearchQualityEvaluationPlugin.QUERY_SETS_INDEX_NAME); - getQuerySetSearchRequest.source(getQuerySetSearchSourceBuilder); - - try { - - // TODO: Don't use .get() - final SearchResponse searchResponse = client.search(getQuerySetSearchRequest).get(); - - // The queries from the query set that will be run. - final Collection> queries = (Collection>) searchResponse.getHits().getAt(0).getSourceAsMap().get("queries"); - - // The results of each query. - final List queryResults = new ArrayList<>(); - - for(Map queryMap : queries) { - - // Loop over each query in the map and run each one. - for (final String userQuery : queryMap.keySet()) { - - // Replace the query placeholder with the user query. - final String q = query.replace(QUERY_PLACEHOLDER, userQuery); - - // Build the query from the one that was passed in. - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(QueryBuilders.wrapperQuery(q)); - searchSourceBuilder.from(0); - searchSourceBuilder.size(k); - - String[] includeFields = new String[] {idField}; - String[] excludeFields = new String[] {}; - searchSourceBuilder.fetchSource(includeFields, excludeFields); - - // TODO: Allow for setting this index name. - final SearchRequest searchRequest = new SearchRequest(index); - getQuerySetSearchRequest.source(searchSourceBuilder); - - client.search(searchRequest, new ActionListener<>() { - - @Override - public void onResponse(final SearchResponse searchResponse) { - - final List orderedDocumentIds = new ArrayList<>(); - - for (final SearchHit hit : searchResponse.getHits().getHits()) { - - final Map sourceAsMap = hit.getSourceAsMap(); - final String documentId = sourceAsMap.get(idField).toString(); - - orderedDocumentIds.add(documentId); - - } - - // TODO: Use getJudgment() to get the judgment for this document. - final List relevanceScores = getRelevanceScores(query, orderedDocumentIds, k); - - final SearchMetric dcgSearchMetric = new DcgSearchMetric(k, relevanceScores); - // TODO: Add these metrics in, too. - //final SearchMetric ndcgSearchmetric = new NdcgSearchMetric(k, relevanceScores, idealRelevanceScores); - //final SearchMetric precisionSearchMetric = new PrecisionSearchMetric(k, relevanceScores); - - final Collection searchMetrics = List.of(dcgSearchMetric); // ndcgSearchmetric, precisionSearchMetric); - - queryResults.add(new QueryResult(userQuery, orderedDocumentIds, k, searchMetrics)); - - } - - @Override - public void onFailure(Exception ex) { - LOGGER.error("Unable to search for query: {}", query, ex); - } - }); - - } - - } - - // TODO: Calculate the search metrics for the entire query set given the results and the judgments. - final List orderedDocumentIds = new ArrayList<>(); - final List relevanceScores = getRelevanceScores(query, orderedDocumentIds, k); - final SearchMetric dcgSearchMetric = new DcgSearchMetric(k, relevanceScores); - // TODO: Add these metrics in, too. - //final SearchMetric ndcgSearchmetric = new NdcgSearchMetric(k, relevanceScores, idealRelevanceScores); - //final SearchMetric precisionSearchMetric = new PrecisionSearchMetric(k, relevanceScores); - - final Collection searchMetrics = List.of(dcgSearchMetric); // ndcgSearchmetric, precisionSearchMetric); - - return new QuerySetRunResult(queryResults, searchMetrics); - - } catch (Exception ex) { - throw new RuntimeException("Unable to run query set.", ex); - } - - } - - @Override - public void save(final QuerySetRunResult result) throws Exception { - - // Index the results into OpenSearch. - - final Map results = new HashMap<>(); - - results.put("run_id", result.getRunId()); - results.put("query_results", result.getQueryResultsAsMap()); - - // Calculate and add each metric to the object to index. - for(final SearchMetric searchMetric : result.getSearchMetrics()) { - results.put(searchMetric.getName(), searchMetric.calculate()); - } - - final IndexRequest indexRequest = new IndexRequest(SearchQualityEvaluationPlugin.QUERY_SETS_RUN_RESULTS_INDEX_NAME) - .source(results); - - client.index(indexRequest, new ActionListener<>() { - @Override - public void onResponse(IndexResponse indexResponse) { - LOGGER.debug("Query set results indexed."); - } - - @Override - public void onFailure(Exception ex) { - throw new RuntimeException(ex); - } - }); - - } - - public List getRelevanceScores(final String query, final List orderedDocumentIds, final int k) { - - // Ordered list of scores. - final List scores = new ArrayList<>(); - - // Go through each document up to k and get the score. - for(int i = 0; i < k; i++) { - - final String documentId = orderedDocumentIds.get(i); - - // TODO: Find the judgment value for this combination of query and documentId from the index. - final double judgment = 0.1; - - scores.add(judgment); - - if(i == orderedDocumentIds.size()) { - // k is greater than the actual length of documents. - break; - } - - } - - String listOfScores = scores.stream().map(Object::toString).collect(Collectors.joining(", ")); - LOGGER.info("Got relevance scores: {}", listOfScores); - - return scores; - - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java new file mode 100644 index 0000000..0e684fa --- /dev/null +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java @@ -0,0 +1,246 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.runners; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.Client; +import org.opensearch.core.action.ActionListener; +import org.opensearch.eval.SearchQualityEvaluationPlugin; +import org.opensearch.eval.metrics.DcgSearchMetric; +import org.opensearch.eval.metrics.NdcgSearchMetric; +import org.opensearch.eval.metrics.PrecisionSearchMetric; +import org.opensearch.eval.metrics.SearchMetric; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.SearchSourceBuilder; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.opensearch.eval.SearchQualityEvaluationRestHandler.QUERY_PLACEHOLDER; + +/** + * A {@link AbstractQuerySetRunner} for Amazon OpenSearch. + */ +public class OpenSearchQuerySetRunner extends AbstractQuerySetRunner { + + private static final Logger LOGGER = LogManager.getLogger(OpenSearchQuerySetRunner.class); + + /** + * Creates a new query set runner + * + * @param client An OpenSearch {@link Client}. + */ + public OpenSearchQuerySetRunner(final Client client) { + super(client); + } + + @Override + public QuerySetRunResult run(final String querySetId, final String judgmentsId, final String index, + final String searchPipeline, final String idField, final String query, + final int k, final double threshold) throws Exception { + + final Collection> querySet = getQuerySet(querySetId); + LOGGER.info("Found {} queries in query set {}", querySet.size(), querySetId); + + try { + + // The results of each query. + final List queryResults = new ArrayList<>(); + + for (Map queryMap : querySet) { + + // Loop over each query in the map and run each one. + for (final String userQuery : queryMap.keySet()) { + + // Replace the query placeholder with the user query. + final String parsedQuery = query.replace(QUERY_PLACEHOLDER, userQuery); + + // Build the query from the one that was passed in. + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.query(QueryBuilders.wrapperQuery(parsedQuery)); + searchSourceBuilder.from(0); + searchSourceBuilder.size(k); + + final String[] includeFields = new String[]{idField}; + final String[] excludeFields = new String[]{}; + searchSourceBuilder.fetchSource(includeFields, excludeFields); + + if(searchPipeline != null) { + searchSourceBuilder.pipeline(searchPipeline); + } + + final SearchRequest searchRequest = new SearchRequest(index); + searchRequest.source(searchSourceBuilder); + + // This is to keep OpenSearch from rejecting queries. + // TODO: Look at using the Workload Management in 2.18.0. + Thread.sleep(50); + + client.search(searchRequest, new ActionListener<>() { + + @Override + public void onResponse(final SearchResponse searchResponse) { + + final List orderedDocumentIds = new ArrayList<>(); + + for (final SearchHit hit : searchResponse.getHits().getHits()) { + + final Map sourceAsMap = hit.getSourceAsMap(); + final String documentId = sourceAsMap.get(idField).toString(); + + orderedDocumentIds.add(documentId); + + } + + try { + + final List relevanceScores = getRelevanceScores(judgmentsId, userQuery, orderedDocumentIds, k); + + // Calculate the metrics for this query. + final SearchMetric dcgSearchMetric = new DcgSearchMetric(k, relevanceScores); + final SearchMetric ndcgSearchmetric = new NdcgSearchMetric(k, relevanceScores); + final SearchMetric precisionSearchMetric = new PrecisionSearchMetric(k, threshold, relevanceScores); + + final Collection searchMetrics = List.of(dcgSearchMetric, ndcgSearchmetric, precisionSearchMetric); + + queryResults.add(new QueryResult(userQuery, orderedDocumentIds, k, searchMetrics)); + + } catch (Exception ex) { + LOGGER.error("Unable to get relevance scores.", ex); + } + + } + + @Override + public void onFailure(Exception ex) { + LOGGER.error("Unable to search using query: {}", parsedQuery, ex); + } + }); + + } + + } + + // Calculate the search metrics for the entire query set given the individual query set metrics. + // Sum up the metrics for each query per metric type. + final int querySetSize = queryResults.size(); + final Map sumOfMetrics = new HashMap<>(); + for(final QueryResult queryResult : queryResults) { + for(final SearchMetric searchMetric : queryResult.getSearchMetrics()) { + sumOfMetrics.merge(searchMetric.getName(), searchMetric.getValue(), Double::sum); + } + } + + // Now divide by the number of queries. + final Map querySetMetrics = new HashMap<>(); + for(final String metric : sumOfMetrics.keySet()) { + querySetMetrics.put(metric, sumOfMetrics.get(metric) / querySetSize); + } + + final String querySetRunId = UUID.randomUUID().toString(); + final QuerySetRunResult querySetRunResult = new QuerySetRunResult(querySetRunId, querySetId, queryResults, querySetMetrics); + + LOGGER.info("Query set run complete: {}", querySetRunId); + + return querySetRunResult; + + } catch (Exception ex) { + throw new RuntimeException("Unable to run query set.", ex); + } + + } + + @Override + public void save(final QuerySetRunResult result) throws Exception { + + // Index the query results into OpenSearch. + + final Map results = new HashMap<>(); + + results.put("run_id", result.getRunId()); + results.put("query_results", result.getQueryResultsAsMap()); + + // Add each metric to the object to index. + for (final String metric : result.getSearchMetrics().keySet()) { + results.put(metric, result.getSearchMetrics().get(metric)); + } + + final IndexRequest indexRequest = new IndexRequest(SearchQualityEvaluationPlugin.QUERY_SETS_RUN_RESULTS_INDEX_NAME) + .source(results); + + client.index(indexRequest, new ActionListener<>() { + @Override + public void onResponse(IndexResponse indexResponse) { + LOGGER.debug("Query set results indexed."); + } + + @Override + public void onFailure(Exception ex) { + throw new RuntimeException(ex); + } + }); + + // TODO: Index the metrics as expected by the dashboards. + + // See https://github.com/o19s/opensearch-search-quality-evaluation/blob/main/opensearch-dashboard-prototyping/METRICS_SCHEMA.md + // See https://github.com/o19s/opensearch-search-quality-evaluation/blob/main/opensearch-dashboard-prototyping/sample_data.ndjson + + final BulkRequest bulkRequest = new BulkRequest(); + + for(final QueryResult queryResult : result.getQueryResults()) { + + for(final SearchMetric searchMetric : queryResult.getSearchMetrics()) { + + // TODO: Make sure all of these items have values. + final Map metrics = new HashMap<>(); + metrics.put("datetime", "2024-09-01T00:00:00"); + metrics.put("search_config", "research_1"); + metrics.put("query_set_id", result.getQuerySetId()); + metrics.put("query", queryResult.getQuery()); + metrics.put("metric", searchMetric.getName()); + metrics.put("value", searchMetric.getValue()); + metrics.put("application", "sample_data"); + metrics.put("evaluation_id", result.getRunId()); + + // TODO: This is using the index name from the sample data. + bulkRequest.add(new IndexRequest("sqe_metrics_sample_data").source(metrics)); + + } + + } + + client.bulk(bulkRequest, new ActionListener<>() { + + @Override + public void onResponse(BulkResponse bulkItemResponses) { + LOGGER.info("Successfully indexed {} metrics.", bulkItemResponses.getItems().length); + } + + @Override + public void onFailure(Exception ex) { + LOGGER.error("Unable to bulk index metrics.", ex); + } + + }); + + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/QuerySetRunResult.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/QuerySetRunResult.java index 0c97e67..7bc86c2 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/QuerySetRunResult.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/QuerySetRunResult.java @@ -15,7 +15,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.UUID; /** * The results of a query set run. @@ -23,16 +22,19 @@ public class QuerySetRunResult { private final String runId; + private final String querySetId; private final List queryResults; - private final Collection metrics; + private final Map metrics; /** * Creates a new query set run result. A random UUID is generated as the run ID. + * @param runId A unique identifier for this query set run. * @param queryResults A collection of {@link QueryResult} that contains the queries and search results. - * @param metrics The {@link SearchMetric metrics} calculated from the search results. + * @param metrics A map of metric name to value. */ - public QuerySetRunResult(final List queryResults, final Collection metrics) { - this.runId = UUID.randomUUID().toString(); + public QuerySetRunResult(final String runId, final String querySetId, final List queryResults, final Map metrics) { + this.runId = runId; + this.querySetId = querySetId; this.queryResults = queryResults; this.metrics = metrics; } @@ -46,10 +48,18 @@ public String getRunId() { } /** - * Gets the {@link SearchMetric metrics} calculated from the run. - * @return The {@link SearchMetric metrics} calculated from the run. + * Gets the query set ID. + * @return The query set ID. */ - public Collection getSearchMetrics() { + public String getQuerySetId() { + return querySetId; + } + + /** + * Gets the search metrics. + * @return The search metrics. + */ + public Map getSearchMetrics() { return metrics; }