diff --git a/data/esci/.gitattributes b/data/esci/.gitattributes index 8de9298..4060623 100644 --- a/data/esci/.gitattributes +++ b/data/esci/.gitattributes @@ -1 +1,2 @@ ubi_queries_events_1000.ndjson.bz2 filter=lfs diff=lfs merge=lfs -text +ubi_queries_events.ndjson.bz2 filter=lfs diff=lfs merge=lfs -text diff --git a/data/esci/index.sh b/data/esci/index.sh index 51b586c..214c745 100755 --- a/data/esci/index.sh +++ b/data/esci/index.sh @@ -1,3 +1,3 @@ #!/bin/bash -e -curl -s -X POST "http://localhost:9200/_bulk?pretty" -H "Content-Type: application/x-ndjson" --data-binary @ubi_queries_events_1000.ndjson +curl -s -X POST "http://localhost:9200/_bulk?pretty" -H "Content-Type: application/x-ndjson" --data-binary @ubi_queries_events.ndjson diff --git a/data/esci/ubi_queries_events.ndjson.bz2 b/data/esci/ubi_queries_events.ndjson.bz2 new file mode 100644 index 0000000..d728d94 --- /dev/null +++ b/data/esci/ubi_queries_events.ndjson.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6811cd6c99311f7b08a549e7783eefdc84bf3bc40e3bfe3abef65efa91548fe9 +size 36696778 diff --git a/data/esci/ubi_queries_events_1000.ndjson.bz2 b/data/esci/ubi_queries_events_1000.ndjson.bz2 deleted file mode 100644 index 5f21f01..0000000 --- a/data/esci/ubi_queries_events_1000.ndjson.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7a67dbe2b866c247def62573dfe1eece797f14866760e730eba10b06c6c82770 -size 37387758 diff --git a/data/kddcup2012/README.md b/data/kddcup2012/README.md deleted file mode 100644 index e9706b5..0000000 --- a/data/kddcup2012/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# kddcup2012 - -https://www.kaggle.com/competitions/kddcup2012-track2/data?select=track2.7z - diff --git a/data/kddcup2012/kddcup2012-formatter.py b/data/kddcup2012/kddcup2012-formatter.py deleted file mode 100644 index e66ad40..0000000 --- a/data/kddcup2012/kddcup2012-formatter.py +++ /dev/null @@ -1,51 +0,0 @@ -fields = [ - 'click', - 'impression', - 'display_url', - 'ad_id', - 'advertiser_id', - 'depth', - 'position', - 'query_id', - 'keyword_id', - 'title_id', - 'description_id', - 'user_id', -] - -def parse_line(line): - values = line.strip().split('\t') - return dict(zip(fields, values)) - -def make_event(instance, action_name): - return { - "action_name": action_name, - "client_id": instance['user_id'], - "query_id": instance['query_id'], - "event_attributes": { - "object_id_field": "ean", - "object_id": instance["ad_id"], - 'description': instance["description_id"], - "position": { - "index": instance["position"], - }, - "session_id": instance["user_id"], - } - } - -def consider_display_url(display_url, sample_rate=1.0): - bucket = (hash(display_url)&1023)/1024 # warning, hash is not deterministic across python runs - return bucket < sample_rate - -with open('./track2/training.txt.1k', 'r') as f: - for line in f: - instance = parse_line(line) - if consider_display_url(instance['display_url']): - event = make_event(instance, 'view') - # note, the impressions field contains how many impressions of this ad were shown - # although one is printed, there is potentially more than one event - print(event) - - if instance['click'] == '1': - event = make_event(instance, 'click') - print(event) \ No newline at end of file diff --git a/data/sample-data/ubi_events.json b/data/sample-data/ubi_events.json deleted file mode 100644 index 79daeaf..0000000 --- a/data/sample-data/ubi_events.json +++ /dev/null @@ -1,10 +0,0 @@ -{"_index":"ubi_events","_id":"3a5928f1-5cfc-4219-b897-dafaf18c14df","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"HP Pavilion dv7-1211ea Notebook Silver 43.2 cm (17\") 1440 x 900 pixels Intel® Core™2 Duo 4 GB DDR2-SDRAM 250 GB HDD NVIDIA® GeForce® 9200M GS Windows Vista Home Premium (0884420588030)","timestamp":1726759565517,"event_attributes":{"object":{"object_id_field":"product","object_id":"2101957","description":"HP Pavilion dv7-1211ea Notebook Silver 43.2 cm (17\") 1440 x 900 pixels Intel® Core™2 Duo 4 GB DDR2-SDRAM 250 GB HDD NVIDIA® GeForce® 9200M GS Windows Vista Home Premium","object_detail":null,"key_value":"0884420588030"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"be87c694-c1ab-4b75-a5e1-b6a793b8c318","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"HP Pavilion dv7-1211ea Notebook Silver 43.2 cm (17\") 1440 x 900 pixels Intel® Core™2 Duo 4 GB DDR2-SDRAM 250 GB HDD NVIDIA® GeForce® 9200M GS Windows Vista Home Premium (0884420588030)","timestamp":1726759565565,"event_attributes":{"object":{"object_id_field":"product","object_id":"2101957","description":"HP Pavilion dv7-1211ea Notebook Silver 43.2 cm (17\") 1440 x 900 pixels Intel® Core™2 Duo 4 GB DDR2-SDRAM 250 GB HDD NVIDIA® GeForce® 9200M GS Windows Vista Home Premium","object_detail":null,"key_value":"0884420588030"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"34e212bf-87e0-4ea9-b241-e6cc8e8d797a","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"StarTech.com DB9 RS232 Serial Null Modem Adapter - M/F (0065030815772)","timestamp":1726759565715,"event_attributes":{"object":{"object_id_field":"product","object_id":"1128895","description":"StarTech.com DB9 RS232 Serial Null Modem Adapter - M/F","object_detail":null,"key_value":"0065030815772"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"995bcf2b-d79a-4370-801b-57de3fcd63aa","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Xerox 006R90321 toner cartridge Original Black 6 pc(s) (0095205603217)","timestamp":1726759565581,"event_attributes":{"object":{"object_id_field":"product","object_id":"3920564","description":"Xerox 006R90321 toner cartridge Original Black 6 pc(s)","object_detail":null,"key_value":"0095205603217"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"a18f8755-9942-4b34-95c1-9bb0201c5090","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"HP ProCurve 420 Wireless Access Point (0882780985407)","timestamp":1726759565748,"event_attributes":{"object":{"object_id_field":"product","object_id":"1449722","description":"HP ProCurve 420 Wireless Access Point","object_detail":null,"key_value":"0882780985407"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"cc4957f0-9bad-4e39-9cdf-fd7e8e79ba00","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Integral 2GB SD Card memory card (5055288404878)","timestamp":1726759565781,"event_attributes":{"object":{"object_id_field":"product","object_id":"1625640","description":"Integral 2GB SD Card memory card","object_detail":null,"key_value":"5055288404878"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"85254e72-7a2e-4f32-bdb9-ece49c7e02d9","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Integral 2GB SD Card memory card (5055288404878)","timestamp":1726759566065,"event_attributes":{"object":{"object_id_field":"product","object_id":"1625640","description":"Integral 2GB SD Card memory card","object_detail":null,"key_value":"5055288404878"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"04d71fbe-226b-4501-b3b0-07e112f056ef","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"APC Emergency Power Off (EPO) (0731304213239)","timestamp":1726759565864,"event_attributes":{"object":{"object_id_field":"product","object_id":"636678","description":"APC Emergency Power Off (EPO)","object_detail":null,"key_value":"0731304213239"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"fccadeda-6ea5-48c0-8c07-ce21064c1003","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Brother LC51C Original Cyan (0012502615620)","timestamp":1726759565815,"event_attributes":{"object":{"object_id_field":"product","object_id":"2067378","description":"Brother LC51C Original Cyan","object_detail":null,"key_value":"0012502615620"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} -{"_index":"ubi_events","_id":"a6f5b060-aaf0-425e-a5d3-ab0fdcd0fbae","_score":1.0,"_source":{"action_name":"product_hover","client_id":"USER-eeed-43de-959d-90e6040e84f9","query_id":null,"page_id":"/","message_type":"INFO","message":"Brother LC51C Original Cyan (0012502615620)","timestamp":1726759566032,"event_attributes":{"object":{"object_id_field":"product","object_id":"2067378","description":"Brother LC51C Original Cyan","object_detail":null,"key_value":"0012502615620"},"position":null,"browser":null,"session_id":null,"page_id":null,"dwell_time":null}}} \ No newline at end of file diff --git a/data/ubi-chorus-data-generator/README.md b/data/ubi-chorus-data-generator/README.md new file mode 100644 index 0000000..0a33a95 --- /dev/null +++ b/data/ubi-chorus-data-generator/README.md @@ -0,0 +1,3 @@ +# UBI Data Generator for Chorus using the Ecommerce Index + +You should likely use the `esci` dataset instead. It's up one directory level. \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml index 1ff9c17..5e8d7cf 100644 --- a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml +++ b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml @@ -23,24 +23,24 @@ services: - "9600:9600" networks: - opensearch-net -# volumes: -# - opensearch-data1:/usr/share/opensearch/data + volumes: + - opensearch-data1:/usr/share/opensearch/data -# opensearch_sef_dashboards: -# image: opensearchproject/opensearch-dashboards:2.16.0 -# container_name: opensearch_sef_dashboards -# ports: -# - "5601:5601" -# environment: -# OPENSEARCH_HOSTS: '["http://opensearch:9200"]' -# DISABLE_SECURITY_DASHBOARDS_PLUGIN: "true" -# depends_on: -# - opensearch_sef -# networks: -# - opensearch-net + opensearch_sef_dashboards: + image: opensearchproject/opensearch-dashboards:2.18.0 + container_name: opensearch_sef_dashboards + ports: + - "5601:5601" + environment: + OPENSEARCH_HOSTS: '["http://opensearch_sef:9200"]' + DISABLE_SECURITY_DASHBOARDS_PLUGIN: "true" + depends_on: + - opensearch_sef + networks: + - opensearch-net -#volumes: -# opensearch-data1: +volumes: + opensearch-data1: networks: opensearch-net: diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java index 3e1be5e..66a5045 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java @@ -124,7 +124,7 @@ public long calculateCoec(final Map rankAggregatedClickThrough, for(int r = 0; r < maxRank; r++) { final double meanCtrAtRank = rankAggregatedClickThrough.getOrDefault(r, 0.0); - final int countOfTimesShownAtRank = openSearchHelper.getCountOfQueriesForUserQueryHavingResultInRankR(userQuery, ctr.getObjectId(), r); + final long countOfTimesShownAtRank = openSearchHelper.getCountOfQueriesForUserQueryHavingResultInRankR(userQuery, ctr.getObjectId(), r); // System.out.println("rank = " + r); // System.out.println("\tmeanCtrAtRank = " + meanCtrAtRank); @@ -179,7 +179,7 @@ private Map> getClickthroughRate(final int maxRank // TODO: Use maxRank in place of the hardcoded 20. // TODO: Allow for a time period and for a specific application. - /** + /* * { * "bool": { * "should": [ @@ -234,7 +234,7 @@ private Map> getClickthroughRate(final int maxRank " }"; final BoolQueryBuilder queryBuilder = new BoolQueryBuilder().must(new WrapperQueryBuilder(query)); - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(1000); + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(500); final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(10L)); final SearchRequest searchRequest = Requests @@ -243,6 +243,7 @@ private Map> getClickthroughRate(final int maxRank .scroll(scroll); SearchResponse searchResponse = client.search(searchRequest).get(); + String scrollId = searchResponse.getScrollId(); SearchHit[] searchHits = searchResponse.getHits().getHits(); @@ -254,6 +255,8 @@ private Map> getClickthroughRate(final int maxRank final UbiEvent ubiEvent = AccessController.doPrivileged((PrivilegedAction) () -> gson.fromJson(hit.getSourceAsString(), UbiEvent.class)); + //LOGGER.info("event: {}", ubiEvent.toString()); + // We need to the hash of the query_id because two users can both search // for "computer" and those searches will have different query IDs, but they are the same search. final String userQuery = openSearchHelper.getUserQuery(ubiEvent.getQueryId()); @@ -280,7 +283,12 @@ private Map> getClickthroughRate(final int maxRank final SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId); scrollRequest.scroll(scroll); + //LOGGER.info("Doing scroll to next results"); + // TODO: Getting a warning in the log that "QueryGroup _id can't be null, It should be set before accessing it. This is abnormal behaviour" + // https://github.com/opensearch-project/OpenSearch/blob/f105e4eb2ede1556b5dd3c743bea1ab9686ebccf/server/src/main/java/org/opensearch/wlm/QueryGroupTask.java#L73 searchResponse = client.searchScroll(scrollRequest).get(); + //LOGGER.info("Scroll complete."); + scrollId = searchResponse.getScrollId(); searchHits = searchResponse.getHits().getHits(); diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventObject.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventObject.java index e579823..6a07b95 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventObject.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventObject.java @@ -18,6 +18,11 @@ public class EventObject { @SerializedName("object_id") private String objectId; + @Override + public String toString() { + return "[" + objectIdField + ", " + objectId + "]"; + } + public String getObjectId() { return objectId; } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/Position.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/Position.java index 883e1a9..b585312 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/Position.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/Position.java @@ -15,6 +15,11 @@ public class Position { @SerializedName("index") private int index; + @Override + public String toString() { + return String.valueOf(index); + } + public int getIndex() { return index; } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/UbiEvent.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/UbiEvent.java index 57809cc..0d76fb4 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/UbiEvent.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/UbiEvent.java @@ -29,7 +29,7 @@ public class UbiEvent { @Override public String toString() { - return actionName + ", " + clientId + ", " + queryId + ", " + eventAttributes.getObject() + ", " + eventAttributes.getPosition().getIndex(); + return actionName + ", " + clientId + ", " + queryId + ", " + eventAttributes.getObject().toString() + ", " + eventAttributes.getPosition().getIndex(); } public String getActionName() { diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java index 2f6fe49..8a1e164 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java +++ b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java @@ -9,6 +9,8 @@ package org.opensearch.eval.judgments.opensearch; import com.google.gson.Gson; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.action.bulk.BulkRequest; import org.opensearch.action.index.IndexRequest; import org.opensearch.action.search.SearchRequest; @@ -25,9 +27,9 @@ import java.io.IOException; import java.security.AccessController; import java.security.PrivilegedAction; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; @@ -39,6 +41,8 @@ public class OpenSearchHelper { + private static final Logger LOGGER = LogManager.getLogger(OpenSearchHelper.class.getName()); + private final Client client; private final Gson gson = new Gson(); @@ -77,6 +81,8 @@ public String getUserQuery(final String queryId) throws Exception { */ public UbiQuery getQueryFromQueryId(final String queryId) throws Exception { + //LOGGER.info("Getting query from query ID {}", queryId); + final String query = "{\"match\": {\"query_id\": \"" + queryId + "\" }}"; final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query); @@ -94,13 +100,101 @@ public UbiQuery getQueryFromQueryId(final String queryId) throws Exception { // Will only be a single result. final SearchHit hit = response.getHits().getHits()[0]; + //LOGGER.info("Retrieved query from query ID {}", queryId); + return AccessController.doPrivileged((PrivilegedAction) () -> gson.fromJson(hit.getSourceAsString(), UbiQuery.class)); } - public int getCountOfQueriesForUserQueryHavingResultInRankR(final String userQuery, final String objectId, final int rank) throws Exception { + private Collection getQueryIdsHavingUserQuery(final String userQuery) throws Exception { + + final String query = "{\"match\": {\"user_query\": \"" + userQuery + "\" }}"; + final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query); + + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.query(qb); + + final String[] indexes = {INDEX_UBI_QUERIES}; + + final SearchRequest searchRequest = new SearchRequest(indexes, searchSourceBuilder); + final SearchResponse response = client.search(searchRequest).get(); + + final Collection queryIds = new ArrayList<>(); + + for(final SearchHit hit : response.getHits().getHits()) { + final String queryId = hit.getSourceAsMap().get("query_id").toString(); + queryIds.add(queryId); + } + + return queryIds; + + } + + public long getCountOfQueriesForUserQueryHavingResultInRankR(final String userQuery, final String objectId, final int rank) throws Exception { + + long countOfTimesShownAtRank = 0; + + // Get all query IDs matching this user query. + final Collection queryIds = getQueryIdsHavingUserQuery(userQuery); + + // For each query ID, get the events with action_name = "view" having a match on objectId and rank (position). + for(final String queryId : queryIds) { + + //LOGGER.info("userQuery = {}; queryId = {}; objectId = {}; rank = {}", userQuery, queryId, objectId, rank); + + final String query = "{\n" + + " \"bool\": {\n" + + " \"must\": [\n" + + " {\n" + + " \"term\": {\n" + + " \"query_id\": \"" + queryId + "\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\": {\n" + + " \"action_name\": \"view\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\": {\n" + + " \"event_attributes.position.index\": \"" + rank + "\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\": {\n" + + " \"event_attributes.object.object_id\": \"" + objectId + "\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " }"; + + final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query); + + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.query(qb); + + final String[] indexes = {INDEX_UBI_QUERIES}; + + final SearchRequest searchRequest = new SearchRequest(indexes, searchSourceBuilder); + final SearchResponse response = client.search(searchRequest).get(); + + countOfTimesShownAtRank += response.getHits().getTotalHits().value; + + } - int countOfTimesShownAtRank = 0; + //LOGGER.info("Count of {} having {} at rank {} = {}", userQuery, objectId, rank, countOfTimesShownAtRank); + + if(countOfTimesShownAtRank > 0) { + LOGGER.info("Count of {} having {} at rank {} = {}", userQuery, objectId, rank, countOfTimesShownAtRank); + } + + return countOfTimesShownAtRank; + + /* + + // This commented block was used to get the value using the ubi_queries index. + // We can now just use the ubi_events index. final String query = "{\"match\": {\"user_query\": \"" + userQuery + "\" }}"; final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query); @@ -123,7 +217,7 @@ public int getCountOfQueriesForUserQueryHavingResultInRankR(final String userQue } - return countOfTimesShownAtRank; + */ } diff --git a/opensearch-search-quality-evaluation-plugin/useful_queries.txt b/opensearch-search-quality-evaluation-plugin/useful_queries.txt new file mode 100644 index 0000000..4ee840e --- /dev/null +++ b/opensearch-search-quality-evaluation-plugin/useful_queries.txt @@ -0,0 +1,151 @@ +DELETE ubi_events +DELETE ubi_queries + +GET ubi_events/_mapping +GET ubi_events/_search + +GET ubi_queries/_mapping +GET ubi_queries/_search + +DELETE judgments +GET judgments/_search + + +PUT ubi_queries +{ + "mappings": { + "properties": { + "timestamp": { "type": "date", "format": "strict_date_time" }, + "query_id": { "type": "keyword", "ignore_above": 100 }, + "query": { "type": "text" }, + "query_response_id": { "type": "keyword", "ignore_above": 100 }, + "query_response_hit_ids": { "type": "keyword" }, + "user_query": { "type": "keyword", "ignore_above": 256 }, + "query_attributes": { "type": "flat_object" }, + "client_id": { "type": "keyword", "ignore_above": 100 }, + "application": { "type": "keyword", "ignore_above": 100 } + } + } +} + +PUT ubi_events +{ +"mappings": { + "properties": { + "application": { "type": "keyword", "ignore_above": 256 }, + "action_name": { "type": "keyword", "ignore_above": 100 }, + "client_id": { "type": "keyword", "ignore_above": 100 }, + "query_id": { "type": "keyword", "ignore_above": 100 }, + "message": { "type": "keyword", "ignore_above": 1024 }, + "message_type": { "type": "keyword", "ignore_above": 100 }, + "timestamp": { + "type": "date", + "format":"strict_date_time", + "ignore_malformed": true, + "doc_values": true + }, + "event_attributes": { + "dynamic": true, + "properties": { + "position": { + "properties": { + "ordinal": { "type": "integer" }, + "x": { "type": "integer" }, + "y": { "type": "integer" }, + "page_depth": { "type": "integer" }, + "scroll_depth": { "type": "integer" }, + "trail": { "type": "text", + "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } + } + } + } + }, + "object": { + "properties": { + "internal_id": { "type": "keyword" }, + "object_id": { "type": "keyword", "ignore_above": 256 }, + "object_id_field": { "type": "keyword", "ignore_above": 100 }, + "name": { "type": "keyword", "ignore_above": 256 }, + "description": { "type": "text", + "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } + }, + "object_detail": { "type": "object" } + } + } + } + } + } + } +} + +GET ubi_events/_search +{ + "query": { + "range": { + "event_attributes.position.index": { + "lte": 20 + } + } + } +} + +GET ubi_queries/_search +{ + "query": { + "term": { + "user_query": "batteries" + } + } +} + +GET ubi_events/_search +{ + "query": { + "bool": { + "must": [ + { + "term": { + "query_id": "cdc01f67-0b24-4c96-bb56-a89234f4fb0c" + } + }, + { + "term": { + "action_name": "click" + } + }, + { + "term": { + "event_attributes.position.index": "0" + } + }, + { + "term": { + "event_attributes.object.object_id": "B0797J3DWK" + } + } + ] + } + } + } +} + +GET ubi_events/_search +{ + "size": 0, + "aggs": { + "By_Action": { + "terms": { + "field": "action_name", + "size": 20 + }, + "aggs": { + "By_Position": { + "terms": { + "field": "event_attributes.position.index", + "size": 20 + } + } + } + } + } +} \ No newline at end of file