Skip to content

Commit

Permalink
Changing queries to use the esci data better. Removing unneeded data …
Browse files Browse the repository at this point in the history
…scripts.
  • Loading branch information
jzonthemtn committed Nov 18, 2024
1 parent 6ef9bda commit 52620c9
Show file tree
Hide file tree
Showing 15 changed files with 295 additions and 93 deletions.
1 change: 1 addition & 0 deletions data/esci/.gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
ubi_queries_events_1000.ndjson.bz2 filter=lfs diff=lfs merge=lfs -text
ubi_queries_events.ndjson.bz2 filter=lfs diff=lfs merge=lfs -text
2 changes: 1 addition & 1 deletion data/esci/index.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash -e

curl -s -X POST "http://localhost:9200/_bulk?pretty" -H "Content-Type: application/x-ndjson" --data-binary @ubi_queries_events_1000.ndjson
curl -s -X POST "http://localhost:9200/_bulk?pretty" -H "Content-Type: application/x-ndjson" --data-binary @ubi_queries_events.ndjson
3 changes: 3 additions & 0 deletions data/esci/ubi_queries_events.ndjson.bz2
Git LFS file not shown
3 changes: 0 additions & 3 deletions data/esci/ubi_queries_events_1000.ndjson.bz2

This file was deleted.

4 changes: 0 additions & 4 deletions data/kddcup2012/README.md

This file was deleted.

51 changes: 0 additions & 51 deletions data/kddcup2012/kddcup2012-formatter.py

This file was deleted.

10 changes: 0 additions & 10 deletions data/sample-data/ubi_events.json

This file was deleted.

3 changes: 3 additions & 0 deletions data/ubi-chorus-data-generator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# UBI Data Generator for Chorus using the Ecommerce Index

You should likely use the `esci` dataset instead. It's up one directory level.
32 changes: 16 additions & 16 deletions opensearch-search-quality-evaluation-plugin/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,24 @@ services:
- "9600:9600"
networks:
- opensearch-net
# volumes:
# - opensearch-data1:/usr/share/opensearch/data
volumes:
- opensearch-data1:/usr/share/opensearch/data

# opensearch_sef_dashboards:
# image: opensearchproject/opensearch-dashboards:2.16.0
# container_name: opensearch_sef_dashboards
# ports:
# - "5601:5601"
# environment:
# OPENSEARCH_HOSTS: '["http://opensearch:9200"]'
# DISABLE_SECURITY_DASHBOARDS_PLUGIN: "true"
# depends_on:
# - opensearch_sef
# networks:
# - opensearch-net
opensearch_sef_dashboards:
image: opensearchproject/opensearch-dashboards:2.18.0
container_name: opensearch_sef_dashboards
ports:
- "5601:5601"
environment:
OPENSEARCH_HOSTS: '["http://opensearch_sef:9200"]'
DISABLE_SECURITY_DASHBOARDS_PLUGIN: "true"
depends_on:
- opensearch_sef
networks:
- opensearch-net

#volumes:
# opensearch-data1:
volumes:
opensearch-data1:

networks:
opensearch-net:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ public long calculateCoec(final Map<Integer, Double> rankAggregatedClickThrough,
for(int r = 0; r < maxRank; r++) {

final double meanCtrAtRank = rankAggregatedClickThrough.getOrDefault(r, 0.0);
final int countOfTimesShownAtRank = openSearchHelper.getCountOfQueriesForUserQueryHavingResultInRankR(userQuery, ctr.getObjectId(), r);
final long countOfTimesShownAtRank = openSearchHelper.getCountOfQueriesForUserQueryHavingResultInRankR(userQuery, ctr.getObjectId(), r);

// System.out.println("rank = " + r);
// System.out.println("\tmeanCtrAtRank = " + meanCtrAtRank);
Expand Down Expand Up @@ -179,7 +179,7 @@ private Map<String, Set<ClickthroughRate>> getClickthroughRate(final int maxRank
// TODO: Use maxRank in place of the hardcoded 20.
// TODO: Allow for a time period and for a specific application.

/**
/*
* {
* "bool": {
* "should": [
Expand Down Expand Up @@ -234,7 +234,7 @@ private Map<String, Set<ClickthroughRate>> getClickthroughRate(final int maxRank
" }";

final BoolQueryBuilder queryBuilder = new BoolQueryBuilder().must(new WrapperQueryBuilder(query));
final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(1000);
final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(500);
final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(10L));

final SearchRequest searchRequest = Requests
Expand All @@ -243,6 +243,7 @@ private Map<String, Set<ClickthroughRate>> getClickthroughRate(final int maxRank
.scroll(scroll);

SearchResponse searchResponse = client.search(searchRequest).get();

String scrollId = searchResponse.getScrollId();
SearchHit[] searchHits = searchResponse.getHits().getHits();

Expand All @@ -254,6 +255,8 @@ private Map<String, Set<ClickthroughRate>> getClickthroughRate(final int maxRank

final UbiEvent ubiEvent = AccessController.doPrivileged((PrivilegedAction<UbiEvent>) () -> gson.fromJson(hit.getSourceAsString(), UbiEvent.class));

//LOGGER.info("event: {}", ubiEvent.toString());

// We need to the hash of the query_id because two users can both search
// for "computer" and those searches will have different query IDs, but they are the same search.
final String userQuery = openSearchHelper.getUserQuery(ubiEvent.getQueryId());
Expand All @@ -280,7 +283,12 @@ private Map<String, Set<ClickthroughRate>> getClickthroughRate(final int maxRank
final SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);

//LOGGER.info("Doing scroll to next results");
// TODO: Getting a warning in the log that "QueryGroup _id can't be null, It should be set before accessing it. This is abnormal behaviour"
// https://github.com/opensearch-project/OpenSearch/blob/f105e4eb2ede1556b5dd3c743bea1ab9686ebccf/server/src/main/java/org/opensearch/wlm/QueryGroupTask.java#L73
searchResponse = client.searchScroll(scrollRequest).get();
//LOGGER.info("Scroll complete.");

scrollId = searchResponse.getScrollId();

searchHits = searchResponse.getHits().getHits();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ public class EventObject {
@SerializedName("object_id")
private String objectId;

@Override
public String toString() {
return "[" + objectIdField + ", " + objectId + "]";
}

public String getObjectId() {
return objectId;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ public class Position {
@SerializedName("index")
private int index;

@Override
public String toString() {
return String.valueOf(index);
}

public int getIndex() {
return index;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public class UbiEvent {

@Override
public String toString() {
return actionName + ", " + clientId + ", " + queryId + ", " + eventAttributes.getObject() + ", " + eventAttributes.getPosition().getIndex();
return actionName + ", " + clientId + ", " + queryId + ", " + eventAttributes.getObject().toString() + ", " + eventAttributes.getPosition().getIndex();
}

public String getActionName() {
Expand Down
Loading

0 comments on commit 52620c9

Please sign in to comment.