From a80ba102b86944e3c2deb20442ad673448bc74c3 Mon Sep 17 00:00:00 2001 From: jzonthemtn <jeff.zemerick@mtnfog.com> Date: Sun, 10 Nov 2024 08:18:09 -0500 Subject: [PATCH] Adding ESCI for UBI. --- data/esci/.gitattributes | 1 + data/esci/README.md | 15 +++++++++++++++ data/esci/index.sh | 3 +++ data/esci/ubi_queries_events_1000.ndjson.bz2 | 3 +++ .../docker-compose.yaml | 2 ++ 5 files changed, 24 insertions(+) create mode 100644 data/esci/.gitattributes create mode 100644 data/esci/README.md create mode 100755 data/esci/index.sh create mode 100644 data/esci/ubi_queries_events_1000.ndjson.bz2 diff --git a/data/esci/.gitattributes b/data/esci/.gitattributes new file mode 100644 index 0000000..8de9298 --- /dev/null +++ b/data/esci/.gitattributes @@ -0,0 +1 @@ +ubi_queries_events_1000.ndjson.bz2 filter=lfs diff=lfs merge=lfs -text diff --git a/data/esci/README.md b/data/esci/README.md new file mode 100644 index 0000000..0db3532 --- /dev/null +++ b/data/esci/README.md @@ -0,0 +1,15 @@ +# ESCI Data in UBI Format + +This directory contains ESCI data in the UBI format. Created using https://github.com/opensearch-project/user-behavior-insights/tree/main/ubi-data-generator. + +https://github.com/amazon-science/esci-data + +``` +@article{reddy2022shopping, +title={Shopping Queries Dataset: A Large-Scale {ESCI} Benchmark for Improving Product Search}, +author={Chandan K. Reddy and Lluís Màrquez and Fran Valero and Nikhil Rao and Hugo Zaragoza and Sambaran Bandyopadhyay and Arnab Biswas and Anlu Xing and Karthik Subbian}, +year={2022}, +eprint={2206.06588}, +archivePrefix={arXiv} +} +``` diff --git a/data/esci/index.sh b/data/esci/index.sh new file mode 100755 index 0000000..b6b5604 --- /dev/null +++ b/data/esci/index.sh @@ -0,0 +1,3 @@ +#!/bin/bash -e + +curl -X POST "http://localhost:9200/_bulk?pretty" -H "Content-Type: application/x-ndjson" --data-binary @ubi_queries_events_1000.ndjson diff --git a/data/esci/ubi_queries_events_1000.ndjson.bz2 b/data/esci/ubi_queries_events_1000.ndjson.bz2 new file mode 100644 index 0000000..5f21f01 --- /dev/null +++ b/data/esci/ubi_queries_events_1000.ndjson.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a67dbe2b866c247def62573dfe1eece797f14866760e730eba10b06c6c82770 +size 37387758 diff --git a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml index 9744165..1ff9c17 100644 --- a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml +++ b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml @@ -9,6 +9,8 @@ services: plugins.security.disabled: "true" logger.level: info OPENSEARCH_INITIAL_ADMIN_PASSWORD: SuperSecretPassword_123 + http.max_content_length: 500mb + OPENSEARCH_JAVA_OPTS: "-Xms8192m -Xmx8192m" ulimits: memlock: soft: -1