From a80ba102b86944e3c2deb20442ad673448bc74c3 Mon Sep 17 00:00:00 2001
From: jzonthemtn <jeff.zemerick@mtnfog.com>
Date: Sun, 10 Nov 2024 08:18:09 -0500
Subject: [PATCH] Adding ESCI for UBI.

---
 data/esci/.gitattributes                          |  1 +
 data/esci/README.md                               | 15 +++++++++++++++
 data/esci/index.sh                                |  3 +++
 data/esci/ubi_queries_events_1000.ndjson.bz2      |  3 +++
 .../docker-compose.yaml                           |  2 ++
 5 files changed, 24 insertions(+)
 create mode 100644 data/esci/.gitattributes
 create mode 100644 data/esci/README.md
 create mode 100755 data/esci/index.sh
 create mode 100644 data/esci/ubi_queries_events_1000.ndjson.bz2

diff --git a/data/esci/.gitattributes b/data/esci/.gitattributes
new file mode 100644
index 0000000..8de9298
--- /dev/null
+++ b/data/esci/.gitattributes
@@ -0,0 +1 @@
+ubi_queries_events_1000.ndjson.bz2 filter=lfs diff=lfs merge=lfs -text
diff --git a/data/esci/README.md b/data/esci/README.md
new file mode 100644
index 0000000..0db3532
--- /dev/null
+++ b/data/esci/README.md
@@ -0,0 +1,15 @@
+# ESCI Data in UBI Format
+
+This directory contains ESCI data in the UBI format. Created using https://github.com/opensearch-project/user-behavior-insights/tree/main/ubi-data-generator.
+
+https://github.com/amazon-science/esci-data
+
+```
+@article{reddy2022shopping,
+title={Shopping Queries Dataset: A Large-Scale {ESCI} Benchmark for Improving Product Search},
+author={Chandan K. Reddy and Lluís Màrquez and Fran Valero and Nikhil Rao and Hugo Zaragoza and Sambaran Bandyopadhyay and Arnab Biswas and Anlu Xing and Karthik Subbian},
+year={2022},
+eprint={2206.06588},
+archivePrefix={arXiv}
+}
+```
diff --git a/data/esci/index.sh b/data/esci/index.sh
new file mode 100755
index 0000000..b6b5604
--- /dev/null
+++ b/data/esci/index.sh
@@ -0,0 +1,3 @@
+#!/bin/bash -e
+
+curl -X POST "http://localhost:9200/_bulk?pretty" -H "Content-Type: application/x-ndjson" --data-binary @ubi_queries_events_1000.ndjson
diff --git a/data/esci/ubi_queries_events_1000.ndjson.bz2 b/data/esci/ubi_queries_events_1000.ndjson.bz2
new file mode 100644
index 0000000..5f21f01
--- /dev/null
+++ b/data/esci/ubi_queries_events_1000.ndjson.bz2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a67dbe2b866c247def62573dfe1eece797f14866760e730eba10b06c6c82770
+size 37387758
diff --git a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml
index 9744165..1ff9c17 100644
--- a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml
+++ b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml
@@ -9,6 +9,8 @@ services:
       plugins.security.disabled: "true"
       logger.level: info
       OPENSEARCH_INITIAL_ADMIN_PASSWORD: SuperSecretPassword_123
+      http.max_content_length: 500mb
+      OPENSEARCH_JAVA_OPTS: "-Xms8192m -Xmx8192m"
     ulimits:
       memlock:
         soft: -1