From d0b45544e18960969ec37b083e3ff4e7ec837109 Mon Sep 17 00:00:00 2001
From: Natasha Boyse <natasha.boyse@digital.trade.gov.uk>
Date: Fri, 7 Feb 2025 14:53:26 +0000
Subject: [PATCH] wip: more migration to opensearch

---
 django_app/.vscode/launch.json               |  2 -
 django_app/.vscode/tasks.json                |  6 --
 docker-compose.yml                           |  2 +-
 docs/DEVELOPER_SETUP.md                      |  6 +-
 docs/architecture/index.md                   |  2 +-
 docs/architecture/transactions_and_schema.md | 10 ++--
 docs/code_reference/models/settings.md       | 12 +---
 docs/installation/local.md                   |  8 +--
 redbox-core/poetry.lock                      | 10 ++--
 redbox-core/pyproject.toml                   |  1 -
 redbox-core/redbox/chains/components.py      |  3 +-
 redbox-core/redbox/graph/nodes/tools.py      |  3 +-
 redbox-core/redbox/loader/ingester.py        | 17 +-----
 redbox-core/redbox/models/chain.py           |  2 +-
 redbox-core/redbox/models/settings.py        | 62 ++++++--------------
 redbox-core/redbox/retriever/queries.py      | 10 ++--
 redbox-core/redbox/retriever/retrievers.py   | 10 ++--
 redbox-core/tests/conftest.py                | 14 ++---
 redbox-core/tests/test_ingest.py             |  9 +--
 redbox-core/tests/test_tools.py              |  4 +-
 redbox-core/tests/test_transform.py          |  2 +-
 21 files changed, 68 insertions(+), 127 deletions(-)

diff --git a/django_app/.vscode/launch.json b/django_app/.vscode/launch.json
index b7540695c..998b7089e 100644
--- a/django_app/.vscode/launch.json
+++ b/django_app/.vscode/launch.json
@@ -15,7 +15,6 @@
                 "MINIO_HOST": "localhost",
                 "POSTGRES_HOST": "localhost",
                 "UNSTRUCTURED_HOST": "localhost",
-                "ELASTIC__HOST": "localhost"
             }
         },
         
@@ -33,7 +32,6 @@
                 "MINIO_HOST": "localhost",
                 "POSTGRES_HOST": "localhost",
                 "UNSTRUCTURED_HOST": "localhost",
-                "ELASTIC__HOST": "localhost"
             }
         },
     ]
diff --git a/django_app/.vscode/tasks.json b/django_app/.vscode/tasks.json
index 432fdc576..3da8dce25 100644
--- a/django_app/.vscode/tasks.json
+++ b/django_app/.vscode/tasks.json
@@ -16,7 +16,6 @@
                     "MINIO_HOST": "localhost",
                     "POSTGRES_HOST": "localhost",
                     "UNSTRUCTURED_HOST": "localhost",
-                    "ELASTIC__HOST": "localhost",
                 }
             },
             "presentation": {
@@ -34,7 +33,6 @@
                     "MINIO_HOST": "localhost",
                     "POSTGRES_HOST": "localhost",
                     "UNSTRUCTURED_HOST": "localhost",
-                    "ELASTIC__HOST": "localhost",
                 }
             },
             "presentation": {
@@ -52,7 +50,6 @@
                     "MINIO_HOST": "localhost",
                     "POSTGRES_HOST": "localhost",
                     "UNSTRUCTURED_HOST": "localhost",
-                    "ELASTIC__HOST": "localhost",
                 }
             },
             "presentation": {
@@ -70,7 +67,6 @@
                     "MINIO_HOST": "localhost",
                     "POSTGRES_HOST": "localhost",
                     "UNSTRUCTURED_HOST": "localhost",
-                    "ELASTIC__HOST": "localhost",
                 }
             },
             "presentation": {
@@ -88,7 +84,6 @@
                     "MINIO_HOST": "localhost",
                     "POSTGRES_HOST": "localhost",
                     "UNSTRUCTURED_HOST": "localhost",
-                    "ELASTIC__HOST": "localhost",
                 }
             },
             "presentation": {
@@ -106,7 +101,6 @@
                     "MINIO_HOST": "localhost",
                     "POSTGRES_HOST": "localhost",
                     "UNSTRUCTURED_HOST": "localhost",
-                    "ELASTIC__HOST": "localhost",
                 }
             },
             "presentation": {
diff --git a/docker-compose.yml b/docker-compose.yml
index 5af21d2bc..eb1751937 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -120,7 +120,7 @@ services:
       start_period: 30s
 
   opensearch:
-    image: opensearchproject/opensearch:2.17.0
+    image: opensearchproject/opensearch:2.18.0
     environment:
       - discovery.type=single-node
       - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m
diff --git a/docs/DEVELOPER_SETUP.md b/docs/DEVELOPER_SETUP.md
index 492745524..417841c50 100644
--- a/docs/DEVELOPER_SETUP.md
+++ b/docs/DEVELOPER_SETUP.md
@@ -198,16 +198,16 @@ elasticdump \
   --type=data
 ```
 
-### Loading data to Elasticsearch
+### Loading data to Opensearch
 
 If you've been provided with a dump from the vector store, add it to [data/elastic-dumps/](../data/elastic-dumps/). The below assumes the existance of `redbox-data-chunk.json` in that directory.
 
 Consider dumping your existing indices if you don't want to have to reembed data you're working on.
 
-Start the Elasticsearch service.
+Start the Opensearch service.
 
 ```console
-docker compose up -d elasticsearch
+docker compose up -d opensearch
 ```
 
 Load data from your JSONs, or your own file.
diff --git a/docs/architecture/index.md b/docs/architecture/index.md
index 9426fe4e3..d6b60ad06 100644
--- a/docs/architecture/index.md
+++ b/docs/architecture/index.md
@@ -34,7 +34,7 @@ The Retrieval Augmented Generation (RAG) architecture grounds our Large Language
 | Core API | ECS | App Service | Docker | FastAPI AI Interaction and DB Intermediary |
 | Worker | ECS | App Service | Docker | Queue fed file ingester and embedder               |
 | Database | RDS/Postgres | Postgres | Postgres | Chat history & user data          |
-| Vector Database | ElasticCloud | ElasticCloud | Elasticsearch | RAG Database                               |
+| Vector Database | ElasticCloud | ElasticCloud | Opensearch | RAG Database                               |
 | Container Registry | ECR | ACR | Harbor | Storage for app containers                 |
 | Embedding API | Azure OpenAI Service | Azure OpenAI Service | Huggingface Containers | Embedding for docs into VectorDB           |
 | LLM API | Azure OpenAI Service | Azure OpenAI Service | Huggingface Containers | Chat model                                 |
diff --git a/docs/architecture/transactions_and_schema.md b/docs/architecture/transactions_and_schema.md
index a62ca79b9..0e3c6ca72 100644
--- a/docs/architecture/transactions_and_schema.md
+++ b/docs/architecture/transactions_and_schema.md
@@ -16,9 +16,9 @@ sequenceDiagram
     Django->>S3: file key, content
     Django->>Core: file key
     Core->>Workers: file key
-    Core->>Elastic: file key
+    Core->>Opensearch: file key
     S3->>Workers: file content
-    Workers->>Elastic: chunk key, content
+    Workers->>Opensearch: chunk key, content
 ```
 
 ### Chat APIs
@@ -44,7 +44,7 @@ title: Transaction sequence - POST /chat/rag
 
 sequenceDiagram
     Django->> Core: ChatHistory.messages[], File[].uuid
-    Elastic->>Core: File[].Chunk[].embeddings
+    Opensearch->>Core: File[].Chunk[].embeddings
     Core->>LLM API: ChatHistory.messages[].embeddings, File[].Chunk[].embeddings
 
 ```
@@ -101,13 +101,13 @@ erDiagram
     ChatHistory }|--o{ FileRecord: "ChatHistory.files_retrieved"
 ```
 
-### Elastic Schema
+### Opensearch Schema
 
 Keeping things simple is the primary ethos here. We are storing the UUID of the parent file in the chunk. This allows us to easily query for all chunks of a file. We are also storing the text of the chunk, the metadata of the chunk, and the embedding of the chunk. The embedding is a float array that is generated by the embedding API.
 
 ```mermaid
 ---
-title: Elastic schema
+title: Opensearch schema
 ---
 
 erDiagram
diff --git a/docs/code_reference/models/settings.md b/docs/code_reference/models/settings.md
index 93b007c35..3d280dff6 100644
--- a/docs/code_reference/models/settings.md
+++ b/docs/code_reference/models/settings.md
@@ -4,14 +4,6 @@ Redbox used the `pydantic_settings` library to manage settings. This library all
 
 ::: redbox.models.settings.Settings
 
-# Elasticsearch Settings
+# OpenSearch Settings
 
-Depending on the deployment scenarios we have two different ways to configure Elasticsearch: `ElasticLocalSettings` and `ElasticCloudSettings`.
-
-## `ElasticLocalSettings`
-
-::: redbox.models.settings.ElasticLocalSettings
-
-## `ElasticCloudSettings`
-
-::: redbox.models.settings.ElasticCloudSettings
\ No newline at end of file
+We configure Opensearch via `OpenSearchSettings` in redbox-core/redbox/models/settings.py
\ No newline at end of file
diff --git a/docs/installation/local.md b/docs/installation/local.md
index 44d7c080f..fae4eaa36 100644
--- a/docs/installation/local.md
+++ b/docs/installation/local.md
@@ -24,7 +24,7 @@ As the project deploys, you should eventually see the following message:
 ```
 [+] Running 8/8
  ✔ Network redbox_redbox-app-network  Created                                                                       0.0s 
- ✔ Container redbox-elasticsearch-1   Healthy                                                                      22.7s 
+ ✔ Container redbox-opensearch-1   Healthy                                                                      22.7s 
  ✔ Container redbox-redis-1           Healthy                                                                      22.7s 
  ✔ Container redbox-minio-1           Healthy                                                                      22.7s 
  ✔ Container redbox-db-1              Healthy                                                                      22.7s 
@@ -35,11 +35,11 @@ As the project deploys, you should eventually see the following message:
 
 Redbox utilises health checks to ensure that the services are running correctly.
 
-!!! info "Elastic and Minio failure"
-    If you see that the Elasticsearch or MinIO containers are unhealthy, this may be due to a permission issue with the directory they're mounted to. You can fix this by running the following command:
+!!! info "Opensearch and Minio failure"
+    If you see that the Opensearch or MinIO containers are unhealthy, this may be due to a permission issue with the directory they're mounted to. You can fix this by running the following command:
 
     ```bash
-    chmod -R 777 ./data/elastic/
+    chmod -R 777 ./data/opensearch/
     chmod -R 777 ./data/objectstore/
     ```
 
diff --git a/redbox-core/poetry.lock b/redbox-core/poetry.lock
index da35d2463..81789b5fc 100644
--- a/redbox-core/poetry.lock
+++ b/redbox-core/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -1212,13 +1212,13 @@ develop = ["aiohttp", "furo", "httpcore (<1.0.6)", "httpx", "opentelemetry-api",
 
 [[package]]
 name = "elasticsearch"
-version = "8.16.0"
+version = "8.17.1"
 description = "Python client for Elasticsearch"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "elasticsearch-8.16.0-py3-none-any.whl", hash = "sha256:83d9fe09e8e95880559da43e44976c1e11cc63fe96bc0c0592f3d64f371772bf"},
-    {file = "elasticsearch-8.16.0.tar.gz", hash = "sha256:d2aaa92f44ebea3c4147389aeba038c0b42a017f8c52ff35b1e7ebc34c49adb7"},
+    {file = "elasticsearch-8.17.1-py3-none-any.whl", hash = "sha256:f1de0a075f12cc0fa377668eb4fb2ce02185c060ebb50cf2c3889242f9a5130e"},
+    {file = "elasticsearch-8.17.1.tar.gz", hash = "sha256:057ab44cae8b3acffbf826a31678e46eafc38f26fcffa91015352d973299cdf0"},
 ]
 
 [package.dependencies]
@@ -4809,4 +4809,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12,<3.13"
-content-hash = "e068974df41e04dce020573cf08519942e6e6d34070798358b6f549aa9d3dd9f"
+content-hash = "e2362b961c8e1d9df7fe2ad77e3556dc5c23dc8882f42f6c3180708a520a3125"
diff --git a/redbox-core/pyproject.toml b/redbox-core/pyproject.toml
index ef7c4c0fb..70b59aec6 100644
--- a/redbox-core/pyproject.toml
+++ b/redbox-core/pyproject.toml
@@ -13,7 +13,6 @@ readme = "../README.md"
 [tool.poetry.dependencies]
 python = ">=3.12,<3.13"
 pydantic = "^2.7.1"
-elasticsearch = "^8.15.0"
 langchain-community = ">0.2.12"
 langchain = "^0.3.4"
 langchain_openai = ">0.1.21"
diff --git a/redbox-core/redbox/chains/components.py b/redbox-core/redbox/chains/components.py
index 328cb92d8..435d3efe4 100644
--- a/redbox-core/redbox/chains/components.py
+++ b/redbox-core/redbox/chains/components.py
@@ -11,7 +11,6 @@
 from langchain_core.runnables import Runnable
 from langchain_core.utils import convert_to_secret_str
 
-# from langchain_elasticsearch import ElasticsearchRetriever
 from langchain_openai.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
 
 
@@ -98,7 +97,7 @@ def get_all_chunks_retriever(env: Settings) -> OpenSearchRetriever:
 
 
 def get_parameterised_retriever(env: Settings, embeddings: Embeddings | None = None):
-    """Creates an Elasticsearch retriever runnable.
+    """Creates an Opensearch retriever runnable.
 
     Runnable takes input of a dict keyed to question, file_uuids and user_uuid.
 
diff --git a/redbox-core/redbox/graph/nodes/tools.py b/redbox-core/redbox/graph/nodes/tools.py
index e7679ae19..6001e3bc0 100644
--- a/redbox-core/redbox/graph/nodes/tools.py
+++ b/redbox-core/redbox/graph/nodes/tools.py
@@ -3,7 +3,6 @@
 import numpy as np
 import requests
 import tiktoken
-from elasticsearch import Elasticsearch
 from opensearchpy import OpenSearch
 from langchain_community.utilities import WikipediaAPIWrapper
 from langchain_core.documents import Document
@@ -30,7 +29,7 @@
 
 
 def build_search_documents_tool(
-    es_client: Union[Elasticsearch, OpenSearch],
+    es_client: OpenSearch,
     index_name: str,
     embedding_model: Embeddings,
     embedding_field_name: str,
diff --git a/redbox-core/redbox/loader/ingester.py b/redbox-core/redbox/loader/ingester.py
index 1164e57db..3c13b87a0 100644
--- a/redbox-core/redbox/loader/ingester.py
+++ b/redbox-core/redbox/loader/ingester.py
@@ -25,16 +25,9 @@
 
 
 def get_elasticsearch_store(es, es_index_name: str):
-    # return ElasticsearchStore(
-    #     index_name=es_index_name,
-    #     embedding=get_embeddings(env),
-    #     es_connection=es,
-    #     query_field="text",
-    #     vector_query_field=env.embedding_document_field_name,
-    # )
     return OpenSearchVectorSearch(
         index_name=es_index_name,
-        opensearch_url=env.elastic.collection_endpoint,
+        opensearch_url=env.opensearch.collection_endpoint,
         embedding_function=get_embeddings(env),
         query_field="text",
         vector_query_field=env.embedding_document_field_name,
@@ -42,16 +35,10 @@ def get_elasticsearch_store(es, es_index_name: str):
 
 
 def get_elasticsearch_store_without_embeddings(es, es_index_name: str):
-    # return ElasticsearchStore(
-    #     index_name=es_index_name,
-    #     es_connection=es,
-    #     query_field="text",
-    #     strategy=BM25Strategy(),
-    # )
 
     return OpenSearchVectorSearch(
         index_name=es_index_name,
-        opensearch_url=env.elastic.collection_endpoint,
+        opensearch_url=env.opensearch.collection_endpoint,
         embedding_function=FakeEmbeddings(size=env.embedding_backend_vector_size),
     )
 
diff --git a/redbox-core/redbox/models/chain.py b/redbox-core/redbox/models/chain.py
index 876001e4e..bbd6b84cf 100644
--- a/redbox-core/redbox/models/chain.py
+++ b/redbox-core/redbox/models/chain.py
@@ -60,7 +60,7 @@ class AISettings(BaseModel):
     chat_map_question_prompt: str = prompts.CHAT_MAP_QUESTION_PROMPT
     reduce_system_prompt: str = prompts.REDUCE_SYSTEM_PROMPT
 
-    # Elasticsearch RAG and boost values
+    # Opensearch RAG and boost values
     rag_k: int = 30
     rag_num_candidates: int = 10
     rag_gauss_scale_size: int = 3
diff --git a/redbox-core/redbox/models/settings.py b/redbox-core/redbox/models/settings.py
index 6622d5689..aef0f29d2 100644
--- a/redbox-core/redbox/models/settings.py
+++ b/redbox-core/redbox/models/settings.py
@@ -7,9 +7,7 @@
 import boto3
 import environ
 from dotenv import load_dotenv
-from elasticsearch import Elasticsearch
 from langchain.globals import set_debug
-from openai import max_retries
 from opensearchpy import OpenSearch, RequestsHttpConnection
 from pydantic import AnyUrl, BaseModel
 from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -42,30 +40,6 @@ class OpenSearchSettings(BaseModel):
     collection_endpoint__port_local: Optional[str] = "9200"  # locally, the port number is 9200
 
 
-class ElasticLocalSettings(BaseModel):
-    """settings required for a local/ec2 instance of elastic"""
-
-    model_config = SettingsConfigDict(frozen=True)
-
-    host: str = "elasticsearch"
-    port: int = 9200
-    scheme: str = "http"
-    user: str = "elastic"
-    version: str = "8.11.0"
-    password: str = "redboxpass"
-    subscription_level: str = "basic"
-
-
-class ElasticCloudSettings(BaseModel):
-    """settings required for elastic-cloud"""
-
-    model_config = SettingsConfigDict(frozen=True)
-
-    api_key: str
-    cloud_id: str
-    subscription_level: str = "basic"
-
-
 class ChatLLMBackend(BaseModel):
     name: str = "gpt-4o"
     provider: str = "azure_openai"
@@ -197,12 +171,12 @@ class Settings(BaseSettings):
     }
 
     @property
-    def elastic_chat_mesage_index(self):
-        return self.elastic_root_index + "-chat-mesage-log"
+    def opensearch_chat_mesage_index(self):
+        return self.opensearch_root_index + "-chat-mesage-log"
 
     @property
-    def elastic_alias(self):
-        return self.elastic_root_index + "-chunk-current"
+    def opensearch_alias(self):
+        return self.opensearch_root_index + "-chunk-current"
 
     # @lru_cache(1) #removing cache because pydantic object (index mapping) is not hashable
     def opensearch_client(self) -> OpenSearch:
@@ -210,11 +184,11 @@ def opensearch_client(self) -> OpenSearch:
             client = OpenSearch(
                 hosts=[
                     {
-                        "host": self.elastic.collection_endpoint__host,
-                        "port": self.elastic.collection_endpoint__port_local,
+                        "host": self.opensearch.collection_endpoint__host,
+                        "port": self.opensearch.collection_endpoint__port_local,
                     }
                 ],
-                http_auth=(self.elastic.collection_endpoint__username, self.elastic.collection_endpoint__password),
+                http_auth=(self.opensearch.collection_endpoint__username, self.opensearch.collection_endpoint__password),
                 use_ssl=False,
                 connection_class=RequestsHttpConnection,
             )
@@ -222,9 +196,9 @@ def opensearch_client(self) -> OpenSearch:
         else:
             client = OpenSearch(
                 hosts=[
-                    {"host": self.elastic.collection_endpoint__host, "port": self.elastic.collection_endpoint__port}
+                    {"host": self.opensearch.collection_endpoint__host, "port": self.opensearch.collection_endpoint__port}
                 ],
-                http_auth=(self.elastic.collection_endpoint__username, self.elastic.collection_endpoint__password),
+                http_auth=(self.opensearch.collection_endpoint__username, self.opensearch.collection_endpoint__password),
                 use_ssl=True,
                 verify_certs=True,
                 connection_class=RequestsHttpConnection,
@@ -233,10 +207,10 @@ def opensearch_client(self) -> OpenSearch:
                 timeout=120,
             )
 
-        if not client.indices.exists_alias(name=self.elastic_alias):
-            chunk_index = f"{self.elastic_root_index}-chunk"
+        if not client.indices.exists_alias(name=self.opensearch_alias):
+            chunk_index = f"{self.opensearch_root_index}-chunk"
             # client.options(ignore_status=[400]).indices.create(index=chunk_index)
-            # client.indices.put_alias(index=chunk_index, name=self.elastic_alias)
+            # client.indices.put_alias(index=chunk_index, name=self.opensearch_alias)
             try:
                 client.indices.create(
                     index=chunk_index, body=self.index_mapping, ignore=400
@@ -245,18 +219,18 @@ def opensearch_client(self) -> OpenSearch:
                 logger.error(f"Failed to create index {chunk_index}: {e}")
 
             try:
-                client.indices.put_alias(index=chunk_index, name=f"{self.elastic_root_index}-chunk-current")
+                client.indices.put_alias(index=chunk_index, name=f"{self.opensearch_root_index}-chunk-current")
             except Exception as e:
-                logger.error(f"Failed to set alias {self.elastic_root_index}-chunk-current: {e}")
+                logger.error(f"Failed to set alias {self.opensearch_root_index}-chunk-current: {e}")
 
-        if not client.indices.exists(index=self.elastic_chat_mesage_index):
+        if not client.indices.exists(index=self.opensearch_chat_mesage_index):
             try:
                 client.indices.create(
-                    index=self.elastic_chat_mesage_index, ignore=400
+                    index=self.opensearch_chat_mesage_index, ignore=400
                 )  # 400 is ignored to avoid index-already-exists errors
             except Exception as e:
-                logger.error(f"Failed to create index {self.elastic_chat_mesage_index}: {e}")
-            # client.indices.create(index=self.elastic_chat_mesage_index)
+                logger.error(f"Failed to create index {self.opensearch_chat_mesage_index}: {e}")
+            # client.indices.create(index=self.opensearch_chat_mesage_index)
 
         return client
 
diff --git a/redbox-core/redbox/retriever/queries.py b/redbox-core/redbox/retriever/queries.py
index 66c44e0e3..9acb80a78 100644
--- a/redbox-core/redbox/retriever/queries.py
+++ b/redbox-core/redbox/retriever/queries.py
@@ -10,12 +10,12 @@
 
 
 def build_file_filter(file_names: list[str]) -> dict[str, Any]:
-    """Creates an Elasticsearch filter for file names."""
+    """Creates an Opensearch filter for file names."""
     return {"terms": {"metadata.uri.keyword": file_names}}
 
 
 def build_resolution_filter(chunk_resolution: ChunkResolution) -> dict[str, Any]:
-    """Creates an Elasticsearch filter for chunk resolutions."""
+    """Creates an Opensearch filter for chunk resolutions."""
     return {"term": {"metadata.chunk_resolution.keyword": str(chunk_resolution.normal)}} #add normal to fix error
 
 
@@ -56,7 +56,7 @@ def get_all(
     state: RedboxState,
 ) -> dict[str, Any]:
     """
-    Returns a parameterised elastic query that will return everything it matches.
+    Returns a parameterised opensearch query that will return everything it matches.
 
     As it's used in summarisation, it excludes embeddings.
     """
@@ -97,7 +97,7 @@ def build_document_query(
     selected_files: list[str] | None = None,
     chunk_resolution: ChunkResolution | None = None,
 ) -> dict[str, Any]:
-    """Builds a an Elasticsearch query that will return documents when called.
+    """Builds a an Opensearch query that will return documents when called.
 
     Searches the document:
         * Text, as a keyword and similarity
@@ -165,7 +165,7 @@ def build_document_query(
 
 
 def scale_score(score: float, old_min: float, old_max: float, new_min=1.1, new_max: float = 2.0):
-    """Rescales an Elasticsearch score.
+    """Rescales an Opensearch score.
 
     Intended to turn the score into a multiplier to weight a Gauss function.
 
diff --git a/redbox-core/redbox/retriever/retrievers.py b/redbox-core/redbox/retriever/retrievers.py
index 1fa7d6f02..b7c393e7a 100644
--- a/redbox-core/redbox/retriever/retrievers.py
+++ b/redbox-core/redbox/retriever/retrievers.py
@@ -4,7 +4,6 @@
 from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Union, cast
 
 import opensearchpy
-from elasticsearch import Elasticsearch
 
 # from elasticsearch.helpers import scan
 from opensearchpy.helpers import scan
@@ -14,7 +13,6 @@
 from langchain_core.documents import Document
 from langchain_core.embeddings.embeddings import Embeddings
 from langchain_core.retrievers import BaseRetriever
-from langchain_elasticsearch.retrievers import ElasticsearchRetriever
 import os
 
 from redbox.models.chain import RedboxState
@@ -123,7 +121,7 @@ def hit_to_doc(hit: dict[str, Any]) -> Document:
 
 
 def query_to_documents(
-    es_client: Union[Elasticsearch, OpenSearch], index_name: str, query: dict[str, Any]
+    es_client: OpenSearch, index_name: str, query: dict[str, Any]
 ) -> list[Document]:
     """Runs an Elasticsearch query and returns Documents."""
     logger.info("query to opensearch: from query_to_documents")
@@ -171,7 +169,7 @@ def _filter_by_elbow(docs: list[Document]) -> list[Document]:
 class ParameterisedElasticsearchRetriever(BaseRetriever):
     """A modified ElasticsearchRetriever that allows configuration from RedboxState."""
 
-    es_client: Union[Elasticsearch, OpenSearch]
+    es_client: OpenSearch
     index_name: str | Sequence[str]
     embedding_model: Embeddings
     embedding_field_name: str = "embedding"
@@ -224,7 +222,7 @@ class AllElasticsearchRetriever(OpenSearchRetriever):
 
     chunk_resolution: ChunkResolution = ChunkResolution.largest
 
-    def __init__(self, es_client: Union[Elasticsearch, OpenSearch], **kwargs: Any) -> None:
+    def __init__(self, es_client: OpenSearch, **kwargs: Any) -> None:
         # Hack to pass validation before overwrite
         # Partly necessary due to how .with_config() interacts with a retriever
         kwargs["es_client"] = es_client
@@ -255,7 +253,7 @@ class MetadataRetriever(OpenSearchRetriever):
 
     chunk_resolution: ChunkResolution = ChunkResolution.largest
 
-    def __init__(self, es_client: Union[Elasticsearch, OpenSearch], **kwargs: Any) -> None:
+    def __init__(self, es_client: OpenSearch, **kwargs: Any) -> None:
         # Hack to pass validation before overwrite
         # Partly necessary due to how .with_config() interacts with a retriever
         kwargs["body_func"] = get_metadata
diff --git a/redbox-core/tests/conftest.py b/redbox-core/tests/conftest.py
index e53b6c809..070152c76 100644
--- a/redbox-core/tests/conftest.py
+++ b/redbox-core/tests/conftest.py
@@ -5,9 +5,9 @@
 import tiktoken
 from _pytest.fixtures import FixtureRequest
 from botocore.exceptions import ClientError
-from elasticsearch import Elasticsearch
 from langchain_core.embeddings.fake import FakeEmbeddings
 from langchain_elasticsearch import ElasticsearchStore
+from opensearchpy import OpenSearch
 from tiktoken.core import Encoding
 
 from redbox.models.settings import Settings
@@ -63,17 +63,17 @@ def embedding_model(embedding_model_dim: int) -> FakeEmbeddings:
 
 @pytest.fixture(scope="session")
 def es_index(env: Settings) -> str:
-    return f"{env.elastic_root_index}-chunk"
+    return f"{env.opensearch_root_index}-chunk"
 
 
 @pytest.fixture(scope="session")
-def es_client(env: Settings) -> Elasticsearch:
+def es_client(env: Settings) -> OpenSearch:
     return env.opensearch_client()
 
 
 @pytest.fixture(scope="session")
 def es_vector_store(
-    es_client: Elasticsearch, es_index: str, embedding_model: FakeEmbeddings, env: Settings
+    es_client: OpenSearch, es_index: str, embedding_model: FakeEmbeddings, env: Settings
 ) -> ElasticsearchStore:
     return ElasticsearchStore(
         index_name=es_index,
@@ -94,7 +94,7 @@ def create_index(env: Settings, es_index: str) -> Generator[None, None, None]:
 
 
 @pytest.fixture(scope="session")
-def all_chunks_retriever(es_client: Elasticsearch, es_index: str) -> AllElasticsearchRetriever:
+def all_chunks_retriever(es_client: OpenSearch, es_index: str) -> AllElasticsearchRetriever:
     return AllElasticsearchRetriever(
         es_client=es_client,
         index_name=es_index,
@@ -103,7 +103,7 @@ def all_chunks_retriever(es_client: Elasticsearch, es_index: str) -> AllElastics
 
 @pytest.fixture(scope="session")
 def parameterised_retriever(
-    env: Settings, es_client: Elasticsearch, es_index: str, embedding_model: FakeEmbeddings
+    env: Settings, es_client: OpenSearch, es_index: str, embedding_model: FakeEmbeddings
 ) -> ParameterisedElasticsearchRetriever:
     return ParameterisedElasticsearchRetriever(
         es_client=es_client,
@@ -114,7 +114,7 @@ def parameterised_retriever(
 
 
 @pytest.fixture(scope="session")
-def metadata_retriever(es_client: Elasticsearch, es_index: str) -> MetadataRetriever:
+def metadata_retriever(es_client: OpenSearch, es_index: str) -> MetadataRetriever:
     return MetadataRetriever(es_client=es_client, index_name=es_index)
 
 
diff --git a/redbox-core/tests/test_ingest.py b/redbox-core/tests/test_ingest.py
index 29907b718..50b34da50 100644
--- a/redbox-core/tests/test_ingest.py
+++ b/redbox-core/tests/test_ingest.py
@@ -5,7 +5,8 @@
 
 import pytest
 from _pytest.monkeypatch import MonkeyPatch
-from elasticsearch import Elasticsearch
+from opensearchpy import OpenSearch
+
 from elasticsearch.helpers import scan
 from langchain_core.embeddings.fake import FakeEmbeddings
 from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
@@ -199,7 +200,7 @@ def test_ingest_from_loader(
     resolution: ChunkResolution,
     has_embeddings: bool,
     monkeypatch: MonkeyPatch,
-    es_client: Elasticsearch,
+    es_client: OpenSearch,
     es_vector_store: ElasticsearchStore,
     es_index: str,
     s3_client: S3Client,
@@ -308,7 +309,7 @@ def get_metadata(chunk: dict) -> dict:
 def test_ingest_file(
     mock_post: MagicMock,
     mock_llm: MagicMock,
-    es_client: Elasticsearch,
+    es_client: OpenSearch,
     s3_client: S3Client,
     monkeypatch: MonkeyPatch,
     env: Settings,
@@ -322,7 +323,7 @@ def test_ingest_file(
     When I call ingest_file
     I Expect to see this file to be:
     1. chunked
-    2. written to Elasticsearch
+    2. written to OpenSearch
     """
     # Mock call to Unstructured
     mock_response = mock_post.return_value
diff --git a/redbox-core/tests/test_tools.py b/redbox-core/tests/test_tools.py
index 18be2968b..84e0fd05e 100644
--- a/redbox-core/tests/test_tools.py
+++ b/redbox-core/tests/test_tools.py
@@ -2,7 +2,7 @@
 from uuid import uuid4
 
 import pytest
-from elasticsearch import Elasticsearch
+from opensearchpy import OpenSearch
 from langchain_core.embeddings.fake import FakeEmbeddings
 from langchain_core.messages import AIMessage
 from langgraph.prebuilt import ToolNode
@@ -24,7 +24,7 @@
 def test_search_documents_tool(
     chain_params: dict,
     stored_file_parameterised: RedboxChatTestCase,
-    es_client: Elasticsearch,
+    es_client: OpenSearch,
     es_index: str,
     embedding_model: FakeEmbeddings,
     env: Settings,
diff --git a/redbox-core/tests/test_transform.py b/redbox-core/tests/test_transform.py
index 03912f423..2c1107c6f 100644
--- a/redbox-core/tests/test_transform.py
+++ b/redbox-core/tests/test_transform.py
@@ -124,7 +124,7 @@
 )
 def test_combine_documents(a: Document, b: Document, combined: Document):
     """
-    Test that documents as pulled by the Elasticsearch retriever get properly mapped to source documents
+    Test that documents as pulled by the Opensearch retriever get properly mapped to source documents
     """
     test_combined = combine_documents(a, b)