Skip to content

Commit

Permalink
WIP: fixing some of test-redbox
Browse files Browse the repository at this point in the history
  • Loading branch information
nboyse committed Feb 10, 2025
1 parent 41316fc commit 4c6392e
Show file tree
Hide file tree
Showing 8 changed files with 3,409 additions and 3,191 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ test-ai: ## Test code with live LLM

.PHONY: test-redbox
test-redbox: ## Test redbox
cd redbox-core && poetry install && poetry run pytest -m "not ai" --cov=redbox -v --cov-report=term-missing --cov-fail-under=60
export PYTHONPATH=$(PWD)/django_app:$(PWD)/redbox-core
cd redbox-core && PYTHONPATH=$(PWD)/django_app:$(PWD)/redbox-core DJANGO_SETTINGS_MODULE=django_app.settings poetry install && poetry run pytest -m "not ai" --cov=redbox -v --cov-report=term-missing --cov-fail-under=60

.PHONY: test-django
test-django: ## Test django-app
Expand Down
6 changes: 3 additions & 3 deletions notebooks/rag_runnable.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@
"metadata": {},
"outputs": [],
"source": [
"es.options(ignore_status=[400,404]).indices.delete(index=\"summarisation-file\")\n",
"es.options(ignore_status=[400,404]).indices.delete(index=\"summarisation-chunk\")\n",
"es.indices.delete(index=\"summarisation-file\", ignore=[400,404])\n",
"es.indices.delete(index=\"summarisation-chunk\", ignore=[400,404])\n",
"\n",
"file = LocalFile(\n",
" filepath=Path(\"../data/Conservative-Manifesto-GE2024.pdf\"),\n",
Expand Down Expand Up @@ -341,4 +341,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
2,795 changes: 1,397 additions & 1,398 deletions poetry.lock

Large diffs are not rendered by default.

3,737 changes: 1,969 additions & 1,768 deletions redbox-core/poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion redbox-core/redbox/test/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,4 @@ def bind_tools(
) -> "GenericFakeChatModelWithTools":
"""Bind tool-like objects to this chat model."""
self.tools = tools
return self
return self
37 changes: 22 additions & 15 deletions redbox-core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import tiktoken
from _pytest.fixtures import FixtureRequest
from botocore.exceptions import ClientError
from elasticsearch import Elasticsearch
from opensearchpy import OpenSearch
from langchain_core.embeddings.fake import FakeEmbeddings
from langchain_elasticsearch import ElasticsearchStore
from langchain_community.vectorstores import OpenSearchVectorSearch
from tiktoken.core import Encoding

from redbox.models.settings import Settings
Expand Down Expand Up @@ -53,7 +53,7 @@ def tokeniser() -> Encoding:

@pytest.fixture(scope="session")
def embedding_model_dim() -> int:
return 3072 # 3-large default size
return 1024


@pytest.fixture(scope="session")
Expand All @@ -67,20 +67,27 @@ def es_index(env: Settings) -> str:


@pytest.fixture(scope="session")
def es_client(env: Settings) -> Elasticsearch:
def es_client(env: Settings) -> OpenSearch:
return env.elasticsearch_client()


@pytest.fixture(scope="session")
def es_vector_store(
es_client: Elasticsearch, es_index: str, embedding_model: FakeEmbeddings, env: Settings
) -> ElasticsearchStore:
return ElasticsearchStore(
es_client: OpenSearch, es_index: str, embedding_model: FakeEmbeddings, env: Settings
) -> OpenSearchVectorSearch:
# return ElasticsearchStore(
# index_name=es_index,
# es_connection=es_client,
# query_field="text",
# vector_query_field=env.embedding_document_field_name,
# embedding=embedding_model,
# )
return OpenSearchVectorSearch(
index_name=es_index,
es_connection=es_client,
opensearch_url=env.elastic.collection_endpoint,
embedding_function=embedding_model,
query_field="text",
vector_query_field=env.embedding_document_field_name,
embedding=embedding_model,
)


Expand All @@ -94,7 +101,7 @@ def create_index(env: Settings, es_index: str) -> Generator[None, None, None]:


@pytest.fixture(scope="session")
def all_chunks_retriever(es_client: Elasticsearch, es_index: str) -> AllElasticsearchRetriever:
def all_chunks_retriever(es_client: OpenSearch, es_index: str) -> AllElasticsearchRetriever:
return AllElasticsearchRetriever(
es_client=es_client,
index_name=es_index,
Expand All @@ -103,7 +110,7 @@ def all_chunks_retriever(es_client: Elasticsearch, es_index: str) -> AllElastics

@pytest.fixture(scope="session")
def parameterised_retriever(
env: Settings, es_client: Elasticsearch, es_index: str, embedding_model: FakeEmbeddings
env: Settings, es_client: OpenSearch, es_index: str, embedding_model: FakeEmbeddings
) -> ParameterisedElasticsearchRetriever:
return ParameterisedElasticsearchRetriever(
es_client=es_client,
Expand All @@ -114,7 +121,7 @@ def parameterised_retriever(


@pytest.fixture(scope="session")
def metadata_retriever(es_client: Elasticsearch, es_index: str) -> MetadataRetriever:
def metadata_retriever(es_client: OpenSearch, es_index: str) -> MetadataRetriever:
return MetadataRetriever(es_client=es_client, index_name=es_index)


Expand All @@ -125,7 +132,7 @@ def metadata_retriever(es_client: Elasticsearch, es_index: str) -> MetadataRetri

@pytest.fixture(params=ALL_CHUNKS_RETRIEVER_CASES)
def stored_file_all_chunks(
request: FixtureRequest, es_vector_store: ElasticsearchStore
request: FixtureRequest, es_vector_store: OpenSearchVectorSearch
) -> Generator[RedboxChatTestCase, None, None]:
test_case: RedboxChatTestCase = request.param
doc_ids = es_vector_store.add_documents(test_case.docs)
Expand All @@ -135,7 +142,7 @@ def stored_file_all_chunks(

@pytest.fixture(params=PARAMETERISED_RETRIEVER_CASES)
def stored_file_parameterised(
request: FixtureRequest, es_vector_store: ElasticsearchStore
request: FixtureRequest, es_vector_store: OpenSearchVectorSearch
) -> Generator[RedboxChatTestCase, None, None]:
test_case: RedboxChatTestCase = request.param
doc_ids = es_vector_store.add_documents(test_case.docs)
Expand All @@ -145,7 +152,7 @@ def stored_file_parameterised(

@pytest.fixture(params=METADATA_RETRIEVER_CASES)
def stored_file_metadata(
request: FixtureRequest, es_vector_store: ElasticsearchStore
request: FixtureRequest, es_vector_store: OpenSearchVectorSearch
) -> Generator[RedboxChatTestCase, None, None]:
test_case: RedboxChatTestCase = request.param
doc_ids = es_vector_store.add_documents(test_case.docs)
Expand Down
12 changes: 7 additions & 5 deletions redbox-core/tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest
from elasticsearch import Elasticsearch
from opensearchpy import OpenSearch
from langchain_core.embeddings.fake import FakeEmbeddings
from langchain_core.messages import AIMessage
from langgraph.prebuilt import ToolNode
Expand All @@ -24,7 +25,7 @@
def test_search_documents_tool(
chain_params: dict,
stored_file_parameterised: RedboxChatTestCase,
es_client: Elasticsearch,
es_client: OpenSearch,
es_index: str,
embedding_model: FakeEmbeddings,
env: Settings,
Expand Down Expand Up @@ -100,18 +101,19 @@ def test_search_documents_tool(
)

if not permission:
# No new messages update emitted
assert result_state["messages"][0].content == ""
assert result_state["messages"][0].artifact == []
else:
print(result_state["messages"][0])
print('goodbye')
result_flat = result_state["messages"][0].artifact
print(f"DEBUG: result_flat = {result_flat}") # Debugging

# Check state update is formed as expected
assert result_flat is not None, "Error: result_flat is None"
assert isinstance(result_state, dict)
assert len(result_state) == 1

# Check flattened documents match expected, similar to retriever
assert len(result_flat) == chain_params["rag_k"]

assert {c.page_content for c in result_flat} <= {c.page_content for c in permitted_docs}
assert {c.metadata["uri"] for c in result_flat} <= set(stored_file_parameterised.query.permitted_s3_keys)

Expand Down
8 changes: 8 additions & 0 deletions tests/.env.test
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,11 @@ USER_EMAIL=

# OFFICIAL, OFFICIAL_SENSITIVE, SECRET or TOP_SECRET
MAX_SECURITY_CLASSIFICATION=OFFICIAL_SENSITIVE

EMBEDDING_BACKEND=amazon.titan-embed-text-v2:0

COLLECTION_ENDPOINT="http://admin:Opensearch2024^@localhost:9200"
OPENSEARCH_INITIAL_ADMIN_PASSWORD = "Opensearch2024^"

REDBOX_API_URL = http://localhost:8080/api/v0/
REDBOX_API_KEY = myapi

0 comments on commit 4c6392e

Please sign in to comment.