From 5bd47bce57bb9ac283772b955b1ac4d5dd8d594d Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Mon, 22 Jul 2024 15:40:23 +0000 Subject: [PATCH] Add source documents (api urls) to logged metrics --- chat/src/handlers/chat.py | 2 +- chat/src/helpers/metrics.py | 19 +++++++ chat/src/helpers/response.py | 19 +------ chat/test/helpers/test_metrics.py | 95 +++++++++++++++++++++++-------- 4 files changed, 93 insertions(+), 42 deletions(-) diff --git a/chat/src/handlers/chat.py b/chat/src/handlers/chat.py index 04e92242..db1c9ef0 100644 --- a/chat/src/handlers/chat.py +++ b/chat/src/handlers/chat.py @@ -13,7 +13,7 @@ RESPONSE_TYPES = { "base": ["answer", "ref"], "debug": ["answer", "attributes", "azure_endpoint", "deployment_name", "is_superuser", "k", "openai_api_version", "prompt", "question", "ref", "temperature", "text_key", "token_counts"], - "log": ["answer", "is_superuser", "k", "openai_api_version", "prompt", "question", "ref", "temperature", "token_counts"] + "log": ["answer", "deployment_name", "is_superuser", "k", "openai_api_version", "prompt", "question", "ref", "source_documents", "temperature", "token_counts"] } def handler(event, context): diff --git a/chat/src/helpers/metrics.py b/chat/src/helpers/metrics.py index 336895b9..cf6d9b6e 100644 --- a/chat/src/helpers/metrics.py +++ b/chat/src/helpers/metrics.py @@ -1,5 +1,24 @@ import tiktoken +def debug_response(config, response, original_question): + source_urls = [doc["api_link"] for doc in original_question.get("source_documents", [])] + + return { + "answer": response, + "attributes": config.attributes, + "azure_endpoint": config.azure_endpoint, + "deployment_name": config.deployment_name, + "is_superuser": config.api_token.is_superuser(), + "k": config.k, + "openai_api_version": config.openai_api_version, + "prompt": config.prompt_text, + "question": config.question, + "ref": config.ref, + "source_documents": source_urls, + "temperature": config.temperature, + "text_key": config.text_key, + "token_counts": token_usage(config, response, original_question), + } def token_usage(config, response, original_question): data = { diff --git a/chat/src/helpers/response.py b/chat/src/helpers/response.py index 79715a79..fdcf5348 100644 --- a/chat/src/helpers/response.py +++ b/chat/src/helpers/response.py @@ -1,4 +1,4 @@ -from helpers.metrics import token_usage +from helpers.metrics import debug_response from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnableLambda, RunnablePassthrough @@ -16,23 +16,6 @@ def __init__(self, config): self.store = {} def debug_response_passthrough(self): - def debug_response(config, response, original_question): - return { - "answer": response, - "attributes": config.attributes, - "azure_endpoint": config.azure_endpoint, - "deployment_name": config.deployment_name, - "is_superuser": config.api_token.is_superuser(), - "k": config.k, - "openai_api_version": config.openai_api_version, - "prompt": config.prompt_text, - "question": config.question, - "ref": config.ref, - "temperature": config.temperature, - "text_key": config.text_key, - "token_counts": token_usage(config, response, original_question), - } - return RunnableLambda(lambda x: debug_response(self.config, x, self.original_question)) def original_question_passthrough(self): diff --git a/chat/test/helpers/test_metrics.py b/chat/test/helpers/test_metrics.py index 5c593b8a..0438a662 100644 --- a/chat/test/helpers/test_metrics.py +++ b/chat/test/helpers/test_metrics.py @@ -5,53 +5,102 @@ sys.path.append('./src') from unittest import TestCase, mock -from helpers.metrics import count_tokens, token_usage +from helpers.metrics import count_tokens, debug_response, token_usage from event_config import EventConfig -@mock.patch.dict( - os.environ, - { - "AZURE_OPENAI_RESOURCE_NAME": "test", - "WEAVIATE_URL": "http://test", - "WEAVIATE_API_KEY": "test" - }, -) class TestMetrics(TestCase): - def test_token_usage(self): - original_question = { - "question": "What is your name?", - "source_documents": [], + @mock.patch.dict( + os.environ, + { + "AZURE_OPENAI_RESOURCE_NAME": "test", + "WEAVIATE_URL": "http://test", + "WEAVIATE_API_KEY": "test" + }, + ) + def setUp(self): + self.question = "What is your name?" + self.original_question = { + "question": self.question, + "source_documents": [ + { + "accession_number": "SourceDoc:1", + "api_link": "https://api.dc.library.northwestern.edu/api/v2/works/881e8cae-67be-4e04-9970-7eafb52b2c5c", + "canonical_link": "https://dc.library.northwestern.edu/items/881e8cae-67be-4e04-9970-7eafb52b2c5c", + "title": "Source Document One!" + }, + { + "accession_number": "SourceDoc:2", + "api_link": "https://api.dc.library.northwestern.edu/api/v2/works/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1", + "canonical_link": "https://dc.library.northwestern.edu/items/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1", + "title": "Source Document Two!" + }, + { + "accession_number": "SourceDoc:3", + "api_link": "https://api.dc.library.northwestern.edu/api/v2/works/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5", + "canonical_link": "https://dc.library.northwestern.edu/items/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5", + "title": "Source Document Three!" + }, + { + "accession_number": "SourceDoc:4", + "api_link": "https://api.dc.library.northwestern.edu/api/v2/works/211eeeca-d56e-4c6e-9123-1612d72258f9", + "canonical_link": "https://dc.library.northwestern.edu/items/211eeeca-d56e-4c6e-9123-1612d72258f9", + "title": "Source Document Four!" + }, + { + "accession_number": "SourceDoc:5", + "api_link": "https://api.dc.library.northwestern.edu/api/v2/works/10e45e7a-8011-4ac5-97df-efa6a5439d0e", + "canonical_link": "https://dc.library.northwestern.edu/items/10e45e7a-8011-4ac5-97df-efa6a5439d0e", + "title": "Source Document Five!" + } + ], } - event = { + self.event = { "body": json.dumps({ "deployment_name": "test", "index": "test", - "k": 1, + "k": 5, "openai_api_version": "2019-05-06", "prompt": "This is a test prompt.", - "question": original_question, + "question": self.question, "ref": "test", "temperature": 0.5, "text_key": "text", "auth": "test123" }) } - config = EventConfig(event=event) - - response = { + self.config = EventConfig(event=self.event) + self.response = { "output_text": "This is a test response.", } + + def test_debug_response(self): + result = debug_response(self.config, self.response, self.original_question) + + self.assertEqual(result["k"], 5) + self.assertEqual(result["question"], self.question) + self.assertEqual(result["ref"], "test") + self.assertEqual( + result["source_documents"], + [ + "https://api.dc.library.northwestern.edu/api/v2/works/881e8cae-67be-4e04-9970-7eafb52b2c5c", + "https://api.dc.library.northwestern.edu/api/v2/works/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1", + "https://api.dc.library.northwestern.edu/api/v2/works/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5", + "https://api.dc.library.northwestern.edu/api/v2/works/211eeeca-d56e-4c6e-9123-1612d72258f9", + "https://api.dc.library.northwestern.edu/api/v2/works/10e45e7a-8011-4ac5-97df-efa6a5439d0e" + ] + ) - result = token_usage(config, response, original_question) + def test_token_usage(self): + result = token_usage(self.config, self.response, self.original_question) expected_result = { "answer": 12, "prompt": 314, - "question": 15, - "source_documents": 1, - "total": 342 + "question": 5, + "source_documents": 527, + "total": 858 } self.assertEqual(result, expected_result)