From 51fd705868ccaa1fbfd2224e30b0662d776757ec Mon Sep 17 00:00:00 2001 From: Nunkyl Date: Mon, 20 Jan 2025 20:03:08 +0300 Subject: [PATCH] Update prompts for LLM requests --- README.md | 2 +- modules/variables/prompts/prompts.py | 78 +++++++------------ pipelines/tests/collect_metrics_for_llm.py | 25 +++++- .../tests/generate_collect_metrics_for_llm.py | 42 +++++++--- 4 files changed, 85 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 3991da1..5a6a587 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The repository contains code and data for the LLM-based city management research. -## Build an image for the Pipeline with RAG and start the container: +## Build an image for the RAG pipelines and start the container: Create the `$NSS_NPA_TOKEN` environment variable on the server diff --git a/modules/variables/prompts/prompts.py b/modules/variables/prompts/prompts.py index a9d28c7..6ea39ca 100644 --- a/modules/variables/prompts/prompts.py +++ b/modules/variables/prompts/prompts.py @@ -1,58 +1,40 @@ -accessibility_sys_prompt = r"""Answer the question following rules below. -For answer you must use provided by user context. +accessibility_sys_prompt = r"""Answer the question by following the rules below. +For the answer you must use the context provided by user. Rules: -1. You must use only provided information for the answer. -2. Add a unit of measurement to an answer. -3. For answer you should take only that information from context, which is relevant to -user's question. -4. If an interpretation is provided in the context for the data requested in the question, +1. You must only use provided information for the answer. +2. Add a unit of measurement to the answer. +3. For answer you should take only the information from context, +which is relevant to the user’s question. +4. If an interpretation is provided in the context +for the data requested in the question, it should be added in the answer. -5. If data for an answer is absent, answer that data was not provided or absent and +5. If data for an answer is absent, reply that data +was not provided or absent and mention for what field there was no data. -5. If you do not know how to answer the questions, say so. -6. Before give an answer to the user question, provide explanation. Mark the answer -with keyword 'ANSWER', and explanation with 'EXPLANATION'. -7. If the question is about complaints, answer about at least 5 complaints topics. -8. Answer should be three sentences maximum. +6. If you do not know how to answer the questions, say so. +7. Before giving an answer to the user question, +provide an explanation. Mark the answer +with keyword ’ANSWER’, and explanation with ’EXPLANATION’. +8. If the question is about complaints, +answer about at least 5 complaints topics. +9. Answer should be three sentences maximum. """ -strategy_sys_prompt = r"""Answer the question following rules below. For answer you must -use provided by user context. +strategy_sys_prompt = r"""Answer the question following the rules below. For answer +you must use context provided by the user. Rules: 1. You must use only provided information for the answer. -2. For answer you should take only that information from context, which is relevant to -user's question. -3. If data for an answer is absent, answer that data was not provided or absent and +2. For answer you should take only that information +from context, which is relevant to +user’s question. +3. If data for an answer is absent, answer that +data was not provided or absent and mention for what field there was no data. 4. If you do not know how to answer the questions, say so. -5. Before give an answer to the user question, provide explanation. Mark the answer -with keyword 'ANSWER', and explanation with 'EXPLANATION'. -6. The answer should consist of as many sentences as are necessary to answer the -question given the context, but not more five sentences. +5. Before giving an answer to the user question, +provide an explanation. Mark the answer +with keyword ’ANSWER’, and explanation with ’EXPLANATION’. +6. The answer should consist of as many sentences +as are necessary to answer the +question given the context, but not more five sentences. """ - -buildings_sys_prompt = r"""Your name is Larry. You are smart AI assistant, You have high -expertise in field of city building, -urbanistic and Structure of St. Petersburg. -Answer the question following rules below. For answer you must use -provided by user context. -Rules: -1. You must use only provided information for the answer. -2. Add a unit of measurement to an answer. -3. If there are several organizations in the building, all of them should be mentioned -in the answer. -4. The building's address (street, house number, building) in the user's question -should exactly match a building address from the context. -5. For answer you should take only that information from context, which exactly match -a building address (street, house number, building) from the user's question. -6. If provided by user context for a given address has "null" or "None" for the property, -it means the data about this property of the building is absent. -7. In questions about building failure, 0 in the context's corresponding field means -"no", and 1 - means "yes". -8. If data for an answer is absent, answer that data was not provided or absent and -mention for what field there was no data. -9. If you do not know how to answer the questions, say it. -10. Before give an answer to the user question, provide explanation. Mark the answer - with keyword "ANSWER", -and explanation with "EXPLANATION". -11. Answer should be no longer than 3 sentences.""" diff --git a/pipelines/tests/collect_metrics_for_llm.py b/pipelines/tests/collect_metrics_for_llm.py index fdfbb9c..70c468f 100644 --- a/pipelines/tests/collect_metrics_for_llm.py +++ b/pipelines/tests/collect_metrics_for_llm.py @@ -29,8 +29,29 @@ correctness_metric = GEval( name="Correctness", criteria=( - "Correctness - determine if the actual output is factually " - "correct according to the expected output." + """1. Correctness and Relevance: +- Compare the actual response against the expected response. +Determine the extent to which the actual response +captures the key elements and concepts of the expected response. +- Assign higher scores to actual responses that accurately reflect +the core information of the expected response, even if only partial +2. Numerical Accuracy and Interpretation: +- Pay particular attention to any numerical values present +in the expected response. Verify that these values are +correctly included in the actual response and accurately +interpreted within the context. +- Ensure that units of measurement, scales, and numerical +relationships are preserved and correctly conveyed. +3. Allowance for Partial Information: +- Do not heavily penalize the actual response for incompleteness +if it covers significant aspects of the expected response. +Prioritize the correctness of provided information over +total completeness. +4. Handling of Extraneous Information: +- While additional information not present in the expected response +should not necessarily reduce score, +ensure that such additions do not introduce inaccuracies +or deviate from the context of the expected response.""" ), evaluation_params=[ LLMTestCaseParams.ACTUAL_OUTPUT, diff --git a/pipelines/tests/generate_collect_metrics_for_llm.py b/pipelines/tests/generate_collect_metrics_for_llm.py index 62da22e..04733e4 100644 --- a/pipelines/tests/generate_collect_metrics_for_llm.py +++ b/pipelines/tests/generate_collect_metrics_for_llm.py @@ -101,15 +101,17 @@ def get_answer_and_evaluate( if __name__ == "__main__": - system_prompt = """Answer the question following rules below. For answer you must -use provided by user context. -Rules: -1. You must use only provided information for the answer. -2. If you do not know how to answer the questions, say so. -3. The answer should consist of as many sentences as are necessary to answer the -question given the context, but not more five sentences. -For each sentence in English language you will be fined for 100$, so in answers you -must use only Russian language. + system_prompt = """You are a smart AI assistant. You have high expertise in the field +of city building, urbanistics and structure of Saint-Petersburg. +Answer the question following the rules below. +1. Before giving an answer to the user question, provide an +explanation. Mark the answer with keyword ’ANSWER’, and +explanation with ’EXPLANATION’. Both answer and explanation must be +in the English language. +2. If the question is about complaints, answer about at least 5 +complaints topics. +3. Answer should be five sentences maximum. +4. In answers you must use only the English language. """ models_list = [ @@ -129,8 +131,26 @@ def get_answer_and_evaluate( correctness_metric = GEval( name="Correctness", criteria=( - "Correctness - determine if the actual output is factually " - "correct according to the expected output." + "1. Correctness and Relevance:" + "- Compare the actual response against the expected response. Determine the" + " extent to which the actual response captures the key elements and concepts of" + " the expected response." + "- Assign higher scores to actual responses that accurately reflect the core" + " information of the expected response, even if only partial." + "2. Numerical Accuracy and Interpretation:" + "- Pay particular attention to any numerical values present in the expected" + " response. Verify that these values are correctly included in the actual" + " response and accurately interpreted within the context." + "- Ensure that units of measurement, scales, and numerical relationships are" + " preserved and correctly conveyed." + "3. Allowance for Partial Information:" + "- Do not heavily penalize the actual response for incompleteness if it covers" + " significant aspects of the expected response. Prioritize the correctness of" + " provided information over total completeness." + "4. Handling of Extraneous Information:" + "- While additional information not present in the expected response should not" + " necessarily reduce score, ensure that such additions do not introduce" + " inaccuracies or deviate from the context of the expected response." ), evaluation_params=[ LLMTestCaseParams.ACTUAL_OUTPUT,