From 51fd705868ccaa1fbfd2224e30b0662d776757ec Mon Sep 17 00:00:00 2001
From: Nunkyl <skystar-el@yandex.ru>
Date: Mon, 20 Jan 2025 20:03:08 +0300
Subject: [PATCH] Update prompts for LLM requests

---
 README.md                                     |  2 +-
 modules/variables/prompts/prompts.py          | 78 +++++++------------
 pipelines/tests/collect_metrics_for_llm.py    | 25 +++++-
 .../tests/generate_collect_metrics_for_llm.py | 42 +++++++---
 4 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/README.md b/README.md
index 3991da1..5a6a587 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 The repository contains code and data for the LLM-based city management research.
 
-## Build an image for the Pipeline with RAG and start the container:
+## Build an image for the RAG pipelines and start the container:
 
 Create the `$NSS_NPA_TOKEN` environment variable on the server
 
diff --git a/modules/variables/prompts/prompts.py b/modules/variables/prompts/prompts.py
index a9d28c7..6ea39ca 100644
--- a/modules/variables/prompts/prompts.py
+++ b/modules/variables/prompts/prompts.py
@@ -1,58 +1,40 @@
-accessibility_sys_prompt = r"""Answer the question following rules below.
-For answer you must use provided by user context.
+accessibility_sys_prompt = r"""Answer the question by following the rules below.
+For the answer you must use the context provided by user.
 Rules:
-1. You must use only provided information for the answer.
-2. Add a unit of measurement to an answer.
-3. For answer you should take only that information from context, which is relevant to
-user's question.
-4. If an interpretation is provided in the context for the data requested in the question,
+1. You must only use provided information for the answer.
+2. Add a unit of measurement to the answer.
+3. For answer you should take only the information from context,
+which is relevant to the user’s question.
+4. If an interpretation is provided in the context
+for the data requested in the question,
 it should be added in the answer.
-5. If data for an answer is absent, answer that data was not provided or absent and
+5. If data for an answer is absent, reply that data
+was not provided or absent and
 mention for what field there was no data.
-5. If you do not know how to answer the questions, say so.
-6. Before give an answer to the user question, provide explanation. Mark the answer
-with keyword 'ANSWER', and explanation with 'EXPLANATION'.
-7. If the question is about complaints, answer about at least 5 complaints topics.
-8. Answer should be three sentences maximum.
+6. If you do not know how to answer the questions, say so.
+7. Before giving an answer to the user question,
+provide an explanation. Mark the answer
+with keyword ’ANSWER’, and explanation with ’EXPLANATION’.
+8. If the question is about complaints,
+answer about at least 5 complaints topics.
+9. Answer should be three sentences maximum.
 """
 
-strategy_sys_prompt = r"""Answer the question following rules below. For answer you must
-use provided by user context.
+strategy_sys_prompt = r"""Answer the question following the rules below. For answer
+you must use context provided by the user.
 Rules:
 1. You must use only provided information for the answer.
-2. For answer you should take only that information from context, which is relevant to
-user's question.
-3. If data for an answer is absent, answer that data was not provided or absent and
+2. For answer you should take only that information
+from context, which is relevant to
+user’s question.
+3. If data for an answer is absent, answer that
+data was not provided or absent and
 mention for what field there was no data.
 4. If you do not know how to answer the questions, say so.
-5. Before give an answer to the user question, provide explanation. Mark the answer
-with keyword 'ANSWER', and explanation with 'EXPLANATION'.
-6. The answer should consist of as many sentences as are necessary to answer the
-question given the context, but not more five sentences.
+5. Before giving an answer to the user question,
+provide an explanation. Mark the answer
+with keyword ’ANSWER’, and explanation with ’EXPLANATION’.
+6. The answer should consist of as many sentences
+as are necessary to answer the
+question given the context, but not more five sentences. 
 """
-
-buildings_sys_prompt = r"""Your name is Larry. You are smart AI assistant, You have high
-expertise in field of city building,
-urbanistic and Structure of St. Petersburg.
-Answer the question following rules below. For answer you must use
-provided by user context.
-Rules:
-1. You must use only provided information for the answer.
-2. Add a unit of measurement to an answer.
-3. If there are several organizations in the building, all of them should be mentioned
-in the answer.
-4. The building's address (street, house number, building) in the user's question
-should exactly match a building address from the context.
-5. For answer you should take only that information from context, which exactly match
-a building address (street, house number, building) from the user's question.
-6. If provided by user context for a given address has "null" or "None" for the property,
-it means the data about this property of the building is absent.
-7. In questions about building failure, 0 in the context's corresponding field means
-"no", and 1 - means "yes".
-8. If data for an answer is absent, answer that data was not provided or absent and
-mention for what field there was no data.
-9. If you do not know how to answer the questions, say it.
-10. Before give an answer to the user question, provide explanation. Mark the answer
- with keyword "ANSWER",
-and explanation with "EXPLANATION".
-11. Answer should be no longer than 3 sentences."""
diff --git a/pipelines/tests/collect_metrics_for_llm.py b/pipelines/tests/collect_metrics_for_llm.py
index fdfbb9c..70c468f 100644
--- a/pipelines/tests/collect_metrics_for_llm.py
+++ b/pipelines/tests/collect_metrics_for_llm.py
@@ -29,8 +29,29 @@
 correctness_metric = GEval(
     name="Correctness",
     criteria=(
-        "Correctness - determine if the actual output is factually "
-        "correct according to the expected output."
+        """1. Correctness and Relevance:
+- Compare the actual response against the expected response. 
+Determine the extent to which the actual response 
+captures the key elements and concepts of the expected response. 
+- Assign higher scores to actual responses that accurately reflect 
+the core information of the expected response, even if only partial 
+2. Numerical Accuracy and Interpretation:
+- Pay particular attention to any numerical values present 
+in the expected response. Verify that these values are 
+correctly included in the actual response and accurately 
+interpreted within the context. 
+- Ensure that units of measurement, scales, and numerical 
+relationships are preserved and correctly conveyed. 
+3. Allowance for Partial Information: 
+- Do not heavily penalize the actual response for incompleteness 
+if it covers significant aspects of the expected response. 
+Prioritize the correctness of provided information over 
+total completeness. 
+4. Handling of Extraneous Information: 
+- While additional information not present in the expected response 
+should not necessarily reduce score, 
+ensure that such additions do not introduce inaccuracies 
+or deviate from the context of the expected response."""
     ),
     evaluation_params=[
         LLMTestCaseParams.ACTUAL_OUTPUT,
diff --git a/pipelines/tests/generate_collect_metrics_for_llm.py b/pipelines/tests/generate_collect_metrics_for_llm.py
index 62da22e..04733e4 100644
--- a/pipelines/tests/generate_collect_metrics_for_llm.py
+++ b/pipelines/tests/generate_collect_metrics_for_llm.py
@@ -101,15 +101,17 @@ def get_answer_and_evaluate(
 
 
 if __name__ == "__main__":
-    system_prompt = """Answer the question following rules below. For answer you must
-use provided by user context.
-Rules:
-1. You must use only provided information for the answer.
-2. If you do not know how to answer the questions, say so.
-3. The answer should consist of as many sentences as are necessary to answer the
-question given the context, but not more five sentences.
-For each sentence in English language you will be fined for 100$, so in answers you
-must use only Russian language.
+    system_prompt = """You are a smart AI assistant. You have high expertise in the field
+of city building, urbanistics and structure of Saint-Petersburg.
+Answer the question following the rules below.
+1. Before giving an answer to the user question, provide an
+explanation. Mark the answer with keyword ’ANSWER’, and
+explanation with ’EXPLANATION’. Both answer and explanation must be
+in the English language. 
+2. If the question is about complaints, answer about at least 5 
+complaints topics. 
+3. Answer should be five sentences maximum. 
+4. In answers you must use only the English language.
 """
 
     models_list = [
@@ -129,8 +131,26 @@ def get_answer_and_evaluate(
     correctness_metric = GEval(
         name="Correctness",
         criteria=(
-            "Correctness - determine if the actual output is factually "
-            "correct according to the expected output."
+            "1. Correctness and Relevance:"
+            "- Compare the actual response against the expected response. Determine the"
+            " extent to which the actual response captures the key elements and concepts of"
+            " the expected response."
+            "- Assign higher scores to actual responses that accurately reflect the core"
+            " information of the expected response, even if only partial."
+            "2. Numerical Accuracy and Interpretation:"
+            "- Pay particular attention to any numerical values present in the expected"
+            " response. Verify that these values are correctly included in the actual"
+            " response and accurately interpreted within the context."
+            "- Ensure that units of measurement, scales, and numerical relationships are"
+            " preserved and correctly conveyed."
+            "3. Allowance for Partial Information:"
+            "- Do not heavily penalize the actual response for incompleteness if it covers"
+            " significant aspects of the expected response. Prioritize the correctness of"
+            " provided information over total completeness."
+            "4. Handling of Extraneous Information:"
+            "- While additional information not present in the expected response should not"
+            " necessarily reduce score, ensure that such additions do not introduce"
+            " inaccuracies or deviate from the context of the expected response."
         ),
         evaluation_params=[
             LLMTestCaseParams.ACTUAL_OUTPUT,