Add customizable title prompt to Memory

valentinfrlch · Feb 25, 2025 · 2f4d0d6 · 2f4d0d6
1 parent 898c3ca
commit 2f4d0d6
Show file tree

Hide file tree

Showing 8 changed files with 49 additions and 21 deletions.
diff --git a/custom_components/llmvision/__init__.py b/custom_components/llmvision/__init__.py
@@ -23,6 +23,7 @@
     CONG_MEMORY_IMAGES_ENCODED,
     CONF_MEMORY_STRINGS,
     CONF_SYSTEM_PROMPT,
+    CONF_TITLE_PROMPT,
     CONF_AWS_ACCESS_KEY_ID,
     CONF_AWS_SECRET_ACCESS_KEY,
     CONF_AWS_REGION_NAME,
@@ -54,6 +55,7 @@
     GENERATE_TITLE,
     SENSOR_ENTITY,
     DEFAULT_SYSTEM_PROMPT,
+    DEFAULT_TITLE_PROMPT,
     DATA_EXTRACTION_PROMPT,
 )
 from .calendar import Timeline
@@ -101,6 +103,7 @@ async def async_setup_entry(hass, entry):
     memory_images_encoded = entry.data.get(CONG_MEMORY_IMAGES_ENCODED)
     memory_strings = entry.data.get(CONF_MEMORY_STRINGS)
     system_prompt = entry.data.get(CONF_SYSTEM_PROMPT)
+    title_prompt = entry.data.get(CONF_TITLE_PROMPT)
     aws_access_key_id = entry.data.get(CONF_AWS_ACCESS_KEY_ID)
     aws_secret_access_key = entry.data.get(CONF_AWS_SECRET_ACCESS_KEY)
     aws_region_name = entry.data.get(CONF_AWS_REGION_NAME)
@@ -139,6 +142,7 @@ async def async_setup_entry(hass, entry):
         CONG_MEMORY_IMAGES_ENCODED: memory_images_encoded,
         CONF_MEMORY_STRINGS: memory_strings,
         CONF_SYSTEM_PROMPT: system_prompt,
+        CONF_TITLE_PROMPT: title_prompt,
         CONF_AWS_ACCESS_KEY_ID: aws_access_key_id,
         CONF_AWS_SECRET_ACCESS_KEY: aws_secret_access_key,
         CONF_AWS_REGION_NAME: aws_region_name,
@@ -388,7 +392,7 @@ async def image_analyzer(data_call):
                                              expose_images=call.expose_images,
                                              )
 
-        call.memory = Memory(hass, fallback_prompt=DEFAULT_SYSTEM_PROMPT)
+        call.memory = Memory(hass)
         await call.memory._update_memory()
 
         # Validate configuration, input data and make the call
@@ -425,7 +429,7 @@ async def video_analyzer(data_call):
                                              frigate_retry_attempts=call.frigate_retry_attempts,
                                              frigate_retry_seconds=call.frigate_retry_seconds
                                              )
-        call.memory = Memory(hass, fallback_prompt=DEFAULT_SYSTEM_PROMPT)
+        call.memory = Memory(hass)
         await call.memory._update_memory()
 
         response = await request.call(call)
@@ -460,7 +464,7 @@ async def stream_analyzer(data_call):
                                               expose_images=call.expose_images,
                                               )
 
-        call.memory = Memory(hass, fallback_prompt=DEFAULT_SYSTEM_PROMPT)
+        call.memory = Memory(hass)
         await call.memory._update_memory()
 
         response = await request.call(call)
@@ -520,7 +524,7 @@ async def data_analyzer(data_call):
                                                   include_filename=call.include_filename
                                                   )
 
-        call.memory = Memory(hass, fallback_prompt=DATA_EXTRACTION_PROMPT)
+        call.memory = Memory(hass, system_prompt=DATA_EXTRACTION_PROMPT)
         await call.memory._update_memory()
 
         response = await request.call(call)

diff --git a/custom_components/llmvision/config_flow.py b/custom_components/llmvision/config_flow.py
@@ -46,6 +46,8 @@
     CONF_OPENWEBUI_DEFAULT_MODEL,
     ENDPOINT_OPENWEBUI,
     DEFAULT_SYSTEM_PROMPT,
+    CONF_TITLE_PROMPT,
+    DEFAULT_TITLE_PROMPT,
 )
 import voluptuous as vol
 import os
@@ -580,6 +582,12 @@ async def async_step_memory(self, user_input=None):
                     "multiple": False
                 }
             }),
+            vol.Optional(CONF_TITLE_PROMPT, default=DEFAULT_TITLE_PROMPT): selector({
+                "text": {
+                    "multiline": True,
+                    "multiple": False
+                }
+            }),
         })
 
         if self.source == config_entries.SOURCE_RECONFIGURE:

diff --git a/custom_components/llmvision/const.py b/custom_components/llmvision/const.py
@@ -26,6 +26,7 @@
 CONG_MEMORY_IMAGES_ENCODED = 'memory_images_encoded'
 CONF_MEMORY_STRINGS = 'memory_strings'
 CONF_SYSTEM_PROMPT = 'system_prompt'
+CONF_TITLE_PROMPT = 'title_prompt'
 CONF_AWS_ACCESS_KEY_ID = 'aws_access_key_id'
 CONF_AWS_SECRET_ACCESS_KEY = 'aws_secret_access_key'
 CONF_AWS_REGION_NAME = 'aws_region_name'
@@ -71,8 +72,10 @@
 
 # Defaults
 DEFAULT_SYSTEM_PROMPT = "Your task is to analyze a series of images and provide a concise event description based on user instructions. Focus on identifying and describing the actions of people and dynamic objects (e.g., vehicles) rather than static background details. When multiple images are provided, track and summarize movements or changes over time (e.g., 'A person walks to the front door' or 'A car pulls out of the driveway'). Keep responses brief, objective, and aligned with the user's prompt. Avoid speculation and prioritize observable activity."
+DEFAULT_TITLE_PROMPT = "Provide a concise event title based on the description provided. The title should summarize the key actions or events captured in the images and be suitable for use in a notification or alert. Keep the title clear, descriptive, and relevant to the content of the images. Avoid unnecessary details or subjective interpretations. The title should be in the format: '<Object> seen at <location>. For example: 'Person seen at front door'."
 DATA_EXTRACTION_PROMPT = "You are an advanced image analysis assistant specializing in extracting precise data from images captured by a home security camera. Your task is to analyze one or more images and extract specific information as requested by the user (e.g., the number of cars or a license plate). Provide only the requested information in your response, with no additional text or commentary. Your response must be a {data_format} Ensure the extracted data is accurate and reflects the content of the images."
 
+
 # API Endpoints
 ENDPOINT_OPENAI = "https://api.openai.com/v1/chat/completions"
 ENDPOINT_ANTHROPIC = "https://api.anthropic.com/v1/messages"

diff --git a/custom_components/llmvision/memory.py b/custom_components/llmvision/memory.py
@@ -4,7 +4,9 @@
     CONG_MEMORY_IMAGES_ENCODED,
     CONF_MEMORY_STRINGS,
     CONF_SYSTEM_PROMPT,
-    DEFAULT_SYSTEM_PROMPT
+    CONF_TITLE_PROMPT,
+    DEFAULT_SYSTEM_PROMPT,
+    DEFAULT_TITLE_PROMPT,
 )
 import base64
 import io
@@ -15,22 +17,28 @@
 
 
 class Memory:
-    def __init__(self, hass, strings=[], paths=[], fallback_prompt=DEFAULT_SYSTEM_PROMPT):
+    def __init__(self, hass, strings=[], paths=[], system_prompt=None):
         self.hass = hass
         self.entry = self._find_memory_entry()
         if self.entry is None:
-            self._system_prompt = fallback_prompt
+
+            self._system_prompt = system_prompt if system_prompt else self.entry.data.get(
+                CONF_SYSTEM_PROMPT, DEFAULT_SYSTEM_PROMPT)
+            self._title_prompt = DEFAULT_TITLE_PROMPT
             self.memory_strings = strings
             self.memory_paths = paths
             self.memory_images = []
 
-
         else:
-            self._system_prompt = self.entry.data.get(
-                CONF_SYSTEM_PROMPT, fallback_prompt)
-            self.memory_strings = self.entry.data.get(CONF_MEMORY_STRINGS, strings)
+            self._system_prompt = system_prompt if system_prompt else self.entry.data.get(
+                CONF_SYSTEM_PROMPT, DEFAULT_SYSTEM_PROMPT)
+            self._title_prompt = self.entry.data.get(
+                CONF_TITLE_PROMPT, DEFAULT_TITLE_PROMPT)
+            self.memory_strings = self.entry.data.get(
+                CONF_MEMORY_STRINGS, strings)
             self.memory_paths = self.entry.data.get(CONF_MEMORY_PATHS, paths)
-            self.memory_images = self.entry.data.get(CONG_MEMORY_IMAGES_ENCODED, [])
+            self.memory_images = self.entry.data.get(
+                CONG_MEMORY_IMAGES_ENCODED, [])
 
         _LOGGER.debug(self)
 
@@ -106,6 +114,10 @@ def _get_memory_images(self, memory_type="OpenAI") -> list:
     def system_prompt(self) -> str:
         return "System prompt: " + self._system_prompt
 
+    @property
+    def title_prompt(self) -> str:
+        return self._title_prompt
+
     def _find_memory_entry(self):
         memory_entry = None
         for entry in self.hass.config_entries.async_entries(DOMAIN):

diff --git a/custom_components/llmvision/providers.py b/custom_components/llmvision/providers.py
@@ -144,8 +144,6 @@ async def call(self, call):
 
         self.validate(call)
 
-        gen_title_prompt = "Your job is to generate a title in the form '<object> seen' for texts. Do not mention the time, do not speculate. Generate a title for this text: {response}"
-
         if provider == 'OpenAI':
             api_key = config.get(CONF_OPENAI_API_KEY)
             provider_instance = OpenAI(hass=self.hass, api_key=api_key)
@@ -246,7 +244,7 @@ async def call(self, call):
         response_text = await provider_instance.vision_request(call)
 
         if call.generate_title:
-            call.message = gen_title_prompt.format(response=response_text)
+            call.message = call.memory.title_prompt + "Create a title for this text: " + response_text
             gen_title = await provider_instance.title_request(call)
 
             return {"title": re.sub(r'[^a-zA-Z0-9\s]', '', gen_title), "response_text": response_text}

diff --git a/custom_components/llmvision/strings.json b/custom_components/llmvision/strings.json
@@ -100,11 +100,12 @@
             },
             "memory": {
                 "title": "Memory",
-                "description": "Content in memory syncs across providers and is used to provide additional context to the model. \n Images and descriptions must be in the same order, and there must be as many descriptions as images.",
+                "description": "Content in memory syncs across providers and is used to provide additional context to the model.  \n\n **Image file paths**: Provide the path to the image file. \n **Image descriptions**: Provide a description of the image (e.g.: 'This is Cookie, my dog'). \n Images and descriptions must be in the same order, and there must be as many descriptions as images. \n\n **System Prompt**: Use the system prompt to change how the model behaves and responds. \n **Title Prompt**: The instruction given to the model to generate a title.",
                 "data": {
                     "memory_paths": "Image file path",
                     "memory_strings": "Image description",
-                    "system_prompt": "System prompt"
+                    "system_prompt": "System prompt",
+                    "title_prompt": "Title prompt"
                 }
             }
         },

diff --git a/custom_components/llmvision/translations/de.json b/custom_components/llmvision/translations/de.json
@@ -100,11 +100,12 @@
             },
             "memory": {
                 "title": "Erinnerungen",
-                "description": "Erinnerungen sind providerübergreifend und werden verwended um dem Modell mehr kontext zur Verfügung zu stellen. \n Bilder und Beschreibungen müssen in der selben Reihenfolge sein und es müssen gleich viele Bilder wie Beschreibungen vorhanden sein.",
+                "description": "Erinnerungen sind providerübergreifend und werden verwended um dem Modell mehr kontext zur Verfügung zu stellen. \n\n **Image file paths**: Provide the path to the image file. \n **Image descriptions**: Provide a description of the image (e.g.: 'This is Cookie, my dog'). \n Images and descriptions must be in the same order, and there must be as many descriptions as images. \n\n **System Prompt**: Benutze den System Prompt um zu verändern wie das Modell sich verhält und antwortet. \n **Title Prompt**: Die Anweisung die dem Modell gegeben wird um einen Titel zu generieren.",
                 "data": {
                     "memory_paths": "Bildpfad",
                     "memory_strings": "Bildbeschreibung",
-                    "system_prompt": "System Prompt"
+                    "system_prompt": "System Prompt",
+                    "title_prompt": "Title prompt"
                 }
             }
         },

diff --git a/custom_components/llmvision/translations/en.json b/custom_components/llmvision/translations/en.json
@@ -100,11 +100,12 @@
             },
             "memory": {
                 "title": "Memory",
-                "description": "Content in memory syncs across providers and is used to provide additional context to the model. \n Images and descriptions must be in the same order, and there must be as many descriptions as images.",
+                "description": "Content in memory syncs across providers and is used to provide additional context to the model.  \n\n **Image file paths**: Provide the path to the image file. \n **Image descriptions**: Provide a description of the image (e.g.: 'This is Cookie, my dog'). \n Images and descriptions must be in the same order, and there must be as many descriptions as images. \n\n **System Prompt**: Use the system prompt to change how the model behaves and responds. \n **Title Prompt**: The instruction given to the model to generate a title.",
                 "data": {
                     "memory_paths": "Image file path",
                     "memory_strings": "Image description",
-                    "system_prompt": "System prompt"
+                    "system_prompt": "System prompt",
+                    "title_prompt": "Title prompt"
                 }
             }
         },