Merge pull request #6 from valentinfrlch/dev

Merging dev with main for v0.3.5
valentinfrlch · May 24, 2024 · e21edf3 · e21edf3
2 parents e561ad5 + 037245a
commit e21edf3
Show file tree

Hide file tree

Showing 11 changed files with 257 additions and 58 deletions.
diff --git a/README.md b/README.md
@@ -36,13 +36,6 @@ Supported providers are OpenAI and [LocalAI](https://github.com/mudler/LocalAI).
 ## Resources
 Check the [wiki](https://github.com/valentinfrlch/ha-gpt4vision/wiki/Usage-Examples) for examples on how you can integrate gpt4vision into your Home Assistant or join the [discussion](https://community.home-assistant.io/t/gpt-4o-vision-capabilities-in-home-assistant/729241) in the Home Assistant Community.
 
-## API key
-> [!IMPORTANT]  
-> If you're planning on using **OpenAI's API** you'll **need an API key**. You must obtain a valid OpenAI key from [here](https://platform.openai.com/api-keys).
-
-A pricing calculator is available here: [https://openai.com/api/pricing/](https://openai.com/api/pricing/).
-
-
 # Installation
 ### Installation via HACS (recommended)
 1. Add this repository's url (https://github.com/valentinfrlch/ha-gpt4vision) to HACS under custom repositories.
@@ -58,6 +51,59 @@ A pricing calculator is available here: [https://openai.com/api/pricing/](https:
 2. Add integration in Home Assistant Settings/Devices & services
 3. Provide your API key or IP address and port of your LocalAI server
 
+
+## Provider specific setup
+### OpenAI
+Simply obtain an API key from [OpenAI](https://platform.openai.com/api-keys) and enter it in the Home Assistant UI during setup.  
+A pricing calculator is available here: [https://openai.com/api/pricing/](https://openai.com/api/pricing/).
+
+
+### LocalAI
+To use LocalAI, you need to have a LocalAI server running. You can find the installation instructions [here](https://localai.io/basics/getting_started/).  During setup you'll need to provide the IP address of your machine and the port on which LocalAI is running (default is 8000).
+
+### Ollama
+---
+To use Ollama you need to install Ollama. You can download it from [here](https://ollama.com/). Once installed you need to run the following command to download the `llava` model:
+```bash
+ollama run llava
+```
+If your Home Assistant is **not** running on the same machine as Ollama, you need to set the `OLLAMA_HOST` environment variable.
+
+**On Linux:**
+1. Edit the systemd service by calling systemctl edit ollama.service. This will open an editor.
+2. For each environment variable, add a line Environment under section [Service]:
+
+```
+[Service]
+Environment="OLLAMA_HOST=0.0.0.0"
+```
+3. Save and close the editor.
+4. Reload systemd and restart Ollama
+```bash
+systemctl daemon-reload
+systemctl restart ollama
+```
+
+**On Windows:**
+1. Quit Ollama from the system tray
+2. Open File Explorer
+3. Right click on This PC and select Properties
+4. Click on Advanced system settings
+5. Select Environment Variables
+6. Under User variables click New
+7. For variable name enter `OLLAMA_HOST` and for value enter 0.0.0.0
+8. Click OK and start Ollama again from the Start Menu
+
+**On macOS:**
+1. Open Terminal
+2. Run the following command
+```bash
+launchctl setenv OLLAMA_HOST "0.0.0.0"
+```
+3. Restart Ollama
+
+<br>
+
 ## Service call and usage
 After restarting, the gpt4vision.image_analyzer service will be available. You can test it in the developer tools section in home assistant.
 To get GPT's analysis of a local image, use the following service call.

diff --git a/custom_components/gpt4vision/__init__.py b/custom_components/gpt4vision/__init__.py
@@ -8,11 +8,17 @@
     CONF_MODEL,
     CONF_MESSAGE,
     CONF_IMAGE_FILE,
-    CONF_IP_ADDRESS,
-    CONF_PORT,
+    CONF_LOCALAI_IP_ADDRESS,
+    CONF_LOCALAI_PORT,
+    CONF_OLLAMA_IP_ADDRESS,
+    CONF_OLLAMA_PORT,
     CONF_TEMPERATURE
 )
-from .request_handlers import handle_localai_request, handle_openai_request
+from .request_handlers import (
+    handle_localai_request,
+    handle_openai_request,
+    handle_ollama_request
+)
 import base64
 import io
 import os
@@ -26,8 +32,10 @@ async def async_setup_entry(hass, entry):
     """Set up gpt4vision from a config entry."""
     # Get all entries from config flow
     openai_api_key = entry.data.get(CONF_OPENAI_API_KEY)
-    ip_address = entry.data.get(CONF_IP_ADDRESS)
-    port = entry.data.get(CONF_PORT)
+    localai_ip_address = entry.data.get(CONF_LOCALAI_IP_ADDRESS)
+    localai_port = entry.data.get(CONF_LOCALAI_PORT)
+    ollama_ip_address = entry.data.get(CONF_OLLAMA_IP_ADDRESS)
+    ollama_port = entry.data.get(CONF_OLLAMA_PORT)
 
     # Ensure DOMAIN exists in hass.data
     if DOMAIN not in hass.data:
@@ -38,16 +46,18 @@ async def async_setup_entry(hass, entry):
         key: value
         for key, value in {
             CONF_OPENAI_API_KEY: openai_api_key,
-            CONF_IP_ADDRESS: ip_address,
-            CONF_PORT: port,
+            CONF_LOCALAI_IP_ADDRESS: localai_ip_address,
+            CONF_LOCALAI_PORT: localai_port,
+            CONF_OLLAMA_IP_ADDRESS: ollama_ip_address,
+            CONF_OLLAMA_PORT: ollama_port
         }.items()
         if value is not None
     })
 
     return True
 
 
-def validate(mode, api_key, ip_address, port, image_paths):
+def validate(mode, api_key, image_paths, ip_address=None, port=None):
     """Validate the configuration for the component
 
     Args:
@@ -59,14 +69,19 @@ def validate(mode, api_key, ip_address, port, image_paths):
     Raises:
         ServiceValidationError: if configuration is invalid
     """
-
-    if mode == "OpenAI":
+    # Checks for OpenAI
+    if mode == 'OpenAI':
         if not api_key:
             raise ServiceValidationError("openai_not_configured")
-    elif mode == "LocalAI":
+    # Checks for LocalAI
+    elif mode == 'LocalAI':
         if not ip_address or not port:
             raise ServiceValidationError("localai_not_configured")
-    # Check if image file exists
+    # Checks for Ollama
+    elif mode == 'Ollama':
+        if not ip_address or not port:
+            raise ServiceValidationError("ollama_not_configured")
+    # File path validation
     for image_path in image_paths:
         if not os.path.exists(image_path):
             raise ServiceValidationError("invalid_image_path")
@@ -82,8 +97,10 @@ async def image_analyzer(data_call):
 
         # Read from configuration (hass.data)
         api_key = hass.data.get(DOMAIN, {}).get(CONF_OPENAI_API_KEY)
-        ip_address = hass.data.get(DOMAIN, {}).get(CONF_IP_ADDRESS)
-        port = hass.data.get(DOMAIN, {}).get(CONF_PORT)
+        localai_ip_address = hass.data.get(DOMAIN, {}).get(CONF_LOCALAI_IP_ADDRESS)
+        localai_port = hass.data.get(DOMAIN, {}).get(CONF_LOCALAI_PORT)
+        ollama_ip_address = hass.data.get(DOMAIN, {}).get(CONF_OLLAMA_IP_ADDRESS)
+        ollama_port = hass.data.get(DOMAIN, {}).get(CONF_OLLAMA_PORT)
 
         # Read data from service call
         mode = str(data_call.data.get(CONF_PROVIDER))
@@ -97,20 +114,20 @@ async def image_analyzer(data_call):
         target_width = data_call.data.get(CONF_TARGET_WIDTH, 1280)
         # Temperature parameter. Default is 0.5
         temperature = float(data_call.data.get(CONF_TEMPERATURE, 0.5))
-
-        # Validate configuration
-        validate(mode, api_key, ip_address, port, image_paths)
-
-        if mode == "OpenAI":
-            # GPT model: Default model is gpt-4o for OpenAI
-            model= str(data_call.data.get(CONF_MODEL, "gpt-4o"))
-            # Maximum number of tokens used by model. Default is 100.
-            max_tokens= int(data_call.data.get(CONF_MAXTOKENS))
-        if mode == "LocalAI":
-            # GPT model: Default model is gpt-4-vision-preview for LocalAI
-            model= str(data_call.data.get(CONF_MODEL, "gpt-4-vision-preview"))
-            # Maximum number of tokens used by model. Default is 100.
-            max_tokens= int(data_call.data.get(CONF_MAXTOKENS))
+        # Maximum number of tokens used by model. Default is 100.
+        max_tokens = int(data_call.data.get(CONF_MAXTOKENS))
+
+        # Validate configuration and input data and set model
+        if mode == 'OpenAI':
+            validate(mode, api_key, image_paths)
+            model = str(data_call.data.get(CONF_MODEL, "gpt-4o"))
+        elif mode == 'LocalAI':
+            validate(mode, None, image_paths, localai_ip_address, localai_port)
+            model = str(data_call.data.get(CONF_MODEL, "gpt-4-vision-preview"))
+        elif mode == 'Ollama':
+            validate(mode, None, image_paths, ollama_ip_address, ollama_port)
+            model = str(data_call.data.get(CONF_MODEL, "llava"))
+
 
         def encode_image(image_path):
             """Encode image as base64
@@ -151,11 +168,13 @@ def encode_image(image_path):
         session= async_get_clientsession(hass)
 
         if mode == "LocalAI":
-            response_text = await handle_localai_request(session, model, message, base64_images, ip_address, port, max_tokens, temperature)
+            response_text = await handle_localai_request(session, model, message, base64_images, localai_ip_address, localai_port, max_tokens, temperature)
 
         elif mode == "OpenAI":
             response_text = await handle_openai_request(session, model, message, base64_images, api_key, max_tokens, temperature)
 
+        elif mode == 'Ollama':
+            response_text = await handle_ollama_request(session, model, message, base64_images, ollama_ip_address, ollama_port, max_tokens, temperature)
         return {"response_text": response_text}
 
     hass.services.register(

diff --git a/custom_components/gpt4vision/config_flow.py b/custom_components/gpt4vision/config_flow.py
@@ -2,7 +2,15 @@
 from homeassistant.helpers.selector import selector
 from homeassistant.exceptions import ServiceValidationError
 from homeassistant.helpers.aiohttp_client import async_get_clientsession
-from .const import DOMAIN, CONF_OPENAI_API_KEY, CONF_IP_ADDRESS, CONF_PORT
+from .const import (
+    DOMAIN, 
+    CONF_PROVIDER,
+    CONF_OPENAI_API_KEY, 
+    CONF_LOCALAI_IP_ADDRESS, 
+    CONF_LOCALAI_PORT,
+    CONF_OLLAMA_IP_ADDRESS,
+    CONF_OLLAMA_PORT
+)
 import voluptuous as vol
 import logging
 
@@ -11,20 +19,33 @@
 
 async def validate_mode(user_input: dict):
     # check CONF_MODE is not empty
-    if not user_input["provider"]:
+    if not user_input[CONF_PROVIDER]:
         raise ServiceValidationError("empty_mode")
 
 
 async def validate_localai(hass, user_input: dict):
     # check CONF_IP_ADDRESS is not empty
-    if not user_input[CONF_IP_ADDRESS]:
+    if not user_input[CONF_LOCALAI_IP_ADDRESS]:
         raise ServiceValidationError("empty_ip_address")
 
     # check CONF_PORT is not empty
-    if not user_input[CONF_PORT]:
+    if not user_input[CONF_LOCALAI_PORT]:
         raise ServiceValidationError("empty_port")
     # perform handshake with LocalAI server
-    if not await validate_connection(hass, user_input[CONF_IP_ADDRESS], user_input[CONF_PORT]):
+    if not await validate_connection(hass, user_input[CONF_LOCALAI_IP_ADDRESS], user_input[CONF_LOCALAI_PORT], "/readyz"):
+        raise ServiceValidationError("handshake_failed")
+
+
+async def validate_ollama(hass, user_input: dict):
+    # check CONF_IP_ADDRESS is not empty
+    if not user_input[CONF_OLLAMA_IP_ADDRESS]:
+        raise ServiceValidationError("empty_ip_address")
+
+    # check CONF_PORT is not empty
+    if not user_input[CONF_OLLAMA_PORT]:
+        raise ServiceValidationError("empty_port")
+    # perform handshake with LocalAI server
+    if not await validate_connection(hass, user_input[CONF_OLLAMA_IP_ADDRESS], user_input[CONF_OLLAMA_PORT], "/api/tags"):
         raise ServiceValidationError("handshake_failed")
 
 
@@ -34,12 +55,12 @@ def validate_openai(user_input: dict):
         raise ServiceValidationError("empty_api_key")
 
 
-async def validate_connection(hass, ip_address, port):
+async def validate_connection(hass, ip_address, port, endpoint, expected_status=200):
     session = async_get_clientsession(hass)
-    url = f'http://{ip_address}:{port}/readyz'
+    url = f'http://{ip_address}:{port}{endpoint}'
     try:
         response = await session.get(url)
-        if response.status == 200:
+        if response.status == expected_status:
             return True
         else:
             return False
@@ -55,7 +76,7 @@ async def async_step_user(self, user_input=None):
         data_schema = vol.Schema({
             vol.Required("provider", default="OpenAI"): selector({
                 "select": {
-                    "options": ["OpenAI", "LocalAI"],
+                    "options": ["OpenAI", "LocalAI", "Ollama"],
                     "mode": "dropdown",
                     "sort": True,
                     "custom_value": False
@@ -65,10 +86,14 @@ async def async_step_user(self, user_input=None):
 
         if user_input is not None:
             self.init_info = user_input
-            if user_input["provider"] == "LocalAI":
-                if DOMAIN in self.hass.data and CONF_IP_ADDRESS in self.hass.data[DOMAIN] and CONF_PORT in self.hass.data[DOMAIN]:
+            if user_input[CONF_PROVIDER] == "LocalAI":
+                if DOMAIN in self.hass.data and CONF_LOCALAI_IP_ADDRESS in self.hass.data[DOMAIN] and CONF_LOCALAI_PORT in self.hass.data[DOMAIN]:
                     return self.async_abort(reason="already_configured")
                 return await self.async_step_localai()
+            elif user_input[CONF_PROVIDER] == "Ollama":
+                if DOMAIN in self.hass.data and CONF_OLLAMA_IP_ADDRESS in self.hass.data[DOMAIN] and CONF_OLLAMA_PORT in self.hass.data[DOMAIN]:
+                    return self.async_abort(reason="already_configured")
+                return await self.async_step_ollama()
             else:
                 if DOMAIN in self.hass.data and CONF_OPENAI_API_KEY in self.hass.data[DOMAIN]:
                     return self.async_abort(reason="already_configured")
@@ -82,13 +107,13 @@ async def async_step_user(self, user_input=None):
 
     async def async_step_localai(self, user_input=None):
         data_schema = vol.Schema({
-            vol.Required(CONF_IP_ADDRESS): str,
-            vol.Required(CONF_PORT, default=8080): int,
+            vol.Required(CONF_LOCALAI_IP_ADDRESS): str,
+            vol.Required(CONF_LOCALAI_PORT, default=8080): int,
         })
 
         if user_input is not None:
             try:
-                validate_localai(self.hass, user_input)
+                await validate_localai(self.hass, user_input)
                 # add the mode to user_input
                 return self.async_create_entry(title="GPT4Vision LocalAI", data=user_input)
             except ServiceValidationError as e:
@@ -103,6 +128,29 @@ async def async_step_localai(self, user_input=None):
             data_schema=data_schema,
         )
 
+    async def async_step_ollama(self, user_input=None):
+        data_schema = vol.Schema({
+            vol.Required(CONF_OLLAMA_IP_ADDRESS): str,
+            vol.Required(CONF_OLLAMA_PORT, default=11434): int,
+        })
+
+        if user_input is not None:
+            try:
+                await validate_ollama(self.hass, user_input)
+                # add the mode to user_input
+                return self.async_create_entry(title="GPT4Vision Ollama", data=user_input)
+            except ServiceValidationError as e:
+                return self.async_show_form(
+                    step_id="ollama",
+                    data_schema=data_schema,
+                    errors={"base": "handshake_failed"}
+                )
+
+        return self.async_show_form(
+            step_id="ollama",
+            data_schema=data_schema,
+        )
+
     async def async_step_openai(self, user_input=None):
         data_schema = vol.Schema({
             vol.Required(CONF_OPENAI_API_KEY): str,

diff --git a/custom_components/gpt4vision/const.py b/custom_components/gpt4vision/const.py
@@ -1,10 +1,17 @@
 """ Constants for gpt4vision component"""
 
+# Global values
 DOMAIN = "gpt4vision"
+
+# Configuration values from setup
 CONF_PROVIDER = 'provider'
 CONF_OPENAI_API_KEY = 'api_key'
-CONF_IP_ADDRESS = 'localai_ip'
-CONF_PORT = 'localai_port'
+CONF_LOCALAI_IP_ADDRESS = 'localai_ip'
+CONF_LOCALAI_PORT = 'localai_port'
+CONF_OLLAMA_IP_ADDRESS = 'ollama_ip'
+CONF_OLLAMA_PORT = 'ollama_port'
+
+# Values from service call
 CONF_MAXTOKENS = 'max_tokens'
 CONF_TARGET_WIDTH = 'target_width'
 CONF_MODEL = 'model'

diff --git a/custom_components/gpt4vision/manifest.json b/custom_components/gpt4vision/manifest.json
@@ -6,5 +6,5 @@
     "issue_tracker": "https://github.com/valentinfrlch/ha-gpt4vision/issues",
     "documentation": "https://github.com/valentinfrlch/ha-gpt4vision",
     "iot_class": "cloud_polling",
-    "version": "0.3.0"
+    "version": "0.3.5"
 }