Skip to content

Commit

Permalink
Merge pull request #6 from valentinfrlch/dev
Browse files Browse the repository at this point in the history
Merging dev with main for v0.3.5
  • Loading branch information
valentinfrlch authored May 24, 2024
2 parents e561ad5 + 037245a commit e21edf3
Show file tree
Hide file tree
Showing 11 changed files with 257 additions and 58 deletions.
60 changes: 53 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,6 @@ Supported providers are OpenAI and [LocalAI](https://github.com/mudler/LocalAI).
## Resources
Check the [wiki](https://github.com/valentinfrlch/ha-gpt4vision/wiki/Usage-Examples) for examples on how you can integrate gpt4vision into your Home Assistant or join the [discussion](https://community.home-assistant.io/t/gpt-4o-vision-capabilities-in-home-assistant/729241) in the Home Assistant Community.

## API key
> [!IMPORTANT]
> If you're planning on using **OpenAI's API** you'll **need an API key**. You must obtain a valid OpenAI key from [here](https://platform.openai.com/api-keys).
A pricing calculator is available here: [https://openai.com/api/pricing/](https://openai.com/api/pricing/).


# Installation
### Installation via HACS (recommended)
1. Add this repository's url (https://github.com/valentinfrlch/ha-gpt4vision) to HACS under custom repositories.
Expand All @@ -58,6 +51,59 @@ A pricing calculator is available here: [https://openai.com/api/pricing/](https:
2. Add integration in Home Assistant Settings/Devices & services
3. Provide your API key or IP address and port of your LocalAI server


## Provider specific setup
### OpenAI
Simply obtain an API key from [OpenAI](https://platform.openai.com/api-keys) and enter it in the Home Assistant UI during setup.
A pricing calculator is available here: [https://openai.com/api/pricing/](https://openai.com/api/pricing/).


### LocalAI
To use LocalAI, you need to have a LocalAI server running. You can find the installation instructions [here](https://localai.io/basics/getting_started/). During setup you'll need to provide the IP address of your machine and the port on which LocalAI is running (default is 8000).

### Ollama
---
To use Ollama you need to install Ollama. You can download it from [here](https://ollama.com/). Once installed you need to run the following command to download the `llava` model:
```bash
ollama run llava
```
If your Home Assistant is **not** running on the same machine as Ollama, you need to set the `OLLAMA_HOST` environment variable.

**On Linux:**
1. Edit the systemd service by calling systemctl edit ollama.service. This will open an editor.
2. For each environment variable, add a line Environment under section [Service]:

```
[Service]
Environment="OLLAMA_HOST=0.0.0.0"
```
3. Save and close the editor.
4. Reload systemd and restart Ollama
```bash
systemctl daemon-reload
systemctl restart ollama
```

**On Windows:**
1. Quit Ollama from the system tray
2. Open File Explorer
3. Right click on This PC and select Properties
4. Click on Advanced system settings
5. Select Environment Variables
6. Under User variables click New
7. For variable name enter `OLLAMA_HOST` and for value enter 0.0.0.0
8. Click OK and start Ollama again from the Start Menu

**On macOS:**
1. Open Terminal
2. Run the following command
```bash
launchctl setenv OLLAMA_HOST "0.0.0.0"
```
3. Restart Ollama

<br>

## Service call and usage
After restarting, the gpt4vision.image_analyzer service will be available. You can test it in the developer tools section in home assistant.
To get GPT's analysis of a local image, use the following service call.
Expand Down
77 changes: 48 additions & 29 deletions custom_components/gpt4vision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@
CONF_MODEL,
CONF_MESSAGE,
CONF_IMAGE_FILE,
CONF_IP_ADDRESS,
CONF_PORT,
CONF_LOCALAI_IP_ADDRESS,
CONF_LOCALAI_PORT,
CONF_OLLAMA_IP_ADDRESS,
CONF_OLLAMA_PORT,
CONF_TEMPERATURE
)
from .request_handlers import handle_localai_request, handle_openai_request
from .request_handlers import (
handle_localai_request,
handle_openai_request,
handle_ollama_request
)
import base64
import io
import os
Expand All @@ -26,8 +32,10 @@ async def async_setup_entry(hass, entry):
"""Set up gpt4vision from a config entry."""
# Get all entries from config flow
openai_api_key = entry.data.get(CONF_OPENAI_API_KEY)
ip_address = entry.data.get(CONF_IP_ADDRESS)
port = entry.data.get(CONF_PORT)
localai_ip_address = entry.data.get(CONF_LOCALAI_IP_ADDRESS)
localai_port = entry.data.get(CONF_LOCALAI_PORT)
ollama_ip_address = entry.data.get(CONF_OLLAMA_IP_ADDRESS)
ollama_port = entry.data.get(CONF_OLLAMA_PORT)

# Ensure DOMAIN exists in hass.data
if DOMAIN not in hass.data:
Expand All @@ -38,16 +46,18 @@ async def async_setup_entry(hass, entry):
key: value
for key, value in {
CONF_OPENAI_API_KEY: openai_api_key,
CONF_IP_ADDRESS: ip_address,
CONF_PORT: port,
CONF_LOCALAI_IP_ADDRESS: localai_ip_address,
CONF_LOCALAI_PORT: localai_port,
CONF_OLLAMA_IP_ADDRESS: ollama_ip_address,
CONF_OLLAMA_PORT: ollama_port
}.items()
if value is not None
})

return True


def validate(mode, api_key, ip_address, port, image_paths):
def validate(mode, api_key, image_paths, ip_address=None, port=None):
"""Validate the configuration for the component
Args:
Expand All @@ -59,14 +69,19 @@ def validate(mode, api_key, ip_address, port, image_paths):
Raises:
ServiceValidationError: if configuration is invalid
"""

if mode == "OpenAI":
# Checks for OpenAI
if mode == 'OpenAI':
if not api_key:
raise ServiceValidationError("openai_not_configured")
elif mode == "LocalAI":
# Checks for LocalAI
elif mode == 'LocalAI':
if not ip_address or not port:
raise ServiceValidationError("localai_not_configured")
# Check if image file exists
# Checks for Ollama
elif mode == 'Ollama':
if not ip_address or not port:
raise ServiceValidationError("ollama_not_configured")
# File path validation
for image_path in image_paths:
if not os.path.exists(image_path):
raise ServiceValidationError("invalid_image_path")
Expand All @@ -82,8 +97,10 @@ async def image_analyzer(data_call):

# Read from configuration (hass.data)
api_key = hass.data.get(DOMAIN, {}).get(CONF_OPENAI_API_KEY)
ip_address = hass.data.get(DOMAIN, {}).get(CONF_IP_ADDRESS)
port = hass.data.get(DOMAIN, {}).get(CONF_PORT)
localai_ip_address = hass.data.get(DOMAIN, {}).get(CONF_LOCALAI_IP_ADDRESS)
localai_port = hass.data.get(DOMAIN, {}).get(CONF_LOCALAI_PORT)
ollama_ip_address = hass.data.get(DOMAIN, {}).get(CONF_OLLAMA_IP_ADDRESS)
ollama_port = hass.data.get(DOMAIN, {}).get(CONF_OLLAMA_PORT)

# Read data from service call
mode = str(data_call.data.get(CONF_PROVIDER))
Expand All @@ -97,20 +114,20 @@ async def image_analyzer(data_call):
target_width = data_call.data.get(CONF_TARGET_WIDTH, 1280)
# Temperature parameter. Default is 0.5
temperature = float(data_call.data.get(CONF_TEMPERATURE, 0.5))

# Validate configuration
validate(mode, api_key, ip_address, port, image_paths)

if mode == "OpenAI":
# GPT model: Default model is gpt-4o for OpenAI
model= str(data_call.data.get(CONF_MODEL, "gpt-4o"))
# Maximum number of tokens used by model. Default is 100.
max_tokens= int(data_call.data.get(CONF_MAXTOKENS))
if mode == "LocalAI":
# GPT model: Default model is gpt-4-vision-preview for LocalAI
model= str(data_call.data.get(CONF_MODEL, "gpt-4-vision-preview"))
# Maximum number of tokens used by model. Default is 100.
max_tokens= int(data_call.data.get(CONF_MAXTOKENS))
# Maximum number of tokens used by model. Default is 100.
max_tokens = int(data_call.data.get(CONF_MAXTOKENS))

# Validate configuration and input data and set model
if mode == 'OpenAI':
validate(mode, api_key, image_paths)
model = str(data_call.data.get(CONF_MODEL, "gpt-4o"))
elif mode == 'LocalAI':
validate(mode, None, image_paths, localai_ip_address, localai_port)
model = str(data_call.data.get(CONF_MODEL, "gpt-4-vision-preview"))
elif mode == 'Ollama':
validate(mode, None, image_paths, ollama_ip_address, ollama_port)
model = str(data_call.data.get(CONF_MODEL, "llava"))


def encode_image(image_path):
"""Encode image as base64
Expand Down Expand Up @@ -151,11 +168,13 @@ def encode_image(image_path):
session= async_get_clientsession(hass)

if mode == "LocalAI":
response_text = await handle_localai_request(session, model, message, base64_images, ip_address, port, max_tokens, temperature)
response_text = await handle_localai_request(session, model, message, base64_images, localai_ip_address, localai_port, max_tokens, temperature)

elif mode == "OpenAI":
response_text = await handle_openai_request(session, model, message, base64_images, api_key, max_tokens, temperature)

elif mode == 'Ollama':
response_text = await handle_ollama_request(session, model, message, base64_images, ollama_ip_address, ollama_port, max_tokens, temperature)
return {"response_text": response_text}

hass.services.register(
Expand Down
76 changes: 62 additions & 14 deletions custom_components/gpt4vision/config_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
from homeassistant.helpers.selector import selector
from homeassistant.exceptions import ServiceValidationError
from homeassistant.helpers.aiohttp_client import async_get_clientsession
from .const import DOMAIN, CONF_OPENAI_API_KEY, CONF_IP_ADDRESS, CONF_PORT
from .const import (
DOMAIN,
CONF_PROVIDER,
CONF_OPENAI_API_KEY,
CONF_LOCALAI_IP_ADDRESS,
CONF_LOCALAI_PORT,
CONF_OLLAMA_IP_ADDRESS,
CONF_OLLAMA_PORT
)
import voluptuous as vol
import logging

Expand All @@ -11,20 +19,33 @@

async def validate_mode(user_input: dict):
# check CONF_MODE is not empty
if not user_input["provider"]:
if not user_input[CONF_PROVIDER]:
raise ServiceValidationError("empty_mode")


async def validate_localai(hass, user_input: dict):
# check CONF_IP_ADDRESS is not empty
if not user_input[CONF_IP_ADDRESS]:
if not user_input[CONF_LOCALAI_IP_ADDRESS]:
raise ServiceValidationError("empty_ip_address")

# check CONF_PORT is not empty
if not user_input[CONF_PORT]:
if not user_input[CONF_LOCALAI_PORT]:
raise ServiceValidationError("empty_port")
# perform handshake with LocalAI server
if not await validate_connection(hass, user_input[CONF_IP_ADDRESS], user_input[CONF_PORT]):
if not await validate_connection(hass, user_input[CONF_LOCALAI_IP_ADDRESS], user_input[CONF_LOCALAI_PORT], "/readyz"):
raise ServiceValidationError("handshake_failed")


async def validate_ollama(hass, user_input: dict):
# check CONF_IP_ADDRESS is not empty
if not user_input[CONF_OLLAMA_IP_ADDRESS]:
raise ServiceValidationError("empty_ip_address")

# check CONF_PORT is not empty
if not user_input[CONF_OLLAMA_PORT]:
raise ServiceValidationError("empty_port")
# perform handshake with LocalAI server
if not await validate_connection(hass, user_input[CONF_OLLAMA_IP_ADDRESS], user_input[CONF_OLLAMA_PORT], "/api/tags"):
raise ServiceValidationError("handshake_failed")


Expand All @@ -34,12 +55,12 @@ def validate_openai(user_input: dict):
raise ServiceValidationError("empty_api_key")


async def validate_connection(hass, ip_address, port):
async def validate_connection(hass, ip_address, port, endpoint, expected_status=200):
session = async_get_clientsession(hass)
url = f'http://{ip_address}:{port}/readyz'
url = f'http://{ip_address}:{port}{endpoint}'
try:
response = await session.get(url)
if response.status == 200:
if response.status == expected_status:
return True
else:
return False
Expand All @@ -55,7 +76,7 @@ async def async_step_user(self, user_input=None):
data_schema = vol.Schema({
vol.Required("provider", default="OpenAI"): selector({
"select": {
"options": ["OpenAI", "LocalAI"],
"options": ["OpenAI", "LocalAI", "Ollama"],
"mode": "dropdown",
"sort": True,
"custom_value": False
Expand All @@ -65,10 +86,14 @@ async def async_step_user(self, user_input=None):

if user_input is not None:
self.init_info = user_input
if user_input["provider"] == "LocalAI":
if DOMAIN in self.hass.data and CONF_IP_ADDRESS in self.hass.data[DOMAIN] and CONF_PORT in self.hass.data[DOMAIN]:
if user_input[CONF_PROVIDER] == "LocalAI":
if DOMAIN in self.hass.data and CONF_LOCALAI_IP_ADDRESS in self.hass.data[DOMAIN] and CONF_LOCALAI_PORT in self.hass.data[DOMAIN]:
return self.async_abort(reason="already_configured")
return await self.async_step_localai()
elif user_input[CONF_PROVIDER] == "Ollama":
if DOMAIN in self.hass.data and CONF_OLLAMA_IP_ADDRESS in self.hass.data[DOMAIN] and CONF_OLLAMA_PORT in self.hass.data[DOMAIN]:
return self.async_abort(reason="already_configured")
return await self.async_step_ollama()
else:
if DOMAIN in self.hass.data and CONF_OPENAI_API_KEY in self.hass.data[DOMAIN]:
return self.async_abort(reason="already_configured")
Expand All @@ -82,13 +107,13 @@ async def async_step_user(self, user_input=None):

async def async_step_localai(self, user_input=None):
data_schema = vol.Schema({
vol.Required(CONF_IP_ADDRESS): str,
vol.Required(CONF_PORT, default=8080): int,
vol.Required(CONF_LOCALAI_IP_ADDRESS): str,
vol.Required(CONF_LOCALAI_PORT, default=8080): int,
})

if user_input is not None:
try:
validate_localai(self.hass, user_input)
await validate_localai(self.hass, user_input)
# add the mode to user_input
return self.async_create_entry(title="GPT4Vision LocalAI", data=user_input)
except ServiceValidationError as e:
Expand All @@ -103,6 +128,29 @@ async def async_step_localai(self, user_input=None):
data_schema=data_schema,
)

async def async_step_ollama(self, user_input=None):
data_schema = vol.Schema({
vol.Required(CONF_OLLAMA_IP_ADDRESS): str,
vol.Required(CONF_OLLAMA_PORT, default=11434): int,
})

if user_input is not None:
try:
await validate_ollama(self.hass, user_input)
# add the mode to user_input
return self.async_create_entry(title="GPT4Vision Ollama", data=user_input)
except ServiceValidationError as e:
return self.async_show_form(
step_id="ollama",
data_schema=data_schema,
errors={"base": "handshake_failed"}
)

return self.async_show_form(
step_id="ollama",
data_schema=data_schema,
)

async def async_step_openai(self, user_input=None):
data_schema = vol.Schema({
vol.Required(CONF_OPENAI_API_KEY): str,
Expand Down
11 changes: 9 additions & 2 deletions custom_components/gpt4vision/const.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
""" Constants for gpt4vision component"""

# Global values
DOMAIN = "gpt4vision"

# Configuration values from setup
CONF_PROVIDER = 'provider'
CONF_OPENAI_API_KEY = 'api_key'
CONF_IP_ADDRESS = 'localai_ip'
CONF_PORT = 'localai_port'
CONF_LOCALAI_IP_ADDRESS = 'localai_ip'
CONF_LOCALAI_PORT = 'localai_port'
CONF_OLLAMA_IP_ADDRESS = 'ollama_ip'
CONF_OLLAMA_PORT = 'ollama_port'

# Values from service call
CONF_MAXTOKENS = 'max_tokens'
CONF_TARGET_WIDTH = 'target_width'
CONF_MODEL = 'model'
Expand Down
2 changes: 1 addition & 1 deletion custom_components/gpt4vision/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
"issue_tracker": "https://github.com/valentinfrlch/ha-gpt4vision/issues",
"documentation": "https://github.com/valentinfrlch/ha-gpt4vision",
"iot_class": "cloud_polling",
"version": "0.3.0"
"version": "0.3.5"
}
Loading

0 comments on commit e21edf3

Please sign in to comment.