diff --git a/src/helm/clients/auto_client.py b/src/helm/clients/auto_client.py index 50eef34f57..a7bacd6cd7 100644 --- a/src/helm/clients/auto_client.py +++ b/src/helm/clients/auto_client.py @@ -71,6 +71,7 @@ def _get_client(self, model_deployment_name: str) -> Client: model_deployment.client_spec, constant_bindings={ "cache_config": cache_config, + "model_name": model_deployment.model_name, "tokenizer_name": model_deployment.tokenizer_name, }, provider_bindings={ diff --git a/src/helm/clients/azure_openai_client.py b/src/helm/clients/azure_openai_client.py new file mode 100644 index 0000000000..97fed399be --- /dev/null +++ b/src/helm/clients/azure_openai_client.py @@ -0,0 +1,40 @@ +import os +from typing import Dict, Optional + +from helm.clients.openai_client import OpenAIClient +from helm.common.cache import CacheConfig +from helm.common.optional_dependencies import handle_module_not_found_error +from helm.proxy.retry import NonRetriableException +from helm.tokenizers.tokenizer import Tokenizer + +try: + from openai import AzureOpenAI +except ModuleNotFoundError as e: + handle_module_not_found_error(e, ["openai"]) + + +class AzureOpenAIClient(OpenAIClient): + API_VERSION = "2024-07-01-preview" + + def __init__( + self, + tokenizer: Tokenizer, + tokenizer_name: str, + cache_config: CacheConfig, + api_key: Optional[str] = None, + endpoint: Optional[str] = None, + api_version: Optional[str] = None, + default_headers: Optional[Dict[str, str]] = None, + ): + super().__init__( + tokenizer=tokenizer, tokenizer_name=tokenizer_name, cache_config=cache_config, api_key="unused" + ) + azure_endpoint = endpoint or os.getenv("AZURE_OPENAI_ENDPOINT") + if not azure_endpoint: + raise NonRetriableException("Must provide Azure endpoint through credentials.conf or AZURE_OPENAI_ENDPOINT") + self.client = AzureOpenAI( + api_key=api_key, + api_version=api_version or AzureOpenAIClient.API_VERSION, + azure_endpoint=azure_endpoint, + default_headers=default_headers, + ) diff --git a/src/helm/clients/stanfordhealthcare_llama_client.py b/src/helm/clients/stanfordhealthcare_llama_client.py new file mode 100644 index 0000000000..4d21634d62 --- /dev/null +++ b/src/helm/clients/stanfordhealthcare_llama_client.py @@ -0,0 +1,54 @@ +from typing import Optional + +from helm.clients.openai_client import OpenAIClient +from helm.common.cache import CacheConfig +from helm.common.optional_dependencies import handle_module_not_found_error +from helm.proxy.retry import NonRetriableException +from helm.tokenizers.tokenizer import Tokenizer + +try: + from openai import OpenAI +except ModuleNotFoundError as e: + handle_module_not_found_error(e, ["openai"]) + + +class StanfordHealthCareLlamaClient(OpenAIClient): + """ + Client for accessing Llama models hosted on Stanford Health Care's model API. + + Configure by setting the following in prod_env/credentials.conf: + + ``` + stanfordhealthcareEndpoint: https://your-domain-name/ + stanfordhealthcareApiKey: your-private-key + ``` + """ + + CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key" + + def __init__( + self, + tokenizer: Tokenizer, + tokenizer_name: str, + cache_config: CacheConfig, + model_name: str, + api_key: Optional[str] = None, + endpoint: Optional[str] = None, + ): + super().__init__( + tokenizer=tokenizer, tokenizer_name=tokenizer_name, cache_config=cache_config, api_key="unused" + ) + if not endpoint: + raise NonRetriableException("Must provide endpoint through credentials.conf") + if not api_key: + raise NonRetriableException("Must provide API key through credentials.conf") + # Guess the base URL part based on the model name + # Maybe make this configurable instead? + base_url_part = model_name.split("/")[1].lower().removesuffix("-instruct").replace("-", "").replace(".", "") + + base_url = f"{endpoint.strip('/')}/{base_url_part}/v1" + self.client = OpenAI( + api_key="dummy", + base_url=base_url, + default_headers={StanfordHealthCareLlamaClient.CREDENTIAL_HEADER_NAME: api_key}, + ) diff --git a/src/helm/clients/stanfordhealthcare_openai_client.py b/src/helm/clients/stanfordhealthcare_openai_client.py new file mode 100644 index 0000000000..c039abc51b --- /dev/null +++ b/src/helm/clients/stanfordhealthcare_openai_client.py @@ -0,0 +1,42 @@ +from typing import Optional + +from helm.clients.azure_openai_client import AzureOpenAIClient +from helm.common.cache import CacheConfig +from helm.proxy.retry import NonRetriableException +from helm.tokenizers.tokenizer import Tokenizer + + +class StanfordHealthCareOpenAIClient(AzureOpenAIClient): + """ + Client for accessing OpenAI models hosted on Stanford Health Care's model API. + + Configure by setting the following in prod_env/credentials.conf: + + ``` + stanfordhealthcareEndpoint: https://your-domain-name/ + stanfordhealthcareApiKey: your-private-key + ``` + """ + + API_VERSION = "2023-05-15" + CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key" + + def __init__( + self, + tokenizer: Tokenizer, + tokenizer_name: str, + cache_config: CacheConfig, + api_key: Optional[str] = None, + endpoint: Optional[str] = None, + ): + if not api_key: + raise NonRetriableException("Must provide API key through credentials.conf") + super().__init__( + tokenizer=tokenizer, + tokenizer_name=tokenizer_name, + cache_config=cache_config, + api_key="unused", + endpoint=endpoint, + api_version=StanfordHealthCareOpenAIClient.API_VERSION, + default_headers={StanfordHealthCareOpenAIClient.CREDENTIAL_HEADER_NAME: api_key}, + ) diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml index a2c2c854b9..32b7eaaecb 100644 --- a/src/helm/config/model_deployments.yaml +++ b/src/helm/config/model_deployments.yaml @@ -16,6 +16,22 @@ model_deployments: client_spec: class_name: "helm.clients.simple_client.SimpleClient" + # Stanford Health Care + # Placed earlier in the file to make them non-default + - name: stanfordhealthcare/gpt-4o-mini-2024-07-18 + model_name: openai/gpt-4o-mini-2024-07-18 + tokenizer_name: openai/o200k_base + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.stanfordhealthcare_openai_client.StanfordHealthCareOpenAIClient" + + - name: stanfordhealthcare/llama-3.3-70b-instruct + model_name: meta/llama-3.3-70b-instruct + tokenizer_name: meta/llama-3.3-70b-instruct + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.stanfordhealthcare_llama_client.StanfordHealthCareLlamaClient" + # Adobe - name: adobe/giga-gan model_name: adobe/giga-gan diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml index 0a4995a6d3..4089528380 100644 --- a/src/helm/config/model_metadata.yaml +++ b/src/helm/config/model_metadata.yaml @@ -1701,7 +1701,16 @@ models: - name: meta/llama-3.3-70b-instruct-turbo display_name: Llama 3.3 Instruct Turbo (70B) - description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality)) + description: Llama 3.3 Instruct (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality)) + creator_organization_name: Meta + access: open + num_parameters: 70000000000 + release_date: 2024-12-06 + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + + - name: meta/llama-3.3-70b-instruct + display_name: Llama 3.3 Instruct (70B) + description: Llama 3.3 Instruct (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) creator_organization_name: Meta access: open num_parameters: 70000000000