diff --git a/garak/generators/watsonx.py b/garak/generators/watsonx.py index 8edc49a05..49b767dde 100644 --- a/garak/generators/watsonx.py +++ b/garak/generators/watsonx.py @@ -2,20 +2,20 @@ from garak.generators.base import Generator from typing import List, Union import os -import importlib +import requests class WatsonXGenerator(Generator): """ This is a generator for watsonx.ai. - Make sure that you initialize the environment variables: - 'WATSONX_TOKEN', - 'WATSONX_URL', - and 'WATSONX_PROJECTID'. - - To use a tuned model that is deployed, use 'deployment/deployment' for the -n flag and make sure - to also initialize the 'WATSONX_DEPLOYID' environment variable. + Make sure that you initialize the environment variables: + 'WATSONX_TOKEN', + 'WATSONX_URL', + 'WATSONX_PROJECTID' OR 'WATSONX_DEPLOYID'. + + To use a model that is in the "project" stage initialize the WATSONX_PROJECTID variable with the Project ID of the model. + To use a tuned model that is deployed, simply initialize the WATSONX_DEPLOYID variable with the Deployment ID of the model. """ ENV_VAR = "WATSONX_TOKEN" @@ -24,93 +24,110 @@ class WatsonXGenerator(Generator): DID_ENV_VAR = "WATSONX_DEPLOYID" DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | { "uri": None, - "project_id": None, - "deployment_id": None, - "frequency_penalty": 0.5, - "logprobs": True, - "top_logprobs": 3, - "presence_penalty": 0.3, - "temperature": 0.7, - "max_tokens": 100, - "time_limit": 300000, - "top_p": 0.9, - "n": 1, + "project_id": "", + "deployment_id": "", + "prompt_variable": "input", + "bearer_token": "", + "max_tokens": 900, } generator_family_name = "watsonx" def __init__(self, name="", config_root=_config): super().__init__(name, config_root=config_root) - # Initialize and validate api_key if self.api_key is not None: os.environ[self.ENV_VAR] = self.api_key - + + def _set_bearer_token(self, iam_url="https://iam.cloud.ibm.com/identity/token"): + header = { + "Content-Type": "application/x-www-form-urlencoded", + "Accept": "application/json", + } + body = ( + "grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey=" + self.api_key + ) + response = requests.post(url=iam_url, headers=header, data=body) + self.bearer_token = "Bearer " + response.json()["access_token"] + + def _generate_with_project(self, payload): + # Generation via Project ID. + + url = self.uri + "/ml/v1/text/generation?version=2023-05-29" + + body = { + "input": payload, + "parameters": { + "decoding_method": "greedy", + "max_new_tokens": self.max_tokens, + "min_new_tokens": 0, + "repetition_penalty": 1, + }, + "model_id": self.name, + "project_id": self.project_id, + } + + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + "Authorization": self.bearer_token, + } + + response = requests.post(url=url, headers=headers, json=body) + return response.json() + + def _generate_with_deployment(self, payload): + # Generation via Deployment ID. + url = ( + self.uri + + "/ml/v1/deployments/" + + self.deployment_id + + "/text/generation?version=2021-05-01" + ) + body = {"parameters": {"prompt_variables": {self.prompt_variable: payload}}} + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + "Authorization": self.bearer_token, + } + response = requests.post(url=url, headers=headers, json=body) + return response.json() + + def _validate_env_var(self): # Initialize and validate url. if self.uri is not None: pass - else : + else: self.uri = os.getenv("WATSONX_URL", None) if self.uri is None: raise ValueError( f"The {self.URI_ENV_VAR} environment variable is required. Please enter the URL corresponding to the region of your provisioned service instance. \n" ) + # Initialize and validate project_id. - if self.project_id is not None: + if self.project_id: pass - else : - self.project_id = os.getenv("WATSONX_PROJECTID", None) - if self.project_id is None: - raise ValueError( - f"The {self.PID_ENV_VAR} environment variable is required. Please enter the corresponding Project ID of the resource. \n" - ) - - # Import Foundation Models from ibm_watsonx_ai module. Import the Credentials function from the same module. - self.watsonx = importlib.import_module("ibm_watsonx_ai.foundation_models") - self.Credentials = getattr( - importlib.import_module("ibm_watsonx_ai"), "Credentials" - ) - - def get_model(self): - # Call Credentials function with the url and api_key. - credentials = self.Credentials(url=self.uri, api_key=self.api_key) - if self.name == "deployment/deployment": - self.deployment_id = os.getenv("WATSONX_DEPLOYID", None) - if self.deployment_id is None: - raise ValueError( - f"The {self.DID_ENV_VAR} environment variable is required. Please enter the corresponding Deployment ID of the resource. \n" - ) - - return self.watsonx.ModelInference( - deployment_id=self.deployment_id, - credentials=credentials, - project_id=self.project_id, - ) + else: + self.project_id = os.getenv("WATSONX_PROJECTID", "") + # Initialize and validate deployment_id. + if self.deployment_id: + pass else: - return self.watsonx.ModelInference( - model_id=self.name, - credentials=credentials, - project_id=self.project_id, - params=self.watsonx.schema.TextChatParameters( - frequency_penalty=self.frequency_penalty, - logprobs=self.logprobs, - top_logprobs=self.top_logprobs, - presence_penalty=self.presence_penalty, - temperature=self.temperature, - max_tokens=self.max_tokens, - time_limit=self.time_limit, - top_p=self.top_p, - n=self.n, - ), + self.deployment_id = os.getenv("WATSONX_DEPLOYID", "") + + # Check to ensure at least ONE of project_id or deployment_id is populated. + if not self.project_id and not self.deployment_id: + raise ValueError( + f"Either {self.PID_ENV_VAR} or {self.DID_ENV_VAR} is required. Please supply either a Project ID or Deployment ID. \n" ) + return super()._validate_env_var() def _call_model( self, prompt: str, generations_this_call: int = 1 ) -> List[Union[str, None]]: - - # Get/Create Model - model = self.get_model() + if not self.bearer_token : + self._set_bearer_token() # Check if message is empty. If it is, append null byte. if not prompt: @@ -119,8 +136,14 @@ def _call_model( "WARNING: Empty prompt was found. Null byte character appended to prevent API failure." ) + output = "" + if self.deployment_id: + output = self._generate_with_deployment(prompt) + else: + output = self._generate_with_project(prompt) + # Parse the output to only contain the output message from the model. Return a list containing that message. - return ["".join(model.generate(prompt=prompt)["results"][0]["generated_text"])] + return ["".join(output["results"][0]["generated_text"])] DEFAULT_CLASS = "WatsonXGenerator" diff --git a/pyproject.toml b/pyproject.toml index 41ab46e0a..eccd8a931 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,6 @@ dependencies = [ "zalgolib>=0.2.2", "ecoji>=0.1.1", "deepl==1.17.0", - "ibm-watsonx-ai==1.1.25", "fschat>=0.2.36", "litellm>=1.41.21", "jsonpath-ng>=1.6.1", diff --git a/requirements.txt b/requirements.txt index 58e6f43c1..50de30fe5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,7 +28,6 @@ deepl==1.17.0 fschat>=0.2.36 litellm>=1.41.21 jsonpath-ng>=1.6.1 -ibm-watsonx-ai==1.1.25 huggingface_hub>=0.21.0 python-magic-bin>=0.4.14; sys_platform == "win32" python-magic>=0.4.21; sys_platform != "win32" diff --git a/tests/generators/conftest.py b/tests/generators/conftest.py index 9a760d80f..52d89c163 100644 --- a/tests/generators/conftest.py +++ b/tests/generators/conftest.py @@ -18,3 +18,9 @@ def hf_endpoint_mocks(): """Mock responses for Huggingface InferenceAPI based endpoints""" with open(pathlib.Path(__file__).parents[0] / "hf_inference.json") as mock_openai: return json.load(mock_openai) + +@pytest.fixture +def watsonx_compat_mocks(): + """Mock responses for watsonx.ai based endpoints""" + with open(pathlib.Path(__file__).parents[0] / "watsonx.json") as mock_watsonx: + return json.load(mock_watsonx) diff --git a/tests/generators/test_generators.py b/tests/generators/test_generators.py index 6e55b5c66..d8573d153 100644 --- a/tests/generators/test_generators.py +++ b/tests/generators/test_generators.py @@ -184,6 +184,7 @@ def test_generator_structure(classname): if classname not in [ "generators.azure.AzureOpenAIGenerator", # requires additional env variables tested in own test class + "generators.watsonx.WatsonXGenerator", # requires additional env variables tested in own test class "generators.function.Multiple", # requires mock local function not implemented here "generators.function.Single", # requires mock local function not implemented here "generators.ggml.GgmlGenerator", # validates files on disk tested in own test class @@ -211,7 +212,6 @@ def test_instantiate_generators(classname): "org_id": "fake", # required for NeMo "uri": "https://example.com", # required for rest "provider": "fake", # required for LiteLLM - "project_id": "fake", # required for watsonx } } } diff --git a/tests/generators/test_watsonx.py b/tests/generators/test_watsonx.py new file mode 100644 index 000000000..e9683d559 --- /dev/null +++ b/tests/generators/test_watsonx.py @@ -0,0 +1,81 @@ +from garak.generators.watsonx import WatsonXGenerator +import os +import pytest +import requests_mock + + +DEFAULT_DEPLOYMENT_NAME = "ibm/granite-3-8b-instruct" + + +@pytest.fixture +def set_fake_env(request) -> None: + stored_env = { + WatsonXGenerator.ENV_VAR: os.getenv(WatsonXGenerator.ENV_VAR, None), + WatsonXGenerator.PID_ENV_VAR: os.getenv(WatsonXGenerator.PID_ENV_VAR, None), + WatsonXGenerator.URI_ENV_VAR: os.getenv(WatsonXGenerator.URI_ENV_VAR, None), + WatsonXGenerator.DID_ENV_VAR: os.getenv(WatsonXGenerator.DID_ENV_VAR, None), + } + + def restore_env(): + for k, v in stored_env.items(): + if v is not None: + os.environ[k] = v + else: + del os.environ[k] + + os.environ[WatsonXGenerator.ENV_VAR] = "XXXXXXXXXXXXX" + os.environ[WatsonXGenerator.PID_ENV_VAR] = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" + os.environ[WatsonXGenerator.DID_ENV_VAR] = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" + os.environ[WatsonXGenerator.URI_ENV_VAR] = "https://garak.example.com/" + request.addfinalizer(restore_env) + + +@pytest.mark.usefixtures("set_fake_env") +def test_bearer_token(watsonx_compat_mocks): + with requests_mock.Mocker() as m: + mock_response = watsonx_compat_mocks["watsonx_bearer_token"] + + extended_request = "identity/token" + + m.post( + "https://garak.example.com/" + extended_request, json=mock_response["json"] + ) + + granite_llm = WatsonXGenerator(DEFAULT_DEPLOYMENT_NAME) + token = granite_llm._set_bearer_token(iam_url="https://garak.example.com/identity/token") + + assert granite_llm.bearer_token == ("Bearer " + mock_response["json"]["access_token"]) + + +@pytest.mark.usefixtures("set_fake_env") +def test_project(watsonx_compat_mocks): + with requests_mock.Mocker() as m: + mock_response = watsonx_compat_mocks["watsonx_generation"] + extended_request = "/ml/v1/text/generation?version=2023-05-29" + + m.post( + "https://garak.example.com/" + extended_request, json=mock_response["json"] + ) + + granite_llm = WatsonXGenerator(DEFAULT_DEPLOYMENT_NAME) + response = granite_llm._generate_with_project("What is this?") + + assert granite_llm.name == response["model_id"] + + +@pytest.mark.usefixtures("set_fake_env") +def test_deployment(watsonx_compat_mocks): + with requests_mock.Mocker() as m: + mock_response = watsonx_compat_mocks["watsonx_generation"] + extended_request = "/ml/v1/deployments/" + extended_request += "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" + extended_request += "/text/generation?version=2021-05-01" + + m.post( + "https://garak.example.com/" + extended_request, json=mock_response["json"] + ) + + granite_llm = WatsonXGenerator(DEFAULT_DEPLOYMENT_NAME) + response = granite_llm._generate_with_deployment("What is this?") + + assert granite_llm.name == response["model_id"] diff --git a/tests/generators/watsonx.json b/tests/generators/watsonx.json new file mode 100644 index 000000000..6b1ef32ca --- /dev/null +++ b/tests/generators/watsonx.json @@ -0,0 +1,29 @@ +{ + "watsonx_bearer_token": { + "code": 200, + "json": { + "access_token": "fake_token1231231231", + "refresh_token": "not_supported", + "token_type": "Bearer", + "expires_in": 3600, + "expiration": 1737754747, + "scope": "ibm openid" + } + }, + "watsonx_generation": { + "code": 200, + "json" : { + "model_id": "ibm/granite-3-8b-instruct", + "model_version": "1.1.0", + "created_at": "2025-01-24T20:51:59.520Z", + "results": [ + { + "generated_text": "This is a test generation. :)", + "generated_token_count": 32, + "input_token_count": 6, + "stop_reason": "eos_token" + } + ] + } + } +} \ No newline at end of file