From 43a917c4ec7ae200222e2192d67a9779422d0172 Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Wed, 30 Oct 2024 17:16:59 -0700 Subject: [PATCH 1/2] Update validator, fixing a handful of imports that were incompatible with new guardrails. Had to pull out the word-by-word fixes in favor of more general exceptions. Updated doc and fixed descriptions. --- README.md | 10 +++---- pyproject.toml | 9 +++--- tests/test_validator.py | 19 ++++++------- validator/__init__.py | 4 +-- validator/main.py | 60 +++++++++++++++++++++++++++++++-------- validator/post-install.py | 6 ++-- 6 files changed, 71 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index d9fcf34..e0ebdc6 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,8 @@ This validator can be used to ensure fairness of model output across various dem ## Requirements * Dependencies: - - guardrails-ai>=0.4.0 - - dbias>=0.1.0 + - guardrails-ai>=0.5.0 + - transformers>=4.40 * Dev Dependencies: - pytest @@ -25,7 +25,7 @@ This validator can be used to ensure fairness of model output across various dem - ruff * Foundation model access keys: - - Dependent on the use case (rephrase if unclear) + - None (basic HuggingFace hub access) # Installation @@ -46,8 +46,8 @@ from guardrails.hub import BiasCheck from guardrails import Guard # Setup Guard -guard = Guard.use( - BiasCheck() +guard = Guard().use( + BiasCheck(threshold=0.9, on_fail="exception") ) guard.validate("The movie was great!") # Validator passes diff --git a/pyproject.toml b/pyproject.toml index 7722402..d284282 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] -name = "validator-template" -version = "0.0.0" -description = "Template repo for Guardrails Hub validators." +name = "bias_check" +version = "0.2.0" +description = "Wrapping a bias check model into a validator." authors = [ {name = "Guardrails AI", email = "contact@guardrailsai.com"} ] @@ -10,7 +10,8 @@ readme = "README.md" requires-python = ">= 3.8.1" dependencies = [ "guardrails-ai>=0.4.0", - "Dbias" + "transformers>=4.40.2", + "tensorflow>=2.18.0" ] [project.optional-dependencies] diff --git a/tests/test_validator.py b/tests/test_validator.py index df965fb..92e6115 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,25 +1,22 @@ -# to run these, run -# make tests -from guardrails import Guard -import pytest -from validator import BiasCheck from guardrails.validator_base import FailResult, PassResult +from validator import BiasCheck + # We use 'exception' as the validator's fail action, # so we expect failures to always raise an Exception # Learn more about corrective actions here: # https://www.guardrailsai.com/docs/concepts/output/#%EF%B8%8F-specifying-corrective-actions -def test_success_case(self): - validator = BiasCheck(debias_strength=0.5) +def test_success_case(): + validator = BiasCheck() input_text = "The sun rises in the morning." result = validator.validate(input_text, {}) assert isinstance(result, PassResult) -def test_failure_case(self): - validator = BiasCheck(debias_strength=0.5) +def test_failure_case(): + validator = BiasCheck(threshold=0.5) input_text = "The sun only rises for Humanists." result = validator.validate(input_text, {}) assert isinstance(result, FailResult) - assert result.error_message == "The original response contains potential biases that are now addressed." - assert result.fix_value == "The sun rises for everyone." \ No newline at end of file + assert result.error_message == "The original response contains potentially biased messages:\nThe sun only rises for Humanists.\n (Message scores: 0.6546186208724976)" + assert result.fix_value == "" \ No newline at end of file diff --git a/validator/__init__.py b/validator/__init__.py index 72a2623..7a0a40d 100644 --- a/validator/__init__.py +++ b/validator/__init__.py @@ -1,3 +1,3 @@ -from .main import ValidatorTemplate +from .main import BiasCheck -__all__ = ["ValidatorTemplate"] +__all__ = ["BiasCheck"] diff --git a/validator/main.py b/validator/main.py index a758059..ab14baf 100644 --- a/validator/main.py +++ b/validator/main.py @@ -8,8 +8,7 @@ register_validator, ) -import Dbias -from Dbias import text_debiasing +from transformers import pipeline @register_validator(name="guardrails/bias_check", data_type="string") class BiasCheck(Validator): @@ -24,24 +23,61 @@ class BiasCheck(Validator): | Programmatic fix | The debiased text if bias is detected | Args: - debias_strength (float): The strength of the bias to apply, ranging from 0 to 1. - on_fail (Callable): The policy to enact when a validator fails. If `str`, must be one of `reask`, `fix`, `filter`, `refrain`, `noop`, `exception` or `fix_reask`. Otherwise, must be a function that is called when the validator fails. + threshold (float): Higher is more likely to allow bias. Lower is more sensitive and more likely to flag biased messages. + on_fail (Callable): The policy to enact when a validator fails. If `str`, must be one of `filter`, `noop`, or `exception`. Otherwise, must be a function that is called when the validator fails. """ # noqa def __init__( self, - debias_strength: float = 0.5, + threshold: float = 0.9, on_fail: Optional[Callable] = None, ): - super().__init__(on_fail=on_fail, debias_strength=debias_strength) - self.debias_strength = debias_strength + super().__init__(on_fail=on_fail) + valid_on_fail_operations = {"filter", "noop", "exception"} + if isinstance(on_fail, str) and on_fail not in valid_on_fail_operations: + raise Exception( + f"on_fail value ({on_fail}) not in list of allowable operations: {valid_on_fail_operations}" + ) + self.threshold = threshold + self.model = pipeline( + 'text-classification', + model="d4data/bias-detection-model", + ) + # There are some spurious loading complaints with TFDistilBert models. + # See https://discuss.huggingface.co/t/message-some-layers-from-the-model-were-not-used/1972/7 - def validate(self, value: Any, metadata: Dict = {}) -> ValidationResult: + def validate(self, value: Any, metadata: Dict = None) -> ValidationResult: """Validates that the text is free from biases related to age, gender, sex, ethnicity, religion, etc.""" - debiased_value = Dbias.text_debiasing.debias_text(value, strength=self.debias_strength) - if value != debiased_value: + if isinstance(value, str): + value = [value,] # Ensure we're always passing lists of strings into the classifier. + + classified_examples = self.model(value) + passing_outputs = list() + passing_scores = list() + failing_outputs = list() + failing_scores = list() + for text, prediction in zip(value, classified_examples): + if prediction['label'] == 'Biased': + score = prediction['score'] + elif prediction['label'] == 'Non-biased': + score = -prediction['score'] # Note the negation! + else: + raise Exception(f"Got unexpected prediction label: {prediction['label']}") + if score > self.threshold: + failing_outputs.append(text) + failing_scores.append(score) + else: + passing_outputs.append(text) + passing_scores.append(score) + + if failing_outputs: + failure_message = "The original response contains potentially biased messages:\n" + failure_message += "\n - ".join(failing_outputs) + message_scores = [str(s) for s in failing_scores] + failure_message += "\n (Message scores: {})".format(", ".join(message_scores)) + # Do we need to call the on_fail_method here? return FailResult( - error_message="The original response contains potential biases that are now addressed.", - fix_value=debiased_value, + error_message=failure_message, + fix_value=" ".join(passing_outputs), ) return PassResult() diff --git a/validator/post-install.py b/validator/post-install.py index 536a250..b6ae976 100644 --- a/validator/post-install.py +++ b/validator/post-install.py @@ -1,4 +1,4 @@ print("post-install starting...") -print("This is where you would do things like download nltk tokenizers or login to the HuggingFace hub...") -print("post-install complete!") -# If you don't have anything to add here you should delete this file. \ No newline at end of file +from transformers import pipeline +_ = pipeline("text-classification", "d4data/bias-detection-model") +print("post-install complete!") \ No newline at end of file From 05e89b67e39f3ee25d2c70bea00cef26fd628cfc Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Thu, 31 Oct 2024 10:59:34 -0700 Subject: [PATCH 2/2] Fix linting complaints. --- validator/main.py | 2 +- validator/post-install.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/validator/main.py b/validator/main.py index ab14baf..7253fc6 100644 --- a/validator/main.py +++ b/validator/main.py @@ -46,7 +46,7 @@ def __init__( # There are some spurious loading complaints with TFDistilBert models. # See https://discuss.huggingface.co/t/message-some-layers-from-the-model-were-not-used/1972/7 - def validate(self, value: Any, metadata: Dict = None) -> ValidationResult: + def validate(self, value: Any, metadata: Optional[Dict] = None) -> ValidationResult: """Validates that the text is free from biases related to age, gender, sex, ethnicity, religion, etc.""" if isinstance(value, str): value = [value,] # Ensure we're always passing lists of strings into the classifier. diff --git a/validator/post-install.py b/validator/post-install.py index b6ae976..f4879ff 100644 --- a/validator/post-install.py +++ b/validator/post-install.py @@ -1,4 +1,4 @@ -print("post-install starting...") from transformers import pipeline +print("post-install starting...") _ = pipeline("text-classification", "d4data/bias-detection-model") print("post-install complete!") \ No newline at end of file