Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update validator, fixing a handful of imports that were incompatible with new guardrails. #1

Merged
merged 2 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ This validator can be used to ensure fairness of model output across various dem
## Requirements

* Dependencies:
- guardrails-ai>=0.4.0
- dbias>=0.1.0
- guardrails-ai>=0.5.0
- transformers>=4.40

* Dev Dependencies:
- pytest
- pyright
- ruff

* Foundation model access keys:
- Dependent on the use case (rephrase if unclear)
- None (basic HuggingFace hub access)


# Installation
Expand All @@ -46,8 +46,8 @@ from guardrails.hub import BiasCheck
from guardrails import Guard

# Setup Guard
guard = Guard.use(
BiasCheck()
guard = Guard().use(
BiasCheck(threshold=0.9, on_fail="exception")
)

guard.validate("The movie was great!") # Validator passes
Expand Down
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "validator-template"
version = "0.0.0"
description = "Template repo for Guardrails Hub validators."
name = "bias_check"
version = "0.2.0"
description = "Wrapping a bias check model into a validator."
authors = [
{name = "Guardrails AI", email = "[email protected]"}
]
Expand All @@ -10,7 +10,8 @@ readme = "README.md"
requires-python = ">= 3.8.1"
dependencies = [
"guardrails-ai>=0.4.0",
"Dbias"
"transformers>=4.40.2",
"tensorflow>=2.18.0"
]

[project.optional-dependencies]
Expand Down
19 changes: 8 additions & 11 deletions tests/test_validator.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
# to run these, run
# make tests

from guardrails import Guard
import pytest
from validator import BiasCheck
from guardrails.validator_base import FailResult, PassResult

from validator import BiasCheck

# We use 'exception' as the validator's fail action,
# so we expect failures to always raise an Exception
# Learn more about corrective actions here:
# https://www.guardrailsai.com/docs/concepts/output/#%EF%B8%8F-specifying-corrective-actions
def test_success_case(self):
validator = BiasCheck(debias_strength=0.5)
def test_success_case():
validator = BiasCheck()
input_text = "The sun rises in the morning."
result = validator.validate(input_text, {})
assert isinstance(result, PassResult)

def test_failure_case(self):
validator = BiasCheck(debias_strength=0.5)
def test_failure_case():
validator = BiasCheck(threshold=0.5)
input_text = "The sun only rises for Humanists."
result = validator.validate(input_text, {})
assert isinstance(result, FailResult)
assert result.error_message == "The original response contains potential biases that are now addressed."
assert result.fix_value == "The sun rises for everyone."
assert result.error_message == "The original response contains potentially biased messages:\nThe sun only rises for Humanists.\n (Message scores: 0.6546186208724976)"
assert result.fix_value == ""
4 changes: 2 additions & 2 deletions validator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .main import ValidatorTemplate
from .main import BiasCheck

__all__ = ["ValidatorTemplate"]
__all__ = ["BiasCheck"]
60 changes: 48 additions & 12 deletions validator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
register_validator,
)

import Dbias
from Dbias import text_debiasing
from transformers import pipeline

@register_validator(name="guardrails/bias_check", data_type="string")
class BiasCheck(Validator):
Expand All @@ -24,24 +23,61 @@ class BiasCheck(Validator):
| Programmatic fix | The debiased text if bias is detected |

Args:
debias_strength (float): The strength of the bias to apply, ranging from 0 to 1.
on_fail (Callable): The policy to enact when a validator fails. If `str`, must be one of `reask`, `fix`, `filter`, `refrain`, `noop`, `exception` or `fix_reask`. Otherwise, must be a function that is called when the validator fails.
threshold (float): Higher is more likely to allow bias. Lower is more sensitive and more likely to flag biased messages.
on_fail (Callable): The policy to enact when a validator fails. If `str`, must be one of `filter`, `noop`, or `exception`. Otherwise, must be a function that is called when the validator fails.
""" # noqa

def __init__(
self,
debias_strength: float = 0.5,
threshold: float = 0.9,
on_fail: Optional[Callable] = None,
):
super().__init__(on_fail=on_fail, debias_strength=debias_strength)
self.debias_strength = debias_strength
super().__init__(on_fail=on_fail)
valid_on_fail_operations = {"filter", "noop", "exception"}
if isinstance(on_fail, str) and on_fail not in valid_on_fail_operations:
raise Exception(
f"on_fail value ({on_fail}) not in list of allowable operations: {valid_on_fail_operations}"
)
self.threshold = threshold
self.model = pipeline(
'text-classification',
model="d4data/bias-detection-model",
)
# There are some spurious loading complaints with TFDistilBert models.
# See https://discuss.huggingface.co/t/message-some-layers-from-the-model-were-not-used/1972/7

def validate(self, value: Any, metadata: Dict = {}) -> ValidationResult:
def validate(self, value: Any, metadata: Optional[Dict] = None) -> ValidationResult:
"""Validates that the text is free from biases related to age, gender, sex, ethnicity, religion, etc."""
debiased_value = Dbias.text_debiasing.debias_text(value, strength=self.debias_strength)
if value != debiased_value:
if isinstance(value, str):
value = [value,] # Ensure we're always passing lists of strings into the classifier.

classified_examples = self.model(value)
passing_outputs = list()
passing_scores = list()
failing_outputs = list()
failing_scores = list()
for text, prediction in zip(value, classified_examples):
if prediction['label'] == 'Biased':
score = prediction['score']
elif prediction['label'] == 'Non-biased':
score = -prediction['score'] # Note the negation!
else:
raise Exception(f"Got unexpected prediction label: {prediction['label']}")
if score > self.threshold:
failing_outputs.append(text)
failing_scores.append(score)
else:
passing_outputs.append(text)
passing_scores.append(score)

if failing_outputs:
failure_message = "The original response contains potentially biased messages:\n"
failure_message += "\n - ".join(failing_outputs)
message_scores = [str(s) for s in failing_scores]
failure_message += "\n (Message scores: {})".format(", ".join(message_scores))
# Do we need to call the on_fail_method here?
return FailResult(
error_message="The original response contains potential biases that are now addressed.",
fix_value=debiased_value,
error_message=failure_message,
fix_value=" ".join(passing_outputs),
)
return PassResult()
6 changes: 3 additions & 3 deletions validator/post-install.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from transformers import pipeline
print("post-install starting...")
print("This is where you would do things like download nltk tokenizers or login to the HuggingFace hub...")
print("post-install complete!")
# If you don't have anything to add here you should delete this file.
_ = pipeline("text-classification", "d4data/bias-detection-model")
print("post-install complete!")