From 0b5ea8b85ea4db218e30ee4472e030e2dfb67350 Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Wed, 4 Dec 2024 10:55:06 -0800 Subject: [PATCH 1/7] Update dependencies to bring validator into a more modern time. Download and fetch in post-install. Add back 'fix' functionality. --- pyproject.toml | 14 ++-- validator/main.py | 144 +++++++++++++++++++++++++++++++++----- validator/post-install.py | 4 +- 3 files changed, 138 insertions(+), 24 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d284282..29e4769 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,11 +7,17 @@ authors = [ ] license = {file = "LICENSE"} readme = "README.md" -requires-python = ">= 3.8.1" +requires-python = ">= 3.9" dependencies = [ - "guardrails-ai>=0.4.0", - "transformers>=4.40.2", - "tensorflow>=2.18.0" + "guardrails-ai>=0.5.15", + "transformers>=4.37.0", + "tf-keras", + "spacy[transformers]>=3.6.0", + "cached_path>=1.6.0", + "sentencepiece", + "tensorflow>=2.16.0", + #"tensorflow-macos>=2.16.0 ; platform_system == 'Darwin'", + #"tensorflow-metal>=1.0.0 ; platform_system == 'Darwin'" ] [project.optional-dependencies] diff --git a/validator/main.py b/validator/main.py index 7253fc6..9fd7692 100644 --- a/validator/main.py +++ b/validator/main.py @@ -1,5 +1,9 @@ -from typing import Any, Callable, Dict, Optional +import os +from pathlib import Path +from typing import Any, Callable, Dict, List, Tuple, Optional +import spacy +from cached_path import cached_path from guardrails.validator_base import ( FailResult, PassResult, @@ -7,8 +11,17 @@ Validator, register_validator, ) +from guardrails.types import OnFailAction +from transformers import pipeline, TFAutoModel, AutoTokenizer + + +S3_SPACY_NLP_MODEL_PATH = "s3://guardrails-ai-public-read-only/bias_check/dbias_0_1_5_en_pipeline.tar.gz" + +MODEL_CACHE_DIR = os.environ.get( + "GUARDRAILS_MODEL_CACHE_PATH_OVERRIDE", + Path.home() / ".cache" / "guardrails_cache" +) -from transformers import pipeline @register_validator(name="guardrails/bias_check", data_type="string") class BiasCheck(Validator): @@ -24,7 +37,9 @@ class BiasCheck(Validator): Args: threshold (float): Higher is more likely to allow bias. Lower is more sensitive and more likely to flag biased messages. - on_fail (Callable): The policy to enact when a validator fails. If `str`, must be one of `filter`, `noop`, or `exception`. Otherwise, must be a function that is called when the validator fails. + on_fail (Callable): The policy to enact when a validator fails. If `str`, + must be one of `filter`, `noop`, `fix`, or `exception`. Otherwise, must be a + function that is called when the validator fails. """ # noqa def __init__( @@ -33,51 +48,144 @@ def __init__( on_fail: Optional[Callable] = None, ): super().__init__(on_fail=on_fail) - valid_on_fail_operations = {"filter", "noop", "exception"} + valid_on_fail_operations = {"filter", "fix", "noop", "exception"} if isinstance(on_fail, str) and on_fail not in valid_on_fail_operations: raise Exception( f"on_fail value ({on_fail}) not in list of allowable operations: {valid_on_fail_operations}" ) self.threshold = threshold - self.model = pipeline( + + classification_model, bias_words_detector, masked_word_model = \ + BiasCheck.prefetch_models() + + # There are some spurious loading complaints with TFDistilBert models. + # See https://discuss.huggingface.co/t/message-some-layers-from-the-model-were-not-used/1972/7 + self.classification_model = classification_model + + # These are used for the 'fix' operation: + # In the original DBias implementation, all of the detected bias words would be + # substituted with [MASK] and then a brute-force substitution would be applied. + self.bias_words_detector = bias_words_detector + self.unmasker = masked_word_model + + @staticmethod + def prefetch_models(): + print("Downloading bias classification model:") + # Despite passing `from_tf=True,` into the pipeline, some versions of + # transformers will complain about loading from TF models. Using this wonky + # combination of TFAutoModel and tokenizer, we can get it to load. + classification_pipe = pipeline( 'text-classification', model="d4data/bias-detection-model", + tokenizer="d4data/bias-detection-model", ) - # There are some spurious loading complaints with TFDistilBert models. - # See https://discuss.huggingface.co/t/message-some-layers-from-the-model-were-not-used/1972/7 + print("Downloading bias words detector:") + bias_words_detector = spacy.load(cached_path( + f"{S3_SPACY_NLP_MODEL_PATH}!dbias_0_1_5_en_pipeline", + cache_dir=MODEL_CACHE_DIR, extract_archive=True + )) + print("Downloading masked word model:") + masked_word_model = pipeline('fill-mask', model='bert-base-cased') + return classification_pipe, bias_words_detector, masked_word_model - def validate(self, value: Any, metadata: Optional[Dict] = None) -> ValidationResult: + def validate(self, value: List[str], metadata: Optional[Dict] = None) -> ValidationResult: """Validates that the text is free from biases related to age, gender, sex, ethnicity, religion, etc.""" if isinstance(value, str): value = [value,] # Ensure we're always passing lists of strings into the classifier. - classified_examples = self.model(value) + scores = self._inference(value) passing_outputs = list() passing_scores = list() failing_outputs = list() failing_scores = list() - for text, prediction in zip(value, classified_examples): - if prediction['label'] == 'Biased': - score = prediction['score'] - elif prediction['label'] == 'Non-biased': - score = -prediction['score'] # Note the negation! - else: - raise Exception(f"Got unexpected prediction label: {prediction['label']}") + all_outputs = list() # A tuple of (fix/ignore, sentence) + for text, score in zip(value, scores): if score > self.threshold: failing_outputs.append(text) failing_scores.append(score) else: passing_outputs.append(text) passing_scores.append(score) + all_outputs.append((score > self.threshold, text)) if failing_outputs: failure_message = "The original response contains potentially biased messages:\n" failure_message += "\n - ".join(failing_outputs) message_scores = [str(s) for s in failing_scores] failure_message += "\n (Message scores: {})".format(", ".join(message_scores)) - # Do we need to call the on_fail_method here? + # Four paths: noop, exception, fix, filter. + # self.on_fail_method == NOOP or FILTER, return only passing outputs. + # EXCEPTION is handled farther up the stack, which leaves us only 'fix'. + if self.on_fail_method != OnFailAction.FIX: + fix_value = " ".join(passing_outputs) + else: + fix_value = "" + for needs_fix, text in all_outputs: + if not needs_fix: + fix_value += text + " " + else: + pass return FailResult( error_message=failure_message, - fix_value=" ".join(passing_outputs), + fix_value=fix_value, ) return PassResult() + + # This normally will be called by _inference. + # Remote inference is unsupported for this model on account of the NER. + def _inference_local(self, sentences: List[str]) -> List[float]: + scores = list() + predictions = self.classification_model(sentences) + for pred in predictions: + if pred['label'] == 'Biased': + scores.append(pred['score']) + elif pred['label'] == 'Non-biased': + scores.append(-pred['score']) + else: + # This should never happen: + raise Exception("Unexpected prediction label: {}".format(pred['label'])) + return scores + + def fix_sentence(self, sentence: str) -> str: + # The original DBias implementation would brute-force all combinations of masks. + # We do a greedy search instead, picking the minimally charged option for each. + # Like the original, there's no guarantee of maintaining the pragmatics of the + # starting sentence, but to accomplish that we would need to train a new model. + start_sentence = sentence + starting_bias = self._inference_local([sentence,])[0] + if starting_bias < self.threshold: + pass # Should we raise an exception here? Starting under threshold? + charged_words = [t.text for t in self.bias_words_detector(sentence).ents] + for word_to_replace in charged_words: + for _ in range(0, start_sentence.count(word_to_replace)): + temp = start_sentence.replace(word_to_replace, "[MASK]", 1) + # Generate a bunch of candidate sentences: + candidate_sentences = [x['sequence'] for x in self.unmasker(temp)] + # Score them and take the best: + scores = self._inference_local(candidate_sentences) + best_score, best_text = argmin_pair(scores, candidate_sentences) + if best_score < self.threshold: + return best_text + elif best_score < starting_bias: + starting_bias = best_score + start_sentence = temp + # We've tried changing everything and can't find a good unbiasing. + return "" + + +def download_spacy_model(): + # The '!dbias...' tells cached_path to return a reference to an unmangled path. + return cached_path( + f"{S3_SPACY_NLP_MODEL_PATH}!dbias_0_1_5_en_pipeline", + cache_dir=MODEL_CACHE_DIR, extract_archive=True + ) + + +def argmin_pair(scores, sentences): + min_score = float("inf") + min_text = "" + for score, text in zip(scores, sentences): + if score < min_score: + min_score = score + min_text = text + return min_score, min_text \ No newline at end of file diff --git a/validator/post-install.py b/validator/post-install.py index f4879ff..dda3017 100644 --- a/validator/post-install.py +++ b/validator/post-install.py @@ -1,4 +1,4 @@ -from transformers import pipeline +from validator import BiasCheck print("post-install starting...") -_ = pipeline("text-classification", "d4data/bias-detection-model") +BiasCheck.prefetch_models() print("post-install complete!") \ No newline at end of file From 52d7f0ffd6ad0c7438aff62184ac6a3e42babf83 Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Fri, 6 Dec 2024 09:09:53 -0800 Subject: [PATCH 2/7] If validator is passed a single sentence then it should return a single sentence, not a list. --- validator/main.py | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/validator/main.py b/validator/main.py index 9fd7692..2c6a7fb 100644 --- a/validator/main.py +++ b/validator/main.py @@ -1,6 +1,7 @@ import os +import re from pathlib import Path -from typing import Any, Callable, Dict, List, Tuple, Optional +from typing import Callable, Dict, List, Optional, Union import spacy from cached_path import cached_path @@ -12,7 +13,7 @@ register_validator, ) from guardrails.types import OnFailAction -from transformers import pipeline, TFAutoModel, AutoTokenizer +from transformers import pipeline S3_SPACY_NLP_MODEL_PATH = "s3://guardrails-ai-public-read-only/bias_check/dbias_0_1_5_en_pipeline.tar.gz" @@ -70,7 +71,6 @@ def __init__( @staticmethod def prefetch_models(): - print("Downloading bias classification model:") # Despite passing `from_tf=True,` into the pipeline, some versions of # transformers will complain about loading from TF models. Using this wonky # combination of TFAutoModel and tokenizer, we can get it to load. @@ -79,18 +79,22 @@ def prefetch_models(): model="d4data/bias-detection-model", tokenizer="d4data/bias-detection-model", ) - print("Downloading bias words detector:") bias_words_detector = spacy.load(cached_path( f"{S3_SPACY_NLP_MODEL_PATH}!dbias_0_1_5_en_pipeline", cache_dir=MODEL_CACHE_DIR, extract_archive=True )) - print("Downloading masked word model:") masked_word_model = pipeline('fill-mask', model='bert-base-cased') return classification_pipe, bias_words_detector, masked_word_model - def validate(self, value: List[str], metadata: Optional[Dict] = None) -> ValidationResult: + def validate( + self, + value: Union[str, List[str]], + metadata: Optional[Dict] = None + ) -> ValidationResult: """Validates that the text is free from biases related to age, gender, sex, ethnicity, religion, etc.""" + single_sentence_passed = False if isinstance(value, str): + single_sentence_passed = True value = [value,] # Ensure we're always passing lists of strings into the classifier. scores = self._inference(value) @@ -116,18 +120,18 @@ def validate(self, value: List[str], metadata: Optional[Dict] = None) -> Validat # Four paths: noop, exception, fix, filter. # self.on_fail_method == NOOP or FILTER, return only passing outputs. # EXCEPTION is handled farther up the stack, which leaves us only 'fix'. - if self.on_fail_method != OnFailAction.FIX: - fix_value = " ".join(passing_outputs) + if self.on_fail_descriptor != OnFailAction.FIX: + fix_value = passing_outputs else: - fix_value = "" + fix_value = list() for needs_fix, text in all_outputs: if not needs_fix: - fix_value += text + " " + fix_value.append(text) else: - pass + fix_value.append(self.fix_sentence(text)) return FailResult( error_message=failure_message, - fix_value=fix_value, + fix_value=" ".join(fix_value) if single_sentence_passed else fix_value, ) return PassResult() @@ -147,20 +151,25 @@ def _inference_local(self, sentences: List[str]) -> List[float]: return scores def fix_sentence(self, sentence: str) -> str: - # The original DBias implementation would brute-force all combinations of masks. - # We do a greedy search instead, picking the minimally charged option for each. - # Like the original, there's no guarantee of maintaining the pragmatics of the - # starting sentence, but to accomplish that we would need to train a new model. + """The original DBias algorithm would brute-force, potentially O(2^n) operation. + This performs a similar evaluation, but greedily replaces words instead of + trying all combinations. Since the original did not preserve semantics or + pragmatics, these will approach something not dissimilar, but they have + different theoretical guarantees about proximity to the original.""" start_sentence = sentence starting_bias = self._inference_local([sentence,])[0] if starting_bias < self.threshold: - pass # Should we raise an exception here? Starting under threshold? + # Should we raise an exception here? Starting under threshold? + return start_sentence charged_words = [t.text for t in self.bias_words_detector(sentence).ents] for word_to_replace in charged_words: for _ in range(0, start_sentence.count(word_to_replace)): temp = start_sentence.replace(word_to_replace, "[MASK]", 1) # Generate a bunch of candidate sentences: - candidate_sentences = [x['sequence'] for x in self.unmasker(temp)] + candidate_sentences = list() + for x in self.unmasker(temp): + if x['token'] not in charged_words: + candidate_sentences.append(x['sequence']) # Score them and take the best: scores = self._inference_local(candidate_sentences) best_score, best_text = argmin_pair(scores, candidate_sentences) @@ -188,4 +197,4 @@ def argmin_pair(scores, sentences): if score < min_score: min_score = score min_text = text - return min_score, min_text \ No newline at end of file + return min_score, min_text From 49572451c166fa1342726a7fbdb656b9a7aeea99 Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Fri, 6 Dec 2024 10:22:24 -0800 Subject: [PATCH 3/7] Apply feedback: validate and remove full sentences rather than words. --- pyproject.toml | 3 +-- validator/main.py | 56 ++++++++++++++++------------------------------- 2 files changed, 20 insertions(+), 39 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 29e4769..b1946bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,7 @@ dependencies = [ "cached_path>=1.6.0", "sentencepiece", "tensorflow>=2.16.0", - #"tensorflow-macos>=2.16.0 ; platform_system == 'Darwin'", - #"tensorflow-metal>=1.0.0 ; platform_system == 'Darwin'" + "sentence-splitter>=1.4" ] [project.optional-dependencies] diff --git a/validator/main.py b/validator/main.py index 2c6a7fb..ce71b32 100644 --- a/validator/main.py +++ b/validator/main.py @@ -13,6 +13,7 @@ register_validator, ) from guardrails.types import OnFailAction +from sentence_splitter import split_text_into_sentences from transformers import pipeline @@ -37,10 +38,8 @@ class BiasCheck(Validator): | Programmatic fix | The debiased text if bias is detected | Args: - threshold (float): Higher is more likely to allow bias. Lower is more sensitive and more likely to flag biased messages. - on_fail (Callable): The policy to enact when a validator fails. If `str`, - must be one of `filter`, `noop`, `fix`, or `exception`. Otherwise, must be a - function that is called when the validator fails. + threshold (float): Higher is more likely to allow bias. Lower is more sensitive and more likely to flag biased messages. + on_fail (Callable): The policy to enact when a validator fails. If `str`, must be one of `noop`, `fix`, or `exception`. Otherwise, must be a function that is called when the validator fails. """ # noqa def __init__( @@ -49,7 +48,7 @@ def __init__( on_fail: Optional[Callable] = None, ): super().__init__(on_fail=on_fail) - valid_on_fail_operations = {"filter", "fix", "noop", "exception"} + valid_on_fail_operations = {"fix", "noop", "exception"} if isinstance(on_fail, str) and on_fail not in valid_on_fail_operations: raise Exception( f"on_fail value ({on_fail}) not in list of allowable operations: {valid_on_fail_operations}" @@ -128,13 +127,27 @@ def validate( if not needs_fix: fix_value.append(text) else: - fix_value.append(self.fix_sentence(text)) + # The 'text' is a full paragraph, actually. + # Split it into sentences, evaluate each for bias, and join them + fix_value.append(self.fix_paragraph(text)) return FailResult( error_message=failure_message, fix_value=" ".join(fix_value) if single_sentence_passed else fix_value, ) return PassResult() + def fix_paragraph(self, text: str) -> str: + """Given a passage of text, split it into sentences, evaluate each for bias, + then recombine them and return a new paragraph. May not preserve whitespace + between sentences.""" + sentences = split_text_into_sentences(text, language='en') + scores = self._inference(sentences) + unbiased_sentences = list() + for score, sentence in zip(scores, sentences): + if score < self.threshold: + unbiased_sentences.append(sentence) + return " ".join(unbiased_sentences) + # This normally will be called by _inference. # Remote inference is unsupported for this model on account of the NER. def _inference_local(self, sentences: List[str]) -> List[float]: @@ -150,37 +163,6 @@ def _inference_local(self, sentences: List[str]) -> List[float]: raise Exception("Unexpected prediction label: {}".format(pred['label'])) return scores - def fix_sentence(self, sentence: str) -> str: - """The original DBias algorithm would brute-force, potentially O(2^n) operation. - This performs a similar evaluation, but greedily replaces words instead of - trying all combinations. Since the original did not preserve semantics or - pragmatics, these will approach something not dissimilar, but they have - different theoretical guarantees about proximity to the original.""" - start_sentence = sentence - starting_bias = self._inference_local([sentence,])[0] - if starting_bias < self.threshold: - # Should we raise an exception here? Starting under threshold? - return start_sentence - charged_words = [t.text for t in self.bias_words_detector(sentence).ents] - for word_to_replace in charged_words: - for _ in range(0, start_sentence.count(word_to_replace)): - temp = start_sentence.replace(word_to_replace, "[MASK]", 1) - # Generate a bunch of candidate sentences: - candidate_sentences = list() - for x in self.unmasker(temp): - if x['token'] not in charged_words: - candidate_sentences.append(x['sequence']) - # Score them and take the best: - scores = self._inference_local(candidate_sentences) - best_score, best_text = argmin_pair(scores, candidate_sentences) - if best_score < self.threshold: - return best_text - elif best_score < starting_bias: - starting_bias = best_score - start_sentence = temp - # We've tried changing everything and can't find a good unbiasing. - return "" - def download_spacy_model(): # The '!dbias...' tells cached_path to return a reference to an unmangled path. From b2afd009c4817ac202ba485fb16ca30c90a91427 Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Fri, 6 Dec 2024 12:12:55 -0800 Subject: [PATCH 4/7] Update validator to use a new fix behavior. Fix readme. Update deps to fix import issue. --- README.md | 10 ++++-- pyproject.toml | 4 +-- tests/test_validator.py | 34 +++++++++++-------- validator/main.py | 71 +++++---------------------------------- validator/post-install.py | 8 +++-- 5 files changed, 43 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index e0ebdc6..dfaef5d 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,11 @@ This validator can be used to ensure fairness of model output across various dem * Dependencies: - guardrails-ai>=0.5.0 - - transformers>=4.40 + - transformers>=4.37 + - tf-keras + - sentencepiece + - tensorflow>=2.16 + - sentence-splitter * Dev Dependencies: - pytest @@ -50,5 +54,5 @@ guard = Guard().use( BiasCheck(threshold=0.9, on_fail="exception") ) -guard.validate("The movie was great!") # Validator passes -guard.validate("Why do men always think the movie was great?") # Validator fails \ No newline at end of file +guard.validate("The movie was great.") # Validator passes +guard.validate("Why do men think that movie was great?") # Validator fails \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b1946bf..46c29eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,10 +12,8 @@ dependencies = [ "guardrails-ai>=0.5.15", "transformers>=4.37.0", "tf-keras", - "spacy[transformers]>=3.6.0", - "cached_path>=1.6.0", "sentencepiece", - "tensorflow>=2.16.0", + "tensorflow>=2.16.0", # Required for the dbias model, but not as a direct dependency. "sentence-splitter>=1.4" ] diff --git a/tests/test_validator.py b/tests/test_validator.py index 92e6115..ace4601 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -3,20 +3,26 @@ from validator import BiasCheck -# We use 'exception' as the validator's fail action, -# so we expect failures to always raise an Exception -# Learn more about corrective actions here: -# https://www.guardrailsai.com/docs/concepts/output/#%EF%B8%8F-specifying-corrective-actions + def test_success_case(): - validator = BiasCheck() - input_text = "The sun rises in the morning." - result = validator.validate(input_text, {}) - assert isinstance(result, PassResult) + validator = BiasCheck() + input_text = "The sun rises in the morning." + result = validator.validate(input_text, {}) + assert isinstance(result, PassResult) + def test_failure_case(): - validator = BiasCheck(threshold=0.5) - input_text = "The sun only rises for Humanists." - result = validator.validate(input_text, {}) - assert isinstance(result, FailResult) - assert result.error_message == "The original response contains potentially biased messages:\nThe sun only rises for Humanists.\n (Message scores: 0.6546186208724976)" - assert result.fix_value == "" \ No newline at end of file + validator = BiasCheck(threshold=0.0) + input_text = "Men these days don't care about my arbitrary and deletarious standards of gender. They only care about emotional honesty and participating in a productive, healthy society. smh" + result = validator.validate(input_text, {}) + assert isinstance(result, FailResult) + assert result.error_message == "The original response contains potentially biased messages:\nMen these days don't care about my arbitrary and deletarious standards of gender. They only care about emotional honesty and participating in a productive, healthy society. smh\n (Message scores: 0.9477301836013794)" + assert result.fix_value == "" + + +def test_sentence_fix(): + v = BiasCheck(on_fail='fix', threshold=0.9) + input_text = "Men these days don't care about my arbitrary and deletarious standards of gender. They only care about emotional honesty and participating in a productive, healthy society. smh" + out = v.validate(input_text) + assert isinstance(out, FailResult) + assert out.fix_value == "Men these days don't care about my arbitrary and deletarious standards of gender." diff --git a/validator/main.py b/validator/main.py index ce71b32..ccdc10f 100644 --- a/validator/main.py +++ b/validator/main.py @@ -1,10 +1,5 @@ -import os -import re -from pathlib import Path from typing import Callable, Dict, List, Optional, Union -import spacy -from cached_path import cached_path from guardrails.validator_base import ( FailResult, PassResult, @@ -17,14 +12,6 @@ from transformers import pipeline -S3_SPACY_NLP_MODEL_PATH = "s3://guardrails-ai-public-read-only/bias_check/dbias_0_1_5_en_pipeline.tar.gz" - -MODEL_CACHE_DIR = os.environ.get( - "GUARDRAILS_MODEL_CACHE_PATH_OVERRIDE", - Path.home() / ".cache" / "guardrails_cache" -) - - @register_validator(name="guardrails/bias_check", data_type="string") class BiasCheck(Validator): """Validates that the text is free from biases related to age, gender, sex, ethnicity, religion, etc. @@ -45,7 +32,7 @@ class BiasCheck(Validator): def __init__( self, threshold: float = 0.9, - on_fail: Optional[Callable] = None, + on_fail: Optional[Union[str, Callable]] = None, ): super().__init__(on_fail=on_fail) valid_on_fail_operations = {"fix", "noop", "exception"} @@ -55,35 +42,13 @@ def __init__( ) self.threshold = threshold - classification_model, bias_words_detector, masked_word_model = \ - BiasCheck.prefetch_models() - # There are some spurious loading complaints with TFDistilBert models. # See https://discuss.huggingface.co/t/message-some-layers-from-the-model-were-not-used/1972/7 - self.classification_model = classification_model - - # These are used for the 'fix' operation: - # In the original DBias implementation, all of the detected bias words would be - # substituted with [MASK] and then a brute-force substitution would be applied. - self.bias_words_detector = bias_words_detector - self.unmasker = masked_word_model - - @staticmethod - def prefetch_models(): - # Despite passing `from_tf=True,` into the pipeline, some versions of - # transformers will complain about loading from TF models. Using this wonky - # combination of TFAutoModel and tokenizer, we can get it to load. - classification_pipe = pipeline( + self.classification_model = pipeline( 'text-classification', model="d4data/bias-detection-model", tokenizer="d4data/bias-detection-model", ) - bias_words_detector = spacy.load(cached_path( - f"{S3_SPACY_NLP_MODEL_PATH}!dbias_0_1_5_en_pipeline", - cache_dir=MODEL_CACHE_DIR, extract_archive=True - )) - masked_word_model = pipeline('fill-mask', model='bert-base-cased') - return classification_pipe, bias_words_detector, masked_word_model def validate( self, @@ -116,9 +81,10 @@ def validate( failure_message += "\n - ".join(failing_outputs) message_scores = [str(s) for s in failing_scores] failure_message += "\n (Message scores: {})".format(", ".join(message_scores)) - # Four paths: noop, exception, fix, filter. - # self.on_fail_method == NOOP or FILTER, return only passing outputs. - # EXCEPTION is handled farther up the stack, which leaves us only 'fix'. + # Three paths: noop, exception, fix. + # on_fail == NOOP, return only passing passages. + # on_fail == FIX, split passages into sentences and drop sentences. + # EXCEPTION is handled farther up the stack. if self.on_fail_descriptor != OnFailAction.FIX: fix_value = passing_outputs else: @@ -127,16 +93,15 @@ def validate( if not needs_fix: fix_value.append(text) else: - # The 'text' is a full paragraph, actually. - # Split it into sentences, evaluate each for bias, and join them - fix_value.append(self.fix_paragraph(text)) + # The 'text' is a full document, passage, or paragraph. + fix_value.append(self.fix_passage(text)) return FailResult( error_message=failure_message, fix_value=" ".join(fix_value) if single_sentence_passed else fix_value, ) return PassResult() - def fix_paragraph(self, text: str) -> str: + def fix_passage(self, text: str) -> str: """Given a passage of text, split it into sentences, evaluate each for bias, then recombine them and return a new paragraph. May not preserve whitespace between sentences.""" @@ -162,21 +127,3 @@ def _inference_local(self, sentences: List[str]) -> List[float]: # This should never happen: raise Exception("Unexpected prediction label: {}".format(pred['label'])) return scores - - -def download_spacy_model(): - # The '!dbias...' tells cached_path to return a reference to an unmangled path. - return cached_path( - f"{S3_SPACY_NLP_MODEL_PATH}!dbias_0_1_5_en_pipeline", - cache_dir=MODEL_CACHE_DIR, extract_archive=True - ) - - -def argmin_pair(scores, sentences): - min_score = float("inf") - min_text = "" - for score, text in zip(scores, sentences): - if score < min_score: - min_score = score - min_text = text - return min_score, min_text diff --git a/validator/post-install.py b/validator/post-install.py index dda3017..26ec5ef 100644 --- a/validator/post-install.py +++ b/validator/post-install.py @@ -1,4 +1,8 @@ -from validator import BiasCheck +from transformers import pipeline print("post-install starting...") -BiasCheck.prefetch_models() +_ = pipeline( + 'text-classification', + model="d4data/bias-detection-model", + tokenizer="d4data/bias-detection-model", +) print("post-install complete!") \ No newline at end of file From 6da73fc4ecee3a7b7316c1fb286735c76677cedb Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Fri, 6 Dec 2024 12:18:48 -0800 Subject: [PATCH 5/7] Also, push new version to pypi on tag. --- .github/workflows/publish_pypi.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/publish_pypi.yml diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml new file mode 100644 index 0000000..5bbc17c --- /dev/null +++ b/.github/workflows/publish_pypi.yml @@ -0,0 +1,18 @@ +name: Publish to Guardrails Hub + +on: + workflow_dispatch: + push: + # Publish when new releases are tagged. + tags: + - '*' + +jobs: + setup: + runs-on: ubuntu-latest + steps: + - name: Build & Deploy + uses: guardrails-ai/guardrails/.github/actions/validator_pypi_publish@main + with: + guardrails_token: ${{ secrets.GR_GUARDRAILS_TOKEN }} + validator_id: guardrails/bias_check \ No newline at end of file From 8431686e04ab5f341b5ca73128d191cb9666b06e Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Fri, 6 Dec 2024 12:26:47 -0800 Subject: [PATCH 6/7] Linting. --- validator/main.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/validator/main.py b/validator/main.py index ccdc10f..051238f 100644 --- a/validator/main.py +++ b/validator/main.py @@ -34,7 +34,7 @@ def __init__( threshold: float = 0.9, on_fail: Optional[Union[str, Callable]] = None, ): - super().__init__(on_fail=on_fail) + super().__init__(on_fail=on_fail) # type: ignore valid_on_fail_operations = {"fix", "noop", "exception"} if isinstance(on_fail, str) and on_fail not in valid_on_fail_operations: raise Exception( @@ -115,15 +115,17 @@ def fix_passage(self, text: str) -> str: # This normally will be called by _inference. # Remote inference is unsupported for this model on account of the NER. - def _inference_local(self, sentences: List[str]) -> List[float]: + def _inference_local(self, sentences: List[str]) -> List[float]: # type: ignore scores = list() predictions = self.classification_model(sentences) for pred in predictions: - if pred['label'] == 'Biased': - scores.append(pred['score']) - elif pred['label'] == 'Non-biased': - scores.append(-pred['score']) + label = pred['label'] # type: ignore + score = pred['score'] # type: ignore + if label == 'Biased': + scores.append(score) + elif label == 'Non-biased': + scores.append(-score) else: # This should never happen: - raise Exception("Unexpected prediction label: {}".format(pred['label'])) + raise Exception("Unexpected prediction label: {}".format(label)) return scores From 680a8f8a4144da04efcc62de79b93013d035a575 Mon Sep 17 00:00:00 2001 From: Joseph Catrambone Date: Fri, 6 Dec 2024 12:58:49 -0800 Subject: [PATCH 7/7] Version bump. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 46c29eb..96662b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bias_check" -version = "0.2.0" +version = "0.3.0" description = "Wrapping a bias check model into a validator." authors = [ {name = "Guardrails AI", email = "contact@guardrailsai.com"}