From d09ffbb64ebc44a6c7868e4b5ba51fac334c8730 Mon Sep 17 00:00:00 2001 From: zalkikar Date: Mon, 23 Dec 2024 16:40:57 -0500 Subject: [PATCH] version 0.1.6 - Update README and AutoConfig support --- README.md | 38 ++++++++++++++++---------- mlm_bias.py | 38 ++++++++++++-------------- mlm_bias/__init__.py | 10 ++----- mlm_bias/__version__.py | 2 +- mlm_bias/bias_datasets.py | 2 +- mlm_bias/bias_results.py | 12 ++++---- mlm_bias/compute_mlm_bias.py | 35 +++++++++++++++--------- mlm_bias/compute_mlms_relative_bias.py | 4 +-- mlm_bias/utils/__init__.py | 10 +++---- mlm_bias/utils/experiments.py | 3 +- mlm_bias/utils/measures.py | 8 +----- mlm_bias/utils/preprocess.py | 3 -- setup.cfg | 2 +- setup.py | 2 +- 14 files changed, 85 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index 12cdf72..26cfd2c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # Measuring Biases in Masked Language Models for PyTorch Transformers +![pypi - status](https://img.shields.io/badge/status-stable-brightgreen) +![pypi - downloads](https://img.shields.io/pypi/dm/mlm-bias) +![pypi - version](https://img.shields.io/pypi/v/mlm-bias) + Evaluate biases in pre-trained or re-trained masked language models (MLMs), such as those available through [HuggingFace](https://huggingface.co/models). This package computes bias scores across various bias types, using benchmark datasets like [CrowS-Pairs (CPS)](https://github.com/nyu-mll/crows-pairs) and [StereoSet (SS)](https://github.com/moinnadeem/StereoSet) (intrasentence), or custom datasets. You can also compare relative bias between two MLMs, or evaluate re-trained MLMs versus their pre-trained base models. ## Evaluation Methods @@ -64,18 +68,18 @@ python3 -m pip install . Run the `mlm_bias.py` example script: ```bash -mlm_bias.py [-h] --data {cps,ss,custom} --model MODEL [--model2 MODEL2] [--output OUTPUT] [--measures {all,crr,crra,dp,dpa,aul,aula,csps,sss}] [--start S] [--end E] +mlm_bias.py [-h] --data {cps,ss,custom} --model_name_or_path MODEL [--model_name_or_path_2 MODEL2] [--output OUTPUT] [--measures {all,crr,crra,dp,dpa,aul,aula,csps,sss}] [--start S] [--end E] ``` Example arguments: ```bash # Single MLM -python3 mlm_bias.py --data cps --model roberta-base --start 0 --end 30 -python3 mlm_bias.py --data ss --model bert-base-uncased --start 0 --end 30 +python3 mlm_bias.py --data cps --model_name_or_path roberta-base --start 0 --end 30 +python3 mlm_bias.py --data ss --model_name_or_path bert-base-uncased --start 0 --end 30 # Relative between two MLMs -python3 mlm_bias.py --data cps --model roberta-base --start 0 --end 30 --model2 bert-base-uncased +python3 mlm_bias.py --data cps --model_name_or_path roberta-base --start 0 --end 30 --model_name_or_path_2 bert-base-uncased ``` Output directories (default arguments): @@ -85,24 +89,28 @@ Output directories (default arguments): ### Example Output: +```bash +python3 mlm_bias.py --data cps --model_name_or_path bert-base-uncased --start 0 --end 30 +``` + ```bash Created output directory. Created Data Directory |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s Downloaded Data [CrowSPairs] |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s Loaded Data [CrowSPairs] |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s -Evaluating Bias [roberta-base] |██████████████████████████████| 30/30 [100%] in 2m 46s ETA: 0s -Saved bias results for roberta-base in ./eval/roberta-base +Evaluating Bias [bert-base-uncased] |██████████████████████████████| 30/30 [100%] in 1m 4s ETA: 0s +Saved bias results for bert-base-uncased in ./eval/bert-base-uncased Saved scores in ./eval/out.txt -------------------------------------------------- -MLM: roberta-base -CRR total = 50.0 -CRRA total = 53.333 -ΔP total = 56.667 -ΔPA total = 56.667 -AUL total = 76.667 -AULA total = 70.0 -SSS total = 53.333 -CSPS total = 63.33 +MLM: bert-base-uncased +CRR total = 26.667 +CRRA total = 30.0 +ΔP total = 46.667 +ΔPA total = 43.333 +AUL total = 36.667 +AULA total = 40.0 +SSS total = 30.0 +CSPS total = 33.333 ``` ## Custom Datasets diff --git a/mlm_bias.py b/mlm_bias.py index 2bff040..54cb5e0 100644 --- a/mlm_bias.py +++ b/mlm_bias.py @@ -19,8 +19,6 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False): for measure in res['bias_scores'].keys(): out += (f"{measure.replace('d','Δ').upper()} "+ f"total = {round(res['bias_scores'][measure]['total'],3)}\n") - if len(out) >= 2 and "\n" in out[-2:]: - out = out[:-2] else: for measure in res['bias_scores'].keys(): out += (f"Measure = {measure.replace('d','Δ').upper()}") @@ -41,13 +39,13 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False): 'Provide bias types in "/bias_types.txt" and biased sentences in "/dis.txt" and "/adv.txt" accordingly.'), choices=['cps','ss','custom']) - parser.add_argument('--model', + parser.add_argument('--model_name_or_path', type=str, required=True, help=('Model (MLM) to compute bias measures for. '+ 'Must be supported by HuggingFace.')) - parser.add_argument('--model2', + parser.add_argument('--model_name_or_path_2', type=str, required=False, default="", @@ -107,45 +105,45 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False): output_dir = os.path.dirname(args.output) out = "" - model = args.model + model_name_or_path = args.model_name_or_path try: - model_bias = BiasMLM(args.model, dataset) + model_bias = BiasMLM(args.model_name_or_path, dataset) except Exception as ex: - raise Exception(f"Could not load {args.model}\n{ex}") + raise Exception(f"Could not load {args.model_name_or_path}\n{ex}") if args.measures == 'all': res1 = model_bias.evaluate(inc_attention=True) else: res1 = model_bias.evaluate(measures=args.measures, inc_attention=True) - output_dir_res1 = os.path.join(output_dir, res1['model_name']) + output_dir_res1 = os.path.join(output_dir, res1['model_name_or_path']) res1.save(output_dir_res1) - print(f"Saved bias results for {res1['model_name']} in {output_dir_res1}") - out = pretty_print(res1, out, m_name=res1['model_name']) + print(f"Saved bias results for {res1['model_name_or_path']} in {output_dir_res1}") + out = pretty_print(res1, out, m_name=res1['model_name_or_path']) res2 = None - if args.model2 != "": - model = args.model2 - model_bias = BiasMLM(args.model2, dataset) + if args.model_name_or_path_2 != "": + model = args.model_name_or_path_2 + model_bias = BiasMLM(args.model_name_or_path_2, dataset) if args.measures == 'all': res2 = model_bias.evaluate(inc_attention=True) else: res2 = model_bias.evaluate(measures=args.measures, inc_attention=True) - output_dir_res2 = os.path.join(output_dir, res2['model_name']) + output_dir_res2 = os.path.join(output_dir, res2['model_name_or_path']) res2.save(output_dir_res2) - print(f"Saved bias results for {res2['model_name']} in {output_dir_res2}") - out = pretty_print(res2, out, m_name=res2['model_name']) + print(f"Saved bias results for {res2['model_name_or_path']} in {output_dir_res2}") + out = pretty_print(res2, out, m_name=res2['model_name_or_path']) if res2 is not None: mlm_bias_relative = RelativeBiasMLMs(res1, res2) res3 = mlm_bias_relative.evaluate() - output_dir_res3 = os.path.join(output_dir, f"{res1['model_name']}_{res2['model_name']}") + output_dir_res3 = os.path.join(output_dir, f"{res1['model_name_or_path']}_{res2['model_name_or_path']}") res3.save(output_dir_res3) - print(f"Saved bias results for {res1['model_name']} relative to {res2['model_name']} in {output_dir_res3}") - out = pretty_print(res3, out, m_name=f"Relative {res1['model_name']}, {res2['model_name']}") + print(f"Saved bias results for {res1['model_name_or_path']} relative to {res2['model_name_or_path']} in {output_dir_res3}") + out = pretty_print(res3, out, m_name=f"Relative {res1['model_name_or_path']}, {res2['model_name_or_path']}") with open(args.output, 'w+', encoding='utf-8') as f: f.write(out) print(f"Saved scores in {args.output}") - console_out = pretty_print(res1, "", m_name=res1['model_name'], total_only=True) + console_out = pretty_print(res1, "", m_name=res1['model_name_or_path'], total_only=True) print(console_out) diff --git a/mlm_bias/__init__.py b/mlm_bias/__init__.py index 3c0f595..c17194e 100644 --- a/mlm_bias/__init__.py +++ b/mlm_bias/__init__.py @@ -1,8 +1,4 @@ -import mlm_bias.utils.experiments -import mlm_bias.utils.measures -import mlm_bias.utils.preprocess -import mlm_bias.utils.constants -from mlm_bias.compute_mlm_bias import BiasMLM +from mlm_bias.bias_datasets import BiasDataset, BiasBenchmarkDataset, BiasLineByLineDataset from mlm_bias.bias_results import BiasResults, RelativeBiasResults -from mlm_bias.compute_mlms_relative_bias import RelativeBiasMLMs -from mlm_bias.bias_datasets import BiasDataset, BiasBenchmarkDataset, BiasLineByLineDataset \ No newline at end of file +from mlm_bias.compute_mlm_bias import BiasMLM +from mlm_bias.compute_mlms_relative_bias import RelativeBiasMLMs \ No newline at end of file diff --git a/mlm_bias/__version__.py b/mlm_bias/__version__.py index de49d1f..32efefd 100644 --- a/mlm_bias/__version__.py +++ b/mlm_bias/__version__.py @@ -1 +1 @@ -__version__ = "0.1.5" \ No newline at end of file +__version__ = "0.1.6" \ No newline at end of file diff --git a/mlm_bias/bias_datasets.py b/mlm_bias/bias_datasets.py index 9d6f57f..b511cec 100644 --- a/mlm_bias/bias_datasets.py +++ b/mlm_bias/bias_datasets.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- from typing import Optional -from mlm_bias.utils.preprocess import preprocess_benchmark, preprocess_linebyline +from mlm_bias.utils import preprocess_benchmark, preprocess_linebyline class BiasDataset(): def __init__(self, bias_types: list, dis: list, adv: list): diff --git a/mlm_bias/bias_results.py b/mlm_bias/bias_results.py index 4a81e69..8a711af 100644 --- a/mlm_bias/bias_results.py +++ b/mlm_bias/bias_results.py @@ -6,19 +6,19 @@ class BiasResults(): - model_name = None + model_name_or_path = None measures = None eval_results = None bias_scores = None def __call__( self, - model_name: str, + model_name_or_path: str, measures: list, eval_results: dict, bias_scores: dict, ): - self.model_name = model_name + self.model_name_or_path = model_name_or_path self.measures = measures self.eval_results = eval_results self.bias_scores = bias_scores @@ -28,7 +28,7 @@ def __getitem__(self, key): def save(self, file_path: Optional[str] = None): if file_path is None: - fp = f'{self.model_name}.bias' + fp = f'{self.model_name_or_path}.bias' else: fp = file_path with open(fp, 'wb') as f: @@ -37,13 +37,13 @@ def save(self, file_path: Optional[str] = None): def load(self, file_path: Optional[str] = None): if file_path is None: - fp = f'{self.model_name}.bias' + fp = f'{self.model_name_or_path}.bias' else: fp = file_path with open(fp, 'rb') as f: data = pickle.load(f) f.close() - self.model_name = data['model_name'] + self.model_name_or_path = data['model_name_or_path'] self.measures = data['measures'] self.eval_results = data['eval_results'] self.bias_scores = data['bias_scores'] diff --git a/mlm_bias/compute_mlm_bias.py b/mlm_bias/compute_mlm_bias.py index 3cc4e81..a885072 100644 --- a/mlm_bias/compute_mlm_bias.py +++ b/mlm_bias/compute_mlm_bias.py @@ -5,13 +5,21 @@ import torch import numpy as np from typing import Optional -from transformers import AutoTokenizer, AutoModelForMaskedLM +from transformers import AutoConfig, AutoModelForMaskedLM, AutoTokenizer from mlm_bias.bias_datasets import BiasDataset from mlm_bias.bias_results import BiasResults -from mlm_bias.utils.experiments import get_mask_combinations, get_span -from mlm_bias.utils.measures import compute_sss, compute_csps, compute_aul, compute_crr_dp -from mlm_bias.utils.constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION -from mlm_bias.utils.progress import show_progress, end_progress +from mlm_bias.utils import ( + compute_aul, + compute_crr_dp, + compute_csps, + compute_sss, + end_progress, + get_mask_combinations, + get_span, + show_progress, + SUPPORTED_MEASURES, + SUPPORTED_MEASURES_ATTENTION +) class BiasMLM(): """ @@ -20,19 +28,20 @@ class BiasMLM(): def __init__( self, - model_name: str, + model_name_or_path: str, dataset: BiasDataset, device: Optional[str] = None, ): self.results = BiasResults() self.dataset = dataset - self.model_name = model_name - self.model = AutoModelForMaskedLM.from_pretrained( - self.model_name, + self.model_name_or_path = model_name_or_path + self.model_config = AutoConfig.from_pretrained( + pretrained_model_name_or_path=self.model_name_or_path, output_hidden_states=True, output_attentions=True, attn_implementation="eager") - self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) + self.model = AutoModelForMaskedLM.from_config(self.model_config) + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path) self.mask_id = self.tokenizer.mask_token_id self.device = None if device is not None: @@ -119,7 +128,7 @@ def evaluate( start_time = time.time() for index in range(len(self.dataset)): - show_progress(index, len(self.dataset), f"Evaluating Bias [{self.model_name}]", start_time) + show_progress(index, len(self.dataset), f"Evaluating Bias [{self.model_name_or_path}]", start_time) bias_type, s1, s2 = self.dataset[index] self.eval_results["bias_types"].append(bias_type) if 'crr' in measures or 'dp' in measures: @@ -175,12 +184,12 @@ def evaluate( mj_adv = compute_sss(self.model, token_ids_adv, adv_spans, self.mask_id, log_softmax=True) self.eval_results[f'S1']['sss'].append(mj_dis['sss']) self.eval_results[f'S2']['sss'].append(mj_adv['sss']) - show_progress(index+1, len(self.dataset), f"Evaluating Bias [{self.model_name}]", start_time) + show_progress(index+1, len(self.dataset), f"Evaluating Bias [{self.model_name_or_path}]", start_time) end_progress() self.measures = measures self.scores() self.results( - self.model_name, + self.model_name_or_path, self.measures, self.eval_results, self.bias_scores diff --git a/mlm_bias/compute_mlms_relative_bias.py b/mlm_bias/compute_mlms_relative_bias.py index b6cc1d6..d665aac 100644 --- a/mlm_bias/compute_mlms_relative_bias.py +++ b/mlm_bias/compute_mlms_relative_bias.py @@ -54,8 +54,8 @@ def evaluate(self, measures: Optional[list] = None): mdifs = [((m21 - m11) - (m22 - m21)) for m22, m21, m12, m11 in zip(m2_s2, m2_s1, m1_s2, m1_s1)] self.bias_scores[m][b] = 100 * np.mean([1 if mdif > 0 else 0 for mdif in mdifs]) self.results( - self.mlm1_bias_results["model_name"], - self.mlm2_bias_results["model_name"], + self.mlm1_bias_results["model_name_or_path"], + self.mlm2_bias_results["model_name_or_path"], measures, self.bias_scores ) diff --git a/mlm_bias/utils/__init__.py b/mlm_bias/utils/__init__.py index 36b2ce8..e40d5ec 100644 --- a/mlm_bias/utils/__init__.py +++ b/mlm_bias/utils/__init__.py @@ -1,5 +1,5 @@ -from mlm_bias.utils.experiments import get_mask_combinations, get_span -from mlm_bias.utils.preprocess import preprocess_benchmark, preprocess_linebyline -from mlm_bias.utils.measures import compute_sss, compute_csps, compute_aul, compute_crr_dp -from mlm_bias.utils.constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION -from mlm_bias.utils.progress import show_progress, end_progress \ No newline at end of file +from .constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION +from .experiments import get_mask_combinations, get_span +from .measures import compute_sss, compute_csps, compute_aul, compute_crr_dp +from .preprocess import preprocess_benchmark, preprocess_linebyline +from .progress import show_progress, end_progress \ No newline at end of file diff --git a/mlm_bias/utils/experiments.py b/mlm_bias/utils/experiments.py index 7542b72..4cd8b6e 100644 --- a/mlm_bias/utils/experiments.py +++ b/mlm_bias/utils/experiments.py @@ -2,12 +2,11 @@ # -*- coding: utf-8 -*- import difflib -import regex as re +import re def get_mask_combinations(sent, tokenizer, skip_space=False, rm_punc=True): sent_toks = [] gt = [] - mask_ind = 0 if rm_punc: sent = ' '.join(re.sub('[^A-Za-z0-9 _\-]+', '', sent).split()) sent_enc = tokenizer.encode(sent, add_special_tokens=False) diff --git a/mlm_bias/utils/measures.py b/mlm_bias/utils/measures.py index 8f91c6a..10461c3 100644 --- a/mlm_bias/utils/measures.py +++ b/mlm_bias/utils/measures.py @@ -6,7 +6,7 @@ def get_mlm_output(model, inputs): with torch.no_grad(): - output = model(inputs) + output = model(inputs, return_dict=True) return output @torch.no_grad() @@ -34,17 +34,12 @@ def compute_crr_dp( top_toks = torch.topk(mask_token_probs, mask_token_probs.shape[1], dim=1) top_toks = top_toks.indices[0].tolist() top_token = top_toks[0] - #top_token_decoded = tokenizer.decode([top_token]) top_token_score = mask_token_probs[:, top_token].tolist()[0] - top_token_rank = 1 tok_inds = list(range(mask_token_probs.shape[1])) - token_js = [] masked_token_index = tok_inds.index(masked_tok) - #masked_token_decoded = tokenizer.decode([masked_tok]) masked_token_score = mask_token_probs[:, masked_token_index].tolist()[0] masked_token_rank = top_toks.index(masked_tok) + 1 token_j = { - #"token": masked_token_decoded, "token_id": masked_tok, "score": masked_token_score, "rank": masked_token_rank @@ -67,7 +62,6 @@ def compute_crr_dp( token_j['dpa'] = dp_attns return { "prediction": { - #"token": top_token_decoded, "token_id": top_token, "score": top_token_score, "rank": 1 diff --git a/mlm_bias/utils/preprocess.py b/mlm_bias/utils/preprocess.py index b10c6d8..f79b78b 100644 --- a/mlm_bias/utils/preprocess.py +++ b/mlm_bias/utils/preprocess.py @@ -57,9 +57,6 @@ def preprocess_linebyline(data_dir): if not os.access(data_dir, os.R_OK): raise Exception("Can't Access Dataset") else: - bias_types_path = os.path.join(data_dir, "bias_types.txt") - dis_path = os.path.join(data_dir, "dis.txt") - adv_path = os.path.join(data_dir, "adv.txt") with open(os.path.join(data_dir, "bias_types.txt"), "r") as f: bias_types = f.read().splitlines() f.close() diff --git a/setup.cfg b/setup.cfg index 1156bdb..02f9910 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = mlm-bias -version = 0.1.5 +version = 0.1.6 author = Rahul Zalkikar author_email = rayzck9@gmail.com description = Bias Evaluation Methods for Masked Language Models implemented in PyTorch diff --git a/setup.py b/setup.py index 6d84086..9e54343 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='mlm-bias', - version='0.1.5', + version='0.1.6', author='Rahul Zalkikar', author_email='rayzck9@gmail.com', description='Bias Evaluation Methods for Masked Language Models implemented in PyTorch',