From d09ffbb64ebc44a6c7868e4b5ba51fac334c8730 Mon Sep 17 00:00:00 2001
From: zalkikar <rayzck9@gmail.com>
Date: Mon, 23 Dec 2024 16:40:57 -0500
Subject: [PATCH] version 0.1.6 - Update README and AutoConfig support

---
 README.md                              | 38 ++++++++++++++++----------
 mlm_bias.py                            | 38 ++++++++++++--------------
 mlm_bias/__init__.py                   | 10 ++-----
 mlm_bias/__version__.py                |  2 +-
 mlm_bias/bias_datasets.py              |  2 +-
 mlm_bias/bias_results.py               | 12 ++++----
 mlm_bias/compute_mlm_bias.py           | 35 +++++++++++++++---------
 mlm_bias/compute_mlms_relative_bias.py |  4 +--
 mlm_bias/utils/__init__.py             | 10 +++----
 mlm_bias/utils/experiments.py          |  3 +-
 mlm_bias/utils/measures.py             |  8 +-----
 mlm_bias/utils/preprocess.py           |  3 --
 setup.cfg                              |  2 +-
 setup.py                               |  2 +-
 14 files changed, 85 insertions(+), 84 deletions(-)

diff --git a/README.md b/README.md
index 12cdf72..26cfd2c 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # Measuring Biases in Masked Language Models for PyTorch Transformers
 
+![pypi - status](https://img.shields.io/badge/status-stable-brightgreen)
+![pypi - downloads](https://img.shields.io/pypi/dm/mlm-bias)
+![pypi - version](https://img.shields.io/pypi/v/mlm-bias)
+
 Evaluate biases in pre-trained or re-trained masked language models (MLMs), such as those available through [HuggingFace](https://huggingface.co/models). This package computes bias scores across various bias types, using benchmark datasets like [CrowS-Pairs (CPS)](https://github.com/nyu-mll/crows-pairs) and [StereoSet (SS)](https://github.com/moinnadeem/StereoSet) (intrasentence), or custom datasets. You can also compare relative bias between two MLMs, or evaluate re-trained MLMs versus their pre-trained base models.
 
 ## Evaluation Methods
@@ -64,18 +68,18 @@ python3 -m pip install .
 Run the `mlm_bias.py` example script:
 
 ```bash
-mlm_bias.py [-h] --data {cps,ss,custom} --model MODEL [--model2 MODEL2] [--output OUTPUT] [--measures {all,crr,crra,dp,dpa,aul,aula,csps,sss}] [--start S] [--end E]
+mlm_bias.py [-h] --data {cps,ss,custom} --model_name_or_path MODEL [--model_name_or_path_2 MODEL2] [--output OUTPUT] [--measures {all,crr,crra,dp,dpa,aul,aula,csps,sss}] [--start S] [--end E]
 ```
 
 Example arguments:
 
 ```bash
 # Single MLM
-python3 mlm_bias.py --data cps --model roberta-base --start 0 --end 30
-python3 mlm_bias.py --data ss --model bert-base-uncased --start 0 --end 30
+python3 mlm_bias.py --data cps --model_name_or_path roberta-base --start 0 --end 30
+python3 mlm_bias.py --data ss --model_name_or_path bert-base-uncased --start 0 --end 30
 
 # Relative between two MLMs
-python3 mlm_bias.py --data cps --model roberta-base --start 0 --end 30 --model2 bert-base-uncased
+python3 mlm_bias.py --data cps --model_name_or_path roberta-base --start 0 --end 30 --model_name_or_path_2 bert-base-uncased
 ```
 
 Output directories (default arguments):
@@ -85,24 +89,28 @@ Output directories (default arguments):
 
 ### Example Output:
 
+```bash
+python3 mlm_bias.py --data cps --model_name_or_path bert-base-uncased --start 0 --end 30
+```
+
 ```bash
 Created output directory.
 Created Data Directory |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s
 Downloaded Data [CrowSPairs] |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s
 Loaded Data [CrowSPairs] |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s
-Evaluating Bias [roberta-base] |██████████████████████████████| 30/30 [100%] in 2m 46s ETA: 0s
-Saved bias results for roberta-base in ./eval/roberta-base
+Evaluating Bias [bert-base-uncased] |██████████████████████████████| 30/30 [100%] in 1m 4s ETA: 0s
+Saved bias results for bert-base-uncased in ./eval/bert-base-uncased
 Saved scores in ./eval/out.txt
 --------------------------------------------------
-MLM: roberta-base
-CRR total = 50.0
-CRRA total = 53.333
-ΔP total = 56.667
-ΔPA total = 56.667
-AUL total = 76.667
-AULA total = 70.0
-SSS total = 53.333
-CSPS total = 63.33
+MLM: bert-base-uncased
+CRR total = 26.667
+CRRA total = 30.0
+ΔP total = 46.667
+ΔPA total = 43.333
+AUL total = 36.667
+AULA total = 40.0
+SSS total = 30.0
+CSPS total = 33.333
 ```
 
 ## Custom Datasets
diff --git a/mlm_bias.py b/mlm_bias.py
index 2bff040..54cb5e0 100644
--- a/mlm_bias.py
+++ b/mlm_bias.py
@@ -19,8 +19,6 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False):
         for measure in res['bias_scores'].keys():
             out += (f"{measure.replace('d','Δ').upper()} "+
                     f"total = {round(res['bias_scores'][measure]['total'],3)}\n")
-        if len(out) >= 2 and "\n" in out[-2:]:
-            out = out[:-2]
     else:
       for measure in res['bias_scores'].keys():
           out += (f"Measure = {measure.replace('d','Δ').upper()}")
@@ -41,13 +39,13 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False):
                               'Provide bias types in "<data>/bias_types.txt" and biased sentences in "<data>/dis.txt" and "<data>/adv.txt" accordingly.'),
                         choices=['cps','ss','custom'])
 
-    parser.add_argument('--model',
+    parser.add_argument('--model_name_or_path',
                         type=str,
                         required=True,
                         help=('Model (MLM) to compute bias measures for. '+
                               'Must be supported by HuggingFace.'))
 
-    parser.add_argument('--model2',
+    parser.add_argument('--model_name_or_path_2',
                         type=str,
                         required=False,
                         default="",
@@ -107,45 +105,45 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False):
     output_dir = os.path.dirname(args.output)
 
     out = ""
-    model = args.model
+    model_name_or_path = args.model_name_or_path
     try:
-        model_bias = BiasMLM(args.model, dataset)
+        model_bias = BiasMLM(args.model_name_or_path, dataset)
     except Exception as ex:
-        raise Exception(f"Could not load {args.model}\n{ex}")
+        raise Exception(f"Could not load {args.model_name_or_path}\n{ex}")
     if args.measures == 'all':
         res1 = model_bias.evaluate(inc_attention=True)
     else:
         res1 = model_bias.evaluate(measures=args.measures, inc_attention=True)
-    output_dir_res1 = os.path.join(output_dir, res1['model_name'])
+    output_dir_res1 = os.path.join(output_dir, res1['model_name_or_path'])
     res1.save(output_dir_res1)
-    print(f"Saved bias results for {res1['model_name']} in {output_dir_res1}")
-    out = pretty_print(res1, out, m_name=res1['model_name'])
+    print(f"Saved bias results for {res1['model_name_or_path']} in {output_dir_res1}")
+    out = pretty_print(res1, out, m_name=res1['model_name_or_path'])
 
     res2 = None
-    if args.model2 != "":
-        model = args.model2
-        model_bias = BiasMLM(args.model2, dataset)
+    if args.model_name_or_path_2 != "":
+        model = args.model_name_or_path_2
+        model_bias = BiasMLM(args.model_name_or_path_2, dataset)
         if args.measures == 'all':
             res2 = model_bias.evaluate(inc_attention=True)
         else:
             res2 = model_bias.evaluate(measures=args.measures, inc_attention=True)
-        output_dir_res2 = os.path.join(output_dir, res2['model_name'])
+        output_dir_res2 = os.path.join(output_dir, res2['model_name_or_path'])
         res2.save(output_dir_res2)
-        print(f"Saved bias results for {res2['model_name']} in {output_dir_res2}")
-        out = pretty_print(res2, out, m_name=res2['model_name'])
+        print(f"Saved bias results for {res2['model_name_or_path']} in {output_dir_res2}")
+        out = pretty_print(res2, out, m_name=res2['model_name_or_path'])
 
     if res2 is not None:
         mlm_bias_relative = RelativeBiasMLMs(res1, res2)
         res3 = mlm_bias_relative.evaluate()
-        output_dir_res3 = os.path.join(output_dir, f"{res1['model_name']}_{res2['model_name']}")
+        output_dir_res3 = os.path.join(output_dir, f"{res1['model_name_or_path']}_{res2['model_name_or_path']}")
         res3.save(output_dir_res3)
-        print(f"Saved bias results for {res1['model_name']} relative to {res2['model_name']} in {output_dir_res3}")
-        out = pretty_print(res3, out, m_name=f"Relative {res1['model_name']}, {res2['model_name']}")
+        print(f"Saved bias results for {res1['model_name_or_path']} relative to {res2['model_name_or_path']} in {output_dir_res3}")
+        out = pretty_print(res3, out, m_name=f"Relative {res1['model_name_or_path']}, {res2['model_name_or_path']}")
 
     with open(args.output, 'w+', encoding='utf-8') as f:
         f.write(out)
 
     print(f"Saved scores in {args.output}")
 
-    console_out = pretty_print(res1, "", m_name=res1['model_name'], total_only=True)
+    console_out = pretty_print(res1, "", m_name=res1['model_name_or_path'], total_only=True)
     print(console_out)
diff --git a/mlm_bias/__init__.py b/mlm_bias/__init__.py
index 3c0f595..c17194e 100644
--- a/mlm_bias/__init__.py
+++ b/mlm_bias/__init__.py
@@ -1,8 +1,4 @@
-import mlm_bias.utils.experiments
-import mlm_bias.utils.measures
-import mlm_bias.utils.preprocess
-import mlm_bias.utils.constants
-from mlm_bias.compute_mlm_bias import BiasMLM
+from mlm_bias.bias_datasets import BiasDataset, BiasBenchmarkDataset, BiasLineByLineDataset
 from mlm_bias.bias_results import BiasResults, RelativeBiasResults
-from mlm_bias.compute_mlms_relative_bias import RelativeBiasMLMs
-from mlm_bias.bias_datasets import BiasDataset, BiasBenchmarkDataset, BiasLineByLineDataset
\ No newline at end of file
+from mlm_bias.compute_mlm_bias import BiasMLM
+from mlm_bias.compute_mlms_relative_bias import RelativeBiasMLMs
\ No newline at end of file
diff --git a/mlm_bias/__version__.py b/mlm_bias/__version__.py
index de49d1f..32efefd 100644
--- a/mlm_bias/__version__.py
+++ b/mlm_bias/__version__.py
@@ -1 +1 @@
-__version__ = "0.1.5"
\ No newline at end of file
+__version__ = "0.1.6"
\ No newline at end of file
diff --git a/mlm_bias/bias_datasets.py b/mlm_bias/bias_datasets.py
index 9d6f57f..b511cec 100644
--- a/mlm_bias/bias_datasets.py
+++ b/mlm_bias/bias_datasets.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 from typing import Optional
-from mlm_bias.utils.preprocess import preprocess_benchmark, preprocess_linebyline
+from mlm_bias.utils import preprocess_benchmark, preprocess_linebyline
 
 class BiasDataset():
     def __init__(self, bias_types: list, dis: list, adv: list):
diff --git a/mlm_bias/bias_results.py b/mlm_bias/bias_results.py
index 4a81e69..8a711af 100644
--- a/mlm_bias/bias_results.py
+++ b/mlm_bias/bias_results.py
@@ -6,19 +6,19 @@
 
 class BiasResults():
 
-    model_name = None
+    model_name_or_path = None
     measures = None
     eval_results = None
     bias_scores = None
 
     def __call__(
         self,
-        model_name: str,
+        model_name_or_path: str,
         measures: list,
         eval_results: dict,
         bias_scores: dict,
     ):
-        self.model_name = model_name
+        self.model_name_or_path = model_name_or_path
         self.measures = measures
         self.eval_results = eval_results
         self.bias_scores = bias_scores
@@ -28,7 +28,7 @@ def __getitem__(self, key):
 
     def save(self, file_path: Optional[str] = None):
         if file_path is None:
-            fp = f'{self.model_name}.bias'
+            fp = f'{self.model_name_or_path}.bias'
         else:
             fp = file_path
         with open(fp, 'wb') as f:
@@ -37,13 +37,13 @@ def save(self, file_path: Optional[str] = None):
 
     def load(self, file_path: Optional[str] = None):
         if file_path is None:
-            fp = f'{self.model_name}.bias'
+            fp = f'{self.model_name_or_path}.bias'
         else:
             fp = file_path
         with open(fp, 'rb') as f:
             data = pickle.load(f)
             f.close()
-        self.model_name = data['model_name']
+        self.model_name_or_path = data['model_name_or_path']
         self.measures = data['measures']
         self.eval_results = data['eval_results']
         self.bias_scores = data['bias_scores']
diff --git a/mlm_bias/compute_mlm_bias.py b/mlm_bias/compute_mlm_bias.py
index 3cc4e81..a885072 100644
--- a/mlm_bias/compute_mlm_bias.py
+++ b/mlm_bias/compute_mlm_bias.py
@@ -5,13 +5,21 @@
 import torch
 import numpy as np
 from typing import Optional
-from transformers import AutoTokenizer, AutoModelForMaskedLM
+from transformers import AutoConfig, AutoModelForMaskedLM, AutoTokenizer
 from mlm_bias.bias_datasets import BiasDataset
 from mlm_bias.bias_results import BiasResults
-from mlm_bias.utils.experiments import get_mask_combinations, get_span
-from mlm_bias.utils.measures import compute_sss, compute_csps, compute_aul, compute_crr_dp
-from mlm_bias.utils.constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION
-from mlm_bias.utils.progress import show_progress, end_progress
+from mlm_bias.utils import (
+    compute_aul,
+    compute_crr_dp,
+    compute_csps,
+    compute_sss,
+    end_progress,
+    get_mask_combinations,
+    get_span,
+    show_progress,
+    SUPPORTED_MEASURES,
+    SUPPORTED_MEASURES_ATTENTION
+)
 
 class BiasMLM():
     """
@@ -20,19 +28,20 @@ class BiasMLM():
 
     def __init__(
         self,
-        model_name: str,
+        model_name_or_path: str,
         dataset: BiasDataset,
         device: Optional[str] = None,
     ):
         self.results = BiasResults()
         self.dataset = dataset
-        self.model_name = model_name
-        self.model = AutoModelForMaskedLM.from_pretrained(
-            self.model_name,
+        self.model_name_or_path = model_name_or_path
+        self.model_config = AutoConfig.from_pretrained(
+            pretrained_model_name_or_path=self.model_name_or_path,
             output_hidden_states=True,
             output_attentions=True,
             attn_implementation="eager")
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        self.model = AutoModelForMaskedLM.from_config(self.model_config)
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
         self.mask_id = self.tokenizer.mask_token_id
         self.device = None
         if device is not None:
@@ -119,7 +128,7 @@ def evaluate(
 
         start_time = time.time()
         for index in range(len(self.dataset)):
-            show_progress(index, len(self.dataset), f"Evaluating Bias [{self.model_name}]", start_time)
+            show_progress(index, len(self.dataset), f"Evaluating Bias [{self.model_name_or_path}]", start_time)
             bias_type, s1, s2 = self.dataset[index]
             self.eval_results["bias_types"].append(bias_type)
             if 'crr' in measures or 'dp' in measures:
@@ -175,12 +184,12 @@ def evaluate(
                     mj_adv = compute_sss(self.model, token_ids_adv, adv_spans, self.mask_id, log_softmax=True)
                     self.eval_results[f'S1']['sss'].append(mj_dis['sss'])
                     self.eval_results[f'S2']['sss'].append(mj_adv['sss'])
-        show_progress(index+1, len(self.dataset), f"Evaluating Bias [{self.model_name}]", start_time)
+        show_progress(index+1, len(self.dataset), f"Evaluating Bias [{self.model_name_or_path}]", start_time)
         end_progress()
         self.measures = measures
         self.scores()
         self.results(
-            self.model_name,
+            self.model_name_or_path,
             self.measures,
             self.eval_results,
             self.bias_scores
diff --git a/mlm_bias/compute_mlms_relative_bias.py b/mlm_bias/compute_mlms_relative_bias.py
index b6cc1d6..d665aac 100644
--- a/mlm_bias/compute_mlms_relative_bias.py
+++ b/mlm_bias/compute_mlms_relative_bias.py
@@ -54,8 +54,8 @@ def evaluate(self, measures: Optional[list] = None):
                 mdifs = [((m21 - m11) - (m22 - m21)) for m22, m21, m12, m11 in zip(m2_s2, m2_s1, m1_s2, m1_s1)]
                 self.bias_scores[m][b] = 100 * np.mean([1 if mdif > 0 else 0 for mdif in mdifs])
         self.results(
-            self.mlm1_bias_results["model_name"],
-            self.mlm2_bias_results["model_name"],
+            self.mlm1_bias_results["model_name_or_path"],
+            self.mlm2_bias_results["model_name_or_path"],
             measures,
             self.bias_scores
         )
diff --git a/mlm_bias/utils/__init__.py b/mlm_bias/utils/__init__.py
index 36b2ce8..e40d5ec 100644
--- a/mlm_bias/utils/__init__.py
+++ b/mlm_bias/utils/__init__.py
@@ -1,5 +1,5 @@
-from mlm_bias.utils.experiments import get_mask_combinations, get_span
-from mlm_bias.utils.preprocess import preprocess_benchmark, preprocess_linebyline
-from mlm_bias.utils.measures import compute_sss, compute_csps, compute_aul, compute_crr_dp
-from mlm_bias.utils.constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION
-from mlm_bias.utils.progress import show_progress, end_progress
\ No newline at end of file
+from .constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION
+from .experiments import get_mask_combinations, get_span
+from .measures import compute_sss, compute_csps, compute_aul, compute_crr_dp
+from .preprocess import preprocess_benchmark, preprocess_linebyline
+from .progress import show_progress, end_progress
\ No newline at end of file
diff --git a/mlm_bias/utils/experiments.py b/mlm_bias/utils/experiments.py
index 7542b72..4cd8b6e 100644
--- a/mlm_bias/utils/experiments.py
+++ b/mlm_bias/utils/experiments.py
@@ -2,12 +2,11 @@
 # -*- coding: utf-8 -*-
 
 import difflib
-import regex as re
+import re
 
 def get_mask_combinations(sent, tokenizer, skip_space=False, rm_punc=True):
     sent_toks = []
     gt = []
-    mask_ind = 0
     if rm_punc:
         sent = ' '.join(re.sub('[^A-Za-z0-9 _\-]+', '', sent).split())
     sent_enc = tokenizer.encode(sent, add_special_tokens=False)
diff --git a/mlm_bias/utils/measures.py b/mlm_bias/utils/measures.py
index 8f91c6a..10461c3 100644
--- a/mlm_bias/utils/measures.py
+++ b/mlm_bias/utils/measures.py
@@ -6,7 +6,7 @@
 
 def get_mlm_output(model, inputs):
     with torch.no_grad():
-        output = model(inputs)
+        output = model(inputs, return_dict=True)
     return output
 
 @torch.no_grad()
@@ -34,17 +34,12 @@ def compute_crr_dp(
     top_toks = torch.topk(mask_token_probs, mask_token_probs.shape[1], dim=1)
     top_toks = top_toks.indices[0].tolist()
     top_token = top_toks[0]
-    #top_token_decoded = tokenizer.decode([top_token])
     top_token_score = mask_token_probs[:, top_token].tolist()[0]
-    top_token_rank = 1
     tok_inds = list(range(mask_token_probs.shape[1]))
-    token_js = []
     masked_token_index = tok_inds.index(masked_tok)
-    #masked_token_decoded = tokenizer.decode([masked_tok])
     masked_token_score = mask_token_probs[:, masked_token_index].tolist()[0]
     masked_token_rank = top_toks.index(masked_tok) + 1
     token_j = {
-        #"token": masked_token_decoded,
         "token_id": masked_tok,
         "score": masked_token_score,
         "rank": masked_token_rank
@@ -67,7 +62,6 @@ def compute_crr_dp(
         token_j['dpa'] = dp_attns
     return {
         "prediction": {
-            #"token": top_token_decoded,
             "token_id": top_token,
             "score": top_token_score,
             "rank": 1
diff --git a/mlm_bias/utils/preprocess.py b/mlm_bias/utils/preprocess.py
index b10c6d8..f79b78b 100644
--- a/mlm_bias/utils/preprocess.py
+++ b/mlm_bias/utils/preprocess.py
@@ -57,9 +57,6 @@ def preprocess_linebyline(data_dir):
     if not os.access(data_dir, os.R_OK):
         raise Exception("Can't Access Dataset")
     else:
-        bias_types_path = os.path.join(data_dir, "bias_types.txt")
-        dis_path = os.path.join(data_dir, "dis.txt")
-        adv_path = os.path.join(data_dir, "adv.txt")
         with open(os.path.join(data_dir, "bias_types.txt"), "r") as f:
             bias_types = f.read().splitlines()
             f.close()
diff --git a/setup.cfg b/setup.cfg
index 1156bdb..02f9910 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = mlm-bias
-version = 0.1.5
+version = 0.1.6
 author = Rahul Zalkikar
 author_email = rayzck9@gmail.com
 description = Bias Evaluation Methods for Masked Language Models implemented in PyTorch
diff --git a/setup.py b/setup.py
index 6d84086..9e54343 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='mlm-bias',
-    version='0.1.5',
+    version='0.1.6',
     author='Rahul Zalkikar',
     author_email='rayzck9@gmail.com',
     description='Bias Evaluation Methods for Masked Language Models implemented in PyTorch',