Skip to content

Commit

Permalink
version 0.1.6 - Update README and AutoConfig support
Browse files Browse the repository at this point in the history
  • Loading branch information
zalkikar committed Dec 23, 2024
1 parent c29d1b5 commit d09ffbb
Show file tree
Hide file tree
Showing 14 changed files with 85 additions and 84 deletions.
38 changes: 23 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Measuring Biases in Masked Language Models for PyTorch Transformers

![pypi - status](https://img.shields.io/badge/status-stable-brightgreen)
![pypi - downloads](https://img.shields.io/pypi/dm/mlm-bias)
![pypi - version](https://img.shields.io/pypi/v/mlm-bias)

Evaluate biases in pre-trained or re-trained masked language models (MLMs), such as those available through [HuggingFace](https://huggingface.co/models). This package computes bias scores across various bias types, using benchmark datasets like [CrowS-Pairs (CPS)](https://github.com/nyu-mll/crows-pairs) and [StereoSet (SS)](https://github.com/moinnadeem/StereoSet) (intrasentence), or custom datasets. You can also compare relative bias between two MLMs, or evaluate re-trained MLMs versus their pre-trained base models.

## Evaluation Methods
Expand Down Expand Up @@ -64,18 +68,18 @@ python3 -m pip install .
Run the `mlm_bias.py` example script:

```bash
mlm_bias.py [-h] --data {cps,ss,custom} --model MODEL [--model2 MODEL2] [--output OUTPUT] [--measures {all,crr,crra,dp,dpa,aul,aula,csps,sss}] [--start S] [--end E]
mlm_bias.py [-h] --data {cps,ss,custom} --model_name_or_path MODEL [--model_name_or_path_2 MODEL2] [--output OUTPUT] [--measures {all,crr,crra,dp,dpa,aul,aula,csps,sss}] [--start S] [--end E]
```

Example arguments:

```bash
# Single MLM
python3 mlm_bias.py --data cps --model roberta-base --start 0 --end 30
python3 mlm_bias.py --data ss --model bert-base-uncased --start 0 --end 30
python3 mlm_bias.py --data cps --model_name_or_path roberta-base --start 0 --end 30
python3 mlm_bias.py --data ss --model_name_or_path bert-base-uncased --start 0 --end 30

# Relative between two MLMs
python3 mlm_bias.py --data cps --model roberta-base --start 0 --end 30 --model2 bert-base-uncased
python3 mlm_bias.py --data cps --model_name_or_path roberta-base --start 0 --end 30 --model_name_or_path_2 bert-base-uncased
```

Output directories (default arguments):
Expand All @@ -85,24 +89,28 @@ Output directories (default arguments):

### Example Output:

```bash
python3 mlm_bias.py --data cps --model_name_or_path bert-base-uncased --start 0 --end 30
```

```bash
Created output directory.
Created Data Directory |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s
Downloaded Data [CrowSPairs] |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s
Loaded Data [CrowSPairs] |██████████████████████████████| 1/1 [100%] in 0s ETA: 0s
Evaluating Bias [roberta-base] |██████████████████████████████| 30/30 [100%] in 2m 46s ETA: 0s
Saved bias results for roberta-base in ./eval/roberta-base
Evaluating Bias [bert-base-uncased] |██████████████████████████████| 30/30 [100%] in 1m 4s ETA: 0s
Saved bias results for bert-base-uncased in ./eval/bert-base-uncased
Saved scores in ./eval/out.txt
--------------------------------------------------
MLM: roberta-base
CRR total = 50.0
CRRA total = 53.333
ΔP total = 56.667
ΔPA total = 56.667
AUL total = 76.667
AULA total = 70.0
SSS total = 53.333
CSPS total = 63.33
MLM: bert-base-uncased
CRR total = 26.667
CRRA total = 30.0
ΔP total = 46.667
ΔPA total = 43.333
AUL total = 36.667
AULA total = 40.0
SSS total = 30.0
CSPS total = 33.333
```
## Custom Datasets
Expand Down
38 changes: 18 additions & 20 deletions mlm_bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False):
for measure in res['bias_scores'].keys():
out += (f"{measure.replace('d','Δ').upper()} "+
f"total = {round(res['bias_scores'][measure]['total'],3)}\n")
if len(out) >= 2 and "\n" in out[-2:]:
out = out[:-2]
else:
for measure in res['bias_scores'].keys():
out += (f"Measure = {measure.replace('d','Δ').upper()}")
Expand All @@ -41,13 +39,13 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False):
'Provide bias types in "<data>/bias_types.txt" and biased sentences in "<data>/dis.txt" and "<data>/adv.txt" accordingly.'),
choices=['cps','ss','custom'])

parser.add_argument('--model',
parser.add_argument('--model_name_or_path',
type=str,
required=True,
help=('Model (MLM) to compute bias measures for. '+
'Must be supported by HuggingFace.'))

parser.add_argument('--model2',
parser.add_argument('--model_name_or_path_2',
type=str,
required=False,
default="",
Expand Down Expand Up @@ -107,45 +105,45 @@ def pretty_print(res, out, m_name, sep="\n", total_only=False):
output_dir = os.path.dirname(args.output)

out = ""
model = args.model
model_name_or_path = args.model_name_or_path
try:
model_bias = BiasMLM(args.model, dataset)
model_bias = BiasMLM(args.model_name_or_path, dataset)
except Exception as ex:
raise Exception(f"Could not load {args.model}\n{ex}")
raise Exception(f"Could not load {args.model_name_or_path}\n{ex}")
if args.measures == 'all':
res1 = model_bias.evaluate(inc_attention=True)
else:
res1 = model_bias.evaluate(measures=args.measures, inc_attention=True)
output_dir_res1 = os.path.join(output_dir, res1['model_name'])
output_dir_res1 = os.path.join(output_dir, res1['model_name_or_path'])
res1.save(output_dir_res1)
print(f"Saved bias results for {res1['model_name']} in {output_dir_res1}")
out = pretty_print(res1, out, m_name=res1['model_name'])
print(f"Saved bias results for {res1['model_name_or_path']} in {output_dir_res1}")
out = pretty_print(res1, out, m_name=res1['model_name_or_path'])

res2 = None
if args.model2 != "":
model = args.model2
model_bias = BiasMLM(args.model2, dataset)
if args.model_name_or_path_2 != "":
model = args.model_name_or_path_2
model_bias = BiasMLM(args.model_name_or_path_2, dataset)
if args.measures == 'all':
res2 = model_bias.evaluate(inc_attention=True)
else:
res2 = model_bias.evaluate(measures=args.measures, inc_attention=True)
output_dir_res2 = os.path.join(output_dir, res2['model_name'])
output_dir_res2 = os.path.join(output_dir, res2['model_name_or_path'])
res2.save(output_dir_res2)
print(f"Saved bias results for {res2['model_name']} in {output_dir_res2}")
out = pretty_print(res2, out, m_name=res2['model_name'])
print(f"Saved bias results for {res2['model_name_or_path']} in {output_dir_res2}")
out = pretty_print(res2, out, m_name=res2['model_name_or_path'])

if res2 is not None:
mlm_bias_relative = RelativeBiasMLMs(res1, res2)
res3 = mlm_bias_relative.evaluate()
output_dir_res3 = os.path.join(output_dir, f"{res1['model_name']}_{res2['model_name']}")
output_dir_res3 = os.path.join(output_dir, f"{res1['model_name_or_path']}_{res2['model_name_or_path']}")
res3.save(output_dir_res3)
print(f"Saved bias results for {res1['model_name']} relative to {res2['model_name']} in {output_dir_res3}")
out = pretty_print(res3, out, m_name=f"Relative {res1['model_name']}, {res2['model_name']}")
print(f"Saved bias results for {res1['model_name_or_path']} relative to {res2['model_name_or_path']} in {output_dir_res3}")
out = pretty_print(res3, out, m_name=f"Relative {res1['model_name_or_path']}, {res2['model_name_or_path']}")

with open(args.output, 'w+', encoding='utf-8') as f:
f.write(out)

print(f"Saved scores in {args.output}")

console_out = pretty_print(res1, "", m_name=res1['model_name'], total_only=True)
console_out = pretty_print(res1, "", m_name=res1['model_name_or_path'], total_only=True)
print(console_out)
10 changes: 3 additions & 7 deletions mlm_bias/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import mlm_bias.utils.experiments
import mlm_bias.utils.measures
import mlm_bias.utils.preprocess
import mlm_bias.utils.constants
from mlm_bias.compute_mlm_bias import BiasMLM
from mlm_bias.bias_datasets import BiasDataset, BiasBenchmarkDataset, BiasLineByLineDataset
from mlm_bias.bias_results import BiasResults, RelativeBiasResults
from mlm_bias.compute_mlms_relative_bias import RelativeBiasMLMs
from mlm_bias.bias_datasets import BiasDataset, BiasBenchmarkDataset, BiasLineByLineDataset
from mlm_bias.compute_mlm_bias import BiasMLM
from mlm_bias.compute_mlms_relative_bias import RelativeBiasMLMs
2 changes: 1 addition & 1 deletion mlm_bias/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.5"
__version__ = "0.1.6"
2 changes: 1 addition & 1 deletion mlm_bias/bias_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

from typing import Optional
from mlm_bias.utils.preprocess import preprocess_benchmark, preprocess_linebyline
from mlm_bias.utils import preprocess_benchmark, preprocess_linebyline

class BiasDataset():
def __init__(self, bias_types: list, dis: list, adv: list):
Expand Down
12 changes: 6 additions & 6 deletions mlm_bias/bias_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@

class BiasResults():

model_name = None
model_name_or_path = None
measures = None
eval_results = None
bias_scores = None

def __call__(
self,
model_name: str,
model_name_or_path: str,
measures: list,
eval_results: dict,
bias_scores: dict,
):
self.model_name = model_name
self.model_name_or_path = model_name_or_path
self.measures = measures
self.eval_results = eval_results
self.bias_scores = bias_scores
Expand All @@ -28,7 +28,7 @@ def __getitem__(self, key):

def save(self, file_path: Optional[str] = None):
if file_path is None:
fp = f'{self.model_name}.bias'
fp = f'{self.model_name_or_path}.bias'
else:
fp = file_path
with open(fp, 'wb') as f:
Expand All @@ -37,13 +37,13 @@ def save(self, file_path: Optional[str] = None):

def load(self, file_path: Optional[str] = None):
if file_path is None:
fp = f'{self.model_name}.bias'
fp = f'{self.model_name_or_path}.bias'
else:
fp = file_path
with open(fp, 'rb') as f:
data = pickle.load(f)
f.close()
self.model_name = data['model_name']
self.model_name_or_path = data['model_name_or_path']
self.measures = data['measures']
self.eval_results = data['eval_results']
self.bias_scores = data['bias_scores']
Expand Down
35 changes: 22 additions & 13 deletions mlm_bias/compute_mlm_bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,21 @@
import torch
import numpy as np
from typing import Optional
from transformers import AutoTokenizer, AutoModelForMaskedLM
from transformers import AutoConfig, AutoModelForMaskedLM, AutoTokenizer
from mlm_bias.bias_datasets import BiasDataset
from mlm_bias.bias_results import BiasResults
from mlm_bias.utils.experiments import get_mask_combinations, get_span
from mlm_bias.utils.measures import compute_sss, compute_csps, compute_aul, compute_crr_dp
from mlm_bias.utils.constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION
from mlm_bias.utils.progress import show_progress, end_progress
from mlm_bias.utils import (
compute_aul,
compute_crr_dp,
compute_csps,
compute_sss,
end_progress,
get_mask_combinations,
get_span,
show_progress,
SUPPORTED_MEASURES,
SUPPORTED_MEASURES_ATTENTION
)

class BiasMLM():
"""
Expand All @@ -20,19 +28,20 @@ class BiasMLM():

def __init__(
self,
model_name: str,
model_name_or_path: str,
dataset: BiasDataset,
device: Optional[str] = None,
):
self.results = BiasResults()
self.dataset = dataset
self.model_name = model_name
self.model = AutoModelForMaskedLM.from_pretrained(
self.model_name,
self.model_name_or_path = model_name_or_path
self.model_config = AutoConfig.from_pretrained(
pretrained_model_name_or_path=self.model_name_or_path,
output_hidden_states=True,
output_attentions=True,
attn_implementation="eager")
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.model = AutoModelForMaskedLM.from_config(self.model_config)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
self.mask_id = self.tokenizer.mask_token_id
self.device = None
if device is not None:
Expand Down Expand Up @@ -119,7 +128,7 @@ def evaluate(

start_time = time.time()
for index in range(len(self.dataset)):
show_progress(index, len(self.dataset), f"Evaluating Bias [{self.model_name}]", start_time)
show_progress(index, len(self.dataset), f"Evaluating Bias [{self.model_name_or_path}]", start_time)
bias_type, s1, s2 = self.dataset[index]
self.eval_results["bias_types"].append(bias_type)
if 'crr' in measures or 'dp' in measures:
Expand Down Expand Up @@ -175,12 +184,12 @@ def evaluate(
mj_adv = compute_sss(self.model, token_ids_adv, adv_spans, self.mask_id, log_softmax=True)
self.eval_results[f'S1']['sss'].append(mj_dis['sss'])
self.eval_results[f'S2']['sss'].append(mj_adv['sss'])
show_progress(index+1, len(self.dataset), f"Evaluating Bias [{self.model_name}]", start_time)
show_progress(index+1, len(self.dataset), f"Evaluating Bias [{self.model_name_or_path}]", start_time)
end_progress()
self.measures = measures
self.scores()
self.results(
self.model_name,
self.model_name_or_path,
self.measures,
self.eval_results,
self.bias_scores
Expand Down
4 changes: 2 additions & 2 deletions mlm_bias/compute_mlms_relative_bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def evaluate(self, measures: Optional[list] = None):
mdifs = [((m21 - m11) - (m22 - m21)) for m22, m21, m12, m11 in zip(m2_s2, m2_s1, m1_s2, m1_s1)]
self.bias_scores[m][b] = 100 * np.mean([1 if mdif > 0 else 0 for mdif in mdifs])
self.results(
self.mlm1_bias_results["model_name"],
self.mlm2_bias_results["model_name"],
self.mlm1_bias_results["model_name_or_path"],
self.mlm2_bias_results["model_name_or_path"],
measures,
self.bias_scores
)
Expand Down
10 changes: 5 additions & 5 deletions mlm_bias/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from mlm_bias.utils.experiments import get_mask_combinations, get_span
from mlm_bias.utils.preprocess import preprocess_benchmark, preprocess_linebyline
from mlm_bias.utils.measures import compute_sss, compute_csps, compute_aul, compute_crr_dp
from mlm_bias.utils.constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION
from mlm_bias.utils.progress import show_progress, end_progress
from .constants import SUPPORTED_MEASURES, SUPPORTED_MEASURES_ATTENTION
from .experiments import get_mask_combinations, get_span
from .measures import compute_sss, compute_csps, compute_aul, compute_crr_dp
from .preprocess import preprocess_benchmark, preprocess_linebyline
from .progress import show_progress, end_progress
3 changes: 1 addition & 2 deletions mlm_bias/utils/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
# -*- coding: utf-8 -*-

import difflib
import regex as re
import re

def get_mask_combinations(sent, tokenizer, skip_space=False, rm_punc=True):
sent_toks = []
gt = []
mask_ind = 0
if rm_punc:
sent = ' '.join(re.sub('[^A-Za-z0-9 _\-]+', '', sent).split())
sent_enc = tokenizer.encode(sent, add_special_tokens=False)
Expand Down
8 changes: 1 addition & 7 deletions mlm_bias/utils/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def get_mlm_output(model, inputs):
with torch.no_grad():
output = model(inputs)
output = model(inputs, return_dict=True)
return output

@torch.no_grad()
Expand Down Expand Up @@ -34,17 +34,12 @@ def compute_crr_dp(
top_toks = torch.topk(mask_token_probs, mask_token_probs.shape[1], dim=1)
top_toks = top_toks.indices[0].tolist()
top_token = top_toks[0]
#top_token_decoded = tokenizer.decode([top_token])
top_token_score = mask_token_probs[:, top_token].tolist()[0]
top_token_rank = 1
tok_inds = list(range(mask_token_probs.shape[1]))
token_js = []
masked_token_index = tok_inds.index(masked_tok)
#masked_token_decoded = tokenizer.decode([masked_tok])
masked_token_score = mask_token_probs[:, masked_token_index].tolist()[0]
masked_token_rank = top_toks.index(masked_tok) + 1
token_j = {
#"token": masked_token_decoded,
"token_id": masked_tok,
"score": masked_token_score,
"rank": masked_token_rank
Expand All @@ -67,7 +62,6 @@ def compute_crr_dp(
token_j['dpa'] = dp_attns
return {
"prediction": {
#"token": top_token_decoded,
"token_id": top_token,
"score": top_token_score,
"rank": 1
Expand Down
3 changes: 0 additions & 3 deletions mlm_bias/utils/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ def preprocess_linebyline(data_dir):
if not os.access(data_dir, os.R_OK):
raise Exception("Can't Access Dataset")
else:
bias_types_path = os.path.join(data_dir, "bias_types.txt")
dis_path = os.path.join(data_dir, "dis.txt")
adv_path = os.path.join(data_dir, "adv.txt")
with open(os.path.join(data_dir, "bias_types.txt"), "r") as f:
bias_types = f.read().splitlines()
f.close()
Expand Down
Loading

0 comments on commit d09ffbb

Please sign in to comment.