forked from amazon-science/bias-bounties
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_demo_updates.py
92 lines (72 loc) · 3.87 KB
/
run_demo_updates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# This is a wrapper that builds a bunch of (g,h) pairs for different demographic groups and feeds them into the
# updater algorithm so as to simulate a bunch of bounty hunters.
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import model
import updater
import verifier
# building the bounty hunters' models_to_update
def bounty_hunter_models(x, y, group_functions, dt_depth, classifier):
# learn the indices first, since this is an inefficient operation
indices = [x.apply(g, axis=1) == 1 for g in group_functions]
# then pull the particular rows from the dataframe
training_xs = [x[i] for i in indices]
training_ys = [y[i] for i in indices]
models = []
for i in range(len(training_xs)):
if classifier == "Logistic Regression":
log_reg = LogisticRegression(penalty='none', max_iter=2000)
log_reg.fit(training_xs[i], training_ys[i])
models.append(log_reg.predict)
elif classifier == "Decision Tree":
dt = DecisionTreeClassifier(max_depth=dt_depth, random_state=0) # setting random state for replicability
dt.fit(training_xs[i], training_ys[i])
models.append(dt.predict)
return models
# run all the different updates
def run_updates(initial_model, group_functions, models, group_indicators, test_x, test_y, train_x, train_y):
# stick the gs and hs into a form that the updater accepts
bounty_hunters = [[group_functions[i], models[i], group_indicators[i]] for i in range(len(group_functions))]
all_groups = [lambda x: 1] + group_functions
print("Building initial model")
# build the initial model
f = model.PointerDecisionList(initial_model.predict, all_groups)
f.test_errors[0] = updater.measure_all_group_errors(f, all_groups, test_x, test_y)
f.train_errors[0] = updater.measure_all_group_errors(f, all_groups, train_x, train_y)
# run the updater
i = 0
for b in bounty_hunters:
print("Running on group %s/%s" % (i + 1, len(bounty_hunters)))
print("Group running on %s" % group_indicators[i])
# check if we want to update or not
if verifier.is_proposed_group_good(f, test_x, test_y, b[1], b[0]):
# the update step:
updater.iterative_update(f, b[1], b[0], train_x, train_y, test_x, test_y, b[2], all_groups,
group_indicators)
# if the update didn't help, just copy the errors and PDLss at this round.
else:
print("Group %s is rejected" % group_indicators[i])
f.num_rounds += 1
f.track_rejects.append(0)
i += 1
print(".....................................................")
test_errors = pd.DataFrame(f.test_errors)
train_errors = pd.DataFrame(f.train_errors)
return [test_errors, train_errors]
###############################################################
# given the initial model and a bunch of group functions, feed them into the updater in a random order
def updater_wrapper(initial_model, group_functions, group_indicators, test_x, test_y, train_x, train_y,
classifier="Decision Tree", dt_depth=10):
# generate the models_to_update using the group functions you built here
models = bounty_hunter_models(train_x, train_y, group_functions, dt_depth, classifier)
rng = np.random.default_rng(12345)
r_indices = np.arange(len(group_functions))
rng.shuffle(r_indices)
r_group_functions = [group_functions[i] for i in r_indices]
r_models = [models[i] for i in r_indices]
r_group_indicators = [group_indicators[i] for i in r_indices]
[test_errors, train_errors] = run_updates(initial_model, r_group_functions, r_models, r_group_indicators, test_x,
test_y, train_x, train_y, )
return [test_errors, train_errors]