run_demo_updates.py

# This is a wrapper that builds a bunch of (g,h) pairs for different demographic groups and feeds them into the
# updater algorithm so as to simulate a bunch of bounty hunters.

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

import model
import updater
import verifier


# building the bounty hunters' models_to_update
def bounty_hunter_models(x, y, group_functions, dt_depth, classifier):
    # learn the indices first, since this is an inefficient operation
    indices = [x.apply(g, axis=1) == 1 for g in group_functions]
    # then pull the particular rows from the dataframe
    training_xs = [x[i] for i in indices]
    training_ys = [y[i] for i in indices]

    models = []
    for i in range(len(training_xs)):
        if classifier == "Logistic Regression":
            log_reg = LogisticRegression(penalty='none', max_iter=2000)
            log_reg.fit(training_xs[i], training_ys[i])
            models.append(log_reg.predict)
        elif classifier == "Decision Tree":
            dt = DecisionTreeClassifier(max_depth=dt_depth, random_state=0)  # setting random state for replicability
            dt.fit(training_xs[i], training_ys[i])
            models.append(dt.predict)
    return models


# run all the different updates
def run_updates(initial_model, group_functions, models, group_indicators, test_x, test_y, train_x, train_y):
    # stick the gs and hs into a form that the updater accepts
    bounty_hunters = [[group_functions[i], models[i], group_indicators[i]] for i in range(len(group_functions))]

    all_groups = [lambda x: 1] + group_functions
    print("Building initial model")
    # build the initial model 
    f = model.PointerDecisionList(initial_model.predict, all_groups)

    f.test_errors[0] = updater.measure_all_group_errors(f, all_groups, test_x, test_y)
    f.train_errors[0] = updater.measure_all_group_errors(f, all_groups, train_x, train_y)

    # run the updater

    i = 0
    for b in bounty_hunters:
        print("Running on group %s/%s" % (i + 1, len(bounty_hunters)))
        print("Group running on %s" % group_indicators[i])
        # check if we want to update or not
        if verifier.is_proposed_group_good(f, test_x, test_y, b[1], b[0]):
            # the update step:
            updater.iterative_update(f, b[1], b[0], train_x, train_y, test_x, test_y, b[2], all_groups,
                                     group_indicators)
        # if the update didn't help, just copy the errors and PDLss at this round.
        else:
            print("Group %s is rejected" % group_indicators[i])
            f.num_rounds += 1
            f.track_rejects.append(0)

        i += 1
        print(".....................................................")

    test_errors = pd.DataFrame(f.test_errors)
    train_errors = pd.DataFrame(f.train_errors)

    return [test_errors, train_errors]


###############################################################

# given the initial model and a bunch of group functions, feed them into the updater in a random order
def updater_wrapper(initial_model, group_functions, group_indicators, test_x, test_y, train_x, train_y,
                    classifier="Decision Tree", dt_depth=10):
    # generate the models_to_update using the group functions you built here
    models = bounty_hunter_models(train_x, train_y, group_functions, dt_depth, classifier)

    rng = np.random.default_rng(12345)
    r_indices = np.arange(len(group_functions))
    rng.shuffle(r_indices)
    r_group_functions = [group_functions[i] for i in r_indices]
    r_models = [models[i] for i in r_indices]
    r_group_indicators = [group_indicators[i] for i in r_indices]

    [test_errors, train_errors] = run_updates(initial_model, r_group_functions, r_models, r_group_indicators, test_x,
                                              test_y, train_x, train_y, )

    return [test_errors, train_errors]