Skip to content

Commit

Permalink
GridSearch and Reductions Rename (interpretml#91)
Browse files Browse the repository at this point in the history
Make naming in `reductions` match `ExponentiatedGradient`
Remove `posterior` methods from `GridSearch` (and base `Reduction` class)
  • Loading branch information
riedgar-ms authored Oct 21, 2019
1 parent 70d95a3 commit 6471620
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 49 deletions.
2 changes: 1 addition & 1 deletion fairlearn/reductions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from .reduction import Reduction # noqa: F401
from .grid_search.grid_search import GridSearch # noqa: F401
from .reductions_estimator import ReductionsEstimator # noqa: F401
from .exponentiated_gradient.exponentiated_gradient import ExponentiatedGradient # noqa: F401
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import logging
import numpy as np
import pandas as pd
from fairlearn.reductions import ReductionsEstimator
from fairlearn.reductions import Reduction
from ._constants import _ACCURACY_MUL, _REGRET_CHECK_START_T, _REGRET_CHECK_INCREASE_T, \
_SHRINK_REGRET, _SHRINK_ETA, _MIN_T, _RUN_LP_STEP, _PRECISION, _INDENTATION
from ._lagrangian import _Lagrangian
Expand Down Expand Up @@ -52,7 +52,7 @@ def _as_dict(self):
}


class ExponentiatedGradient(ReductionsEstimator):
class ExponentiatedGradient(Reduction):
def __init__(self, estimator, constraints, eps=0.01, T=50, nu=None, eta_mul=2.0):
self._estimator = estimator
self._constraints = constraints
Expand Down
36 changes: 15 additions & 21 deletions fairlearn/reductions/grid_search/grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
import pandas as pd

from fairlearn.reductions.reductions_estimator import ReductionsEstimator
from fairlearn.reductions import Reduction
from fairlearn.reductions.grid_search import GridSearchResult
from fairlearn.reductions.moments.moment import Moment, ClassificationMoment
from fairlearn import _KW_SENSITIVE_FEATURES
Expand Down Expand Up @@ -74,7 +74,7 @@ def accumulate_integer_grid(self, index, max_val):
self.accumulate_integer_grid(index+1, max_val-abs(current_value))


class GridSearch(ReductionsEstimator):
class GridSearch(Reduction):
"""Learner to perform a grid search given a blackbox algorithm.
The supplied algorithm must implement a method
fit(X, y, sample_weight=[...])
Expand All @@ -91,16 +91,16 @@ class GridSearch(ReductionsEstimator):

def __init__(self,
estimator,
disparity_metric,
constraints,
selection_rule=TRADEOFF_OPTIMIZATION,
constraint_weight=0.5,
grid_size=10,
grid_limit=2.0,
grid=None):
self.estimator = estimator
if not isinstance(disparity_metric, Moment):
if not isinstance(constraints, Moment):
raise RuntimeError("Unsupported disparity metric")
self.disparity_metric = disparity_metric
self.constraints = constraints

if (selection_rule == TRADEOFF_OPTIMIZATION):
if not (0.0 <= constraint_weight <= 1.0):
Expand Down Expand Up @@ -142,24 +142,24 @@ def fit(self, X, y, **kwargs):
if X_rows != sensitive.shape[0]:
raise RuntimeError(self._MESSAGE_X_SENSITIVE_ROWS)

if isinstance(self.disparity_metric, ClassificationMoment):
if isinstance(self.constraints, ClassificationMoment):
# We have a classification problem
# Need to make sure that y is binary (for now)
unique_labels = np.unique(y_vector)
if not set(unique_labels).issubset({0, 1}):
raise RuntimeError(self._MESSAGE_Y_NOT_BINARY)

# Prep the disparity metric and objective
self.disparity_metric.load_data(X, y_vector, **kwargs)
objective = self.disparity_metric.default_objective()
self.constraints.load_data(X, y_vector, **kwargs)
objective = self.constraints.default_objective()
objective.load_data(X, y_vector, **kwargs)
is_classification_reduction = isinstance(self.disparity_metric, ClassificationMoment)
is_classification_reduction = isinstance(self.constraints, ClassificationMoment)

# Basis information
pos_basis = self.disparity_metric.pos_basis
neg_basis = self.disparity_metric.neg_basis
neg_allowed = self.disparity_metric.neg_basis_present
objective_in_the_span = (self.disparity_metric.default_objective_lambda_vec is not None)
pos_basis = self.constraints.pos_basis
neg_basis = self.constraints.neg_basis
neg_allowed = self.constraints.neg_basis_present
objective_in_the_span = (self.constraints.default_objective_lambda_vec is not None)

if self.grid is None:
grid = _GridGenerator(self.grid_size,
Expand All @@ -175,7 +175,7 @@ def fit(self, X, y, **kwargs):
self.all_results = []
for i in grid.columns:
lambda_vec = grid[i]
weights = self.disparity_metric.signed_weights(lambda_vec)
weights = self.constraints.signed_weights(lambda_vec)
if not objective_in_the_span:
weights = weights + objective.signed_weights()
if is_classification_reduction:
Expand All @@ -191,7 +191,7 @@ def predict_fct(X): return current_estimator.predict(X)
nxt = GridSearchResult(current_estimator,
lambda_vec,
objective.gamma(predict_fct)[0],
self.disparity_metric.gamma(predict_fct))
self.constraints.gamma(predict_fct))
self.all_results.append(nxt)

if self.selection_rule == TRADEOFF_OPTIMIZATION:
Expand All @@ -209,12 +209,6 @@ def predict(self, X):
def predict_proba(self, X):
return self.best_result.predictor.predict_proba(X)

def posterior_predict(self, X):
return [r.predictor.predict(X) for r in self.all_results]

def posterior_predict_proba(self, X):
return [r.predictor.predict_proba(X) for r in self.all_results]

def _make_vector(self, formless, formless_name):
formed_vector = None
if isinstance(formless, list):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Licensed under the MIT License.


class ReductionsEstimator:
class Reduction:
def fit(self, X, y, **kwargs):
raise NotImplementedError()

Expand All @@ -11,9 +11,3 @@ def predict(self, X):

def predict_proba(self, X):
raise NotImplementedError()

def posterior_predict(self, X):
raise NotImplementedError()

def posterior_predict_proba(self, X):
raise NotImplementedError()
4 changes: 2 additions & 2 deletions notebooks/Grid Search for Binary Classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@
"outputs": [],
"source": [
"first_sweep=GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),\n",
" disparity_metric=DemographicParity(),\n",
" constraints=DemographicParity(),\n",
" grid_size=7)\n",
"\n",
"first_sweep.fit(X, Y, sensitive_features=A)"
Expand Down Expand Up @@ -330,7 +330,7 @@
"outputs": [],
"source": [
"second_sweep=GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),\n",
" disparity_metric=DemographicParity(),\n",
" constraints=DemographicParity(),\n",
" grid=multiplier_df)\n",
"\n",
"second_sweep.fit(X, Y, sensitive_features=A)"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/Grid Search with Census Data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
"outputs": [],
"source": [
"sweep = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),\n",
" disparity_metric=moments.DemographicParity(),\n",
" constraints=moments.DemographicParity(),\n",
" grid_size=71)"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_demographicparity_fair_uneven_populations():
a0_label, a1_label)

target = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
disparity_metric=moments.DemographicParity(),
constraints=moments.DemographicParity(),
grid_size=11)

target.fit(X, Y, sensitive_features=A)
Expand All @@ -69,12 +69,6 @@ def test_demographicparity_fair_uneven_populations():
sample_results = target.all_results[0].predictor.predict(test_X)
assert np.array_equal(sample_results, [1, 0])

all_results = target.posterior_predict(test_X)
assert len(all_results) == 11

all_proba = target.posterior_predict_proba(test_X)
assert len(all_proba) == 11


def test_lambda_vec_zero_unchanged_model():
score_threshold = 0.6
Expand Down Expand Up @@ -104,7 +98,7 @@ def test_lambda_vec_zero_unchanged_model():
grid_df = pd.DataFrame(lagrange_zero_series)

target = GridSearch(estimator,
disparity_metric=moments.DemographicParity(),
constraints=moments.DemographicParity(),
grid=grid_df)
target.fit(X, y, sensitive_features=A)
assert len(target.all_results) == 1
Expand Down Expand Up @@ -143,11 +137,11 @@ def test_can_specify_and_generate_lambda_vecs():
axis=1)

target1 = GridSearch(copy.deepcopy(estimator),
disparity_metric=moments.DemographicParity(),
constraints=moments.DemographicParity(),
grid_size=3)

target2 = GridSearch(copy.deepcopy(estimator),
disparity_metric=moments.DemographicParity(),
constraints=moments.DemographicParity(),
grid=grid_df)

# Try both ways of specifying the Lagrange multipliers
Expand Down
10 changes: 5 additions & 5 deletions test/unit/reductions/grid_search/test_grid_search_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_bgl_unfair():

bgl_square_loss = moments.GroupLossMoment(moments.SquareLoss(-np.inf, np.inf))
target = GridSearch(LinearRegression(),
disparity_metric=bgl_square_loss,
constraints=bgl_square_loss,
grid_size=7)

target.fit(X, Y, sensitive_features=A)
Expand All @@ -64,7 +64,7 @@ def test_bgl_unfair():
best_predict = target.predict(test_X)
assert np.allclose([-1.91764706, 9.61176471], best_predict)

all_predict = target.posterior_predict(test_X)
all_predict = [r.predictor.predict(test_X) for r in target.all_results]
assert np.allclose([[3.2, 11.2],
[-3.47346939, 10.64897959],
[-2.68, 10.12],
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_bgl_unmitigated_same():
grid_df = pd.DataFrame(lagrange_balanced_series)

target = GridSearch(estimator,
disparity_metric=moments.GroupLossMoment(moments.ZeroOneLoss()),
constraints=moments.GroupLossMoment(moments.ZeroOneLoss()),
grid=grid_df)
target.fit(X, y, sensitive_features=A)

Expand Down Expand Up @@ -141,11 +141,11 @@ def test_bgl_lagrange_specifications():
axis=1)

target1 = GridSearch(copy.deepcopy(estimator),
disparity_metric=moments.GroupLossMoment(moments.ZeroOneLoss()),
constraints=moments.GroupLossMoment(moments.ZeroOneLoss()),
grid_size=5)

target2 = GridSearch(copy.deepcopy(estimator),
disparity_metric=moments.GroupLossMoment(moments.ZeroOneLoss()),
constraints=moments.GroupLossMoment(moments.ZeroOneLoss()),
grid=grid_df)

tradeoffs = [0, 0.25, 0.5, 0.75, 1]
Expand Down

0 comments on commit 6471620

Please sign in to comment.