GridSearch and Reductions Rename (interpretml#91)

Make naming in `reductions` match `ExponentiatedGradient` Remove `posterior` methods from `GridSearch` (and base `Reduction` class)
rihorn2 · Oct 21, 2019 · 6471620 · 6471620
1 parent 70d95a3
commit 6471620
Show file tree

Hide file tree

Showing 8 changed files with 31 additions and 49 deletions.
diff --git a/fairlearn/reductions/__init__.py b/fairlearn/reductions/__init__.py
@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+from .reduction import Reduction  # noqa: F401
 from .grid_search.grid_search import GridSearch  # noqa: F401
-from .reductions_estimator import ReductionsEstimator  # noqa: F401
 from .exponentiated_gradient.exponentiated_gradient import ExponentiatedGradient  # noqa: F401
diff --git a/fairlearn/reductions/exponentiated_gradient/exponentiated_gradient.py b/fairlearn/reductions/exponentiated_gradient/exponentiated_gradient.py
@@ -12,7 +12,7 @@
 import logging
 import numpy as np
 import pandas as pd
-from fairlearn.reductions import ReductionsEstimator
+from fairlearn.reductions import Reduction
 from ._constants import _ACCURACY_MUL, _REGRET_CHECK_START_T, _REGRET_CHECK_INCREASE_T, \
     _SHRINK_REGRET, _SHRINK_ETA, _MIN_T, _RUN_LP_STEP, _PRECISION, _INDENTATION
 from ._lagrangian import _Lagrangian
@@ -52,7 +52,7 @@ def _as_dict(self):
         }
 
 
-class ExponentiatedGradient(ReductionsEstimator):
+class ExponentiatedGradient(Reduction):
     def __init__(self, estimator, constraints, eps=0.01, T=50, nu=None, eta_mul=2.0):
         self._estimator = estimator
         self._constraints = constraints

diff --git a/fairlearn/reductions/grid_search/grid_search.py b/fairlearn/reductions/grid_search/grid_search.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pandas as pd
 
-from fairlearn.reductions.reductions_estimator import ReductionsEstimator
+from fairlearn.reductions import Reduction
 from fairlearn.reductions.grid_search import GridSearchResult
 from fairlearn.reductions.moments.moment import Moment, ClassificationMoment
 from fairlearn import _KW_SENSITIVE_FEATURES
@@ -74,7 +74,7 @@ def accumulate_integer_grid(self, index, max_val):
                 self.accumulate_integer_grid(index+1, max_val-abs(current_value))
 
 
-class GridSearch(ReductionsEstimator):
+class GridSearch(Reduction):
     """Learner to perform a grid search given a blackbox algorithm.
     The supplied algorithm must implement a method
     fit(X, y, sample_weight=[...])
@@ -91,16 +91,16 @@ class GridSearch(ReductionsEstimator):
 
     def __init__(self,
                  estimator,
-                 disparity_metric,
+                 constraints,
                  selection_rule=TRADEOFF_OPTIMIZATION,
                  constraint_weight=0.5,
                  grid_size=10,
                  grid_limit=2.0,
                  grid=None):
         self.estimator = estimator
-        if not isinstance(disparity_metric, Moment):
+        if not isinstance(constraints, Moment):
             raise RuntimeError("Unsupported disparity metric")
-        self.disparity_metric = disparity_metric
+        self.constraints = constraints
 
         if (selection_rule == TRADEOFF_OPTIMIZATION):
             if not (0.0 <= constraint_weight <= 1.0):
@@ -142,24 +142,24 @@ def fit(self, X, y, **kwargs):
         if X_rows != sensitive.shape[0]:
             raise RuntimeError(self._MESSAGE_X_SENSITIVE_ROWS)
 
-        if isinstance(self.disparity_metric, ClassificationMoment):
+        if isinstance(self.constraints, ClassificationMoment):
             # We have a classification problem
             # Need to make sure that y is binary (for now)
             unique_labels = np.unique(y_vector)
             if not set(unique_labels).issubset({0, 1}):
                 raise RuntimeError(self._MESSAGE_Y_NOT_BINARY)
 
         # Prep the disparity metric and objective
-        self.disparity_metric.load_data(X, y_vector, **kwargs)
-        objective = self.disparity_metric.default_objective()
+        self.constraints.load_data(X, y_vector, **kwargs)
+        objective = self.constraints.default_objective()
         objective.load_data(X, y_vector, **kwargs)
-        is_classification_reduction = isinstance(self.disparity_metric, ClassificationMoment)
+        is_classification_reduction = isinstance(self.constraints, ClassificationMoment)
 
         # Basis information
-        pos_basis = self.disparity_metric.pos_basis
-        neg_basis = self.disparity_metric.neg_basis
-        neg_allowed = self.disparity_metric.neg_basis_present
-        objective_in_the_span = (self.disparity_metric.default_objective_lambda_vec is not None)
+        pos_basis = self.constraints.pos_basis
+        neg_basis = self.constraints.neg_basis
+        neg_allowed = self.constraints.neg_basis_present
+        objective_in_the_span = (self.constraints.default_objective_lambda_vec is not None)
 
         if self.grid is None:
             grid = _GridGenerator(self.grid_size,
@@ -175,7 +175,7 @@ def fit(self, X, y, **kwargs):
         self.all_results = []
         for i in grid.columns:
             lambda_vec = grid[i]
-            weights = self.disparity_metric.signed_weights(lambda_vec)
+            weights = self.constraints.signed_weights(lambda_vec)
             if not objective_in_the_span:
                 weights = weights + objective.signed_weights()
             if is_classification_reduction:
@@ -191,7 +191,7 @@ def predict_fct(X): return current_estimator.predict(X)
             nxt = GridSearchResult(current_estimator,
                                    lambda_vec,
                                    objective.gamma(predict_fct)[0],
-                                   self.disparity_metric.gamma(predict_fct))
+                                   self.constraints.gamma(predict_fct))
             self.all_results.append(nxt)
 
         if self.selection_rule == TRADEOFF_OPTIMIZATION:
@@ -209,12 +209,6 @@ def predict(self, X):
     def predict_proba(self, X):
         return self.best_result.predictor.predict_proba(X)
 
-    def posterior_predict(self, X):
-        return [r.predictor.predict(X) for r in self.all_results]
-
-    def posterior_predict_proba(self, X):
-        return [r.predictor.predict_proba(X) for r in self.all_results]
-
     def _make_vector(self, formless, formless_name):
         formed_vector = None
         if isinstance(formless, list):

diff --git a/fairlearn/reductions/reductions_estimator.py → fairlearn/reductions/reduction.py b/fairlearn/reductions/reductions_estimator.py → fairlearn/reductions/reduction.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 
 
-class ReductionsEstimator:
+class Reduction:
     def fit(self, X, y, **kwargs):
         raise NotImplementedError()
 
@@ -11,9 +11,3 @@ def predict(self, X):
 
     def predict_proba(self, X):
         raise NotImplementedError()
-
-    def posterior_predict(self, X):
-        raise NotImplementedError()
-
-    def posterior_predict_proba(self, X):
-        raise NotImplementedError()
diff --git a/notebooks/Grid Search for Binary Classification.ipynb b/notebooks/Grid Search for Binary Classification.ipynb
@@ -188,7 +188,7 @@
    "outputs": [],
    "source": [
     "first_sweep=GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),\n",
-    "                       disparity_metric=DemographicParity(),\n",
+    "                       constraints=DemographicParity(),\n",
     "                       grid_size=7)\n",
     "\n",
     "first_sweep.fit(X, Y, sensitive_features=A)"
@@ -330,7 +330,7 @@
    "outputs": [],
    "source": [
     "second_sweep=GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),\n",
-    "                        disparity_metric=DemographicParity(),\n",
+    "                        constraints=DemographicParity(),\n",
     "                        grid=multiplier_df)\n",
     "\n",
     "second_sweep.fit(X, Y, sensitive_features=A)"

diff --git a/notebooks/Grid Search with Census Data.ipynb b/notebooks/Grid Search with Census Data.ipynb
@@ -182,7 +182,7 @@
    "outputs": [],
    "source": [
     "sweep = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),\n",
-    "                   disparity_metric=moments.DemographicParity(),\n",
+    "                   constraints=moments.DemographicParity(),\n",
     "                   grid_size=71)"
    ]
   },

diff --git a/test/unit/reductions/grid_search/test_grid_search_demographicparity.py b/test/unit/reductions/grid_search/test_grid_search_demographicparity.py
@@ -52,7 +52,7 @@ def test_demographicparity_fair_uneven_populations():
                                      a0_label, a1_label)
 
     target = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
-                        disparity_metric=moments.DemographicParity(),
+                        constraints=moments.DemographicParity(),
                         grid_size=11)
 
     target.fit(X, Y, sensitive_features=A)
@@ -69,12 +69,6 @@ def test_demographicparity_fair_uneven_populations():
     sample_results = target.all_results[0].predictor.predict(test_X)
     assert np.array_equal(sample_results, [1, 0])
 
-    all_results = target.posterior_predict(test_X)
-    assert len(all_results) == 11
-
-    all_proba = target.posterior_predict_proba(test_X)
-    assert len(all_proba) == 11
-
 
 def test_lambda_vec_zero_unchanged_model():
     score_threshold = 0.6
@@ -104,7 +98,7 @@ def test_lambda_vec_zero_unchanged_model():
     grid_df = pd.DataFrame(lagrange_zero_series)
 
     target = GridSearch(estimator,
-                        disparity_metric=moments.DemographicParity(),
+                        constraints=moments.DemographicParity(),
                         grid=grid_df)
     target.fit(X, y, sensitive_features=A)
     assert len(target.all_results) == 1
@@ -143,11 +137,11 @@ def test_can_specify_and_generate_lambda_vecs():
                         axis=1)
 
     target1 = GridSearch(copy.deepcopy(estimator),
-                         disparity_metric=moments.DemographicParity(),
+                         constraints=moments.DemographicParity(),
                          grid_size=3)
 
     target2 = GridSearch(copy.deepcopy(estimator),
-                         disparity_metric=moments.DemographicParity(),
+                         constraints=moments.DemographicParity(),
                          grid=grid_df)
 
     # Try both ways of specifying the Lagrange multipliers

diff --git a/test/unit/reductions/grid_search/test_grid_search_regression.py b/test/unit/reductions/grid_search/test_grid_search_regression.py
@@ -50,7 +50,7 @@ def test_bgl_unfair():
 
     bgl_square_loss = moments.GroupLossMoment(moments.SquareLoss(-np.inf, np.inf))
     target = GridSearch(LinearRegression(),
-                        disparity_metric=bgl_square_loss,
+                        constraints=bgl_square_loss,
                         grid_size=7)
 
     target.fit(X, Y, sensitive_features=A)
@@ -64,7 +64,7 @@ def test_bgl_unfair():
     best_predict = target.predict(test_X)
     assert np.allclose([-1.91764706,  9.61176471], best_predict)
 
-    all_predict = target.posterior_predict(test_X)
+    all_predict = [r.predictor.predict(test_X) for r in target.all_results]
     assert np.allclose([[3.2, 11.2],
                         [-3.47346939, 10.64897959],
                         [-2.68, 10.12],
@@ -100,7 +100,7 @@ def test_bgl_unmitigated_same():
     grid_df = pd.DataFrame(lagrange_balanced_series)
 
     target = GridSearch(estimator,
-                        disparity_metric=moments.GroupLossMoment(moments.ZeroOneLoss()),
+                        constraints=moments.GroupLossMoment(moments.ZeroOneLoss()),
                         grid=grid_df)
     target.fit(X, y, sensitive_features=A)
 
@@ -141,11 +141,11 @@ def test_bgl_lagrange_specifications():
                         axis=1)
 
     target1 = GridSearch(copy.deepcopy(estimator),
-                         disparity_metric=moments.GroupLossMoment(moments.ZeroOneLoss()),
+                         constraints=moments.GroupLossMoment(moments.ZeroOneLoss()),
                          grid_size=5)
 
     target2 = GridSearch(copy.deepcopy(estimator),
-                         disparity_metric=moments.GroupLossMoment(moments.ZeroOneLoss()),
+                         constraints=moments.GroupLossMoment(moments.ZeroOneLoss()),
                          grid=grid_df)
 
     tradeoffs = [0, 0.25, 0.5, 0.75, 1]