From 0fff38037194a4dd277fe0c6555a52415e417b7b Mon Sep 17 00:00:00 2001 From: gaugup <47334368+gaugup@users.noreply.github.com> Date: Thu, 7 Jan 2021 13:26:48 -0800 Subject: [PATCH] Add replication metric computation in MimicExplainer (#364) * Add replication metric computation in MimicExplainer Signed-off-by: Gaurav Gupta * Addressed code review comments Signed-off-by: Gaurav Gupta --- .../interpret_community/common/exception.py | 15 +++++ .../mimic/mimic_explainer.py | 56 +++++++++++++++++-- test/test_mimic_explainer.py | 35 +++++++----- 3 files changed, 89 insertions(+), 17 deletions(-) create mode 100644 python/interpret_community/common/exception.py diff --git a/python/interpret_community/common/exception.py b/python/interpret_community/common/exception.py new file mode 100644 index 00000000..1e827e57 --- /dev/null +++ b/python/interpret_community/common/exception.py @@ -0,0 +1,15 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +"""Defines different types of exceptions that this package can raise.""" + + +class ScenarioNotSupportedException(Exception): + """An exception indicating that some scenario is not supported. + + :param exception_message: A message describing the error. + :type exception_message: str + """ + + _error_code = "Unsupported scenario" diff --git a/python/interpret_community/mimic/mimic_explainer.py b/python/interpret_community/mimic/mimic_explainer.py index f55b61cf..0744db07 100644 --- a/python/interpret_community/mimic/mimic_explainer.py +++ b/python/interpret_community/mimic/mimic_explainer.py @@ -12,8 +12,10 @@ import numpy as np from scipy.sparse import issparse +from sklearn.metrics import accuracy_score, r2_score from ..common.explanation_utils import _order_imp +from ..common.exception import ScenarioNotSupportedException from ..common.model_wrapper import _wrap_model from .._internal.raw_explain.raw_explain_utils import get_datamapper_and_transformed_data, \ transform_with_datamapper @@ -313,14 +315,14 @@ def __init__(self, model, initialization_examples, explainable_model, explainabl self._original_eval_examples = None self._allow_all_transformations = allow_all_transformations - def _get_surrogate_model_predictions(self, evaluation_examples): - """Return the predictions given by the surrogate model. + def _get_transformed_data(self, evaluation_examples): + """Return the transformed data for some evaluation data. :param evaluation_examples: A matrix of feature vector examples (# examples x # features) on which to explain the model's output. If specified, computes feature importance through aggregation. :type evaluation_examples: numpy.array or pandas.DataFrame or scipy.sparse.csr_matrix - :return: predictions of the surrogate model. - :rtype: numpy.array + :return: Transformed data. + :rtype: numpy.array or pandas.DataFrame or scipy.sparse.csr_matrix """ if self.transformations is not None: _, transformed_evaluation_examples = get_datamapper_and_transformed_data( @@ -329,6 +331,18 @@ def _get_surrogate_model_predictions(self, evaluation_examples): else: transformed_evaluation_examples = evaluation_examples + return transformed_evaluation_examples + + def _get_surrogate_model_predictions(self, evaluation_examples): + """Return the predictions given by the surrogate model. + + :param evaluation_examples: A matrix of feature vector examples (# examples x # features) on which to + explain the model's output. If specified, computes feature importance through aggregation. + :type evaluation_examples: numpy.array or pandas.DataFrame or scipy.sparse.csr_matrix + :return: predictions of the surrogate model. + :rtype: numpy.array + """ + transformed_evaluation_examples = self._get_transformed_data(evaluation_examples) if self.classes is not None and len(self.classes) == 2: index_predictions = _inverse_soft_logit(self.surrogate_model.predict(transformed_evaluation_examples)) actual_predictions = [] @@ -338,6 +352,18 @@ def _get_surrogate_model_predictions(self, evaluation_examples): else: return self.surrogate_model.predict(transformed_evaluation_examples) + def _get_teacher_model_predictions(self, evaluation_examples): + """Return the predictions given by the teacher model. + + :param evaluation_examples: A matrix of feature vector examples (# examples x # features) on which to + explain the model's output. If specified, computes feature importance through aggregation. + :type evaluation_examples: numpy.array or pandas.DataFrame or scipy.sparse.csr_matrix + :return: predictions of the surrogate model. + :rtype: numpy.array + """ + transformed_evaluation_examples = self._get_transformed_data(evaluation_examples) + return self.model.predict(transformed_evaluation_examples) + def _supports_categoricals(self, explainable_model): return issubclass(explainable_model, LGBMExplainableModel) @@ -709,3 +735,25 @@ def __setstate__(self, state): """ self.__dict__.update(state) self._logger = logging.getLogger(__name__) + + def _get_surrogate_model_replication_measure(self, training_data): + """Return the metric which tells how well the surrogate model replicates the teacher model. + :param training_data: The data for getting the replication metric. + :type training_data: numpy.array or pandas.DataFrame or iml.datatypes.DenseData or + scipy.sparse.csr_matrix + :return: Metric that tells how well the surrogate model replicates the behavior of teacher model. + :rtype: float + """ + if self.classes is None and training_data.shape[0] == 1: + raise ScenarioNotSupportedException( + "Replication measure for regression surrogate not supported " + "because of single instance in training data") + + surrogate_model_predictions = self._get_surrogate_model_predictions(training_data) + teacher_model_predictions = self._get_teacher_model_predictions(training_data) + + if self.classes is not None: + replication_measure = accuracy_score(teacher_model_predictions, surrogate_model_predictions) + else: + replication_measure = r2_score(teacher_model_predictions, surrogate_model_predictions) + return replication_measure diff --git a/test/test_mimic_explainer.py b/test/test_mimic_explainer.py index 412299de..0684396f 100644 --- a/test/test_mimic_explainer.py +++ b/test/test_mimic_explainer.py @@ -18,6 +18,7 @@ from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split from sys import platform +from interpret_community.common.exception import ScenarioNotSupportedException from interpret_community.common.constants import ShapValuesOutput, ModelTask from interpret_community.mimic.models.lightgbm_model import LGBMExplainableModel from interpret_community.mimic.models.linear_model import LinearExplainableModel @@ -396,6 +397,23 @@ def test_explain_raw_feats_regression(self, mimic_explainer): # There should be an explanation for each row assert len(local_explanation.local_importance_values) == num_rows * test_size + def _verify_predictions_and_replication_metric(self, mimic_explainer, data): + predictions_main_model = mimic_explainer._get_teacher_model_predictions(data) + predictions_surrogate_model = mimic_explainer._get_surrogate_model_predictions(data) + replication_score = mimic_explainer._get_surrogate_model_replication_measure(data) + + assert predictions_main_model is not None + assert predictions_surrogate_model is not None + if mimic_explainer.classes is not None: + assert mimic_explainer.classes == np.unique(predictions_main_model).tolist() + assert mimic_explainer.classes == np.unique(predictions_surrogate_model).tolist() + assert replication_score is not None and isinstance(replication_score, float) + + if mimic_explainer.classes is None: + with pytest.raises(ScenarioNotSupportedException): + mimic_explainer._get_surrogate_model_replication_measure( + data[0].reshape(1, len(data[0]))) + def test_explain_model_string_classes(self, mimic_explainer): adult_census_income = retrieve_dataset('AdultCensusIncome.csv', skipinitialspace=True) X = adult_census_income.drop(['income'], axis=1) @@ -433,11 +451,7 @@ def test_explain_model_string_classes(self, mimic_explainer): global_explanation = explainer.explain_global(X.iloc[:1000]) assert global_explanation.method == LINEAR_METHOD - predictions_main_model = model.predict(X_train) - assert classes == np.unique(predictions_main_model).tolist() - - predictions_surrogate_model = explainer._get_surrogate_model_predictions(X.iloc[:1000]) - assert classes == np.unique(predictions_surrogate_model).tolist() + self._verify_predictions_and_replication_metric(explainer, X.iloc[:1000]) def test_linear_explainable_model_regression(self, mimic_explainer): num_features = 3 @@ -455,11 +469,7 @@ def test_linear_explainable_model_regression(self, mimic_explainer): global_explanation = explainer.explain_global(x_train) assert global_explanation.method == LINEAR_METHOD - predictions_main_model = model.predict(x_train) - assert predictions_main_model is not None - - predictions_surrogate_model = explainer._get_surrogate_model_predictions(x_train) - assert predictions_surrogate_model is not None + self._verify_predictions_and_replication_metric(explainer, x_train) @pytest.mark.parametrize('if_multiclass', [True, False]) @pytest.mark.parametrize('raw_feature_transformations', [True, False]) @@ -518,10 +528,9 @@ def test_linear_explainable_model_classification(self, mimic_explainer, if_multi assert global_explanation.method == LINEAR_METHOD if if_multiclass: if raw_feature_transformations: - predictions_surrogate_model = explainer._get_surrogate_model_predictions(data_x) + self._verify_predictions_and_replication_metric(explainer, data_x) else: - predictions_surrogate_model = explainer._get_surrogate_model_predictions(encoded_cat_features) - assert classes == np.unique(predictions_surrogate_model).tolist() + self._verify_predictions_and_replication_metric(explainer, encoded_cat_features) def test_dense_wide_data(self, mimic_explainer): # use 6000 rows instead for real performance testing