diff --git a/src/evidently/analyzers/classification_performance_analyzer.py b/src/evidently/analyzers/classification_performance_analyzer.py index 81525f554d..7fa164c97f 100644 --- a/src/evidently/analyzers/classification_performance_analyzer.py +++ b/src/evidently/analyzers/classification_performance_analyzer.py @@ -39,7 +39,7 @@ class ClassificationPerformanceAnalyzerResults(BaseAnalyzerResult): def classification_performance_metrics( - target: pd.Series, prediction: pd.Series, target_names: Optional[List[str]] + target: pd.Series, prediction: pd.Series, target_names: Optional[Dict[Union[str, int], str]] ) -> ClassificationPerformanceMetrics: # calculate metrics matrix metrics_matrix = metrics.classification_report(target, prediction, output_dict=True) @@ -52,7 +52,7 @@ def classification_performance_metrics( # calculate confusion matrix confusion_matrix = metrics.confusion_matrix(target, prediction) # get labels from data mapping or get all values kinds from target and prediction columns - labels = target_names if target_names else sorted(set(target) | set(prediction)) + labels = list(target_names.keys()) if target_names else sorted(set(target) | set(prediction)) confusion_by_classes = calculate_confusion_by_classes(confusion_matrix, labels) return ClassificationPerformanceMetrics( accuracy=accuracy_score, @@ -70,7 +70,7 @@ def _calculate_performance_metrics( data: pd.DataFrame, target_column: Union[str, Sequence[str]], prediction_column: Union[str, Sequence[str]], - target_names: Optional[List[str]], + target_names: Optional[Dict[Union[str, int], str]], ) -> ClassificationPerformanceMetrics: # remove all rows with infinite and NaN values from the dataset target_and_preds = [target_column] diff --git a/src/evidently/calculations/classification_performance.py b/src/evidently/calculations/classification_performance.py index 3b4ea169c6..88558ede79 100644 --- a/src/evidently/calculations/classification_performance.py +++ b/src/evidently/calculations/classification_performance.py @@ -113,20 +113,13 @@ def get_prediction_data( labels=prediction, ) - # calculate labels as np.array - for better negative label calculations for binary classification - if data_columns.target_names is not None: - # if target_names is specified, get labels from it - labels = data_columns.target_names.copy() + if isinstance(prediction, str) and not is_float_dtype(data[prediction]): + # if prediction is not probas, get unique values from it and target + labels = np.union1d(data[target].unique(), data[prediction].unique()).tolist() else: - # if target_names is not specified, try to get labels from target and/or prediction - if isinstance(prediction, str) and not is_float_dtype(data[prediction]): - # if prediction is not probas, get unique values from it and target - labels = np.union1d(data[target].unique(), data[prediction].unique()).tolist() - - else: - # if prediction is probas, get unique values from target only - labels = data[target].unique().tolist() + # if prediction is probas, get unique values from target only + labels = data[target].unique().tolist() # binary classification # prediction in mapping is a list of two columns: diff --git a/src/evidently/calculations/data_drift.py b/src/evidently/calculations/data_drift.py index 33ce58d771..60d6ab7369 100644 --- a/src/evidently/calculations/data_drift.py +++ b/src/evidently/calculations/data_drift.py @@ -203,6 +203,24 @@ def get_one_column_drift( reversed(list(map(list, zip(*sorted(current_counts.items(), key=lambda x: str(x[0])))))) ) if column_type != "text": + if ( + column_type == "cat" + and dataset_columns.target_names is not None + and ( + column_name == dataset_columns.utility_columns.target + or ( + isinstance(dataset_columns.utility_columns.prediction, str) + and column_name == dataset_columns.utility_columns.prediction + ) + ) + ): + column_values = np.union1d(current_column.unique(), reference_column.unique()) + new_values = np.setdiff1d(list(dataset_columns.target_names), column_values) + if len(new_values) > 0: + raise ValueError(f"Values {new_values} not presented in 'target_names'") + else: + current_column = current_column.map(dataset_columns.target_names) + reference_column = reference_column.map(dataset_columns.target_names) current_distribution, reference_distribution = get_distribution_for_column( column_type=column_type, current=current_column, diff --git a/src/evidently/metrics/classification_performance/class_balance_metric.py b/src/evidently/metrics/classification_performance/class_balance_metric.py index 59755d219e..96029b15cb 100644 --- a/src/evidently/metrics/classification_performance/class_balance_metric.py +++ b/src/evidently/metrics/classification_performance/class_balance_metric.py @@ -25,10 +25,17 @@ def calculate(self, data: InputData) -> ClassificationClassBalanceResult: prediction_name = dataset_columns.utility_columns.prediction if target_name is None or prediction_name is None: raise ValueError("The columns 'target' and 'prediction' columns should be present") + curr_target = data.current_data[target_name] ref_target = None if data.reference_data is not None: ref_target = data.reference_data[target_name] - plot_data = make_hist_for_cat_plot(data.current_data[target_name], ref_target) + target_names = dataset_columns.target_names + if target_names is not None: + curr_target = curr_target.map(target_names) + if ref_target is not None: + ref_target = ref_target.map(target_names) + + plot_data = make_hist_for_cat_plot(curr_target, ref_target) return ClassificationClassBalanceResult(plot_data=plot_data) diff --git a/src/evidently/metrics/classification_performance/confusion_matrix_metric.py b/src/evidently/metrics/classification_performance/confusion_matrix_metric.py index c3335ff63e..6c6dcd7a2f 100644 --- a/src/evidently/metrics/classification_performance/confusion_matrix_metric.py +++ b/src/evidently/metrics/classification_performance/confusion_matrix_metric.py @@ -1,4 +1,5 @@ import dataclasses +from typing import Dict from typing import List from typing import Optional from typing import Union @@ -21,6 +22,7 @@ class ClassificationConfusionMatrixResult: current_matrix: ConfusionMatrix reference_matrix: Optional[ConfusionMatrix] + target_names: Optional[Dict[Union[str, int], str]] = None class ClassificationConfusionMatrix(ThresholdClassificationMetric[ClassificationConfusionMatrixResult]): @@ -36,7 +38,9 @@ def __init__( def calculate(self, data: InputData) -> ClassificationConfusionMatrixResult: current_target_data, current_pred = self.get_target_prediction_data(data.current_data, data.column_mapping) - + target_names = data.column_mapping.target_names + if target_names is not None and current_pred.prediction_probas is None: + target_names = data.column_mapping.target_names current_results = calculate_matrix( current_target_data, current_pred.predictions, @@ -46,6 +50,7 @@ def calculate(self, data: InputData) -> ClassificationConfusionMatrixResult: reference_results = None if data.reference_data is not None: ref_target_data, ref_pred = self.get_target_prediction_data(data.reference_data, data.column_mapping) + reference_results = calculate_matrix( ref_target_data, ref_pred.predictions, @@ -55,6 +60,7 @@ def calculate(self, data: InputData) -> ClassificationConfusionMatrixResult: return ClassificationConfusionMatrixResult( current_matrix=current_results, reference_matrix=reference_results, + target_names=target_names, ) @@ -65,7 +71,15 @@ def render_json(self, obj: ClassificationConfusionMatrix) -> dict: def render_html(self, obj: ClassificationConfusionMatrix) -> List[BaseWidgetInfo]: metric_result = obj.get_result() - fig = plot_conf_mtrx(metric_result.current_matrix, metric_result.reference_matrix) + target_names = metric_result.target_names + curr_matrix = metric_result.current_matrix + ref_matrix = metric_result.reference_matrix + if target_names is not None: + curr_matrix.labels = [target_names[x] for x in curr_matrix.labels] + if ref_matrix is not None: + ref_matrix.labels = [target_names[x] for x in ref_matrix.labels] + + fig = plot_conf_mtrx(curr_matrix, ref_matrix) fig.for_each_xaxis(lambda axis: axis.update(title_text="Predicted Value")) fig.update_layout(yaxis_title="Actual Value") return [header_text(label="Confusion Matrix"), plotly_figure(figure=fig, title="")] diff --git a/src/evidently/metrics/classification_performance/quality_by_class_metric.py b/src/evidently/metrics/classification_performance/quality_by_class_metric.py index 1fb5228db2..c899e51cfe 100644 --- a/src/evidently/metrics/classification_performance/quality_by_class_metric.py +++ b/src/evidently/metrics/classification_performance/quality_by_class_metric.py @@ -100,8 +100,11 @@ def render_html(self, obj: ClassificationQualityByClass) -> List[BaseWidgetInfo] reference_roc_aucs = metric_result.reference_roc_aucs metrics_frame = pd.DataFrame(current_metrics) + names = metrics_frame.columns.tolist()[:-3] + if columns.target_names is not None: + names = [columns.target_names[int(x)] for x in names] z = metrics_frame.iloc[:-1, :-3].values - x = columns.target_names if columns.target_names else metrics_frame.columns.tolist()[:-3] + x = names y = ["precision", "recall", "f1-score"] if current_roc_aucs is not None and len(current_roc_aucs) > 2: z = np.append(z, [current_roc_aucs], axis=0) @@ -130,7 +133,7 @@ def render_html(self, obj: ClassificationQualityByClass) -> List[BaseWidgetInfo] if reference_metrics is not None: ref_metrics_frame = pd.DataFrame(reference_metrics) z = ref_metrics_frame.iloc[:-1, :-3].values - x = columns.target_names if columns.target_names else metrics_frame.columns.tolist()[:-3] + x = names y = ["precision", "recall", "f1-score"] if current_roc_aucs is not None and len(current_roc_aucs) > 2: diff --git a/src/evidently/model_monitoring/monitors/classification_performance.py b/src/evidently/model_monitoring/monitors/classification_performance.py index fc24ac8576..0832c65c55 100644 --- a/src/evidently/model_monitoring/monitors/classification_performance.py +++ b/src/evidently/model_monitoring/monitors/classification_performance.py @@ -59,8 +59,8 @@ def _yield_metrics( ) # try to move classes names to readable names via ColumnMapping settings - if columns.target_names: - classes_names = columns.target_names + if columns.target_names is not None: + classes_names = list(columns.target_names.keys()) else: # get classes list from the matrix data diff --git a/src/evidently/model_monitoring/monitors/prob_classification_performance.py b/src/evidently/model_monitoring/monitors/prob_classification_performance.py index ab6e7038df..d3e267ea08 100644 --- a/src/evidently/model_monitoring/monitors/prob_classification_performance.py +++ b/src/evidently/model_monitoring/monitors/prob_classification_performance.py @@ -63,8 +63,8 @@ def _yield_metrics( ) # try to move classes names to readable names via ColumnMapping settings - if columns.target_names: - classes_names = columns.target_names + if columns.target_names is not None: + classes_names = list(columns.target_names.keys()) else: # get classes list from the matrix data diff --git a/src/evidently/pipeline/column_mapping.py b/src/evidently/pipeline/column_mapping.py index 61c8f25f58..b8965e7df2 100644 --- a/src/evidently/pipeline/column_mapping.py +++ b/src/evidently/pipeline/column_mapping.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import Dict from typing import List from typing import Optional from typing import Sequence @@ -19,7 +20,7 @@ class ColumnMapping: numerical_features: Optional[List[str]] = None categorical_features: Optional[List[str]] = None datetime_features: Optional[List[str]] = None - target_names: Optional[List[str]] = None + target_names: Optional[Dict[Union[str, int], str]] = None task: Optional[str] = None pos_label: Optional[Union[str, int]] = 1 text_features: Optional[List[str]] = None diff --git a/src/evidently/utils/data_operations.py b/src/evidently/utils/data_operations.py index 50e83f64f5..974f339b69 100644 --- a/src/evidently/utils/data_operations.py +++ b/src/evidently/utils/data_operations.py @@ -45,7 +45,7 @@ class DatasetColumns: cat_feature_names: List[str] text_feature_names: List[str] datetime_feature_names: List[str] - target_names: Optional[List[str]] + target_names: Optional[Dict[Union[str, int], str]] task: Optional[str] def as_dict(self) -> Dict[str, Union[str, Optional[List[str]], Dict]]: diff --git a/src/evidently/utils/data_preprocessing.py b/src/evidently/utils/data_preprocessing.py index bffe21c01f..831dded386 100644 --- a/src/evidently/utils/data_preprocessing.py +++ b/src/evidently/utils/data_preprocessing.py @@ -77,7 +77,7 @@ class DataDefinition: _datetime_column: Optional[ColumnDefinition] _task: Optional[str] - _classification_labels: Optional[Sequence[str]] + _classification_labels: Optional[Dict[Union[str, int], str]] def __init__( self, @@ -87,7 +87,7 @@ def __init__( id_column: Optional[ColumnDefinition], datetime_column: Optional[ColumnDefinition], task: Optional[str], - classification_labels: Optional[Sequence[str]], + classification_labels: Optional[Dict[Union[str, int], str]], ): self._columns = {column.column_name: column for column in columns} self._id_column = id_column @@ -132,7 +132,7 @@ def get_datetime_column(self) -> Optional[ColumnDefinition]: def task(self) -> Optional[str]: return self._task - def classification_labels(self) -> Optional[Sequence[str]]: + def classification_labels(self) -> Optional[Dict[Union[str, int], str]]: return self._classification_labels @@ -162,7 +162,6 @@ def _process_column( def _prediction_column( prediction: Optional[Union[str, int, Sequence[int], Sequence[str]]], target_type: Optional[ColumnType], - target_names: Optional[List[str]], task: Optional[str], data: _InputData, mapping: Optional[ColumnMapping] = None, @@ -193,9 +192,6 @@ def _prediction_column( return PredictionColumns(prediction_probas=[ColumnDefinition(prediction, prediction_type)]) return PredictionColumns(predicted_values=ColumnDefinition(prediction, prediction_type)) if isinstance(prediction, list): - if target_names is not None: - if prediction != target_names: - raise ValueError("List of prediction columns should be equal to target_names if both set") presence = [_get_column_presence(column, data) for column in prediction] if all([item == ColumnPresenceState.Missing for item in presence]): return None @@ -224,7 +220,7 @@ def create_data_definition( prediction_columns = _prediction_column( mapping.prediction, target_column.column_type if target_column is not None else None, - mapping.target_names, + # mapping.target_names, mapping.task, data, mapping, diff --git a/tests/analyzers/test_classification_performance_analyzer.py b/tests/analyzers/test_classification_performance_analyzer.py index 2512e48ae7..4731fd7c57 100644 --- a/tests/analyzers/test_classification_performance_analyzer.py +++ b/tests/analyzers/test_classification_performance_analyzer.py @@ -266,19 +266,19 @@ def test_classification_performance_metrics_with_reference_data( # simple target names mapping ( pd.DataFrame({"target": [1, 0, 1, 1, 0, 1], "prediction": [1, 1, 0, 1, 0, 1]}), - column_mapping.ColumnMapping(target_names=["false", "true"]), + column_mapping.ColumnMapping(target_names={0: "false", 1: "true"}), ), # test with mapping for target and prediction and target names ( pd.DataFrame({"another_target": [1, 0, 1, 1, 0, 1], "another_prediction": [1, 1, 0, 1, 0, 1]}), column_mapping.ColumnMapping( - target="another_target", prediction="another_prediction", target_names=["false", "true"] + target="another_target", prediction="another_prediction", target_names={0: "false", 1: "true"} ), ), # second class is in prediction column only ( pd.DataFrame({"another_target": [0, 0, 0, 0, 0], "prediction": [0, 1, 0, 0, 0]}), - column_mapping.ColumnMapping(target="another_target", target_names=["false", "true"]), + column_mapping.ColumnMapping(target="another_target", target_names={0: "false", 1: "true"}), ), ), ) @@ -292,11 +292,11 @@ def test_classification_analyser_with_target_names( current_data=None, column_mapping=data_mapping, ) - assert result.columns.target_names == ["false", "true"] + assert result.columns.target_names == {0: "false", 1: "true"} # target_names now changes labels for confusion matrix only assert "0" in result.reference_metrics.metrics_matrix assert "1" in result.reference_metrics.metrics_matrix - assert result.reference_metrics.confusion_matrix.labels == ["false", "true"] + assert result.reference_metrics.confusion_matrix.labels == [0, 1] @pytest.mark.parametrize( diff --git a/tests/analyzers/test_data_drift_analyzer.py b/tests/analyzers/test_data_drift_analyzer.py index d5c12f5575..e87533aeea 100644 --- a/tests/analyzers/test_data_drift_analyzer.py +++ b/tests/analyzers/test_data_drift_analyzer.py @@ -49,7 +49,6 @@ def test_data_drift_analyzer_as_dict_format(data_drift_analyzer: DataDriftAnalyz data_columns = ColumnMapping() data_columns.numerical_features = ["numerical_feature_1", "numerical_feature_2"] data_columns.categorical_features = ["categorical_feature_1", "categorical_feature_2"] - data_columns.target_names = ["drift_target"] result = data_drift_analyzer.calculate(test_data[:2], test_data, data_columns) assert result.options is not None assert result.columns is not None @@ -64,7 +63,6 @@ def test_data_drift_analyzer_as_dict_format(data_drift_analyzer: DataDriftAnalyz assert "numerical_feature_3" not in result.metrics.drift_by_columns # check data drift results - assert result.columns.target_names == ["drift_target"] assert result.metrics.dataset_drift is True diff --git a/tests/model_profile/sections/test_data_drift_profile_section.py b/tests/model_profile/sections/test_data_drift_profile_section.py index d0bbb06dc7..012c3c9e42 100644 --- a/tests/model_profile/sections/test_data_drift_profile_section.py +++ b/tests/model_profile/sections/test_data_drift_profile_section.py @@ -33,7 +33,6 @@ def test_data_drift_profile_section_with_calculated_results(): data_columns = ColumnMapping( numerical_features=["numerical_feature"], categorical_features=["categorical_feature"], - target_names=["drift_target_result"], ) data_drift_profile_section_result = calculate_section_results(