Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Distributed components and integrate it with FT pipelines and Model Eval fixes #3202

Merged
merged 13 commits into from
Aug 2, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -488,9 +488,9 @@ outputs:
description: output folder containing _best_ finetuned model in mlflow format.
mode: rw_mount

# evaluation_result:
# type: uri_folder
# description: Test Data Evaluation Results
evaluation_result:
type: uri_folder
description: Test Data Evaluation Results

jobs:
ft_nlp_common_validation:
Expand Down Expand Up @@ -627,34 +627,30 @@ jobs:
# converted_model: '${{parent.jobs.chat_completion_finetune.outputs.mlflow_model_folder}}'
outputs:
mlflow_model_folder: '${{parent.outputs.mlflow_model_folder}}'
# model_prediction:
# type: command
# component: azureml:model_prediction:0.0.21
# compute: '${{parent.inputs.compute_model_evaluation}}'
# resources:
# instance_type: '${{parent.inputs.instance_type_model_evaluation}}'
# inputs:
# task: chat-completion
# test_data: '${{parent.jobs.chat_completion_datapreprocess.outputs.output_dir}}'
# label_column_name: ''
# input_column_names: "''"
# batch_size: '${{parent.inputs.per_device_train_batch_size}}'
# device: auto
# mlflow_model: '${{parent.jobs.chat_completion_model_converter.outputs.mlflow_model_folder}}'
# compute_metrics:
# type: command
# component: azureml:compute_metrics:0.0.21
# compute: '${{parent.inputs.compute_model_evaluation}}'
# resources:
# instance_type: '${{parent.inputs.instance_type_model_evaluation}}'
# inputs:
# task: chat-completion
# ground_truth: '${{parent.jobs.model_prediction.outputs.ground_truth}}'
# ground_truth_column_name: '${{parent.inputs.answers_key}}'
# prediction: '${{parent.jobs.model_prediction.outputs.predictions}}'
# prediction_column_name: predictions
# prediction_probabilities: '${{parent.jobs.model_prediction.outputs.prediction_probabilities}}'
# evaluation_config: '${{parent.inputs.evaluation_config}}'
# evaluation_config_params: '${{parent.inputs.evaluation_config_params}}'
# outputs:
# evaluation_result: '${{parent.outputs.evaluation_result}}'
model_prediction:
type: command
component: azureml:model_prediction_with_container:0.0.2
compute: '${{parent.inputs.compute_model_evaluation}}'
resources:
instance_type: '${{parent.inputs.instance_type_model_evaluation}}'
inputs:
task: chat-completion
test_data: '${{parent.jobs.chat_completion_datapreprocess.outputs.output_dir}}'
label_column_name: messages
mlflow_model: '${{parent.jobs.chat_completion_model_converter.outputs.mlflow_model_folder}}'
evaluation_config_params: '${{parent.inputs.evaluation_config_params}}'
compute_metrics:
type: command
component: azureml:compute_metrics:0.0.31
compute: '${{parent.inputs.compute_model_evaluation}}'
resources:
instance_type: '${{parent.inputs.instance_type_model_evaluation}}'
inputs:
task: chat-completion
ground_truth: '${{parent.jobs.model_prediction.outputs.ground_truth}}'
prediction: '${{parent.jobs.model_prediction.outputs.predictions}}'
prediction_probabilities: '${{parent.jobs.model_prediction.outputs.prediction_probabilities}}'
evaluation_config: '${{parent.inputs.evaluation_config}}'
evaluation_config_params: '${{parent.inputs.evaluation_config_params}}'
outputs:
evaluation_result: '${{parent.outputs.evaluation_result}}'
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ jobs:
mlflow_model_folder: '${{parent.outputs.mlflow_model_folder}}'
model_prediction:
type: command
component: azureml:model_prediction:0.0.30
component: azureml:model_prediction_with_container:0.0.2
compute: '${{parent.inputs.compute_model_evaluation}}'
resources:
instance_type: '${{parent.inputs.instance_type_model_evaluation}}'
Expand All @@ -676,10 +676,7 @@ jobs:
test_data: '${{parent.jobs.text_generation_datapreprocess.outputs.output_dir}}'
label_column_name: '${{parent.inputs.ground_truth_key}}'
input_column_names: '${{parent.inputs.text_key}}'
batch_size: '${{parent.inputs.per_device_train_batch_size}}'
device: auto
mlflow_model: '${{parent.jobs.text_generation_model_converter.outputs.mlflow_model_folder}}'
evaluation_config: '${{parent.inputs.evaluation_config}}'
mlflow_model: '${{parent.jobs.ft_nlp_model_converter.outputs.mlflow_model_folder}}'
evaluation_config_params: '${{parent.inputs.evaluation_config_params}}'
compute_metrics:
type: command
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: compute_metrics
display_name: Compute Metrics
description: Calculate model performance metrics, given ground truth and prediction data.

version: 0.0.30
version: 0.0.31
type: command
tags:
type: evaluation
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
name: model_prediction_with_container
version: 0.0.1
version: 0.0.2
type: command
display_name: Distributed Model Prediction
description: "Optimized Distributed inference component for LLMs."
Expand Down Expand Up @@ -69,7 +69,7 @@ outputs:


code: ../../src_distributed
environment: azureml://registries/azureml/environments/foundation-model-inference/versions/42
environment: azureml://registries/azureml/environments/foundation-model-inference/versions/46
command: >-
python download_extra_dependency.py
--mlflow-model '${{inputs.mlflow_model}}' ;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: model_prediction
display_name: Model Prediction
description: Generate predictions on a given mlflow model for supported tasks.

version: 0.0.30
version: 0.0.31
type: command
tags:
type: evaluation
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
name: model_evaluation_pipeline
version: 0.0.30
version: 0.0.31
type: pipeline
display_name: Model Evaluation Pipeline
description: Pipeline component for model evaluation for supported tasks. \
Expand Down Expand Up @@ -87,7 +87,7 @@ outputs:
jobs:
validation_trigger_model_evaluation:
type: command
component: azureml:validation_trigger_model_evaluation:0.0.30
component: azureml:validation_trigger_model_evaluation:0.0.31
compute: '${{parent.inputs.compute_name}}'
resources:
instance_type: '${{parent.inputs.instance_type}}'
Expand All @@ -111,7 +111,7 @@ jobs:

model_prediction:
type: command
component: azureml:model_prediction:0.0.30
component: azureml:model_prediction:0.0.31
compute: '${{parent.inputs.compute_name}}'
resources:
instance_type: '${{parent.inputs.instance_type}}'
Expand All @@ -128,7 +128,7 @@ jobs:

compute_metrics:
type: command
component: azureml:compute_metrics:0.0.30
component: azureml:compute_metrics:0.0.31
compute: '${{parent.inputs.compute_name}}'
resources:
instance_type: '${{parent.inputs.instance_type}}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: validation_trigger_model_evaluation
display_name: Validation Trigger Model Evaluation
description: Component for enabling validation of model evaluation pipeline.

version: 0.0.30
version: 0.0.31
type: command
tags:
type: evaluation
Expand Down
43 changes: 36 additions & 7 deletions assets/training/model_evaluation/src/evaluators/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,16 +617,45 @@ def evaluate(self, y_test, y_pred, **kwargs):
"""
# dataframe with 2 columns predictions and predictions appended to the conversation
if len(y_pred.columns) > 1:
y_pred_formatted = [
list(item[ChatCompletionConstants.OUTPUT_FULL_CONVERSATION][0].values())[0]
for idx, item in y_pred.iterrows()
]
logger.info("Found more than 1 col. Trying to fetch conversation.")

def check_item(row_item: pd.Series):
"""Convert input data to correct format for metrics package.

Args:
row_item (pd.Series): Single row input from Dataframe
"""
item = row_item.get(ChatCompletionConstants.OUTPUT_FULL_CONVERSATION, None)
if item is None:
return row_item
if isinstance(item, list) and isinstance(item[0], dict):
if item[0].get("role", False) and item[0].get("content", False):
return item
else:
if item[0].get("0", False):
return item["0"]
return item

y_pred_formatted = y_pred.apply(check_item, axis=1).tolist()
# dataframe wih just predictions appended to conversations
else:
y_pred_formatted = y_pred.values.tolist()[0]
# if ground truth is passed
y_pred_formatted = y_pred.values.tolist()
# if ground truth is passed
if y_test is not None and len(y_test) > 0:
y_test = y_test.iloc[:, 0].apply(lambda x: [x]).tolist()

def check_y_test(row_item: pd.Series):
"""Convert ground truth into correct format for metrics package.

Args:
row_item (pd.Series): Single row input from Dataframe
"""
item = row_item.get(y_test.columns[0])
if isinstance(item, str) or isinstance(item, dict):
return [item]
if isinstance(item, list):
return item

y_test = y_test.apply(check_y_test, axis=1).tolist()
metrics = compute_metrics(task_type=constants.Tasks.CHAT_COMPLETION, y_pred=y_pred_formatted,
y_test=y_test, **self.metrics_config)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ def ner_predictor_for_transformers(X_test, params=None):
Returns:
_type_: _description_
"""
transformers_class._override_model_config(params)
try:
transformers_class._override_model_config(params)
except AttributeError:
logger.info("Using newer version of mlflow.transformers._TransformersWrapper\
model config override API")
transformers_class._merge_model_config_with_params(transformers_class.model_config, params)
from azureml.evaluate.mlflow.hftransformers._task_based_predictors import NERPredictor
predictor = NERPredictor(task_type="token-classification", model=transformers_class.pipeline.model,
tokenizer=transformers_class.pipeline.tokenizer,
Expand Down
28 changes: 25 additions & 3 deletions assets/training/model_evaluation/src_distributed/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
import glob

from mltable import load


from logging_utilities import get_logger
from exceptions import DataLoaderException
from error_definitions import BadLabelColumnData
from logging_utilities import get_logger, get_azureml_exception, log_traceback

logger = get_logger(name=__name__)

Expand Down Expand Up @@ -180,6 +180,28 @@ def read_multiple_files(path):
return iter([data])


def prepare_chat_data_from_ft_pipeline(data: pd.DataFrame):
"""Prepare Chat completion data from FT pipeline.

Args:
data: pd.DataFrame
"""
try:
messages_col = data[local_constants.LLM_FT_CHAT_COMPLETION_KEY]
except Exception as e:
logger.error(f"'{local_constants.LLM_FT_CHAT_COMPLETION_KEY}' not found in FT test dataset.")
exception = get_azureml_exception(DataLoaderException, BadLabelColumnData, e, error=repr(e))
log_traceback(exception, logger)
raise exception
X_test, y_test = {local_constants.LLM_FT_CHAT_COMPLETION_KEY:[]}, []
for message in messages_col.to_list():
X_test[local_constants.LLM_FT_CHAT_COMPLETION_KEY].append(message[:-1])
y_test.append(message[-1]["content"])
X_test = pd.DataFrame(X_test)
y_test = pd.Series(y_test)
return X_test, y_test.values


def prepare_data(data, task, label_column_name=None, _has_multiple_output=False, extra_y_test_cols=None):
"""Prepare data.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""File to create AzureML Based Exceptions for Model Evaluation."""

from azureml.exceptions import AzureMLException
from constants import ExceptionLiterals
from local_constants import ExceptionLiterals


class ModelEvaluationException(AzureMLException):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
MLTABLE_FILE_NAME = "MLTable"
LLM_FT_PREPROCESS_FILENAME = "preprocess_args.json"
LLM_FT_TEST_DATA_KEY = "raw_test_data_fname"
LLM_FT_CHAT_COMPLETION_KEY = "messages"

# default values
class ModelPath:
Expand Down Expand Up @@ -194,4 +195,10 @@ class TASK:
FILTER_MODEL_PREDICTION_PARAMS = [
"tokenizer_config",
"generator_config"
]
]

class ChatCompletionConstants:
"""Chat completion constants."""

OUTPUT = "predictions"
OUTPUT_FULL_CONVERSATION = "prediction_appended"
Loading
Loading