Skip to content

Commit

Permalink
Bhavanatumma/ta2614748 (Azure#1294)
Browse files Browse the repository at this point in the history
* [bhavanatumma/ta2614748] - import model modification for OSS models

* [bhavanatumma/ta2614748] - Converting to MLFlow OSS transformers flavor

* [bhavanatumma/ta2614748] - Converting to MLFlow OSS transformers flavor

* [bhavanatumma/ta2614748] - Converting to MLFlow OSS transformers flavor

* [bhavanatumma/ta2614748] - Converting to MLFlow OSS transformers flavor - lint fix

* [bhavanatumma/ta2614748] - Converting to MLFlow OSS transformers flavor - v2, adding conda list approach

* [bhavanatumma/ta2614748] - lint fix

* [bhavanatumma/ta2614748] - review comments

* [bhavanatumma/ta2614748] - current progress

* [bhavanatumma/ta2614748] - current progress v2

* Merge branch 'main' of https://github.com/Azure/azureml-assets into bhavanatumma/ta2614748

# Conflicts: Resolved
#	assets/common/components/mlflow_model_local_validation/spec.yaml
#	assets/training/model_management/components/convert_model_to_mlflow/spec.yaml
#	assets/training/model_management/components/import_model/spec.yaml

* [bhavanatumma/ta2614748] - ref point

* [bhavanatumma/ta2614748] - ref point

* [bhavanatumma/ta2614748] - ref point v2

* [bhavanatumma/ta2614748] - added OSS and HF transformers support variable - model flavor

* [bhavanatumma/ta2614748] - lint fixes

* [bhavanatumma/ta2614748] - test fixes

* [bhavanatumma/ta2614748] - test fixes

* [bhavanatumma/ta2614748] - test fixes

* [bhavanatumma/ta2614748] - review comments and adding Whisper model

* [bhavanatumma/ta2614748] - review comments and adding Whisper model

* [bhavanatumma/ta2614748] - changing version

* [bhavanatumma/ta2614748] - changing versions

* [bhavanatumma/ta2614748] - changing signatures

* [bhavanatumma/ta2614748] - adding new env

* [bhavanatumma/ta2614748] - adding new task text2text

* [bhavanatumma/ta2614748] - adding new task text2text

* [bhavanatumma/ta2614748] - supporting t2t tasks

* [bhavanatumma/ta2614748] - whisper changes

* [bhavanatumma/ta2614748] - whisper changes + convertors

* [bhavanatumma/ta2614748] - fixing tests

* [bhavanatumma/summarization-oss] - version change

* [bhavanatumma/ta2614748] - lint fix

* [bhavanatumma/ta2614748] - signature fix

* [bhavanatumma/ta2614748] - signature fix

* pl

---------

Co-authored-by: bhavanatumma <[email protected]>
  • Loading branch information
BhavanaTumma and bhavanatumma authored Mar 7, 2024
1 parent 1ff1c9d commit a65574a
Show file tree
Hide file tree
Showing 11 changed files with 290 additions and 87 deletions.
32 changes: 30 additions & 2 deletions assets/common/components/mlflow_model_local_validation/spec.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json

name: mlflow_model_local_validation
version: 0.0.6
version: 0.0.7
type: command

is_deterministic: True
Expand All @@ -17,6 +17,7 @@ command: >-
--model-path ${{inputs.model_path}}
$[[--test-data-path ${{inputs.test_data_path}}]]
$[[--column-rename-map "${{inputs.column_rename_map}}"]]
$[[--task-name ${{inputs.task_name}}]]
--output-model-path ${{outputs.mlflow_model_folder}}
inputs:
Expand All @@ -35,7 +36,34 @@ inputs:
optional: true
description: |
Provide mapping of dataset column names that should be renamed before inferencing.
eg: col1:ren1; col2:ren2; col3:ren3
eg: col1:ren1; col2:ren2; col3:ren3
task_name:
description: A Hugging face task on which model was trained on
enum:
- fill-mask
- token-classification
- question-answering
- summarization
- text-generation
- text2text-generation
- text-classification
- translation
- image-classification
- image-classification-multilabel
- image-object-detection
- image-instance-segmentation
- image-to-text
- text-to-image
- text-to-image-inpainting
- image-text-to-text
- image-to-image
- zero-shot-image-classification
- mask-generation
- video-multi-object-tracking
- visual-question-answering
optional: true
type: string

outputs:
mlflow_model_folder:
Expand Down
12 changes: 10 additions & 2 deletions assets/common/src/scripts/validate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# Script uses minimal required dependency to process data and load mlflow model.
# Please refrain from adding any additional dependencies that could cause failure while running the script.

import os
import argparse
import json
import mlflow
Expand All @@ -20,6 +21,7 @@
KV_COLON_SEP = ":"
ITEM_COMMA_SEP = ","
ITEM_SEMI_COLON_SEP = ";"
SUMMARIZATION = 'summarization'


def get_dict_from_comma_separated_str(dict_str: str, item_sep: str, kv_sep: str) -> Dict:
Expand Down Expand Up @@ -65,7 +67,7 @@ def _load_and_prepare_data(test_data_path: Path, mlmodel: Dict, col_rename_map:
elif ext == ".csv":
data = pd.read_csv(test_data_path)
else:
raise Exception("Unsupported file type {ext}")
raise Exception(f"Unsupported file type {ext}")

# translations
if col_rename_map:
Expand All @@ -91,12 +93,15 @@ def _load_and_prepare_data(test_data_path: Path, mlmodel: Dict, col_rename_map:
return data


def _load_and_infer_model(model_dir, data):
def _load_and_infer_model(model_dir, data, task_name):
if data is None:
print("Data not shared. Could not infer the loaded model")
return

try:
if task_name == SUMMARIZATION:
os.environ["MLFLOW_HUGGINGFACE_USE_DEVICE_MAP"] = "False"

model = mlflow.pyfunc.load_model(str(model_dir))
except Exception as e:
raise Exception(f"Error in loading mlflow model: {e}")
Expand All @@ -114,6 +119,7 @@ def _get_parser():
parser.add_argument("--model-path", type=Path, required=True, help="Model input path")
parser.add_argument("--test-data-path", type=Path, required=False, help="Test dataset path")
parser.add_argument("--column-rename-map", type=str, required=False, help="Column rename map as string")
parser.add_argument("--task-name", type=str, required=False, help="Hugging Face task type")
return parser


Expand All @@ -124,6 +130,7 @@ def _get_parser():
model_dir: Path = args.model_path
test_data_path: Path = args.test_data_path
col_rename_map_str: str = args.column_rename_map
task_name: str = args.task_name

mlmodel_file_path = model_dir / MLMODEL_FILE_NAME
conda_env_file_path = model_dir / CONDA_YAML_FILE_NAME
Expand All @@ -144,6 +151,7 @@ def _get_parser():
mlmodel=mlmodel_dict,
col_rename_map=col_rename_map,
),
task_name=task_name,
)

print("Local validation completed")
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json

name: convert_model_to_mlflow
version: 0.0.21
version: 0.0.22
type: command

is_deterministic: True
Expand All @@ -19,7 +19,7 @@ command: |
pip_pkg_str="${pip_pkgs[*]}"
if [[ -n "$pip_pkg_str" ]]; then echo "Installing $pip_pkg_str"; pip install $pip_pkg_str; echo "pip installation completed. For any installation error please check above logs"; fi;
echo "Running model conversion ... "
python -u run_model_preprocess.py $[[--model-id ${{inputs.model_id}}]] $[[--task-name ${{inputs.task_name}}]] $[[--model-download-metadata ${{inputs.model_download_metadata}}]] $[[--license-file-path ${{inputs.license_file_path}}]] $[[--hf-config-args "${{inputs.hf_config_args}}"]] $[[--hf-tokenizer-args "${{inputs.hf_tokenizer_args}}"]] $[[--hf-model-args "${{inputs.hf_model_args}}"]] $[[--hf-pipeline-args "${{inputs.hf_pipeline_args}}"]] $[[--hf-config-class ${{inputs.hf_config_class}}]] $[[--hf-model-class ${{inputs.hf_model_class}}]] $[[--hf-tokenizer-class ${{inputs.hf_tokenizer_class}}]] $[[--hf-use-experimental-features ${{inputs.hf_use_experimental_features}}]] $[[--extra-pip-requirements "${{inputs.extra_pip_requirements}}"]] $[[--inference-base-image "${{inputs.inference_base_image}}"]] --model-framework ${{inputs.model_framework}} --model-path ${{inputs.model_path}} --mlflow-model-output-dir ${{outputs.mlflow_model_folder}} --model-import-job-path ${{outputs.model_import_job_path}}
python -u run_model_preprocess.py $[[--model-id ${{inputs.model_id}}]] $[[--task-name ${{inputs.task_name}}]] $[[--model-download-metadata ${{inputs.model_download_metadata}}]] $[[--license-file-path ${{inputs.license_file_path}}]] $[[--hf-config-args "${{inputs.hf_config_args}}"]] $[[--hf-tokenizer-args "${{inputs.hf_tokenizer_args}}"]] $[[--hf-model-args "${{inputs.hf_model_args}}"]] $[[--hf-pipeline-args "${{inputs.hf_pipeline_args}}"]] $[[--hf-config-class ${{inputs.hf_config_class}}]] $[[--hf-model-class ${{inputs.hf_model_class}}]] $[[--hf-tokenizer-class ${{inputs.hf_tokenizer_class}}]] $[[--hf-use-experimental-features ${{inputs.hf_use_experimental_features}}]] $[[--extra-pip-requirements "${{inputs.extra_pip_requirements}}"]] $[[--inference-base-image "${{inputs.inference_base_image}}"]] --model-framework ${{inputs.model_framework}} --model-path ${{inputs.model_path}} --mlflow-model-output-dir ${{outputs.mlflow_model_folder}} --model-import-job-path ${{outputs.model_import_job_path}} --model-flavor ${{inputs.model_flavor}}
echo "Completed model conversion ... "
inputs:
Expand All @@ -28,6 +28,15 @@ inputs:
description: Huggingface model id (https://huggingface.co/<model_id>). A required parameter for Huggingface model framework. Can be provided as input here or in model_download_metadata JSON file.
optional: true

model_flavor:
type: string
enum:
- HFTransformersV2
- OSS
default: HFTransformersV2
optional: false
description: Flavor of MLFlow to which the model is converted to.

model_framework:
type: string
enum:
Expand All @@ -42,12 +51,12 @@ inputs:
task_name:
type: string
enum:
- text-classification
- fill-mask
- token-classification
- question-answering
- summarization
- text-generation
- text2text-generation
- text-classification
- translation
- image-classification
Expand Down
53 changes: 37 additions & 16 deletions assets/training/model_management/components/import_model/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ type: pipeline
name: import_model
display_name: Import model
description: Import a model into a workspace or a registry
version: 0.0.25
version: 0.0.26

# Pipeline inputs
inputs:
Expand Down Expand Up @@ -38,6 +38,15 @@ inputs:
type: string
description: A valid model id for the model source selected. For example you can specify `bert-base-uncased` for importing HuggingFace bert base uncased model. Please specify the complete URL if **GIT** or **AzureBlob** is selected in `model_source`

model_flavor:
type: string
enum:
- HFTransformersV2
- OSS
default: HFTransformersV2
optional: false
description: Flavor of MLFlow to which model the model is converted to.

token:
type: string
description: If set use it to access the private models or authenticate the user. For example, user can get the token for HF private model by creating account in Huggingface, accept the condition for models that needs to be downloaded and create access token from browser. For more details please visit - https://huggingface.co/docs/hub/security-tokens
Expand All @@ -52,18 +61,27 @@ inputs:
task_name:
description: A Hugging face task on which model was trained on
enum:
- text-classification
- fill-mask
- token-classification
- question-answering
- summarization
- text-generation
- text-classification
- translation
- image-classification
- text-to-image
- zero-shot-image-classification
- video-multi-object-tracking
- fill-mask
- token-classification
- question-answering
- summarization
- text-generation
- text2text-generation
- text-classification
- translation
- image-classification
- image-classification-multilabel
- image-object-detection
- image-instance-segmentation
- image-to-text
- text-to-image
- text-to-image-inpainting
- image-text-to-text
- image-to-image
- zero-shot-image-classification
- mask-generation
- video-multi-object-tracking
- visual-question-answering
optional: true
type: string

Expand Down Expand Up @@ -182,7 +200,7 @@ outputs:

jobs:
validation_trigger_import:
component: azureml:validation_trigger_import:0.0.4
component: azureml:validation_trigger_import:0.0.5
compute: ${{parent.inputs.compute}}
resources:
instance_type: '${{parent.inputs.instance_type}}'
Expand All @@ -194,6 +212,7 @@ jobs:
model_source: ${{parent.inputs.model_source}}
model_id: ${{parent.inputs.model_id}}
model_version: ${{parent.inputs.model_version}}
model_flavor: ${{parent.inputs.model_flavor}}
model_description: ${{parent.inputs.model_description}}
model_metadata: ${{parent.inputs.model_metadata}}
registry_name: ${{parent.inputs.registry_name}}
Expand Down Expand Up @@ -234,12 +253,13 @@ jobs:
type: uri_folder

convert_model_to_mlflow:
component: azureml:convert_model_to_mlflow:0.0.21
component: azureml:convert_model_to_mlflow:0.0.22
compute: ${{parent.inputs.compute}}
resources:
instance_type: '${{parent.inputs.instance_type}}'
inputs:
task_name: ${{parent.inputs.task_name}}
model_flavor: ${{parent.inputs.model_flavor}}
license_file_path: ${{parent.inputs.license_file_path}}
model_download_metadata: ${{parent.jobs.download_model.outputs.model_download_metadata}}
model_path: ${{parent.jobs.download_model.outputs.model_output}}
Expand All @@ -259,14 +279,15 @@ jobs:
type: uri_file

mlflow_model_local_validation:
component: azureml:mlflow_model_local_validation:0.0.6
component: azureml:mlflow_model_local_validation:0.0.7
compute: ${{parent.inputs.compute}}
resources:
instance_type: '${{parent.inputs.instance_type}}'
inputs:
model_path: ${{parent.jobs.convert_model_to_mlflow.outputs.mlflow_model_folder}}
test_data_path: ${{parent.inputs.local_validation_test_data}}
column_rename_map: ${{parent.inputs.local_validation_column_rename_map}}
task_name: ${{parent.inputs.task_name}}
outputs:
mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ type: command
name: validation_trigger_import
display_name: Validation Trigger Import model
description: Component for enabling validation of import pipeline.
version: 0.0.4
version: 0.0.5

# Pipeline inputs
inputs:
Expand Down Expand Up @@ -38,6 +38,15 @@ inputs:
type: string
description: A valid model id for the model source selected. For example you can specify `bert-base-uncased` for importing HuggingFace bert base uncased model. Please specify the complete URL if **GIT** or **AzureBlob** is selected in `model_source`

model_flavor:
type: string
enum:
- HFTransformersV2
- OSS
default: HFTransformersV2
optional: false
description: Flavor of MLFlow to which model the model is converted to.

## Inputs for the MlFLow conversion
license_file_path:
type: uri_file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class SupportedNLPTasks(_CustomEnum):
TEXT_GENERATION = "text-generation"
TEXT_CLASSIFICATION = "text-classification"
TRANSLATION = "translation"
TEXT2TEXT_GENERATION = "text2text-generation"


class SupportedASRModelFamily(_CustomEnum):
Expand All @@ -88,6 +89,7 @@ class SupportedTasks(_CustomEnum):
TEXT_GENERATION = "text-generation"
TEXT_CLASSIFICATION = "text-classification"
TRANSLATION = "translation"
TEXT2TEXT_GENERATION = "text2text-generation"
# Vision tasks
IMAGE_CLASSIFICATION = "image-classification"
# ASR
Expand Down
Loading

0 comments on commit a65574a

Please sign in to comment.