diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/README.md b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/README.md new file mode 100644 index 0000000000..73405b4eee --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/README.md @@ -0,0 +1,5 @@ +# ai-ml-automl-dnn-forecasting-gpu Docker Environment + +## Overview +This environment is used by Azure ML AutoML for training models. +It is not intended for use in other scenarios and is subject to change without notice. diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/asset.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/asset.yaml new file mode 100644 index 0000000000..e7ba952933 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/asset.yaml @@ -0,0 +1,11 @@ +name: ai-ml-automl-dnn-forecasting-gpu +version: auto +type: environment +spec: spec.yaml +extra_config: environment.yaml +test: + pytest: + enabled: true + pip_requirements: tests/requirements.txt + tests_dir: tests +categories: ["AutoML", "Training"] \ No newline at end of file diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/context/Dockerfile b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/context/Dockerfile new file mode 100644 index 0000000000..3bbed8c0d9 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/context/Dockerfile @@ -0,0 +1,70 @@ +FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:{{latest-image-tag}} + +ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml-automl-dnn-forecasting-gpu +# Prepend path to AzureML conda environment +ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH + +COPY --from=mcr.microsoft.com/azureml/mlflow-ubuntu20.04-py38-cpu-inference:20230306.v3 /var/mlflow_resources/mlflow_score_script.py /var/mlflow_resources/mlflow_score_script.py + +ENV MLFLOW_MODEL_FOLDER="mlflow-model" +# ENV AML_APP_ROOT="/var/mlflow_resources" +# ENV AZUREML_ENTRY_SCRIPT="mlflow_score_script.py" + +ENV ENABLE_METADATA=true + +# begin conda create +# Create conda environment +RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ + python=3.9 \ + # begin conda dependencies + pip=22.1.2 \ + numpy~=1.23.5 \ + scikit-learn=1.5.1 \ + pandas~=1.3.5 \ + scipy=1.10.1 \ + 'psutil>=5.2.2,<6.0.0' \ + tqdm \ + setuptools=72.1.0 \ + wheel=0.44.0 \ + # Install pytorch separately to speed up image build + -c conda-forge -c pytorch -c anaconda && \ + conda install -p $AZUREML_CONDA_ENVIRONMENT_PATH \ + pytorch=1.13.1 \ + pytorch-cuda=11.6 \ + -c pytorch -c nvidia -y && \ + # end conda dependencies + conda run -p $AZUREML_CONDA_ENVIRONMENT_PATH && \ + conda clean -a -y +# end conda create + +# begin pip install +# Install pip dependencies +# GitPython>=3.1.41 is required for https://github.com/advisories/GHSA-2mqj-m65w-jghx and is not available in conda +RUN pip install \ + # begin pypi dependencies + azureml-core=={{latest-pypi-version}} \ + azureml-mlflow=={{latest-pypi-version}} \ + azureml-defaults=={{latest-pypi-version}} \ + azureml-telemetry=={{latest-pypi-version}} \ + azureml-interpret=={{latest-pypi-version}} \ + azureml-responsibleai=={{latest-pypi-version}} \ + azureml-automl-core=={{latest-pypi-version}} \ + azureml-automl-runtime=={{latest-pypi-version}} \ + azureml-train-automl-client=={{latest-pypi-version}} \ + azureml-train-automl-runtime=={{latest-pypi-version}} \ + azureml-dataset-runtime=={{latest-pypi-version}} \ + azureml-train-automl=={{latest-pypi-version}} \ + azureml-contrib-automl-dnn-forecasting==1.57.0 \ + 'azure-identity>=1.16.1' \ + 'inference-schema' \ + 'horovod==0.28.1' \ + 'xgboost==1.5.2' \ + 'cryptography>=42.0.5' \ + 'requests>=2.31.0' \ + 'certifi>=2023.07.22' \ + 'spacy==3.7.4' \ + 'GitPython>=3.1.41' \ + 'https://aka.ms/automl-resources/packages/en_core_web_sm-3.7.1.tar.gz' \ + 'py-cpuinfo==5.0.0' + # end pypi dependencies +# end pip install diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/environment.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/environment.yaml new file mode 100644 index 0000000000..1b4312f2b5 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/environment.yaml @@ -0,0 +1,11 @@ +image: + name: azureml/curated/ai-ml-automl-dnn-forecasting-gpu + os: linux + context: + dir: context + dockerfile: Dockerfile + template_files: + - Dockerfile + publish: + location: mcr + visibility: public diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/spec.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/spec.yaml new file mode 100644 index 0000000000..596d070220 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/spec.yaml @@ -0,0 +1,20 @@ +$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json + +description: >- + An environment used by Azure ML AutoML for training models. + +name: "{{asset.name}}" +version: "{{asset.version}}" + +build: + path: "{{image.context.path}}" + dockerfile_path: "{{image.dockerfile.path}}" + +os_type: linux + +tags: + OS: Ubuntu20.04 + Training: "" + Preview: "" + OpenMpi: "4.1.0" + Python: "3.9" diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/automl_sample_test.py b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/automl_sample_test.py new file mode 100644 index 0000000000..b3b4567c36 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/automl_sample_test.py @@ -0,0 +1,87 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Smoke tests running a job in the ai-ml-automl-dnn-forecasting-gpu environment.""" +# This only tests that scikit-learn training can be dome in the environment. +# After the environment is deployed, notebooks tests should be run before updating +# the "Prod" label in the Jasmine service +import os +import time +from pathlib import Path +from azure.ai.ml import command, Input, MLClient +from azure.ai.ml._restclient.models import JobStatus +from azure.ai.ml.entities import Environment, BuildContext +from azure.identity import AzureCliCredential + +BUILD_CONTEXT = Path("../context") +JOB_SOURCE_CODE = "src" +TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 60) +STD_LOG = Path("artifacts/user_logs/std_log.txt") + + +def test_azure_ai_ml_automl(): + """Tests a sample job using ai-ml-automl-dnn-forecasting-gpu as the environment.""" + this_dir = Path(__file__).parent + + subscription_id = os.environ.get("subscription_id") + resource_group = os.environ.get("resource_group") + workspace_name = os.environ.get("workspace") + + ml_client = MLClient( + AzureCliCredential(), subscription_id, resource_group, workspace_name + ) + + env_name = "ai-ml-automl-dnn-forecasting-gpu" + + env_docker_context = Environment( + build=BuildContext(path=this_dir / BUILD_CONTEXT), + name="ai-ml-automl-dnn-forecasting-gpu", + description="ai-ml-automl-dnn-forecasting-gpu environment created from a Docker context.", + ) + ml_client.environments.create_or_update(env_docker_context) + + # create the command + job = command( + code=this_dir / JOB_SOURCE_CODE, # local path where the code is stored + command="python main.py --diabetes-csv ${{inputs.diabetes}}", + inputs={ + "diabetes": Input( + type="uri_file", + path="https://azuremlexamples.blob.core.windows.net/datasets/diabetes.csv", + ) + }, + environment=f"{env_name}@latest", + compute=os.environ.get("gpu_cluster"), + display_name="sklearn-diabetes-example", + description="A test run of the ai-ml-automl-dnn-forecasting-gpu curated environment", + experiment_name="sklearnExperiment" + ) + + returned_job = ml_client.create_or_update(job) + assert returned_job is not None + + # Poll until final status is reached or timed out + timeout = time.time() + (TIMEOUT_MINUTES * 60) + while time.time() <= timeout: + job = ml_client.jobs.get(returned_job.name) + status = job.status + if status in [JobStatus.COMPLETED, JobStatus.FAILED]: + break + time.sleep(30) # sleep 30 seconds + else: + # Timeout + ml_client.jobs.cancel(returned_job.name) + raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. " + f"Last status was {status}.") + + if status == JobStatus.FAILED: + ml_client.jobs.download(returned_job.name) + if STD_LOG.exists(): + print(f"*** BEGIN {STD_LOG} ***") + with open(STD_LOG, "r") as f: + print(f.read(), end="") + print(f"*** END {STD_LOG} ***") + else: + ml_client.jobs.stream(returned_job.name) + + assert status == JobStatus.COMPLETED diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/requirements.txt b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/requirements.txt new file mode 100644 index 0000000000..f333fe4fc6 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/requirements.txt @@ -0,0 +1,2 @@ +azure-ai-ml==1.2.0 +azure.identity==1.10.0 diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/src/main.py b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/src/main.py new file mode 100644 index 0000000000..1fc846111f --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/src/main.py @@ -0,0 +1,87 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Simple Sklearn Test.""" +# imports +import mlflow +import argparse +import pandas as pd + +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split + + +# define functions +def main(args): + """Run and evaluate model.""" + # enable auto logging + mlflow.autolog() + # setup parameters + params = { + "fit_intercept": args.fit_intercept, + "positive": args.positive, + } + # read in data + df = pd.read_csv(args.diabetes_csv) + + # process data + X_train, X_test, y_train, y_test = process_data(df, args.random_state) + + # train model + model = train_model(params, X_train, X_test, y_train, y_test) + + print(model) + + +def process_data(df, random_state): + """Process data.""" + # split dataframe into X and y + X = df.drop(["target"], axis=1) + y = df["target"] + + # train/test split + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=random_state + ) + + # return splits and encoder + return X_train, X_test, y_train, y_test + + +def train_model(params, X_train, X_test, y_train, y_test): + """Train the model.""" + # train model + model = LinearRegression(**params) + model = model.fit(X_train, y_train) + + # return model + return model + + +def parse_args(): + """Parse arguments.""" + # setup arg parser + parser = argparse.ArgumentParser() + + # add arguments + parser.add_argument("--diabetes-csv", type=str) + parser.add_argument("--random_state", type=int, default=42) + parser.add_argument("--fit_intercept", type=bool, default=True) + parser.add_argument("--positive", type=bool, default=False) + parser.add_argument("--intel-extension", type=bool, default=False) + + # parse args + args = parser.parse_args() + + # return args + return args + + +# run script +if __name__ == "__main__": + # parse args + args = parse_args() + if (args.intel_extension): + from sklearnex import patch_sklearn + patch_sklearn() + # run main function + main(args) diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/README.md b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/README.md new file mode 100644 index 0000000000..5c60e3cb52 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/README.md @@ -0,0 +1,5 @@ +# ai-ml-automl-dnn-text-gpu-ptca Docker Environment + +## Overview +This environment is used by Azure ML AutoML for training models. +It is not intended for use in other scenarios and is subject to change without notice. diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/asset.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/asset.yaml new file mode 100644 index 0000000000..cdc805976e --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/asset.yaml @@ -0,0 +1,11 @@ +name: ai-ml-automl-dnn-text-gpu-ptca +version: auto +type: environment +spec: spec.yaml +extra_config: environment.yaml +test: + pytest: + enabled: true + pip_requirements: tests/requirements.txt + tests_dir: tests +categories: ["AutoML", "Training"] \ No newline at end of file diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/context/Dockerfile b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/context/Dockerfile new file mode 100644 index 0000000000..07e099d388 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/context/Dockerfile @@ -0,0 +1,26 @@ +FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu117-py310-torch1131:{{latest-image-tag}} + +USER root:root + +RUN pip install 'azureml-automl-dnn-nlp=={{latest-pypi-version}}' +RUN pip install 'azureml-defaults=={{latest-pypi-version}}' + +RUN pip install torch==1.13.1 +RUN pip uninstall -y onnxruntime-training +RUN pip install onnxruntime-training==1.15.1 +RUN pip install torch-ort && \ + TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX" python -m torch_ort.configure + +RUN pip install transformers==4.36.2 +RUN pip install optimum==1.16.1 +RUN pip install accelerate==0.26.1 +RUN pip install deepspeed==0.13.1 +RUN pip install numpy==1.22.0 + +# Address vulnerabilities +RUN pip install pyarrow==14.0.1 +RUN pip install aiohttp==3.10.2 +RUN pip install idna==3.7 +RUN pip install requests==2.32.3 + +# dummy number to change when needing to force rebuild without changing the definition: 14 \ No newline at end of file diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/environment.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/environment.yaml new file mode 100644 index 0000000000..437069968f --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/environment.yaml @@ -0,0 +1,11 @@ +image: + name: azureml/curated/ai-ml-automl-dnn-text-gpu-ptca + os: linux + context: + dir: context + dockerfile: Dockerfile + template_files: + - Dockerfile + publish: + location: mcr + visibility: public diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/spec.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/spec.yaml new file mode 100644 index 0000000000..596d070220 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/spec.yaml @@ -0,0 +1,20 @@ +$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json + +description: >- + An environment used by Azure ML AutoML for training models. + +name: "{{asset.name}}" +version: "{{asset.version}}" + +build: + path: "{{image.context.path}}" + dockerfile_path: "{{image.dockerfile.path}}" + +os_type: linux + +tags: + OS: Ubuntu20.04 + Training: "" + Preview: "" + OpenMpi: "4.1.0" + Python: "3.9" diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/automl_sample_test.py b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/automl_sample_test.py new file mode 100644 index 0000000000..b9752ae088 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/automl_sample_test.py @@ -0,0 +1,87 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Smoke tests running a job in the ai-ml-automl-dnn-text-gpu-ptca environment.""" +# This only tests that scikit-learn training can be dome in the environment. +# After the environment is deployed, notebooks tests should be run before updating +# the "Prod" label in the Jasmine service +import os +import time +from pathlib import Path +from azure.ai.ml import command, Input, MLClient +from azure.ai.ml._restclient.models import JobStatus +from azure.ai.ml.entities import Environment, BuildContext +from azure.identity import AzureCliCredential + +BUILD_CONTEXT = Path("../context") +JOB_SOURCE_CODE = "src" +TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 45) +STD_LOG = Path("artifacts/user_logs/std_log.txt") + + +def test_azure_ai_ml_automl(): + """Tests a sample job using ai-ml-automl-dnn-text-gpu-ptca as the environment.""" + this_dir = Path(__file__).parent + + subscription_id = os.environ.get("subscription_id") + resource_group = os.environ.get("resource_group") + workspace_name = os.environ.get("workspace") + + ml_client = MLClient( + AzureCliCredential(), subscription_id, resource_group, workspace_name + ) + + env_name = "ai-ml-automl-dnn-text-gpu-ptca" + + env_docker_context = Environment( + build=BuildContext(path=this_dir / BUILD_CONTEXT), + name="ai-ml-automl-dnn-text-gpu-ptca", + description="ai-ml-automl-dnn-text-gpu-ptca environment created from a Docker context.", + ) + ml_client.environments.create_or_update(env_docker_context) + + # create the command + job = command( + code=this_dir / JOB_SOURCE_CODE, # local path where the code is stored + command="python main.py --diabetes-csv ${{inputs.diabetes}}", + inputs={ + "diabetes": Input( + type="uri_file", + path="https://azuremlexamples.blob.core.windows.net/datasets/diabetes.csv", + ) + }, + environment=f"{env_name}@latest", + compute=os.environ.get("gpu_cluster"), + display_name="sklearn-diabetes-example", + description="A test run of the ai-ml-automl-dnn-text-gpu-ptca curated environment", + experiment_name="sklearnExperiment" + ) + + returned_job = ml_client.create_or_update(job) + assert returned_job is not None + + # Poll until final status is reached or timed out + timeout = time.time() + (TIMEOUT_MINUTES * 60) + while time.time() <= timeout: + job = ml_client.jobs.get(returned_job.name) + status = job.status + if status in [JobStatus.COMPLETED, JobStatus.FAILED]: + break + time.sleep(30) # sleep 30 seconds + else: + # Timeout + ml_client.jobs.cancel(returned_job.name) + raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. " + f"Last status was {status}.") + + if status == JobStatus.FAILED: + ml_client.jobs.download(returned_job.name) + if STD_LOG.exists(): + print(f"*** BEGIN {STD_LOG} ***") + with open(STD_LOG, "r") as f: + print(f.read(), end="") + print(f"*** END {STD_LOG} ***") + else: + ml_client.jobs.stream(returned_job.name) + + assert status == JobStatus.COMPLETED diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/requirements.txt b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/requirements.txt new file mode 100644 index 0000000000..f333fe4fc6 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/requirements.txt @@ -0,0 +1,2 @@ +azure-ai-ml==1.2.0 +azure.identity==1.10.0 diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/src/main.py b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/src/main.py new file mode 100644 index 0000000000..1fc846111f --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/tests/src/main.py @@ -0,0 +1,87 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Simple Sklearn Test.""" +# imports +import mlflow +import argparse +import pandas as pd + +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split + + +# define functions +def main(args): + """Run and evaluate model.""" + # enable auto logging + mlflow.autolog() + # setup parameters + params = { + "fit_intercept": args.fit_intercept, + "positive": args.positive, + } + # read in data + df = pd.read_csv(args.diabetes_csv) + + # process data + X_train, X_test, y_train, y_test = process_data(df, args.random_state) + + # train model + model = train_model(params, X_train, X_test, y_train, y_test) + + print(model) + + +def process_data(df, random_state): + """Process data.""" + # split dataframe into X and y + X = df.drop(["target"], axis=1) + y = df["target"] + + # train/test split + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=random_state + ) + + # return splits and encoder + return X_train, X_test, y_train, y_test + + +def train_model(params, X_train, X_test, y_train, y_test): + """Train the model.""" + # train model + model = LinearRegression(**params) + model = model.fit(X_train, y_train) + + # return model + return model + + +def parse_args(): + """Parse arguments.""" + # setup arg parser + parser = argparse.ArgumentParser() + + # add arguments + parser.add_argument("--diabetes-csv", type=str) + parser.add_argument("--random_state", type=int, default=42) + parser.add_argument("--fit_intercept", type=bool, default=True) + parser.add_argument("--positive", type=bool, default=False) + parser.add_argument("--intel-extension", type=bool, default=False) + + # parse args + args = parser.parse_args() + + # return args + return args + + +# run script +if __name__ == "__main__": + # parse args + args = parse_args() + if (args.intel_extension): + from sklearnex import patch_sklearn + patch_sklearn() + # run main function + main(args) diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/README.md b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/README.md new file mode 100644 index 0000000000..811c4b6e85 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/README.md @@ -0,0 +1,5 @@ +# ai-ml-automl-dnn-text-gpu Docker Environment + +## Overview +This environment is used by Azure ML AutoML for training models. +It is not intended for use in other scenarios and is subject to change without notice. diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/asset.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/asset.yaml new file mode 100644 index 0000000000..0d61bc06ae --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/asset.yaml @@ -0,0 +1,11 @@ +name: ai-ml-automl-dnn-text-gpu +version: auto +type: environment +spec: spec.yaml +extra_config: environment.yaml +test: + pytest: + enabled: true + pip_requirements: tests/requirements.txt + tests_dir: tests +categories: ["AutoML", "Training"] \ No newline at end of file diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/context/Dockerfile b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/context/Dockerfile new file mode 100644 index 0000000000..fb92420eae --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/context/Dockerfile @@ -0,0 +1,73 @@ +FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:{{latest-image-tag}} + +ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml-automl-dnn-text-gpu +# Prepend path to AzureML conda environment +ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH + +COPY --from=mcr.microsoft.com/azureml/mlflow-ubuntu20.04-py38-cpu-inference:20230306.v3 /var/mlflow_resources/mlflow_score_script.py /var/mlflow_resources/mlflow_score_script.py + +ENV MLFLOW_MODEL_FOLDER="mlflow-model" +# ENV AML_APP_ROOT="/var/mlflow_resources" +# ENV AZUREML_ENTRY_SCRIPT="mlflow_score_script.py" + +ENV ENABLE_METADATA=true + +# begin conda create +# Create conda environment +RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ + python=3.9 \ + # begin conda dependencies + pip=22.1.2 \ + numpy~=1.23.5 \ + scikit-learn=1.5.1 \ + pandas~=1.3.5 \ + setuptools=72.1.0 \ + wheel=0.44.0 \ + scipy=1.10.1 \ + pybind11=2.10.1 \ + # end conda dependencies + -c conda-forge -c anaconda + +# Ensure additional conda and pip install commands apply to our conda env of interest. +SHELL ["conda", "run", "-p", "$AZUREML_CONDA_ENVIRONMENT_PATH", "/bin/bash", "-c"] + +# Conda installs from extra channels. Separate these to avoid solver OOMs. +RUN conda install pytorch=1.13.1 pytorch-cuda=11.6 -c pytorch -c nvidia && conda clean -a -y +# end conda create + + +# begin pip install +# Install pip dependencies +# Here, we pin sentencepiece since 0.1.98 breaks training. Earlier versions of horovod contain a sev2 vulnerability, +# and earlier versions of tokenizers cause log spam with transformers==4.16.0. +RUN pip install \ + # begin pypi dependencies + azureml-core=={{latest-pypi-version}} \ + azureml-mlflow=={{latest-pypi-version}} \ + azureml-automl-core=={{latest-pypi-version}} \ + azureml-automl-dnn-nlp=={{latest-pypi-version}} \ + azureml-automl-runtime=={{latest-pypi-version}} \ + azureml-train-automl-client=={{latest-pypi-version}} \ + azureml-train-automl-runtime=={{latest-pypi-version}} \ + azureml-defaults=={{latest-pypi-version}} \ + 'azure-identity>=1.16.1' \ + 'horovod==0.28.1' \ + 'sentencepiece==0.1.97' \ + 'cryptography>=42.0.5'\ + 'urllib3>=1.26.18' \ + 'requests>=2.31.0' \ + 'certifi>=2023.07.22' + # end pypi dependencies + +# Separate updates for fixing vulnerabilities. +# Doing it separately from pip install above to avoid conflict with other packages +# We should aim for this list to be empty with new and patched releases +# by fixing dependencies in the base packages +RUN pip list && \ + pip install pyarrow==14.0.2 \ + 'transformers[sentencepiece,torch]==4.36.2' \ + aiohttp==3.10.2 + +# end pip install + +# Force new version - 1 diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/environment.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/environment.yaml new file mode 100644 index 0000000000..b00e79065c --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/environment.yaml @@ -0,0 +1,11 @@ +image: + name: azureml/curated/ai-ml-automl-dnn-text-gpu + os: linux + context: + dir: context + dockerfile: Dockerfile + template_files: + - Dockerfile + publish: + location: mcr + visibility: public diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/spec.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/spec.yaml new file mode 100644 index 0000000000..596d070220 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/spec.yaml @@ -0,0 +1,20 @@ +$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json + +description: >- + An environment used by Azure ML AutoML for training models. + +name: "{{asset.name}}" +version: "{{asset.version}}" + +build: + path: "{{image.context.path}}" + dockerfile_path: "{{image.dockerfile.path}}" + +os_type: linux + +tags: + OS: Ubuntu20.04 + Training: "" + Preview: "" + OpenMpi: "4.1.0" + Python: "3.9" diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/automl_sample_test.py b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/automl_sample_test.py new file mode 100644 index 0000000000..26492d8c44 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/automl_sample_test.py @@ -0,0 +1,87 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Smoke tests running a job in the ai-ml-automl-dnn-text-gpu environment.""" +# This only tests that scikit-learn training can be dome in the environment. +# After the environment is deployed, notebooks tests should be run before updating +# the "Prod" label in the Jasmine service +import os +import time +from pathlib import Path +from azure.ai.ml import command, Input, MLClient +from azure.ai.ml._restclient.models import JobStatus +from azure.ai.ml.entities import Environment, BuildContext +from azure.identity import AzureCliCredential + +BUILD_CONTEXT = Path("../context") +JOB_SOURCE_CODE = "src" +TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 60) +STD_LOG = Path("artifacts/user_logs/std_log.txt") + + +def test_azure_ai_ml_automl(): + """Tests a sample job using ai-ml-automl-dnn-text-gpu as the environment.""" + this_dir = Path(__file__).parent + + subscription_id = os.environ.get("subscription_id") + resource_group = os.environ.get("resource_group") + workspace_name = os.environ.get("workspace") + + ml_client = MLClient( + AzureCliCredential(), subscription_id, resource_group, workspace_name + ) + + env_name = "ai-ml-automl-dnn-text-gpu" + + env_docker_context = Environment( + build=BuildContext(path=this_dir / BUILD_CONTEXT), + name="ai-ml-automl-dnn-text-gpu", + description="ai-ml-automl-dnn-text-gpu environment created from a Docker context.", + ) + ml_client.environments.create_or_update(env_docker_context) + + # create the command + job = command( + code=this_dir / JOB_SOURCE_CODE, # local path where the code is stored + command="python main.py --diabetes-csv ${{inputs.diabetes}}", + inputs={ + "diabetes": Input( + type="uri_file", + path="https://azuremlexamples.blob.core.windows.net/datasets/diabetes.csv", + ) + }, + environment=f"{env_name}@latest", + compute=os.environ.get("gpu_cluster"), + display_name="sklearn-diabetes-example", + description="A test run of the ai-ml-automl-dnn-text-gpu curated environment", + experiment_name="sklearnExperiment" + ) + + returned_job = ml_client.create_or_update(job) + assert returned_job is not None + + # Poll until final status is reached or timed out + timeout = time.time() + (TIMEOUT_MINUTES * 60) + while time.time() <= timeout: + job = ml_client.jobs.get(returned_job.name) + status = job.status + if status in [JobStatus.COMPLETED, JobStatus.FAILED]: + break + time.sleep(30) # sleep 30 seconds + else: + # Timeout + ml_client.jobs.cancel(returned_job.name) + raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. " + f"Last status was {status}.") + + if status == JobStatus.FAILED: + ml_client.jobs.download(returned_job.name) + if STD_LOG.exists(): + print(f"*** BEGIN {STD_LOG} ***") + with open(STD_LOG, "r") as f: + print(f.read(), end="") + print(f"*** END {STD_LOG} ***") + else: + ml_client.jobs.stream(returned_job.name) + + assert status == JobStatus.COMPLETED diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/requirements.txt b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/requirements.txt new file mode 100644 index 0000000000..f333fe4fc6 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/requirements.txt @@ -0,0 +1,2 @@ +azure-ai-ml==1.2.0 +azure.identity==1.10.0 diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/src/main.py b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/src/main.py new file mode 100644 index 0000000000..1fc846111f --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu/tests/src/main.py @@ -0,0 +1,87 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Simple Sklearn Test.""" +# imports +import mlflow +import argparse +import pandas as pd + +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split + + +# define functions +def main(args): + """Run and evaluate model.""" + # enable auto logging + mlflow.autolog() + # setup parameters + params = { + "fit_intercept": args.fit_intercept, + "positive": args.positive, + } + # read in data + df = pd.read_csv(args.diabetes_csv) + + # process data + X_train, X_test, y_train, y_test = process_data(df, args.random_state) + + # train model + model = train_model(params, X_train, X_test, y_train, y_test) + + print(model) + + +def process_data(df, random_state): + """Process data.""" + # split dataframe into X and y + X = df.drop(["target"], axis=1) + y = df["target"] + + # train/test split + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=random_state + ) + + # return splits and encoder + return X_train, X_test, y_train, y_test + + +def train_model(params, X_train, X_test, y_train, y_test): + """Train the model.""" + # train model + model = LinearRegression(**params) + model = model.fit(X_train, y_train) + + # return model + return model + + +def parse_args(): + """Parse arguments.""" + # setup arg parser + parser = argparse.ArgumentParser() + + # add arguments + parser.add_argument("--diabetes-csv", type=str) + parser.add_argument("--random_state", type=int, default=42) + parser.add_argument("--fit_intercept", type=bool, default=True) + parser.add_argument("--positive", type=bool, default=False) + parser.add_argument("--intel-extension", type=bool, default=False) + + # parse args + args = parser.parse_args() + + # return args + return args + + +# run script +if __name__ == "__main__": + # parse args + args = parse_args() + if (args.intel_extension): + from sklearnex import patch_sklearn + patch_sklearn() + # run main function + main(args) diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/README.md b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/README.md new file mode 100644 index 0000000000..ed47e04fee --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/README.md @@ -0,0 +1,5 @@ +# ai-ml-automl-dnn-vision-gpu Docker Environment + +## Overview +This environment is used by Azure ML AutoML for training models. +It is not intended for use in other scenarios and is subject to change without notice. diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/asset.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/asset.yaml new file mode 100644 index 0000000000..366a7fa43e --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/asset.yaml @@ -0,0 +1,11 @@ +name: ai-ml-automl-dnn-vision-gpu +version: auto +type: environment +spec: spec.yaml +extra_config: environment.yaml +test: + pytest: + enabled: true + pip_requirements: tests/requirements.txt + tests_dir: tests +categories: ["AutoML", "Training"] \ No newline at end of file diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/context/Dockerfile b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/context/Dockerfile new file mode 100644 index 0000000000..a64d6dc64d --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/context/Dockerfile @@ -0,0 +1,74 @@ +FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu121-py310-torch222:{{latest-image-tag}} + + +ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml-automl-dnn-vision-gpu +# Prepend path to AzureML conda environment +ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH + +COPY --from=mcr.microsoft.com/azureml/mlflow-ubuntu20.04-py38-cpu-inference:20230306.v3 /var/mlflow_resources/mlflow_score_script.py /var/mlflow_resources/mlflow_score_script.py + +ENV MLFLOW_MODEL_FOLDER="mlflow-model" +# ENV AML_APP_ROOT="/var/mlflow_resources" +# ENV AZUREML_ENTRY_SCRIPT="mlflow_score_script.py" + +# Inference requirements +COPY --from=mcr.microsoft.com/azureml/o16n-base/python-assets:20230419.v1 /artifacts /var/ +RUN /var/requirements/install_system_requirements.sh && \ + cp /var/configuration/rsyslog.conf /etc/rsyslog.conf && \ + cp /var/configuration/nginx.conf /etc/nginx/sites-available/app && \ + ln -sf /etc/nginx/sites-available/app /etc/nginx/sites-enabled/app && \ + rm -f /etc/nginx/sites-enabled/default +ENV SVDIR=/var/runit +ENV WORKER_TIMEOUT=400 +EXPOSE 5001 8883 8888 + +ENV ENABLE_METADATA=true + +# Create conda environment +# begin conda create +RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ + python=3.9 \ + # begin conda dependencies + pip=21.3.1 \ + numpy~=1.23.5 \ + libffi=3.3 \ + pycocotools=2.0.4 \ + shap=0.39.0 \ + llvmlite=0.39.1 \ + scipy=1.10.1 \ + setuptools=72.1.0 \ + wheel=0.44.0 \ + tbb=2021.1.1 \ + # end conda dependencies + -c conda-forge -c cerebis && \ + conda run -p $AZUREML_CONDA_ENVIRONMENT_PATH && \ + conda clean -a -y +# end conda create + +# begin pip install + +# Install pip dependencies +RUN pip install \ + # begin pypi dependencies + azureml-mlflow=={{latest-pypi-version}} \ + azureml-dataset-runtime=={{latest-pypi-version}} \ + azureml-telemetry=={{latest-pypi-version}} \ + azureml-responsibleai=={{latest-pypi-version}} \ + azureml-automl-core=={{latest-pypi-version}} \ + azureml-automl-runtime=={{latest-pypi-version}} \ + azureml-train-automl-client=={{latest-pypi-version}} \ + azureml-defaults=={{latest-pypi-version}} \ + azureml-interpret=={{latest-pypi-version}} \ + azureml-train-automl-runtime=={{latest-pypi-version}} \ + azureml-automl-dnn-vision=={{latest-pypi-version}} \ + 'azureml-dataprep>=2.24.4' \ + 'azure-identity>=1.16.1' + # end pypi dependencies + +# Update cryptography and pyarow for fixing vulnerability. Doing it separately from pip install to avoid conflict with other packages +RUN pip install cryptography>=42.0.5 \ + pyarrow==14.0.2 \ + aiohttp>=3.9.4 + +# end pip install +ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/environment.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/environment.yaml new file mode 100644 index 0000000000..9146315c31 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/environment.yaml @@ -0,0 +1,11 @@ +image: + name: azureml/curated/ai-ml-automl-dnn-vision-gpu + os: linux + context: + dir: context + dockerfile: Dockerfile + template_files: + - Dockerfile + publish: + location: mcr + visibility: public diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/spec.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/spec.yaml new file mode 100644 index 0000000000..596d070220 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/spec.yaml @@ -0,0 +1,20 @@ +$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json + +description: >- + An environment used by Azure ML AutoML for training models. + +name: "{{asset.name}}" +version: "{{asset.version}}" + +build: + path: "{{image.context.path}}" + dockerfile_path: "{{image.dockerfile.path}}" + +os_type: linux + +tags: + OS: Ubuntu20.04 + Training: "" + Preview: "" + OpenMpi: "4.1.0" + Python: "3.9" diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/automl_sample_test.py b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/automl_sample_test.py new file mode 100644 index 0000000000..60a1a9e8a7 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/automl_sample_test.py @@ -0,0 +1,87 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Smoke tests running a job in the ai-ml-automl-dnn-vision-gpu environment.""" +# This only tests that scikit-learn training can be dome in the environment. +# After the environment is deployed, notebooks tests should be run before updating +# the "Prod" label in the Jasmine service +import os +import time +from pathlib import Path +from azure.ai.ml import command, Input, MLClient +from azure.ai.ml._restclient.models import JobStatus +from azure.ai.ml.entities import Environment, BuildContext +from azure.identity import AzureCliCredential + +BUILD_CONTEXT = Path("../context") +JOB_SOURCE_CODE = "src" +TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 60) +STD_LOG = Path("artifacts/user_logs/std_log.txt") + + +def test_azure_ai_ml_automl(): + """Tests a sample job using ai-ml-automl-dnn-vision-gpu as the environment.""" + this_dir = Path(__file__).parent + + subscription_id = os.environ.get("subscription_id") + resource_group = os.environ.get("resource_group") + workspace_name = os.environ.get("workspace") + + ml_client = MLClient( + AzureCliCredential(), subscription_id, resource_group, workspace_name + ) + + env_name = "ai-ml-automl-dnn-vision-gpu" + + env_docker_context = Environment( + build=BuildContext(path=this_dir / BUILD_CONTEXT), + name="ai-ml-automl-dnn-vision-gpu", + description="ai-ml-automl-dnn-vision-gpu environment created from a Docker context.", + ) + ml_client.environments.create_or_update(env_docker_context) + + # create the command + job = command( + code=this_dir / JOB_SOURCE_CODE, # local path where the code is stored + command="python main.py --diabetes-csv ${{inputs.diabetes}}", + inputs={ + "diabetes": Input( + type="uri_file", + path="https://azuremlexamples.blob.core.windows.net/datasets/diabetes.csv", + ) + }, + environment=f"{env_name}@latest", + compute=os.environ.get("gpu_cluster"), + display_name="sklearn-diabetes-example", + description="A test run of the ai-ml-automl-dnn-vision-gpu curated environment", + experiment_name="sklearnExperiment" + ) + + returned_job = ml_client.create_or_update(job) + assert returned_job is not None + + # Poll until final status is reached or timed out + timeout = time.time() + (TIMEOUT_MINUTES * 60) + while time.time() <= timeout: + job = ml_client.jobs.get(returned_job.name) + status = job.status + if status in [JobStatus.COMPLETED, JobStatus.FAILED]: + break + time.sleep(30) # sleep 30 seconds + else: + # Timeout + ml_client.jobs.cancel(returned_job.name) + raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. " + f"Last status was {status}.") + + if status == JobStatus.FAILED: + ml_client.jobs.download(returned_job.name) + if STD_LOG.exists(): + print(f"*** BEGIN {STD_LOG} ***") + with open(STD_LOG, "r") as f: + print(f.read(), end="") + print(f"*** END {STD_LOG} ***") + else: + ml_client.jobs.stream(returned_job.name) + + assert status == JobStatus.COMPLETED diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/requirements.txt b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/requirements.txt new file mode 100644 index 0000000000..f333fe4fc6 --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/requirements.txt @@ -0,0 +1,2 @@ +azure-ai-ml==1.2.0 +azure.identity==1.10.0 diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/src/main.py b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/src/main.py new file mode 100644 index 0000000000..1fc846111f --- /dev/null +++ b/assets/training/automl/environments/ai-ml-automl-dnn-vision-gpu/tests/src/main.py @@ -0,0 +1,87 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +"""Simple Sklearn Test.""" +# imports +import mlflow +import argparse +import pandas as pd + +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split + + +# define functions +def main(args): + """Run and evaluate model.""" + # enable auto logging + mlflow.autolog() + # setup parameters + params = { + "fit_intercept": args.fit_intercept, + "positive": args.positive, + } + # read in data + df = pd.read_csv(args.diabetes_csv) + + # process data + X_train, X_test, y_train, y_test = process_data(df, args.random_state) + + # train model + model = train_model(params, X_train, X_test, y_train, y_test) + + print(model) + + +def process_data(df, random_state): + """Process data.""" + # split dataframe into X and y + X = df.drop(["target"], axis=1) + y = df["target"] + + # train/test split + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=random_state + ) + + # return splits and encoder + return X_train, X_test, y_train, y_test + + +def train_model(params, X_train, X_test, y_train, y_test): + """Train the model.""" + # train model + model = LinearRegression(**params) + model = model.fit(X_train, y_train) + + # return model + return model + + +def parse_args(): + """Parse arguments.""" + # setup arg parser + parser = argparse.ArgumentParser() + + # add arguments + parser.add_argument("--diabetes-csv", type=str) + parser.add_argument("--random_state", type=int, default=42) + parser.add_argument("--fit_intercept", type=bool, default=True) + parser.add_argument("--positive", type=bool, default=False) + parser.add_argument("--intel-extension", type=bool, default=False) + + # parse args + args = parser.parse_args() + + # return args + return args + + +# run script +if __name__ == "__main__": + # parse args + args = parse_args() + if (args.intel_extension): + from sklearnex import patch_sklearn + patch_sklearn() + # run main function + main(args)