Added automl forecasting, text and vision envirnments (#3341)

* Added automl forecasting, text and vision envirnments
Azure · Sep 6, 2024 · 980a4a9 · 980a4a9
1 parent c4e647e
commit 980a4a9
Show file tree

Hide file tree

Showing 32 changed files with 1,135 additions and 0 deletions.
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/README.md b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/README.md
@@ -0,0 +1,5 @@
+# ai-ml-automl-dnn-forecasting-gpu Docker Environment
+
+## Overview
+This environment is used by Azure ML AutoML for training models.
+It is not intended for use in other scenarios and is subject to change without notice.
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/asset.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/asset.yaml
@@ -0,0 +1,11 @@
+name: ai-ml-automl-dnn-forecasting-gpu
+version: auto
+type: environment
+spec: spec.yaml
+extra_config: environment.yaml
+test:
+  pytest:
+    enabled: true 
+    pip_requirements: tests/requirements.txt
+    tests_dir: tests
+categories: ["AutoML", "Training"]
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/context/Dockerfile b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/context/Dockerfile
@@ -0,0 +1,70 @@
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:{{latest-image-tag}}
+
+ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml-automl-dnn-forecasting-gpu
+# Prepend path to AzureML conda environment
+ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
+
+COPY --from=mcr.microsoft.com/azureml/mlflow-ubuntu20.04-py38-cpu-inference:20230306.v3 /var/mlflow_resources/mlflow_score_script.py /var/mlflow_resources/mlflow_score_script.py
+
+ENV MLFLOW_MODEL_FOLDER="mlflow-model"
+# ENV AML_APP_ROOT="/var/mlflow_resources"
+# ENV AZUREML_ENTRY_SCRIPT="mlflow_score_script.py"
+
+ENV ENABLE_METADATA=true
+
+# begin conda create
+# Create conda environment
+RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
+    python=3.9 \
+    # begin conda dependencies
+    pip=22.1.2 \
+    numpy~=1.23.5 \
+    scikit-learn=1.5.1 \
+    pandas~=1.3.5 \
+    scipy=1.10.1 \
+    'psutil>=5.2.2,<6.0.0' \
+    tqdm \
+    setuptools=72.1.0 \
+    wheel=0.44.0 \
+    # Install pytorch separately to speed up image build
+    -c conda-forge -c pytorch -c anaconda && \
+    conda install -p $AZUREML_CONDA_ENVIRONMENT_PATH \
+    pytorch=1.13.1 \
+    pytorch-cuda=11.6 \
+    -c pytorch -c nvidia -y && \
+    # end conda dependencies
+    conda run -p $AZUREML_CONDA_ENVIRONMENT_PATH && \
+    conda clean -a -y
+# end conda create
+
+# begin pip install
+# Install pip dependencies
+# GitPython>=3.1.41 is required for https://github.com/advisories/GHSA-2mqj-m65w-jghx and is not available in conda
+RUN pip install \
+                # begin pypi dependencies
+                azureml-core=={{latest-pypi-version}} \
+                azureml-mlflow=={{latest-pypi-version}} \
+                azureml-defaults=={{latest-pypi-version}} \
+                azureml-telemetry=={{latest-pypi-version}} \
+                azureml-interpret=={{latest-pypi-version}} \
+                azureml-responsibleai=={{latest-pypi-version}} \
+                azureml-automl-core=={{latest-pypi-version}} \
+                azureml-automl-runtime=={{latest-pypi-version}} \
+                azureml-train-automl-client=={{latest-pypi-version}} \
+                azureml-train-automl-runtime=={{latest-pypi-version}} \
+                azureml-dataset-runtime=={{latest-pypi-version}} \
+                azureml-train-automl=={{latest-pypi-version}} \
+                azureml-contrib-automl-dnn-forecasting==1.57.0 \
+                'azure-identity>=1.16.1' \
+                'inference-schema' \
+                'horovod==0.28.1' \
+                'xgboost==1.5.2' \
+                'cryptography>=42.0.5' \
+                'requests>=2.31.0' \
+                'certifi>=2023.07.22' \
+                'spacy==3.7.4' \
+                'GitPython>=3.1.41' \
+                'https://aka.ms/automl-resources/packages/en_core_web_sm-3.7.1.tar.gz' \
+                'py-cpuinfo==5.0.0'
+                # end pypi dependencies
+# end pip install
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/environment.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/environment.yaml
@@ -0,0 +1,11 @@
+image:
+  name: azureml/curated/ai-ml-automl-dnn-forecasting-gpu
+  os: linux
+  context:
+    dir: context
+    dockerfile: Dockerfile
+    template_files:
+    - Dockerfile
+  publish:
+    location: mcr
+    visibility: public
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/spec.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/spec.yaml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
+
+description: >-
+  An environment used by Azure ML AutoML for training models.
+
+name: "{{asset.name}}"
+version: "{{asset.version}}"
+
+build:
+  path: "{{image.context.path}}"
+  dockerfile_path: "{{image.dockerfile.path}}"
+
+os_type: linux
+
+tags:
+  OS: Ubuntu20.04
+  Training: ""
+  Preview: ""
+  OpenMpi: "4.1.0"
+  Python: "3.9"
diff --git a/...training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/automl_sample_test.py b/...training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/automl_sample_test.py
@@ -0,0 +1,87 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Smoke tests running a job in the ai-ml-automl-dnn-forecasting-gpu environment."""
+# This only tests that scikit-learn training can be dome in the environment.
+# After the environment is deployed, notebooks tests should be run before updating
+# the "Prod" label in the Jasmine service
+import os
+import time
+from pathlib import Path
+from azure.ai.ml import command, Input, MLClient
+from azure.ai.ml._restclient.models import JobStatus
+from azure.ai.ml.entities import Environment, BuildContext
+from azure.identity import AzureCliCredential
+
+BUILD_CONTEXT = Path("../context")
+JOB_SOURCE_CODE = "src"
+TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 60)
+STD_LOG = Path("artifacts/user_logs/std_log.txt")
+
+
+def test_azure_ai_ml_automl():
+    """Tests a sample job using ai-ml-automl-dnn-forecasting-gpu as the environment."""
+    this_dir = Path(__file__).parent
+
+    subscription_id = os.environ.get("subscription_id")
+    resource_group = os.environ.get("resource_group")
+    workspace_name = os.environ.get("workspace")
+
+    ml_client = MLClient(
+        AzureCliCredential(), subscription_id, resource_group, workspace_name
+    )
+
+    env_name = "ai-ml-automl-dnn-forecasting-gpu"
+
+    env_docker_context = Environment(
+        build=BuildContext(path=this_dir / BUILD_CONTEXT),
+        name="ai-ml-automl-dnn-forecasting-gpu",
+        description="ai-ml-automl-dnn-forecasting-gpu environment created from a Docker context.",
+    )
+    ml_client.environments.create_or_update(env_docker_context)
+
+    # create the command
+    job = command(
+        code=this_dir / JOB_SOURCE_CODE,  # local path where the code is stored
+        command="python main.py --diabetes-csv ${{inputs.diabetes}}",
+        inputs={
+            "diabetes": Input(
+                type="uri_file",
+                path="https://azuremlexamples.blob.core.windows.net/datasets/diabetes.csv",
+            )
+        },
+        environment=f"{env_name}@latest",
+        compute=os.environ.get("gpu_cluster"),
+        display_name="sklearn-diabetes-example",
+        description="A test run of the ai-ml-automl-dnn-forecasting-gpu curated environment",
+        experiment_name="sklearnExperiment"
+    )
+
+    returned_job = ml_client.create_or_update(job)
+    assert returned_job is not None
+
+    # Poll until final status is reached or timed out
+    timeout = time.time() + (TIMEOUT_MINUTES * 60)
+    while time.time() <= timeout:
+        job = ml_client.jobs.get(returned_job.name)
+        status = job.status
+        if status in [JobStatus.COMPLETED, JobStatus.FAILED]:
+            break
+        time.sleep(30)  # sleep 30 seconds
+    else:
+        # Timeout
+        ml_client.jobs.cancel(returned_job.name)
+        raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. "
+                        f"Last status was {status}.")
+
+    if status == JobStatus.FAILED:
+        ml_client.jobs.download(returned_job.name)
+        if STD_LOG.exists():
+            print(f"*** BEGIN {STD_LOG} ***")
+            with open(STD_LOG, "r") as f:
+                print(f.read(), end="")
+            print(f"*** END {STD_LOG} ***")
+        else:
+            ml_client.jobs.stream(returned_job.name)
+
+    assert status == JobStatus.COMPLETED
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/requirements.txt b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/requirements.txt
@@ -0,0 +1,2 @@
+azure-ai-ml==1.2.0
+azure.identity==1.10.0
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/src/main.py b/assets/training/automl/environments/ai-ml-automl-dnn-forecasting-gpu/tests/src/main.py
@@ -0,0 +1,87 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Simple Sklearn Test."""
+# imports
+import mlflow
+import argparse
+import pandas as pd
+
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
+
+
+# define functions
+def main(args):
+    """Run and evaluate model."""
+    # enable auto logging
+    mlflow.autolog()
+    # setup parameters
+    params = {
+        "fit_intercept": args.fit_intercept,
+        "positive": args.positive,
+    }
+    # read in data
+    df = pd.read_csv(args.diabetes_csv)
+
+    # process data
+    X_train, X_test, y_train, y_test = process_data(df, args.random_state)
+
+    # train model
+    model = train_model(params, X_train, X_test, y_train, y_test)
+
+    print(model)
+
+
+def process_data(df, random_state):
+    """Process data."""
+    # split dataframe into X and y
+    X = df.drop(["target"], axis=1)
+    y = df["target"]
+
+    # train/test split
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=random_state
+    )
+
+    # return splits and encoder
+    return X_train, X_test, y_train, y_test
+
+
+def train_model(params, X_train, X_test, y_train, y_test):
+    """Train the model."""
+    # train model
+    model = LinearRegression(**params)
+    model = model.fit(X_train, y_train)
+
+    # return model
+    return model
+
+
+def parse_args():
+    """Parse arguments."""
+    # setup arg parser
+    parser = argparse.ArgumentParser()
+
+    # add arguments
+    parser.add_argument("--diabetes-csv", type=str)
+    parser.add_argument("--random_state", type=int, default=42)
+    parser.add_argument("--fit_intercept", type=bool, default=True)
+    parser.add_argument("--positive", type=bool, default=False)
+    parser.add_argument("--intel-extension", type=bool, default=False)
+
+    # parse args
+    args = parser.parse_args()
+
+    # return args
+    return args
+
+
+# run script
+if __name__ == "__main__":
+    # parse args
+    args = parse_args()
+    if (args.intel_extension):
+        from sklearnex import patch_sklearn
+        patch_sklearn()
+    # run main function
+    main(args)
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/README.md b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/README.md
@@ -0,0 +1,5 @@
+# ai-ml-automl-dnn-text-gpu-ptca Docker Environment
+
+## Overview
+This environment is used by Azure ML AutoML for training models.
+It is not intended for use in other scenarios and is subject to change without notice.
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/asset.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/asset.yaml
@@ -0,0 +1,11 @@
+name: ai-ml-automl-dnn-text-gpu-ptca
+version: auto
+type: environment
+spec: spec.yaml
+extra_config: environment.yaml
+test:
+  pytest:
+    enabled: true 
+    pip_requirements: tests/requirements.txt
+    tests_dir: tests
+categories: ["AutoML", "Training"]
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/context/Dockerfile b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/context/Dockerfile
@@ -0,0 +1,26 @@
+FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu117-py310-torch1131:{{latest-image-tag}}
+
+USER root:root
+
+RUN pip install 'azureml-automl-dnn-nlp=={{latest-pypi-version}}'
+RUN pip install 'azureml-defaults=={{latest-pypi-version}}'
+
+RUN pip install torch==1.13.1
+RUN pip uninstall -y onnxruntime-training
+RUN pip install onnxruntime-training==1.15.1
+RUN pip install torch-ort && \
+    TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX" python -m torch_ort.configure
+
+RUN pip install transformers==4.36.2
+RUN pip install optimum==1.16.1
+RUN pip install accelerate==0.26.1
+RUN pip install deepspeed==0.13.1
+RUN pip install numpy==1.22.0
+
+# Address vulnerabilities
+RUN pip install pyarrow==14.0.1
+RUN pip install aiohttp==3.10.2
+RUN pip install idna==3.7
+RUN pip install requests==2.32.3
+
+# dummy number to change when needing to force rebuild without changing the definition: 14
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/environment.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/environment.yaml
@@ -0,0 +1,11 @@
+image:
+  name: azureml/curated/ai-ml-automl-dnn-text-gpu-ptca
+  os: linux
+  context:
+    dir: context
+    dockerfile: Dockerfile
+    template_files:
+    - Dockerfile
+  publish:
+    location: mcr
+    visibility: public
diff --git a/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/spec.yaml b/assets/training/automl/environments/ai-ml-automl-dnn-text-gpu-ptca/spec.yaml
@@ -0,0 +1,20 @@
+$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
+
+description: >-
+  An environment used by Azure ML AutoML for training models.
+
+name: "{{asset.name}}"
+version: "{{asset.version}}"
+
+build:
+  path: "{{image.context.path}}"
+  dockerfile_path: "{{image.dockerfile.path}}"
+
+os_type: linux
+
+tags:
+  OS: Ubuntu20.04
+  Training: ""
+  Preview: ""
+  OpenMpi: "4.1.0"
+  Python: "3.9"