Skip to content

Commit

Permalink
Added automl forecasting, text and vision envirnments (#3341)
Browse files Browse the repository at this point in the history
* Added automl forecasting, text and vision envirnments
  • Loading branch information
jeff-shepherd authored Sep 6, 2024
1 parent c4e647e commit 980a4a9
Show file tree
Hide file tree
Showing 32 changed files with 1,135 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# ai-ml-automl-dnn-forecasting-gpu Docker Environment

## Overview
This environment is used by Azure ML AutoML for training models.
It is not intended for use in other scenarios and is subject to change without notice.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: ai-ml-automl-dnn-forecasting-gpu
version: auto
type: environment
spec: spec.yaml
extra_config: environment.yaml
test:
pytest:
enabled: true
pip_requirements: tests/requirements.txt
tests_dir: tests
categories: ["AutoML", "Training"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:{{latest-image-tag}}

ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml-automl-dnn-forecasting-gpu
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH

COPY --from=mcr.microsoft.com/azureml/mlflow-ubuntu20.04-py38-cpu-inference:20230306.v3 /var/mlflow_resources/mlflow_score_script.py /var/mlflow_resources/mlflow_score_script.py

ENV MLFLOW_MODEL_FOLDER="mlflow-model"
# ENV AML_APP_ROOT="/var/mlflow_resources"
# ENV AZUREML_ENTRY_SCRIPT="mlflow_score_script.py"

ENV ENABLE_METADATA=true

# begin conda create
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
python=3.9 \
# begin conda dependencies
pip=22.1.2 \
numpy~=1.23.5 \
scikit-learn=1.5.1 \
pandas~=1.3.5 \
scipy=1.10.1 \
'psutil>=5.2.2,<6.0.0' \
tqdm \
setuptools=72.1.0 \
wheel=0.44.0 \
# Install pytorch separately to speed up image build
-c conda-forge -c pytorch -c anaconda && \
conda install -p $AZUREML_CONDA_ENVIRONMENT_PATH \
pytorch=1.13.1 \
pytorch-cuda=11.6 \
-c pytorch -c nvidia -y && \
# end conda dependencies
conda run -p $AZUREML_CONDA_ENVIRONMENT_PATH && \
conda clean -a -y
# end conda create

# begin pip install
# Install pip dependencies
# GitPython>=3.1.41 is required for https://github.com/advisories/GHSA-2mqj-m65w-jghx and is not available in conda
RUN pip install \
# begin pypi dependencies
azureml-core=={{latest-pypi-version}} \
azureml-mlflow=={{latest-pypi-version}} \
azureml-defaults=={{latest-pypi-version}} \
azureml-telemetry=={{latest-pypi-version}} \
azureml-interpret=={{latest-pypi-version}} \
azureml-responsibleai=={{latest-pypi-version}} \
azureml-automl-core=={{latest-pypi-version}} \
azureml-automl-runtime=={{latest-pypi-version}} \
azureml-train-automl-client=={{latest-pypi-version}} \
azureml-train-automl-runtime=={{latest-pypi-version}} \
azureml-dataset-runtime=={{latest-pypi-version}} \
azureml-train-automl=={{latest-pypi-version}} \
azureml-contrib-automl-dnn-forecasting==1.57.0 \
'azure-identity>=1.16.1' \
'inference-schema' \
'horovod==0.28.1' \
'xgboost==1.5.2' \
'cryptography>=42.0.5' \
'requests>=2.31.0' \
'certifi>=2023.07.22' \
'spacy==3.7.4' \
'GitPython>=3.1.41' \
'https://aka.ms/automl-resources/packages/en_core_web_sm-3.7.1.tar.gz' \
'py-cpuinfo==5.0.0'
# end pypi dependencies
# end pip install
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
image:
name: azureml/curated/ai-ml-automl-dnn-forecasting-gpu
os: linux
context:
dir: context
dockerfile: Dockerfile
template_files:
- Dockerfile
publish:
location: mcr
visibility: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json

description: >-
An environment used by Azure ML AutoML for training models.
name: "{{asset.name}}"
version: "{{asset.version}}"

build:
path: "{{image.context.path}}"
dockerfile_path: "{{image.dockerfile.path}}"

os_type: linux

tags:
OS: Ubuntu20.04
Training: ""
Preview: ""
OpenMpi: "4.1.0"
Python: "3.9"
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Smoke tests running a job in the ai-ml-automl-dnn-forecasting-gpu environment."""
# This only tests that scikit-learn training can be dome in the environment.
# After the environment is deployed, notebooks tests should be run before updating
# the "Prod" label in the Jasmine service
import os
import time
from pathlib import Path
from azure.ai.ml import command, Input, MLClient
from azure.ai.ml._restclient.models import JobStatus
from azure.ai.ml.entities import Environment, BuildContext
from azure.identity import AzureCliCredential

BUILD_CONTEXT = Path("../context")
JOB_SOURCE_CODE = "src"
TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 60)
STD_LOG = Path("artifacts/user_logs/std_log.txt")


def test_azure_ai_ml_automl():
"""Tests a sample job using ai-ml-automl-dnn-forecasting-gpu as the environment."""
this_dir = Path(__file__).parent

subscription_id = os.environ.get("subscription_id")
resource_group = os.environ.get("resource_group")
workspace_name = os.environ.get("workspace")

ml_client = MLClient(
AzureCliCredential(), subscription_id, resource_group, workspace_name
)

env_name = "ai-ml-automl-dnn-forecasting-gpu"

env_docker_context = Environment(
build=BuildContext(path=this_dir / BUILD_CONTEXT),
name="ai-ml-automl-dnn-forecasting-gpu",
description="ai-ml-automl-dnn-forecasting-gpu environment created from a Docker context.",
)
ml_client.environments.create_or_update(env_docker_context)

# create the command
job = command(
code=this_dir / JOB_SOURCE_CODE, # local path where the code is stored
command="python main.py --diabetes-csv ${{inputs.diabetes}}",
inputs={
"diabetes": Input(
type="uri_file",
path="https://azuremlexamples.blob.core.windows.net/datasets/diabetes.csv",
)
},
environment=f"{env_name}@latest",
compute=os.environ.get("gpu_cluster"),
display_name="sklearn-diabetes-example",
description="A test run of the ai-ml-automl-dnn-forecasting-gpu curated environment",
experiment_name="sklearnExperiment"
)

returned_job = ml_client.create_or_update(job)
assert returned_job is not None

# Poll until final status is reached or timed out
timeout = time.time() + (TIMEOUT_MINUTES * 60)
while time.time() <= timeout:
job = ml_client.jobs.get(returned_job.name)
status = job.status
if status in [JobStatus.COMPLETED, JobStatus.FAILED]:
break
time.sleep(30) # sleep 30 seconds
else:
# Timeout
ml_client.jobs.cancel(returned_job.name)
raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. "
f"Last status was {status}.")

if status == JobStatus.FAILED:
ml_client.jobs.download(returned_job.name)
if STD_LOG.exists():
print(f"*** BEGIN {STD_LOG} ***")
with open(STD_LOG, "r") as f:
print(f.read(), end="")
print(f"*** END {STD_LOG} ***")
else:
ml_client.jobs.stream(returned_job.name)

assert status == JobStatus.COMPLETED
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
azure-ai-ml==1.2.0
azure.identity==1.10.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Simple Sklearn Test."""
# imports
import mlflow
import argparse
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split


# define functions
def main(args):
"""Run and evaluate model."""
# enable auto logging
mlflow.autolog()
# setup parameters
params = {
"fit_intercept": args.fit_intercept,
"positive": args.positive,
}
# read in data
df = pd.read_csv(args.diabetes_csv)

# process data
X_train, X_test, y_train, y_test = process_data(df, args.random_state)

# train model
model = train_model(params, X_train, X_test, y_train, y_test)

print(model)


def process_data(df, random_state):
"""Process data."""
# split dataframe into X and y
X = df.drop(["target"], axis=1)
y = df["target"]

# train/test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=random_state
)

# return splits and encoder
return X_train, X_test, y_train, y_test


def train_model(params, X_train, X_test, y_train, y_test):
"""Train the model."""
# train model
model = LinearRegression(**params)
model = model.fit(X_train, y_train)

# return model
return model


def parse_args():
"""Parse arguments."""
# setup arg parser
parser = argparse.ArgumentParser()

# add arguments
parser.add_argument("--diabetes-csv", type=str)
parser.add_argument("--random_state", type=int, default=42)
parser.add_argument("--fit_intercept", type=bool, default=True)
parser.add_argument("--positive", type=bool, default=False)
parser.add_argument("--intel-extension", type=bool, default=False)

# parse args
args = parser.parse_args()

# return args
return args


# run script
if __name__ == "__main__":
# parse args
args = parse_args()
if (args.intel_extension):
from sklearnex import patch_sklearn
patch_sklearn()
# run main function
main(args)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# ai-ml-automl-dnn-text-gpu-ptca Docker Environment

## Overview
This environment is used by Azure ML AutoML for training models.
It is not intended for use in other scenarios and is subject to change without notice.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: ai-ml-automl-dnn-text-gpu-ptca
version: auto
type: environment
spec: spec.yaml
extra_config: environment.yaml
test:
pytest:
enabled: true
pip_requirements: tests/requirements.txt
tests_dir: tests
categories: ["AutoML", "Training"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu117-py310-torch1131:{{latest-image-tag}}

USER root:root

RUN pip install 'azureml-automl-dnn-nlp=={{latest-pypi-version}}'
RUN pip install 'azureml-defaults=={{latest-pypi-version}}'

RUN pip install torch==1.13.1
RUN pip uninstall -y onnxruntime-training
RUN pip install onnxruntime-training==1.15.1
RUN pip install torch-ort && \
TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX" python -m torch_ort.configure

RUN pip install transformers==4.36.2
RUN pip install optimum==1.16.1
RUN pip install accelerate==0.26.1
RUN pip install deepspeed==0.13.1
RUN pip install numpy==1.22.0

# Address vulnerabilities
RUN pip install pyarrow==14.0.1
RUN pip install aiohttp==3.10.2
RUN pip install idna==3.7
RUN pip install requests==2.32.3

# dummy number to change when needing to force rebuild without changing the definition: 14
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
image:
name: azureml/curated/ai-ml-automl-dnn-text-gpu-ptca
os: linux
context:
dir: context
dockerfile: Dockerfile
template_files:
- Dockerfile
publish:
location: mcr
visibility: public
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json

description: >-
An environment used by Azure ML AutoML for training models.
name: "{{asset.name}}"
version: "{{asset.version}}"

build:
path: "{{image.context.path}}"
dockerfile_path: "{{image.dockerfile.path}}"

os_type: linux

tags:
OS: Ubuntu20.04
Training: ""
Preview: ""
OpenMpi: "4.1.0"
Python: "3.9"
Loading

0 comments on commit 980a4a9

Please sign in to comment.