Skip to content

Commit

Permalink
Merge branch 'main' into ui-flow-v11
Browse files Browse the repository at this point in the history
  • Loading branch information
MilesHolland authored Feb 25, 2025
2 parents f085188 + c6e4cf3 commit f4c385e
Show file tree
Hide file tree
Showing 10 changed files with 30 additions and 23 deletions.
7 changes: 5 additions & 2 deletions assets/models/system/Phi-4/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ properties:
inference-recommended-sku: Standard_NC24ads_A100_v4, Standard_NC48ads_A100_v4, Standard_NC96ads_A100_v4, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4
finetuning-tasks: chat-completion
finetune-min-sku-spec: 24|1|220|64
finetune-recommended-sku: Standard_NC24ads_A100_v4, Standard_NC48ads_A100_v4, Standard_NC96ads_A100_v4, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4
finetune-recommended-sku: Standard_NC24ads_A100_v4, Standard_NC48ads_A100_v4, Standard_NC96ads_A100_v4, Standard_ND96asr_v4, Standard_ND96amsr_A100_v4, Standard_NC40ads_H100_v5, Standard_NC80adis_H100_v5, Standard_ND96isr_H100_v5
languages: en
SharedComputeCapacityEnabled: true

Expand Down Expand Up @@ -52,7 +52,10 @@ tags:
Standard_NC48ads_A100_v4,
Standard_NC96ads_A100_v4,
Standard_ND96asr_v4,
Standard_ND96amsr_A100_v4
Standard_ND96amsr_A100_v4,
Standard_NC40ads_H100_v5,
Standard_NC80adis_H100_v5,
Standard_ND96isr_H100_v5
]
model_specific_defaults:
apply_deepspeed: "true"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu121-py310-torch222:{{latest-image-tag}}
FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2204-cu118-py310-torch222:{{latest-image-tag}}

USER root:root

Expand All @@ -15,7 +15,7 @@ RUN pip install torch-ort==1.18.0 && TORCH_CUDA_ARCH_LIST="5.2;6.0;7.0;8.0;8.6;9

RUN pip uninstall -y onnxruntime

RUN pip install transformers==4.38.2
RUN pip install transformers==4.48.0
RUN pip install optimum==1.21.4
RUN pip install accelerate==0.33.0
RUN pip install deepspeed~=0.15.1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import time
from pathlib import Path
from azure.ai.ml import command, Input, MLClient
from azure.ai.ml._restclient.models import JobStatus
from azure.ai.ml.operations._run_history_constants import JobStatus
from azure.ai.ml.entities import Environment, BuildContext
from azure.identity import AzureCliCredential

Expand Down Expand Up @@ -54,7 +54,7 @@ def test_azure_ai_ml_automl():
compute=os.environ.get("gpu_cluster"),
display_name="sklearn-diabetes-example",
description="A test run of the ai-ml-automl-dnn-text-gpu-ptca curated environment",
experiment_name="sklearnExperiment"
experiment_name="sklearnExperiment",
)

returned_job = ml_client.create_or_update(job)
Expand All @@ -70,9 +70,11 @@ def test_azure_ai_ml_automl():
time.sleep(30) # sleep 30 seconds
else:
# Timeout
ml_client.jobs.cancel(returned_job.name)
raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. "
f"Last status was {status}.")
ml_client.jobs.begin_cancel(returned_job.name)
raise Exception(
f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. "
f"Last status was {status}."
)

if status == JobStatus.FAILED:
ml_client.jobs.download(returned_job.name)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
azure-ai-ml==1.2.0
azure-ai-ml==1.25.0
azure.identity==1.10.0
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu121-py310-torch222:{{latest-image-tag}}
FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2204-cu118-py310-torch222:{{latest-image-tag}}

ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml-automl-dnn-text-gpu
# Prepend path to AzureML conda environment
Expand Down Expand Up @@ -62,7 +62,7 @@ RUN HOROVOD_WITH_PYTORCH=1 pip install --no-cache-dir git+https://github.com/hor
# by fixing dependencies in the base packages
RUN pip list && \
pip install pyarrow==14.0.2 \
'transformers[sentencepiece,torch]==4.37.2' \
'transformers[sentencepiece,torch]==4.48.0' \
aiohttp==3.10.2

# end pip install
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import time
from pathlib import Path
from azure.ai.ml import command, Input, MLClient
from azure.ai.ml._restclient.models import JobStatus
from azure.ai.ml.operations._run_history_constants import JobStatus
from azure.ai.ml.entities import Environment, BuildContext
from azure.identity import AzureCliCredential

Expand Down Expand Up @@ -54,7 +54,7 @@ def test_azure_ai_ml_automl():
compute=os.environ.get("gpu_cluster"),
display_name="sklearn-diabetes-example",
description="A test run of the ai-ml-automl-dnn-text-gpu curated environment",
experiment_name="sklearnExperiment"
experiment_name="sklearnExperiment",
)

returned_job = ml_client.create_or_update(job)
Expand All @@ -70,9 +70,11 @@ def test_azure_ai_ml_automl():
time.sleep(30) # sleep 30 seconds
else:
# Timeout
ml_client.jobs.cancel(returned_job.name)
raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. "
f"Last status was {status}.")
ml_client.jobs.begin_cancel(returned_job.name)
raise Exception(
f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. "
f"Last status was {status}."
)

if status == JobStatus.FAILED:
ml_client.jobs.download(returned_job.name)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
azure-ai-ml==1.2.0
azure-ai-ml==1.25.0
azure.identity==1.10.0
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:{{latest-image-tag}}
FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04:{{latest-image-tag}}

ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/automl
# Prepend path to AzureML conda environment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ dependencies:
- numpy~=1.22.3
- pandas~=1.5.3
- py-xgboost=1.3.3
- pyopenssl=24.2.1
- cryptography=42.0.2
- pyopenssl=25.0.0
- cryptography=44.0.1
- 'psutil>=5.2.2,<6.0.0'
- tqdm
- setuptools=72.1.0
- wheel=0.44.0
- openssl=3.0.15
- openssl=3.4.0
- pip:
- inference-schema
- azureml-core=={{latest-pypi-version}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ image:
- requirements.txt
publish:
location: mcr
visibility: unlisted
visibility: public

0 comments on commit f4c385e

Please sign in to comment.