-
Notifications
You must be signed in to change notification settings - Fork 70
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New Guide: adding python sdk for NIM on AzureML (#39)
Co-authored-by: Mohit Ayani <[email protected]>
- Loading branch information
Showing
4 changed files
with
536 additions
and
0 deletions.
There are no files selected for viewing
21 changes: 21 additions & 0 deletions
21
cloud-service-providers/azure/azureml/python_sdk/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Instructions for deploying NIM models on AzureML using Python SDK | ||
|
||
In this example, we will deploy the LLAMA3 8B model on AzureML using the Python SDK. | ||
|
||
****Prerequisites:** | ||
- [NGC API Key](https://catalog.ngc.nvidia.com/) | ||
- [AzureML workspace](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-workspace?view=azureml-api-2&tabs=python) | ||
|
||
1. Provision the compute instance using the Jupyter notebook `provision aml-compute.ipynb`.This will setup the GPU compute 1xA100 on AzureML. You can run this Jupyter notebook from your local machine. | ||
|
||
2. Upon the successful running of this notebook, you will get the URL of the Jupyter server which starts running on the AzureML compute as shown below (_note: your URL would be different name_). You can then paste the URL in your local machines' browser | ||
```bash | ||
|
||
{'display_name': 'Jupyter Lab', 'endpoint_uri': 'https://mayani-gpu-ci.swedencentral.instances.azureml.ms/lab'}]..... | ||
|
||
``` | ||
|
||
3. Run the script `nim-azureml-compute.ipynb` from this repository on your jupyter server which is running on the AzureML compute node as shown in the image below | ||
 | ||
|
||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
309 changes: 309 additions & 0 deletions
309
cloud-service-providers/azure/azureml/python_sdk/nim-azureml-compute.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,309 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "be9cbb03-afe2-4c0a-96c2-bcfa2dfb7e65", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Thu Jul 11 21:57:03 2024 \n", | ||
"+---------------------------------------------------------------------------------------+\n", | ||
"| NVIDIA-SMI 535.171.04 Driver Version: 535.171.04 CUDA Version: 12.2 |\n", | ||
"|-----------------------------------------+----------------------+----------------------+\n", | ||
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | ||
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", | ||
"| | | MIG M. |\n", | ||
"|=========================================+======================+======================|\n", | ||
"| 0 NVIDIA A100 80GB PCIe On | 00000001:00:00.0 Off | 0 |\n", | ||
"| N/A 32C P0 41W / 300W | 0MiB / 81920MiB | 0% Default |\n", | ||
"| | | Disabled |\n", | ||
"+-----------------------------------------+----------------------+----------------------+\n", | ||
" \n", | ||
"+---------------------------------------------------------------------------------------+\n", | ||
"| Processes: |\n", | ||
"| GPU GI CI PID Type Process name GPU Memory |\n", | ||
"| ID ID Usage |\n", | ||
"|=======================================================================================|\n", | ||
"| No running processes found |\n", | ||
"+---------------------------------------------------------------------------------------+\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"!nvidia-smi" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"id": "247b5c45-c8f3-4cb4-8eea-823a04d3c3ea", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"NGC API Key: ········\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Login Succeeded\n", | ||
"\n", | ||
"WARNING! Your password will be stored unencrypted in /home/azureuser/.docker/config.json.\n", | ||
"Configure a credential helper to remove this warning. See\n", | ||
"https://docs.docker.com/engine/reference/commandline/login/#credentials-store\n", | ||
"\n", | ||
"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import getpass\n", | ||
"import subprocess\n", | ||
"import os\n", | ||
"\n", | ||
"# Prompt for NGC API key\n", | ||
"ngc_api_key = getpass.getpass(\"NGC API Key: \")\n", | ||
"\n", | ||
"# Log in to the Docker registry\n", | ||
"login_command = f\"echo {ngc_api_key} | docker login nvcr.io -u '$oauthtoken' --password-stdin\"\n", | ||
"login_result = subprocess.run(login_command, shell=True, capture_output=True, text=True)\n", | ||
"print(login_result.stdout)\n", | ||
"print(login_result.stderr)\n", | ||
"\n", | ||
"# Check if login was successful\n", | ||
"if login_result.returncode != 0:\n", | ||
" raise Exception(\"Docker login failed\")\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 19, | ||
"id": "3990ed25-a430-40b1-bd3b-d0e6c4532709", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Set environment variables\n", | ||
"os.environ[\"CONTAINER_NAME\"] = \"llama3-8b-instruct\"\n", | ||
"os.environ[\"IMG_NAME\"] = f\"nvcr.io/nim/meta/{os.environ['CONTAINER_NAME']}:1.0.0\"\n", | ||
"os.environ[\"NGC_API_KEY\"]=\"<Your NGC API key>\"\n", | ||
"os.environ[\"LOCAL_NIM_CACHE\"] = \"/mnt/batch/tasks/shared/LS_root/mounts/clusters/mayani-gpu-ci/code/.cache/nim\"\n", | ||
"\n", | ||
"# Create the cache directory\n", | ||
"os.makedirs(os.environ[\"LOCAL_NIM_CACHE\"], exist_ok=True)\n", | ||
"\n", | ||
"# Define the docker run command without -it and with -d\n", | ||
"docker_command = [\n", | ||
" \"docker\", \"run\", \"-d\", \"--rm\",\n", | ||
" f\"--name={os.environ['CONTAINER_NAME']}\",\n", | ||
" \"--gpus\", \"all\",\n", | ||
" \"-e\", f\"{os.environ['NGC_API_KEY']}\",\n", | ||
" \"-v\", f\"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache\",\n", | ||
" \"-u\", str(os.getuid()),\n", | ||
" \"-p\", \"8000:8000\",\n", | ||
" os.environ[\"IMG_NAME\"]\n", | ||
"]\n", | ||
"\n", | ||
"# Execute the docker run command\n", | ||
"result = subprocess.run(docker_command, capture_output=True, text=True)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 15, | ||
"id": "da55dd96-0d73-4039-a561-aa40f10fcaef", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"stdout: 9a7d3f7cc0fceeb3a6dc5a80dad6e859c8bef94f9963f2e251f374102601d436\n", | ||
"\n", | ||
"stderr: \n", | ||
"Container started successfully with ID: 9a7d3f7cc0fceeb3a6dc5a80dad6e859c8bef94f9963f2e251f374102601d436\n", | ||
"Container logs:\n", | ||
"\n", | ||
"===========================================\n", | ||
"== NVIDIA Inference Microservice LLM NIM ==\n", | ||
"===========================================\n", | ||
"\n", | ||
"NVIDIA Inference Microservice LLM NIM Version 1.0.0\n", | ||
"Model: nim/meta/llama3-8b-instruct\n", | ||
"\n", | ||
"Container image Copyright (c) 2016-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n", | ||
"\n", | ||
"This NIM container is governed by the NVIDIA AI Product Agreement here:\n", | ||
"https://www.nvidia.com/en-us/data-center/products/nvidia-ai-enterprise/eula/.\n", | ||
"A copy of this license can be found under /opt/nim/LICENSE.\n", | ||
"\n", | ||
"The use of this model is governed by the AI Foundation Models Community License\n", | ||
"here: https://docs.nvidia.com/ai-foundation-models-community-license.pdf.\n", | ||
"\n", | ||
"ADDITIONAL INFORMATION: Meta Llama 3 Community License, Built with Meta Llama 3. \n", | ||
"A copy of the Llama 3 license can be found under /opt/nim/MODEL_LICENSE.\n", | ||
"\n", | ||
"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import subprocess\n", | ||
"import os\n", | ||
"import getpass\n", | ||
"\n", | ||
"# Prompt for NGC API key if not set\n", | ||
"if \"NGC_API_KEY\" not in os.environ:\n", | ||
" os.environ[\"NGC_API_KEY\"] = getpass.getpass(\"NGC API Key: \")\n", | ||
"\n", | ||
"# Set environment variables\n", | ||
"container_name = \"llama3-8b-instruct\"\n", | ||
"img_name = f\"nvcr.io/nim/meta/{container_name}:1.0.0\"\n", | ||
"local_nim_cache = \"/mnt/batch/tasks/shared/LS_root/mounts/clusters/mayani-gpu-ci/code/.cache/nim\" # this should be the path where you want to store the cache\n", | ||
"\n", | ||
"# Create the cache directory\n", | ||
"os.makedirs(local_nim_cache, exist_ok=True)\n", | ||
"\n", | ||
"# Define the docker run command without -it and with -d\n", | ||
"docker_command = [\n", | ||
" \"docker\", \"run\", \"-d\", \"--rm\",\n", | ||
" f\"--name={container_name}\",\n", | ||
" \"--gpus\", \"all\",\n", | ||
" \"-e\", f\"NGC_API_KEY={os.environ['NGC_API_KEY']}\",\n", | ||
" \"-v\", f\"{local_nim_cache}:/opt/nim/.cache\",\n", | ||
" \"-u\", str(os.getuid()),\n", | ||
" \"-p\", \"8000:8000\",\n", | ||
" img_name\n", | ||
"]\n", | ||
"\n", | ||
"# Execute the docker run command\n", | ||
"result = subprocess.run(docker_command, capture_output=True, text=True)\n", | ||
"print(\"stdout:\", result.stdout)\n", | ||
"print(\"stderr:\", result.stderr)\n", | ||
"\n", | ||
"# Check if the container started successfully\n", | ||
"if result.returncode == 0:\n", | ||
" container_id = result.stdout.strip()\n", | ||
" print(f\"Container started successfully with ID: {container_id}\")\n", | ||
"\n", | ||
" # Optionally, check the logs of the container\n", | ||
" logs_command = [\"docker\", \"logs\", container_id]\n", | ||
" logs_result = subprocess.run(logs_command, capture_output=True, text=True)\n", | ||
" print(\"Container logs:\")\n", | ||
" print(logs_result.stdout)\n", | ||
" print(logs_result.stderr)\n", | ||
"else:\n", | ||
" print(\"Failed to start the container\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"id": "ed95bad4-d477-4c58-a0b8-fbf5732575eb", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES\n", | ||
"32fdd3fe65a4 localhost/c3:latest \"/usr/local/bin/ice\" About an hour ago Up About an hour c3-progenitor\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"!docker container ps -a" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"id": "ee094db9-c986-4e84-a023-56782d6f8837", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Status Code: 200\n", | ||
"Response Body: {'id': 'cmpl-9f5372bd74444ce09a68b6335c6f9905', 'object': 'text_completion', 'created': 1720736060, 'model': 'meta/llama3-8b-instruct', 'choices': [{'index': 0, 'text': ', there was a lovely little girl name Sophie. She was eight years old and lived in a small town in the countryside. Sophie had a big heart and always tried to do the right thing, even if it meant going against the crowd.\\nOne day, Sophie discovered that her cat, Mr. Whiskers, was', 'logprobs': None, 'finish_reason': 'length', 'stop_reason': None}], 'usage': {'prompt_tokens': 5, 'total_tokens': 69, 'completion_tokens': 64}}\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import requests\n", | ||
"import json\n", | ||
"\n", | ||
"# Define the URL and headers\n", | ||
"url = 'http://0.0.0.0:8000/v1/completions'\n", | ||
"headers = {\n", | ||
" 'accept': 'application/json',\n", | ||
" 'Content-Type': 'application/json'\n", | ||
"}\n", | ||
"\n", | ||
"# Define the payload\n", | ||
"payload = {\n", | ||
" \"model\": \"meta/llama3-8b-instruct\",\n", | ||
" \"prompt\": \"Once upon a time\",\n", | ||
" \"max_tokens\": 64\n", | ||
"}\n", | ||
"\n", | ||
"# Make the POST request\n", | ||
"response = requests.post(url, headers=headers, data=json.dumps(payload))\n", | ||
"\n", | ||
"# Print the response\n", | ||
"print(\"Status Code:\", response.status_code)\n", | ||
"print(\"Response Body:\", response.json())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "b3437882-6834-4ffa-a959-8b1a4cbc4786", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3.8 - AzureML", | ||
"language": "python", | ||
"name": "python38-azureml" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.19" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.