Skip to content

Commit

Permalink
New Guide: adding python sdk for NIM on AzureML (#39)
Browse files Browse the repository at this point in the history
Co-authored-by: Mohit Ayani <[email protected]>
  • Loading branch information
mayani-nv and Mohit Ayani authored Jul 12, 2024
1 parent 69bb506 commit 24d7fe9
Show file tree
Hide file tree
Showing 4 changed files with 536 additions and 0 deletions.
21 changes: 21 additions & 0 deletions cloud-service-providers/azure/azureml/python_sdk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Instructions for deploying NIM models on AzureML using Python SDK

In this example, we will deploy the LLAMA3 8B model on AzureML using the Python SDK.

****Prerequisites:**
- [NGC API Key](https://catalog.ngc.nvidia.com/)
- [AzureML workspace](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-workspace?view=azureml-api-2&tabs=python)

1. Provision the compute instance using the Jupyter notebook `provision aml-compute.ipynb`.This will setup the GPU compute 1xA100 on AzureML. You can run this Jupyter notebook from your local machine.

2. Upon the successful running of this notebook, you will get the URL of the Jupyter server which starts running on the AzureML compute as shown below (_note: your URL would be different name_). You can then paste the URL in your local machines' browser
```bash

{'display_name': 'Jupyter Lab', 'endpoint_uri': 'https://mayani-gpu-ci.swedencentral.instances.azureml.ms/lab'}].....

```

3. Run the script `nim-azureml-compute.ipynb` from this repository on your jupyter server which is running on the AzureML compute node as shown in the image below
![image](imgs/browser.png)


Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "be9cbb03-afe2-4c0a-96c2-bcfa2dfb7e65",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thu Jul 11 21:57:03 2024 \n",
"+---------------------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 535.171.04 Driver Version: 535.171.04 CUDA Version: 12.2 |\n",
"|-----------------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|=========================================+======================+======================|\n",
"| 0 NVIDIA A100 80GB PCIe On | 00000001:00:00.0 Off | 0 |\n",
"| N/A 32C P0 41W / 300W | 0MiB / 81920MiB | 0% Default |\n",
"| | | Disabled |\n",
"+-----------------------------------------+----------------------+----------------------+\n",
" \n",
"+---------------------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=======================================================================================|\n",
"| No running processes found |\n",
"+---------------------------------------------------------------------------------------+\n"
]
}
],
"source": [
"!nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "247b5c45-c8f3-4cb4-8eea-823a04d3c3ea",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"NGC API Key: ········\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Login Succeeded\n",
"\n",
"WARNING! Your password will be stored unencrypted in /home/azureuser/.docker/config.json.\n",
"Configure a credential helper to remove this warning. See\n",
"https://docs.docker.com/engine/reference/commandline/login/#credentials-store\n",
"\n",
"\n"
]
}
],
"source": [
"import getpass\n",
"import subprocess\n",
"import os\n",
"\n",
"# Prompt for NGC API key\n",
"ngc_api_key = getpass.getpass(\"NGC API Key: \")\n",
"\n",
"# Log in to the Docker registry\n",
"login_command = f\"echo {ngc_api_key} | docker login nvcr.io -u '$oauthtoken' --password-stdin\"\n",
"login_result = subprocess.run(login_command, shell=True, capture_output=True, text=True)\n",
"print(login_result.stdout)\n",
"print(login_result.stderr)\n",
"\n",
"# Check if login was successful\n",
"if login_result.returncode != 0:\n",
" raise Exception(\"Docker login failed\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "3990ed25-a430-40b1-bd3b-d0e6c4532709",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Set environment variables\n",
"os.environ[\"CONTAINER_NAME\"] = \"llama3-8b-instruct\"\n",
"os.environ[\"IMG_NAME\"] = f\"nvcr.io/nim/meta/{os.environ['CONTAINER_NAME']}:1.0.0\"\n",
"os.environ[\"NGC_API_KEY\"]=\"<Your NGC API key>\"\n",
"os.environ[\"LOCAL_NIM_CACHE\"] = \"/mnt/batch/tasks/shared/LS_root/mounts/clusters/mayani-gpu-ci/code/.cache/nim\"\n",
"\n",
"# Create the cache directory\n",
"os.makedirs(os.environ[\"LOCAL_NIM_CACHE\"], exist_ok=True)\n",
"\n",
"# Define the docker run command without -it and with -d\n",
"docker_command = [\n",
" \"docker\", \"run\", \"-d\", \"--rm\",\n",
" f\"--name={os.environ['CONTAINER_NAME']}\",\n",
" \"--gpus\", \"all\",\n",
" \"-e\", f\"{os.environ['NGC_API_KEY']}\",\n",
" \"-v\", f\"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache\",\n",
" \"-u\", str(os.getuid()),\n",
" \"-p\", \"8000:8000\",\n",
" os.environ[\"IMG_NAME\"]\n",
"]\n",
"\n",
"# Execute the docker run command\n",
"result = subprocess.run(docker_command, capture_output=True, text=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "da55dd96-0d73-4039-a561-aa40f10fcaef",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"stdout: 9a7d3f7cc0fceeb3a6dc5a80dad6e859c8bef94f9963f2e251f374102601d436\n",
"\n",
"stderr: \n",
"Container started successfully with ID: 9a7d3f7cc0fceeb3a6dc5a80dad6e859c8bef94f9963f2e251f374102601d436\n",
"Container logs:\n",
"\n",
"===========================================\n",
"== NVIDIA Inference Microservice LLM NIM ==\n",
"===========================================\n",
"\n",
"NVIDIA Inference Microservice LLM NIM Version 1.0.0\n",
"Model: nim/meta/llama3-8b-instruct\n",
"\n",
"Container image Copyright (c) 2016-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n",
"\n",
"This NIM container is governed by the NVIDIA AI Product Agreement here:\n",
"https://www.nvidia.com/en-us/data-center/products/nvidia-ai-enterprise/eula/.\n",
"A copy of this license can be found under /opt/nim/LICENSE.\n",
"\n",
"The use of this model is governed by the AI Foundation Models Community License\n",
"here: https://docs.nvidia.com/ai-foundation-models-community-license.pdf.\n",
"\n",
"ADDITIONAL INFORMATION: Meta Llama 3 Community License, Built with Meta Llama 3. \n",
"A copy of the Llama 3 license can be found under /opt/nim/MODEL_LICENSE.\n",
"\n",
"\n"
]
}
],
"source": [
"import subprocess\n",
"import os\n",
"import getpass\n",
"\n",
"# Prompt for NGC API key if not set\n",
"if \"NGC_API_KEY\" not in os.environ:\n",
" os.environ[\"NGC_API_KEY\"] = getpass.getpass(\"NGC API Key: \")\n",
"\n",
"# Set environment variables\n",
"container_name = \"llama3-8b-instruct\"\n",
"img_name = f\"nvcr.io/nim/meta/{container_name}:1.0.0\"\n",
"local_nim_cache = \"/mnt/batch/tasks/shared/LS_root/mounts/clusters/mayani-gpu-ci/code/.cache/nim\" # this should be the path where you want to store the cache\n",
"\n",
"# Create the cache directory\n",
"os.makedirs(local_nim_cache, exist_ok=True)\n",
"\n",
"# Define the docker run command without -it and with -d\n",
"docker_command = [\n",
" \"docker\", \"run\", \"-d\", \"--rm\",\n",
" f\"--name={container_name}\",\n",
" \"--gpus\", \"all\",\n",
" \"-e\", f\"NGC_API_KEY={os.environ['NGC_API_KEY']}\",\n",
" \"-v\", f\"{local_nim_cache}:/opt/nim/.cache\",\n",
" \"-u\", str(os.getuid()),\n",
" \"-p\", \"8000:8000\",\n",
" img_name\n",
"]\n",
"\n",
"# Execute the docker run command\n",
"result = subprocess.run(docker_command, capture_output=True, text=True)\n",
"print(\"stdout:\", result.stdout)\n",
"print(\"stderr:\", result.stderr)\n",
"\n",
"# Check if the container started successfully\n",
"if result.returncode == 0:\n",
" container_id = result.stdout.strip()\n",
" print(f\"Container started successfully with ID: {container_id}\")\n",
"\n",
" # Optionally, check the logs of the container\n",
" logs_command = [\"docker\", \"logs\", container_id]\n",
" logs_result = subprocess.run(logs_command, capture_output=True, text=True)\n",
" print(\"Container logs:\")\n",
" print(logs_result.stdout)\n",
" print(logs_result.stderr)\n",
"else:\n",
" print(\"Failed to start the container\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "ed95bad4-d477-4c58-a0b8-fbf5732575eb",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES\n",
"32fdd3fe65a4 localhost/c3:latest \"/usr/local/bin/ice\" About an hour ago Up About an hour c3-progenitor\n"
]
}
],
"source": [
"!docker container ps -a"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "ee094db9-c986-4e84-a023-56782d6f8837",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Status Code: 200\n",
"Response Body: {'id': 'cmpl-9f5372bd74444ce09a68b6335c6f9905', 'object': 'text_completion', 'created': 1720736060, 'model': 'meta/llama3-8b-instruct', 'choices': [{'index': 0, 'text': ', there was a lovely little girl name Sophie. She was eight years old and lived in a small town in the countryside. Sophie had a big heart and always tried to do the right thing, even if it meant going against the crowd.\\nOne day, Sophie discovered that her cat, Mr. Whiskers, was', 'logprobs': None, 'finish_reason': 'length', 'stop_reason': None}], 'usage': {'prompt_tokens': 5, 'total_tokens': 69, 'completion_tokens': 64}}\n"
]
}
],
"source": [
"import requests\n",
"import json\n",
"\n",
"# Define the URL and headers\n",
"url = 'http://0.0.0.0:8000/v1/completions'\n",
"headers = {\n",
" 'accept': 'application/json',\n",
" 'Content-Type': 'application/json'\n",
"}\n",
"\n",
"# Define the payload\n",
"payload = {\n",
" \"model\": \"meta/llama3-8b-instruct\",\n",
" \"prompt\": \"Once upon a time\",\n",
" \"max_tokens\": 64\n",
"}\n",
"\n",
"# Make the POST request\n",
"response = requests.post(url, headers=headers, data=json.dumps(payload))\n",
"\n",
"# Print the response\n",
"print(\"Status Code:\", response.status_code)\n",
"print(\"Response Body:\", response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3437882-6834-4ffa-a959-8b1a4cbc4786",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
"name": "python38-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 24d7fe9

Please sign in to comment.