Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding python sdk for NIM on AzureML #39

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions cloud-service-providers/azure/azureml/python_sdk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Instructions for deploying NIM models on AzureML using Python SDK

In this example, we will deploy the LLAMA3 8B model on AzureML using the Python SDK.

****Prerequisites:**
- [NGC API Key](https://catalog.ngc.nvidia.com/)
- [AzureML workspace](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-workspace?view=azureml-api-2&tabs=python)

1. Provision the compute instance using the Jupyter notebook `provision aml-compute.ipynb`.This will setup the GPU compute 1xA100 on AzureML. You can run this Jupyter notebook from your local machine.

2. Upon the successful running of this notebook, you will get the URL of the Jupyter server which starts running on the AzureML compute as shown below (_note: your URL would be different name_). You can then paste the URL in your local machines' browser
```bash

{'display_name': 'Jupyter Lab', 'endpoint_uri': 'https://mayani-gpu-ci.swedencentral.instances.azureml.ms/lab'}].....

```

3. Run the script `nim-azureml-compute.ipynb` from this repository on your jupyter server which is running on the AzureML compute node as shown in the image below
![image](imgs/browser.png)


Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "be9cbb03-afe2-4c0a-96c2-bcfa2dfb7e65",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thu Jul 11 21:57:03 2024 \n",
"+---------------------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 535.171.04 Driver Version: 535.171.04 CUDA Version: 12.2 |\n",
"|-----------------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|=========================================+======================+======================|\n",
"| 0 NVIDIA A100 80GB PCIe On | 00000001:00:00.0 Off | 0 |\n",
"| N/A 32C P0 41W / 300W | 0MiB / 81920MiB | 0% Default |\n",
"| | | Disabled |\n",
"+-----------------------------------------+----------------------+----------------------+\n",
" \n",
"+---------------------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=======================================================================================|\n",
"| No running processes found |\n",
"+---------------------------------------------------------------------------------------+\n"
]
}
],
"source": [
"!nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "247b5c45-c8f3-4cb4-8eea-823a04d3c3ea",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"NGC API Key: ········\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Login Succeeded\n",
"\n",
"WARNING! Your password will be stored unencrypted in /home/azureuser/.docker/config.json.\n",
"Configure a credential helper to remove this warning. See\n",
"https://docs.docker.com/engine/reference/commandline/login/#credentials-store\n",
"\n",
"\n"
]
}
],
"source": [
"import getpass\n",
"import subprocess\n",
"import os\n",
"\n",
"# Prompt for NGC API key\n",
"ngc_api_key = getpass.getpass(\"NGC API Key: \")\n",
"\n",
"# Log in to the Docker registry\n",
"login_command = f\"echo {ngc_api_key} | docker login nvcr.io -u '$oauthtoken' --password-stdin\"\n",
"login_result = subprocess.run(login_command, shell=True, capture_output=True, text=True)\n",
"print(login_result.stdout)\n",
"print(login_result.stderr)\n",
"\n",
"# Check if login was successful\n",
"if login_result.returncode != 0:\n",
" raise Exception(\"Docker login failed\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "3990ed25-a430-40b1-bd3b-d0e6c4532709",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Set environment variables\n",
"os.environ[\"CONTAINER_NAME\"] = \"llama3-8b-instruct\"\n",
"os.environ[\"IMG_NAME\"] = f\"nvcr.io/nim/meta/{os.environ['CONTAINER_NAME']}:1.0.0\"\n",
"os.environ[\"NGC_API_KEY\"]=\"<Your NGC API key>\"\n",
"os.environ[\"LOCAL_NIM_CACHE\"] = \"/mnt/batch/tasks/shared/LS_root/mounts/clusters/mayani-gpu-ci/code/.cache/nim\"\n",
"\n",
"# Create the cache directory\n",
"os.makedirs(os.environ[\"LOCAL_NIM_CACHE\"], exist_ok=True)\n",
"\n",
"# Define the docker run command without -it and with -d\n",
"docker_command = [\n",
" \"docker\", \"run\", \"-d\", \"--rm\",\n",
" f\"--name={os.environ['CONTAINER_NAME']}\",\n",
" \"--gpus\", \"all\",\n",
" \"-e\", f\"{os.environ['NGC_API_KEY']}\",\n",
" \"-v\", f\"{os.environ['LOCAL_NIM_CACHE']}:/opt/nim/.cache\",\n",
" \"-u\", str(os.getuid()),\n",
" \"-p\", \"8000:8000\",\n",
" os.environ[\"IMG_NAME\"]\n",
"]\n",
"\n",
"# Execute the docker run command\n",
"result = subprocess.run(docker_command, capture_output=True, text=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "da55dd96-0d73-4039-a561-aa40f10fcaef",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"stdout: 9a7d3f7cc0fceeb3a6dc5a80dad6e859c8bef94f9963f2e251f374102601d436\n",
"\n",
"stderr: \n",
"Container started successfully with ID: 9a7d3f7cc0fceeb3a6dc5a80dad6e859c8bef94f9963f2e251f374102601d436\n",
"Container logs:\n",
"\n",
"===========================================\n",
"== NVIDIA Inference Microservice LLM NIM ==\n",
"===========================================\n",
"\n",
"NVIDIA Inference Microservice LLM NIM Version 1.0.0\n",
"Model: nim/meta/llama3-8b-instruct\n",
"\n",
"Container image Copyright (c) 2016-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n",
"\n",
"This NIM container is governed by the NVIDIA AI Product Agreement here:\n",
"https://www.nvidia.com/en-us/data-center/products/nvidia-ai-enterprise/eula/.\n",
"A copy of this license can be found under /opt/nim/LICENSE.\n",
"\n",
"The use of this model is governed by the AI Foundation Models Community License\n",
"here: https://docs.nvidia.com/ai-foundation-models-community-license.pdf.\n",
"\n",
"ADDITIONAL INFORMATION: Meta Llama 3 Community License, Built with Meta Llama 3. \n",
"A copy of the Llama 3 license can be found under /opt/nim/MODEL_LICENSE.\n",
"\n",
"\n"
]
}
],
"source": [
"import subprocess\n",
"import os\n",
"import getpass\n",
"\n",
"# Prompt for NGC API key if not set\n",
"if \"NGC_API_KEY\" not in os.environ:\n",
" os.environ[\"NGC_API_KEY\"] = getpass.getpass(\"NGC API Key: \")\n",
"\n",
"# Set environment variables\n",
"container_name = \"llama3-8b-instruct\"\n",
"img_name = f\"nvcr.io/nim/meta/{container_name}:1.0.0\"\n",
"local_nim_cache = \"/mnt/batch/tasks/shared/LS_root/mounts/clusters/mayani-gpu-ci/code/.cache/nim\" # this should be the path where you want to store the cache\n",
"\n",
"# Create the cache directory\n",
"os.makedirs(local_nim_cache, exist_ok=True)\n",
"\n",
"# Define the docker run command without -it and with -d\n",
"docker_command = [\n",
" \"docker\", \"run\", \"-d\", \"--rm\",\n",
" f\"--name={container_name}\",\n",
" \"--gpus\", \"all\",\n",
" \"-e\", f\"NGC_API_KEY={os.environ['NGC_API_KEY']}\",\n",
" \"-v\", f\"{local_nim_cache}:/opt/nim/.cache\",\n",
" \"-u\", str(os.getuid()),\n",
" \"-p\", \"8000:8000\",\n",
" img_name\n",
"]\n",
"\n",
"# Execute the docker run command\n",
"result = subprocess.run(docker_command, capture_output=True, text=True)\n",
"print(\"stdout:\", result.stdout)\n",
"print(\"stderr:\", result.stderr)\n",
"\n",
"# Check if the container started successfully\n",
"if result.returncode == 0:\n",
" container_id = result.stdout.strip()\n",
" print(f\"Container started successfully with ID: {container_id}\")\n",
"\n",
" # Optionally, check the logs of the container\n",
" logs_command = [\"docker\", \"logs\", container_id]\n",
" logs_result = subprocess.run(logs_command, capture_output=True, text=True)\n",
" print(\"Container logs:\")\n",
" print(logs_result.stdout)\n",
" print(logs_result.stderr)\n",
"else:\n",
" print(\"Failed to start the container\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "ed95bad4-d477-4c58-a0b8-fbf5732575eb",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES\n",
"32fdd3fe65a4 localhost/c3:latest \"/usr/local/bin/ice\" About an hour ago Up About an hour c3-progenitor\n"
]
}
],
"source": [
"!docker container ps -a"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "ee094db9-c986-4e84-a023-56782d6f8837",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Status Code: 200\n",
"Response Body: {'id': 'cmpl-9f5372bd74444ce09a68b6335c6f9905', 'object': 'text_completion', 'created': 1720736060, 'model': 'meta/llama3-8b-instruct', 'choices': [{'index': 0, 'text': ', there was a lovely little girl name Sophie. She was eight years old and lived in a small town in the countryside. Sophie had a big heart and always tried to do the right thing, even if it meant going against the crowd.\\nOne day, Sophie discovered that her cat, Mr. Whiskers, was', 'logprobs': None, 'finish_reason': 'length', 'stop_reason': None}], 'usage': {'prompt_tokens': 5, 'total_tokens': 69, 'completion_tokens': 64}}\n"
]
}
],
"source": [
"import requests\n",
"import json\n",
"\n",
"# Define the URL and headers\n",
"url = 'http://0.0.0.0:8000/v1/completions'\n",
"headers = {\n",
" 'accept': 'application/json',\n",
" 'Content-Type': 'application/json'\n",
"}\n",
"\n",
"# Define the payload\n",
"payload = {\n",
" \"model\": \"meta/llama3-8b-instruct\",\n",
" \"prompt\": \"Once upon a time\",\n",
" \"max_tokens\": 64\n",
"}\n",
"\n",
"# Make the POST request\n",
"response = requests.post(url, headers=headers, data=json.dumps(payload))\n",
"\n",
"# Print the response\n",
"print(\"Status Code:\", response.status_code)\n",
"print(\"Response Body:\", response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3437882-6834-4ffa-a959-8b1a4cbc4786",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
"name": "python38-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading