-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #39 from CaptainVincent/k8s-llama-server
Add k8s with llama server example
- Loading branch information
Showing
4 changed files
with
206 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
name: k8s containerd LLAMA service test | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} | ||
cancel-in-progress: true | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
logLevel: | ||
description: 'Log level' | ||
required: true | ||
default: 'info' | ||
push: | ||
branches: [ main ] | ||
paths-ignore: | ||
- '**/README.md' | ||
pull_request: | ||
branches: [ main ] | ||
paths-ignore: | ||
- '**/README.md' | ||
schedule: | ||
- cron: "0 0 */1 * *" | ||
|
||
jobs: | ||
run: | ||
runs-on: ubuntu-20.04 | ||
name: Run ggml plugin example | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Install apt-get packages | ||
run: | | ||
sudo ACCEPT_EULA=Y apt-get update | ||
sudo ACCEPT_EULA=Y apt-get upgrade | ||
sudo ACCEPT_EULA=Y apt-get install git wget jq | ||
- name: Install containerd, WasmEdge, and crun with supprt of plugins and nn-preoload | ||
run: | | ||
sed 's|https://github.com/containers/crun|-b enable-wasmedge-plugin https://github.com/second-state/crun|g' containerd/install.sh | bash | ||
- name: Installing and starting k8s | ||
run: | | ||
bash kubernetes_containerd/install.sh > k8s.log 2>&1 | ||
- name: Installing wasi_nn-ggml plugin and copy sys's dependencies into same path for container environment | ||
run: | | ||
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- --plugins wasi_nn-ggml | ||
wget -qO- https://raw.githubusercontent.com/second-state/runwasi/main/release/utils/copy_sys_dependencies.sh | bash -s $HOME/.wasmedge/plugin/libwasmedgePluginWasiNN.so $HOME/.wasmedge/plugin/ | ||
- name: Download llm model | ||
run: | | ||
curl -LO https://huggingface.co/second-state/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf | ||
- name: Sleep for 1200s | ||
run: sleep 1200s | ||
shell: bash | ||
|
||
- name: Dump the log of k8s setup | ||
run: | | ||
cat k8s.log | ||
- name: Run llm api service in k8s | ||
continue-on-error: true | ||
run: | | ||
bash k8s_containerd_llama/llama_server_application.sh >> dump.log 2>&1 | ||
- name: Test API server pod was created using the kubectl run command. | ||
continue-on-error: true | ||
run: | | ||
curl -X POST http://localhost:8080/v1/models -H 'accept:application/json' | ||
curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "Who is Robert Oppenheimer?"}], "model":"llama-2-chat"}' | jq . | ||
curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "What new discoveries from the James Webb Space Telescope can I tell my nine-year-old about?"}], "model":"llama-2-chat"}' | jq . | ||
- name: Display crun and wasmedge version | ||
run: | | ||
crun --version | ||
wasmedge --version | ||
- name: Dump the log of execution | ||
run: | | ||
cat dump.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Run a WasmEdge LLAMA chat server app with Containerd over Kubernetes | ||
|
||
## Environment | ||
|
||
We use `Ubuntu 20.04 x86_64` in the following example. | ||
|
||
## Install containerd, costomized crun, and WasmEdge | ||
|
||
Reuse install script from other example, but use the experimental crun branch. | ||
|
||
```bash | ||
sed 's|https://github.com/containers/crun|-b enable-wasmedge-plugin https://github.com/second-state/crun|g' containerd/install.sh | bash | ||
``` | ||
|
||
## Install k8s | ||
|
||
Reuse install script from other example. | ||
|
||
```bash | ||
bash kubernetes_containerd/install.sh | ||
``` | ||
|
||
## Run LLAMA chat server app | ||
The [llama_server_application.sh](./llama_server_application.sh) script shows how to pull a WASM container image with WASI-NN-GGML plugin support from the Docker Hub, and then run it as a containerized application in Kubernetes. | ||
|
||
```bash | ||
bash k8s_containerd_llama/llama_server_application.sh | ||
``` | ||
|
||
Test API service from other session | ||
|
||
```bash | ||
curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "Who is Robert Oppenheimer?"}], "model":"llama-2-chat"}' | jq . | ||
``` | ||
|
||
Check output | ||
|
||
```bash | ||
``` | ||
|
||
[Learn more](https://wasmedge.org/book/en/kubernetes/kubernetes/kubernetes-containerd.html) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/bin/bash | ||
set -x # Enable verbose for the debug information | ||
export KUBERNETES_PROVIDER=local | ||
export WASM_IMAGE=ghcr.io/second-state/runwasi-demo | ||
export WASM_IMAGE_TAG=llama-simple | ||
export VARIANT=compat-smart | ||
export CLUS_NAME=local | ||
export CRED_NAME=myself | ||
export SERVER=https://localhost:6443 | ||
export CERT_AUTH=/var/run/kubernetes/server-ca.crt | ||
export CLIENT_KEY=/var/run/kubernetes/client-admin.key | ||
export CLIENT_CERT=/var/run/kubernetes/client-admin.crt | ||
|
||
|
||
sudo ./kubernetes/cluster/kubectl.sh config set-cluster "$CLUS_NAME" --server="$SERVER" --certificate-authority="$CERT_AUTH" | ||
sudo ./kubernetes/cluster/kubectl.sh config set-credentials $CRED_NAME --client-key="$CLIENT_KEY" --client-certificate="$CLIENT_CERT" | ||
sudo ./kubernetes/cluster/kubectl.sh config set-context "$CLUS_NAME" --cluster="$CLUS_NAME" --user="$CRED_NAME" | ||
sudo ./kubernetes/cluster/kubectl.sh config use-context "$CLUS_NAME" | ||
sudo ./kubernetes/cluster/kubectl.sh cluster-info | ||
|
||
sudo ./kubernetes/cluster/kubectl.sh run -i --restart=Never testggml --image=ghcr.io/captainvincent/runwasi-demo:llama-api-server --annotations="module.wasm.image/variant=compat-smart" --overrides=' | ||
{ | ||
"apiVersion": "v1", | ||
"kind": "Pod", | ||
"metadata": { | ||
"name": "testggml" | ||
}, | ||
"spec": { | ||
"hostNetwork": true, | ||
"containers": [ | ||
{ | ||
"name": "simple", | ||
"image": "ghcr.io/captainvincent/runwasi-demo:llama-api-server", | ||
"command": ["/app.wasm", "-p", "llama-2-chat"], | ||
"stdin": true, | ||
"tty": true, | ||
"env": [ | ||
{ | ||
"name": "WASMEDGE_PLUGIN_PATH", | ||
"value": "/opt/containerd/lib" | ||
}, | ||
{ | ||
"name": "WASMEDGE_WASINN_PRELOAD", | ||
"value": "default:GGML:CPU:/resource/llama-2-7b-chat.Q5_K_M.gguf" | ||
} | ||
], | ||
"volumeMounts": [ | ||
{ | ||
"name": "plugins", | ||
"mountPath": "/opt/containerd/lib" | ||
}, | ||
{ | ||
"name": "model", | ||
"mountPath": "/resource" | ||
} | ||
] | ||
} | ||
], | ||
"volumes": [ | ||
{ | ||
"name": "plugins", | ||
"hostPath": { | ||
"path": "'"$HOME"'/.wasmedge/plugin/" | ||
} | ||
}, | ||
{ | ||
"name": "model", | ||
"hostPath": { | ||
"path": "'"$PWD"'" | ||
} | ||
} | ||
] | ||
} | ||
}' | ||
|
||
echo -e "Wait 60s" | ||
sleep 60 | ||
|
||
sudo ./kubernetes/cluster/kubectl.sh get pod --all-namespaces -o wide |