Skip to content

Commit

Permalink
added with models
Browse files Browse the repository at this point in the history
  • Loading branch information
Chameleon Cloud User committed Jan 2, 2024
1 parent 6e64c42 commit 4a56132
Show file tree
Hide file tree
Showing 113 changed files with 4,054 additions and 242 deletions.
8 changes: 5 additions & 3 deletions pipelines/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Pipelines presented in the IPA paper

<img src="../doc-figs/pipelines-paper.png" alt="pipelines" width="400">
1. Video Monitoring (1 Chameleon Cascadelake_r node)
2. Audio Question Answering (2 Chameleon Cascadelake_r node)
3. Audio Sentiment Analysis (2 Chameleon Cascadelake_r node)
4. Summerization Question Answering (3 Chameleon Cascadelake_r node)
5. Natural Language Processing (6 Chameleon Cascadelake_r node)
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ spec:
env:
- name: MODEL_VARIANT
value: facebook-s2t-large-librispeech-asr
- name: TRANSFORMERS_CACHE
value: /opt/mlserver/.cache
# - name: TRANSFORMERS_CACHE
# value: /opt/mlserver/.cache
- name: TASK
value: automatic-speech-recognition
- name: MLSERVER_PARALLEL_WORKERS
Expand Down Expand Up @@ -86,7 +86,7 @@ spec:
- /bin/sh
- -c
- /bin/sleep 10
replicas: 30
replicas: 1
graph:
name: audio
type: MODEL
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

# Define the folders to copy
folders=(
"/mnt/myshareddir/huggingface/question-answering/deepset-roberta-base-squad2"
"/mnt/myshareddir/huggingface/question-answering/deepset-xlm-roberta-large-squad2"
"/mnt/myshareddir/huggingface/question-answering/deepset-xlm-roberta-base-squad2"
"/mnt/myshareddir/huggingface/question-answering/distilbert-base-cased-distilled-squad"
)

REPOS=(
sdghafouri
)

dockerfile="Dockerfile"
IMAGE_NAME=audio-qa-centralized-with-model:nlpqa

# Create the models directory
mkdir models

# Copy the folders to the models directory
for folder in "${folders[@]}"; do
cp -r "$folder" models/
done

# Generate the Dockerfile
mlserver dockerfile --include-dockerignore .
sed -i 's/seldonio/sdghafouri/g' Dockerfile
sed -i 's/1.3.0.dev4-slim/custom-2-slim/g' Dockerfile

# Add the copy commands to the Dockerfile using the contents of the models directory
# copy_commands=""
# for folder in "${folders[@]}"; do
# target_path="/mnt/models/$(basename "$folder")"
# copy_commands+="COPY models/$(basename "$folder") $target_path\n"
# done
# awk -v copy_commands="$copy_commands" '!done && /^COPY \\$/ {print copy_commands; done=1} 1' "$dockerfile" > Dockerfile.tmp
# mv Dockerfile.tmp "$dockerfile"

# Build and push the Docker image
DOCKER_BUILDKIT=1 docker build . --tag=$IMAGE_NAME
for REPO in "${REPOS[@]}"; do
docker tag $IMAGE_NAME $REPO/$IMAGE_NAME
docker push $REPO/$IMAGE_NAME
done

# Delete the models directory
rm -r models
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@
f"LOGS_ENABLED env variable not set, using default value: {LOGS_ENABLED}"
)

try:
WITH_MODELS = os.getenv("WITH_MODELS", "False").lower() in ("true", "1", "t")
logger.info(f"WITH_MODELS set to: {WITH_MODELS}")
except KeyError as e:
WITH_MODELS = False
logger.info(
f"USE_THREADING env variable not set, using default value: {WITH_MODELS}"
)

if not LOGS_ENABLED:
logger.disabled = True

Expand Down Expand Up @@ -98,9 +107,13 @@ async def load(self):
logger.info("Loading the ML models")
logger.info(f"max_batch_size: {self._settings.max_batch_size}")
logger.info(f"max_batch_time: {self._settings.max_batch_time}")
if WITH_MODELS:
model_path = os.path.join(".", "models", self.MODEL_VARIANT)
else:
model_path = os.path.join("/", "mnt", "models", self.MODEL_VARIANT)
self.model = pipeline(
task=self.TASK,
model=os.path.join("/", "mnt", "models", self.MODEL_VARIANT),
model=model_path,
batch_size=self._settings.max_batch_size,
)
self.loaded = True
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
name: {{ name }}
spec:
protocol: v2
name: {{ name }}
# annotations:
# seldon.io/engine-separate-pod: "true"
annotations:
proxy.istio.io/config: |
terminationDrainDuration: {{ distrpution_time }}s
predictors:
- name: {{ name }}
annotations:
seldon.io/no-engine: "true"
componentSpecs:

- spec:
# volumes:
# - name: classifier-provision-location
# emptyDir: {}

# initContainers:
# - name: classifier-model-initializer
# image: seldonio/rclone-storage-initializer:1.16.0-dev
# imagePullPolicy: IfNotPresent
# args:
# - "s3://huggingface/question-answering/{{ model_variant }}"
# - "/mnt/models/{{ model_variant }}"

# volumeMounts:
# - mountPath: /mnt/models
# name: classifier-provision-location

# envFrom:
# - secretRef:
# name: seldon-rclone-secret

terminationGracePeriodSeconds: {{ distrpution_time }}
containers:
- image: sdghafouri/audio-qa-centralized-with-model:nlpqa
name: nlp-qa
imagePullPolicy: Always
resources:
requests:
cpu: '{{ cpu_request }}'
memory: '{{ memory_request }}'
limits:
cpu: '{{ cpu_limit }}'
memory: '{{ memory_limit }}'
# volumeMounts:
# - mountPath: /mnt/models
# name: classifier-provision-location

env:
- name: MODEL_VARIANT
# value: /mnt/models/{{ model_variant }}
value: {{ model_variant }}
- name: TRANSFORMERS_CACHE
value: /opt/mlserver/.cache
- name: TASK
value: question-answering
- name: CONTEXT
value: default
- name: MLSERVER_PARALLEL_WORKERS
value: "1"
- name: USE_THREADING
value: '{{use_threading}}'
- name: NUM_INTEROP_THREADS
value: '{{num_interop_threads}}'
- name: NUM_THREADS
value: '{{num_threads}}'
- name: LOGS_ENABLED
value: '{{ logs_enabled }}'
- name: WITH_MODELS
value: "True"
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 0
periodSeconds: 1
successThreshold: 1
tcpSocket:
port: 9000
timeoutSeconds: 1
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- /bin/sleep {{ distrpution_time }}
replicas: {{ replicas }}
graph:
name: nlp-qa
type: MODEL
children: []
labels:
sidecar.istio.io/inject: "true"
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
name: nlp-qa
spec:
protocol: v2
name: nlp-qa
# annotations:
# seldon.io/engine-separate-pod: "true"
annotations:
proxy.istio.io/config: |
terminationDrainDuration: 10s
predictors:
- name: nlp-qa
annotations:
seldon.io/no-engine: "true"
componentSpecs:

- spec:
# volumes:
# - name: classifier-provision-location
# emptyDir: {}

# initContainers:
# - name: classifier-model-initializer
# image: seldonio/rclone-storage-initializer:1.16.0-dev
# imagePullPolicy: IfNotPresent
# args:
# - "s3://huggingface/question-answering/distilbert-base-cased-distilled-squad"
# - "/mnt/models/distilbert-base-cased-distilled-squad"

# volumeMounts:
# - mountPath: /mnt/models
# name: classifier-provision-location

# envFrom:
# - secretRef:
# name: seldon-rclone-secret

terminationGracePeriodSeconds: 10
containers:
- image: sdghafouri/audio-qa-centralized-with-model:nlpqa
name: nlp-qa
imagePullPolicy: Always
resources:
requests:
cpu: '4'
memory: 4Gi
limits:
cpu: '4'
memory: 4Gi
# volumeMounts:
# - mountPath: /mnt/models
# name: classifier-provision-location

env:
- name: MODEL_VARIANT
value: distilbert-base-cased-distilled-squad
# - name: TRANSFORMERS_CACHE
# value: /opt/mlserver/.cache
- name: TASK
value: question-answering
- name: CONTEXT
value: default
- name: MLSERVER_PARALLEL_WORKERS
value: "1"
- name: USE_THREADING
value: "True"
- name: NUM_INTEROP_THREADS
value: "4"
- name: NUM_THREADS
value: "4"
- name: LOGS_ENABLED
value: "False"
- name: WITH_MODELS
value: "True"
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 0
periodSeconds: 1
successThreshold: 1
tcpSocket:
port: 9000
timeoutSeconds: 1
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- /bin/sleep 10
replicas: 1
graph:
name: nlp-qa
type: MODEL
children: []
labels:
sidecar.istio.io/inject: "true"
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
transformers==4.21.1
transformers==4.35.0
sentencepiece==0.1.97
torch==1.11.0
torch==2.1.0
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ spec:
tcpSocket:
port: 9000
timeoutSeconds: 1
replicas: 6
replicas: 20
graph:
name: nlp-qa
type: MODEL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@
"from transformers import pipeline\n",
"from datasets import load_dataset, Audio, Dataset\n",
"\n",
"path = \"/home/cc/infernece-pipeline-joint-optimization/pipelines/21-pipelines-prototype/audio-pipeline/seldon-core-version/sample-dataset.mp3\"\n",
"path = \"/home/cc/ipa-private/pipelines/21-pipelines-prototype/audio-pipeline/seldon-core-version/sample-dataset.mp3\"\n",
"translator = pipeline(\n",
" task=\"automatic-speech-recognition\", model=\"facebook/s2t-small-librispeech-asr\"\n",
")\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ spec:
env:
- name: MODEL_VARIANT
value: facebook-s2t-large-librispeech-asr
- name: TRANSFORMERS_CACHE
value: /opt/mlserver/.cache
# - name: TRANSFORMERS_CACHE
# value: /opt/mlserver/.cache
- name: TASK
value: automatic-speech-recognition
- name: MLSERVER_PARALLEL_WORKERS
Expand Down Expand Up @@ -86,7 +86,7 @@ spec:
- /bin/sh
- -c
- /bin/sleep 10
replicas: 30
replicas: 1
graph:
name: audio
type: MODEL
Expand Down
Loading

0 comments on commit 4a56132

Please sign in to comment.