added with models

reconfigurable-ml-pipeline · Jan 2, 2024 · 4a56132 · 4a56132
1 parent 6e64c42
commit 4a56132
Show file tree

Hide file tree

Showing 113 changed files with 4,054 additions and 242 deletions.
diff --git a/pipelines/README.md b/pipelines/README.md
@@ -1,3 +1,5 @@
-Pipelines presented in the IPA paper
-
-<img src="../doc-figs/pipelines-paper.png" alt="pipelines" width="400">
+1. Video Monitoring (1 Chameleon Cascadelake_r node)
+2. Audio Question Answering (2 Chameleon Cascadelake_r node)
+3. Audio Sentiment Analysis (2 Chameleon Cascadelake_r node)
+4. Summerization Question Answering (3 Chameleon Cascadelake_r node)
+5. Natural Language Processing (6 Chameleon Cascadelake_r node)
diff --git a/pipelines/mlserver-centralized/audio-qa/seldon-core-version/nodes/audio/node-with-model.yaml b/pipelines/mlserver-centralized/audio-qa/seldon-core-version/nodes/audio/node-with-model.yaml
@@ -55,8 +55,8 @@ spec:
           env:
             - name: MODEL_VARIANT
               value: facebook-s2t-large-librispeech-asr
-            - name: TRANSFORMERS_CACHE
-              value: /opt/mlserver/.cache
+            # - name: TRANSFORMERS_CACHE
+            #   value: /opt/mlserver/.cache
             - name: TASK
               value: automatic-speech-recognition
             - name: MLSERVER_PARALLEL_WORKERS
@@ -86,7 +86,7 @@ spec:
                 - /bin/sh
                 - -c
                 - /bin/sleep 10
-      replicas: 30
+      replicas: 1
     graph:
       name: audio
       type: MODEL

diff --git a/pipelines/mlserver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/build-with-model.sh b/pipelines/mlserver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/build-with-model.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Define the folders to copy
+folders=(
+    "/mnt/myshareddir/huggingface/question-answering/deepset-roberta-base-squad2"
+    "/mnt/myshareddir/huggingface/question-answering/deepset-xlm-roberta-large-squad2"
+    "/mnt/myshareddir/huggingface/question-answering/deepset-xlm-roberta-base-squad2"
+    "/mnt/myshareddir/huggingface/question-answering/distilbert-base-cased-distilled-squad"
+)
+
+REPOS=(
+    sdghafouri
+)
+
+dockerfile="Dockerfile"
+IMAGE_NAME=audio-qa-centralized-with-model:nlpqa
+
+# Create the models directory
+mkdir models
+
+# Copy the folders to the models directory
+for folder in "${folders[@]}"; do
+    cp -r "$folder" models/
+done
+
+# Generate the Dockerfile
+mlserver dockerfile --include-dockerignore .
+sed -i 's/seldonio/sdghafouri/g' Dockerfile
+sed -i 's/1.3.0.dev4-slim/custom-2-slim/g' Dockerfile
+
+# Add the copy commands to the Dockerfile using the contents of the models directory
+# copy_commands=""
+# for folder in "${folders[@]}"; do
+#     target_path="/mnt/models/$(basename "$folder")"
+#     copy_commands+="COPY models/$(basename "$folder") $target_path\n"
+# done
+# awk -v copy_commands="$copy_commands" '!done && /^COPY \\$/ {print copy_commands; done=1} 1' "$dockerfile" > Dockerfile.tmp
+# mv Dockerfile.tmp "$dockerfile"
+
+# Build and push the Docker image
+DOCKER_BUILDKIT=1 docker build . --tag=$IMAGE_NAME
+for REPO in "${REPOS[@]}"; do
+    docker tag $IMAGE_NAME $REPO/$IMAGE_NAME
+    docker push $REPO/$IMAGE_NAME
+done
+
+# Delete the models directory
+rm -r models
diff --git a/pipelines/mlserver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/models.py b/pipelines/mlserver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/models.py
@@ -60,6 +60,15 @@
         f"LOGS_ENABLED env variable not set, using default value: {LOGS_ENABLED}"
     )
 
+try:
+    WITH_MODELS = os.getenv("WITH_MODELS", "False").lower() in ("true", "1", "t")
+    logger.info(f"WITH_MODELS set to: {WITH_MODELS}")
+except KeyError as e:
+    WITH_MODELS = False
+    logger.info(
+        f"USE_THREADING env variable not set, using default value: {WITH_MODELS}"
+    )
+
 if not LOGS_ENABLED:
     logger.disabled = True
 
@@ -98,9 +107,13 @@ async def load(self):
         logger.info("Loading the ML models")
         logger.info(f"max_batch_size: {self._settings.max_batch_size}")
         logger.info(f"max_batch_time: {self._settings.max_batch_time}")
+        if WITH_MODELS:
+            model_path = os.path.join(".", "models", self.MODEL_VARIANT)
+        else:
+            model_path = os.path.join("/", "mnt", "models", self.MODEL_VARIANT)
         self.model = pipeline(
             task=self.TASK,
-            model=os.path.join("/", "mnt", "models", self.MODEL_VARIANT),
+            model=model_path,
             batch_size=self._settings.max_batch_size,
         )
         self.loaded = True

diff --git a/...erver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/node-template-with-model.yaml b/...erver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/node-template-with-model.yaml
@@ -0,0 +1,99 @@
+apiVersion: machinelearning.seldon.io/v1
+kind: SeldonDeployment
+metadata:
+  name: {{ name }}
+spec:
+  protocol: v2
+  name: {{ name }}
+  # annotations:
+  #   seldon.io/engine-separate-pod: "true"
+  annotations:
+    proxy.istio.io/config: |
+      terminationDrainDuration: {{ distrpution_time }}s
+  predictors:
+  - name: {{ name }}
+    annotations:
+      seldon.io/no-engine: "true"
+    componentSpecs:
+
+    - spec:
+        # volumes:
+        # - name: classifier-provision-location
+        #   emptyDir: {}
+
+        # initContainers:
+        # - name: classifier-model-initializer
+        #   image: seldonio/rclone-storage-initializer:1.16.0-dev
+        #   imagePullPolicy: IfNotPresent
+        #   args:
+        #     - "s3://huggingface/question-answering/{{ model_variant }}"
+        #     - "/mnt/models/{{ model_variant }}"
+
+        #   volumeMounts:
+        #   - mountPath: /mnt/models
+        #     name: classifier-provision-location
+
+        #   envFrom:
+        #   - secretRef:
+        #       name: seldon-rclone-secret
+
+        terminationGracePeriodSeconds: {{ distrpution_time }}
+        containers:
+        - image: sdghafouri/audio-qa-centralized-with-model:nlpqa
+          name: nlp-qa 
+          imagePullPolicy: Always
+          resources:
+            requests:
+              cpu: '{{ cpu_request }}'
+              memory: '{{ memory_request }}'
+            limits:
+              cpu: '{{ cpu_limit }}'
+              memory: '{{ memory_limit }}'
+          # volumeMounts:
+          # - mountPath: /mnt/models
+          #   name: classifier-provision-location
+
+          env:
+            - name: MODEL_VARIANT
+              # value: /mnt/models/{{ model_variant }}
+              value: {{ model_variant }}
+            - name: TRANSFORMERS_CACHE
+              value: /opt/mlserver/.cache
+            - name: TASK
+              value: question-answering
+            - name: CONTEXT
+              value: default
+            - name: MLSERVER_PARALLEL_WORKERS
+              value: "1"
+            - name: USE_THREADING
+              value: '{{use_threading}}'
+            - name: NUM_INTEROP_THREADS
+              value: '{{num_interop_threads}}'
+            - name: NUM_THREADS
+              value: '{{num_threads}}'
+            - name: LOGS_ENABLED
+              value: '{{ logs_enabled }}'
+            - name: WITH_MODELS
+              value: "True"
+          readinessProbe:
+            failureThreshold: 3
+            initialDelaySeconds: 0
+            periodSeconds: 1
+            successThreshold: 1
+            tcpSocket:
+              port: 9000
+            timeoutSeconds: 1
+          lifecycle:
+            preStop:
+              exec:
+                command:
+                - /bin/sh
+                - -c
+                - /bin/sleep {{ distrpution_time }}
+      replicas: {{ replicas }}
+    graph:
+      name: nlp-qa
+      type: MODEL
+      children: []
+    labels:
+      sidecar.istio.io/inject: "true"
diff --git a/...lines/mlserver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/node-with-model.yaml b/...lines/mlserver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/node-with-model.yaml
@@ -0,0 +1,98 @@
+apiVersion: machinelearning.seldon.io/v1
+kind: SeldonDeployment
+metadata:
+  name: nlp-qa
+spec:
+  protocol: v2
+  name: nlp-qa
+  # annotations:
+  #   seldon.io/engine-separate-pod: "true"
+  annotations:
+    proxy.istio.io/config: |
+      terminationDrainDuration: 10s
+  predictors:
+  - name: nlp-qa
+    annotations:
+      seldon.io/no-engine: "true" 
+    componentSpecs:
+
+    - spec:
+        # volumes:
+        # - name: classifier-provision-location
+        #   emptyDir: {}
+
+        # initContainers:
+        # - name: classifier-model-initializer
+        #   image: seldonio/rclone-storage-initializer:1.16.0-dev
+        #   imagePullPolicy: IfNotPresent
+        #   args:
+        #     - "s3://huggingface/question-answering/distilbert-base-cased-distilled-squad"
+        #     - "/mnt/models/distilbert-base-cased-distilled-squad"
+
+        #   volumeMounts:
+        #   - mountPath: /mnt/models
+        #     name: classifier-provision-location
+
+        #   envFrom:
+        #   - secretRef:
+        #       name: seldon-rclone-secret
+
+        terminationGracePeriodSeconds: 10
+        containers:
+        - image: sdghafouri/audio-qa-centralized-with-model:nlpqa
+          name: nlp-qa 
+          imagePullPolicy: Always
+          resources:
+            requests:
+              cpu: '4'
+              memory: 4Gi
+            limits:
+              cpu: '4'
+              memory: 4Gi
+          # volumeMounts:
+          # - mountPath: /mnt/models
+          #   name: classifier-provision-location
+
+          env:
+            - name: MODEL_VARIANT
+              value: distilbert-base-cased-distilled-squad
+            # - name: TRANSFORMERS_CACHE
+            #   value: /opt/mlserver/.cache
+            - name: TASK
+              value: question-answering
+            - name: CONTEXT
+              value: default
+            - name: MLSERVER_PARALLEL_WORKERS
+              value: "1"
+            - name: USE_THREADING
+              value: "True"
+            - name: NUM_INTEROP_THREADS
+              value: "4"
+            - name: NUM_THREADS
+              value: "4"
+            - name: LOGS_ENABLED
+              value: "False"
+            - name: WITH_MODELS
+              value: "True"
+          readinessProbe:
+            failureThreshold: 3
+            initialDelaySeconds: 0
+            periodSeconds: 1
+            successThreshold: 1
+            tcpSocket:
+              port: 9000
+            timeoutSeconds: 1
+          lifecycle:
+            preStop:
+              exec:
+                command:
+                - /bin/sh
+                - -c
+                - /bin/sleep 10
+      replicas: 1
+    graph:
+      name: nlp-qa
+      type: MODEL
+      children: []
+    labels:
+      sidecar.istio.io/inject: "true"
diff --git a/pipelines/mlserver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/requirements.txt b/pipelines/mlserver-centralized/audio-qa/seldon-core-version/nodes/nlp-qa/requirements.txt
@@ -1,3 +1,3 @@
-transformers==4.21.1
+transformers==4.35.0
 sentencepiece==0.1.97
-torch==1.11.0
+torch==2.1.0
diff --git a/pipelines/mlserver-centralized/audio-qa/seldon-core-version/pipeline.yaml b/pipelines/mlserver-centralized/audio-qa/seldon-core-version/pipeline.yaml
@@ -210,7 +210,7 @@ spec:
             tcpSocket:
               port: 9000
             timeoutSeconds: 1
-      replicas: 6
+      replicas: 20
     graph:
       name: nlp-qa
       type: MODEL

diff --git a/pipelines/mlserver-centralized/audio-sent/notebook-version.ipynb b/pipelines/mlserver-centralized/audio-sent/notebook-version.ipynb
@@ -191,7 +191,7 @@
     "from transformers import pipeline\n",
     "from datasets import load_dataset, Audio, Dataset\n",
     "\n",
-    "path = \"/home/cc/infernece-pipeline-joint-optimization/pipelines/21-pipelines-prototype/audio-pipeline/seldon-core-version/sample-dataset.mp3\"\n",
+    "path = \"/home/cc/ipa-private/pipelines/21-pipelines-prototype/audio-pipeline/seldon-core-version/sample-dataset.mp3\"\n",
     "translator = pipeline(\n",
     "    task=\"automatic-speech-recognition\", model=\"facebook/s2t-small-librispeech-asr\"\n",
     ")\n",

diff --git a/...ines/mlserver-centralized/audio-sent/seldon-core-version/nodes/audio/node-with-model.yaml b/...ines/mlserver-centralized/audio-sent/seldon-core-version/nodes/audio/node-with-model.yaml
@@ -55,8 +55,8 @@ spec:
           env:
             - name: MODEL_VARIANT
               value: facebook-s2t-large-librispeech-asr
-            - name: TRANSFORMERS_CACHE
-              value: /opt/mlserver/.cache
+            # - name: TRANSFORMERS_CACHE
+            #   value: /opt/mlserver/.cache
             - name: TASK
               value: automatic-speech-recognition
             - name: MLSERVER_PARALLEL_WORKERS
@@ -86,7 +86,7 @@ spec:
                 - /bin/sh
                 - -c
                 - /bin/sleep 10
-      replicas: 30
+      replicas: 1
     graph:
       name: audio
       type: MODEL