guardrails-ai · AlejandroEsquivel · Aug 20, 2024 · Aug 20, 2024 · Aug 22, 2024
diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
@@ -0,0 +1,81 @@
+name: Sagemaker ECR Publish (RC)
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+    inputs:
+      is_release_candidate:
+        description: 'Is this a release candidate?'
+        required: true
+        default: 'true'
+
+# Needed for OIDC / assume role
+permissions:
+  id-token: write  
+  contents: read    
+
+jobs:
+  publish_image:
+    name: Publish Sagemaker Image (Release Candidate)
+    runs-on: ubuntu-latest
+    env:
+      VALIDATOR_TAG_NAME: toxiclanguage
+      AWS_REGION: us-east-1
+      WORKING_DIR: "./"
+      AWS_CI_ROLE__PROD: ${{ secrets.AWS_CI_ROLE__PROD }}
+      AWS_ECR_RELEASE_CANDIDATE: ${{ inputs.is_release_candidate || 'true' }}
+    steps: 
+
+      - name: Check out head
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      - name: Set ECR Tag
+        id: set-ecr-tag
+        run: |
+          if [ ${{ env.AWS_ECR_RELEASE_CANDIDATE }} == 'true' ]; then
+            echo "This is a release candidate."
+            echo "Setting tag to -rc"
+            ECR_TAG=$VALIDATOR_TAG_NAME-rc
+          else
+            echo "This is a production image."
+            ECR_TAG=$VALIDATOR_TAG_NAME
+          fi
+          echo "Setting ECR tag to $ECR_TAG"
+          echo "ECR_TAG=$ECR_TAG" >> "$GITHUB_OUTPUT"
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@master
+        with:
+          platforms: linux/amd64
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@master
+        with:
+          platforms: linux/amd64
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: ${{ env.AWS_REGION }}
+          role-to-assume: ${{ env.AWS_CI_ROLE__PROD}}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+        with:
+          mask-password: 'true'
+
+      - name: Build & Push ECR Image
+        uses: docker/build-push-action@v2
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: ${{ env.WORKING_DIR }}
+          platforms: linux/amd64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          push: true
+          tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:${{ steps.set-ecr-tag.outputs.ECR_TAG }}
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,35 @@
+# Use an official PyTorch image with CUDA support
+FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime
+
+# Set the working directory
+WORKDIR /app
+
+# Copy the pyproject.toml and any other necessary files (e.g., README, LICENSE)
+COPY pyproject.toml .
+COPY README.md .
+COPY LICENSE .
+
+# Install dependencies from the pyproject.toml file
+RUN pip install --upgrade pip setuptools wheel
+RUN pip install .
+
+# Install the necessary packages for the FastAPI app
+RUN pip install fastapi "uvicorn[standard]" gunicorn
+
+# Copy the entire project code into the container
+COPY . /app
+
+# Copy the serve script into the container
+COPY serve /usr/local/bin/serve
+
+# Make the serve script executable
+RUN chmod +x /usr/local/bin/serve
+
+# Set environment variable to determine the device (cuda or cpu)
+ENV env=prod
+
+# Expose the port that the FastAPI app will run on
+EXPOSE 8080
+
+# Set the entrypoint for SageMaker to the serve script
+ENTRYPOINT ["serve"]
diff --git a/app.py b/app.py
@@ -9,6 +9,9 @@
 # Initialize the detoxify model once
 env = os.environ.get("env", "dev")
 torch_device = "cuda" if env == "prod" else "cpu"
+
+print(f"Using torch device: {torch_device}")
+
 model = detoxify.Detoxify("unbiased-small", device=torch.device(torch_device))
 
 
@@ -81,6 +84,15 @@ def infer(text_vals, threshold) -> OutputResponse:
 
         return output_data
 
+# Sagemaker specific endpoints
+@app.get("/ping")
+async def healtchcheck():
+    return {"status": "ok"}
+
+@app.post("/invocations", response_model=OutputResponse)
+async def check_toxicity_sagemaker(input_request: InputRequest):
+    return await check_toxicity(input_request)
+
 
 # Run the app with uvicorn
 # Save this script as app.py and run with: uvicorn app:app --reload
diff --git a/serve b/serve
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+import multiprocessing
+import os
+import signal
+import subprocess
+import sys
+
+cpu_count = multiprocessing.cpu_count()
+
+model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60)
+model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count))
+
+def sigterm_handler(gunicorn_pid):
+    try:
+        os.kill(gunicorn_pid, signal.SIGTERM)
+    except OSError:
+        pass
+
+    sys.exit(0)
+
+def start_server():
+    print(f'Starting the inference server with {model_server_workers} workers.')
+
+    # Start Gunicorn to serve the FastAPI app
+    gunicorn = subprocess.Popen(['gunicorn',
+                                 '--timeout', str(model_server_timeout),
+                                 '-k', 'uvicorn.workers.UvicornWorker',
+                                 '-b', '0.0.0.0:8080',
+                                 '-w', str(model_server_workers),
+                                 'app:app'])
+
+    signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(gunicorn.pid))
+
+    # Wait for the Gunicorn process to exit
+    gunicorn.wait()
+
+    print('Inference server exiting')
+
+# The main routine just invokes the start function.
+
+if __name__ == '__main__':
+    start_server()