From 937a8eb54514c47124af72d55678063fd29a81aa Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 20 Aug 2024 12:33:42 -0700 Subject: [PATCH 1/3] Updated for sagemaker endpoints compatibility --- Dockerfile | 35 +++++++++++++++++++++++++++++++++++ app.py | 12 ++++++++++++ serve | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 Dockerfile create mode 100644 serve diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bd1e015 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,35 @@ +# Use an official PyTorch image with CUDA support +FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime + +# Set the working directory +WORKDIR /app + +# Copy the pyproject.toml and any other necessary files (e.g., README, LICENSE) +COPY pyproject.toml . +COPY README.md . +COPY LICENSE . + +# Install dependencies from the pyproject.toml file +RUN pip install --upgrade pip setuptools wheel +RUN pip install . + +# Install the necessary packages for the FastAPI app +RUN pip install fastapi "uvicorn[standard]" gunicorn + +# Copy the entire project code into the container +COPY . /app + +# Copy the serve script into the container +COPY serve /usr/local/bin/serve + +# Make the serve script executable +RUN chmod +x /usr/local/bin/serve + +# Set environment variable to determine the device (cuda or cpu) +ENV env=prod + +# Expose the port that the FastAPI app will run on +EXPOSE 8080 + +# Set the entrypoint for SageMaker to the serve script +ENTRYPOINT ["serve"] diff --git a/app.py b/app.py index 5350b76..375acf1 100644 --- a/app.py +++ b/app.py @@ -9,6 +9,9 @@ # Initialize the detoxify model once env = os.environ.get("env", "dev") torch_device = "cuda" if env == "prod" else "cpu" + +print(f"Using torch device: {torch_device}") + model = detoxify.Detoxify("unbiased-small", device=torch.device(torch_device)) @@ -81,6 +84,15 @@ def infer(text_vals, threshold) -> OutputResponse: return output_data +# Sagemaker specific endpoints +@app.get("/ping") +async def healtchcheck(): + return {"status": "ok"} + +@app.post("/invocations", response_model=OutputResponse) +async def check_toxicity_sagemaker(input_request: InputRequest): + return check_toxicity(input_request) + # Run the app with uvicorn # Save this script as app.py and run with: uvicorn app:app --reload diff --git a/serve b/serve new file mode 100644 index 0000000..f024fda --- /dev/null +++ b/serve @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +import multiprocessing +import os +import signal +import subprocess +import sys + +cpu_count = multiprocessing.cpu_count() + +model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) +model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) + +def sigterm_handler(gunicorn_pid): + try: + os.kill(gunicorn_pid, signal.SIGTERM) + except OSError: + pass + + sys.exit(0) + +def start_server(): + print(f'Starting the inference server with {model_server_workers} workers.') + + # Start Gunicorn to serve the FastAPI app + gunicorn = subprocess.Popen(['gunicorn', + '--timeout', str(model_server_timeout), + '-k', 'uvicorn.workers.UvicornWorker', + '-b', '0.0.0.0:8080', + '-w', str(model_server_workers), + 'app:app']) + + signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(gunicorn.pid)) + + # Wait for the Gunicorn process to exit + gunicorn.wait() + + print('Inference server exiting') + +# The main routine just invokes the start function. + +if __name__ == '__main__': + start_server() From db2fa50700d4f5d6b979aca10c58b14038b4593c Mon Sep 17 00:00:00 2001 From: Alejandro Date: Tue, 20 Aug 2024 16:17:44 -0700 Subject: [PATCH 2/3] minor fix --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 375acf1..51f7738 100644 --- a/app.py +++ b/app.py @@ -91,7 +91,7 @@ async def healtchcheck(): @app.post("/invocations", response_model=OutputResponse) async def check_toxicity_sagemaker(input_request: InputRequest): - return check_toxicity(input_request) + return await check_toxicity(input_request) # Run the app with uvicorn From cac0a4dd2338bba45fce2f9d016a7e82af7bbd53 Mon Sep 17 00:00:00 2001 From: Alejandro Date: Thu, 22 Aug 2024 16:49:41 -0700 Subject: [PATCH 3/3] added ci for publishing sagemaker images --- .github/workflows/ecr_sagemaker_publish.yml | 81 +++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 .github/workflows/ecr_sagemaker_publish.yml diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml new file mode 100644 index 0000000..73cffa5 --- /dev/null +++ b/.github/workflows/ecr_sagemaker_publish.yml @@ -0,0 +1,81 @@ +name: Sagemaker ECR Publish (RC) + +on: + push: + branches: + - main + workflow_dispatch: + inputs: + is_release_candidate: + description: 'Is this a release candidate?' + required: true + default: 'true' + +# Needed for OIDC / assume role +permissions: + id-token: write + contents: read + +jobs: + publish_image: + name: Publish Sagemaker Image (Release Candidate) + runs-on: ubuntu-latest + env: + VALIDATOR_TAG_NAME: toxiclanguage + AWS_REGION: us-east-1 + WORKING_DIR: "./" + AWS_CI_ROLE__PROD: ${{ secrets.AWS_CI_ROLE__PROD }} + AWS_ECR_RELEASE_CANDIDATE: ${{ inputs.is_release_candidate || 'true' }} + steps: + + - name: Check out head + uses: actions/checkout@v3 + with: + persist-credentials: false + + - name: Set ECR Tag + id: set-ecr-tag + run: | + if [ ${{ env.AWS_ECR_RELEASE_CANDIDATE }} == 'true' ]; then + echo "This is a release candidate." + echo "Setting tag to -rc" + ECR_TAG=$VALIDATOR_TAG_NAME-rc + else + echo "This is a production image." + ECR_TAG=$VALIDATOR_TAG_NAME + fi + echo "Setting ECR tag to $ECR_TAG" + echo "ECR_TAG=$ECR_TAG" >> "$GITHUB_OUTPUT" + + - name: Set up QEMU + uses: docker/setup-qemu-action@master + with: + platforms: linux/amd64 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@master + with: + platforms: linux/amd64 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ env.AWS_REGION }} + role-to-assume: ${{ env.AWS_CI_ROLE__PROD}} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + with: + mask-password: 'true' + + - name: Build & Push ECR Image + uses: docker/build-push-action@v2 + with: + builder: ${{ steps.buildx.outputs.name }} + context: ${{ env.WORKING_DIR }} + platforms: linux/amd64 + cache-from: type=gha + cache-to: type=gha,mode=max + push: true + tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:${{ steps.set-ecr-tag.outputs.ECR_TAG }}