Unattended operation outside of a Docker Composition

EDRN · Feb 1, 2023 · 7dce3e9 · 7dce3e9
1 parent 8922eae
commit 7dce3e9
Show file tree

Hide file tree

Showing 10 changed files with 210 additions and 18 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -9,6 +9,7 @@ build
 dist
 docker-compose.yaml
 outputs
+python3
 README.md
 received_data
 TODOS.py

diff --git a/.gitignore b/.gitignore
@@ -8,14 +8,17 @@
 .RData
 .Rhistory
 .venv
-.venv
 __pycache__
 _build
 bin
 build
 dist
+nginx.conf
 output.zip
+python3
 received_data
+self.cert
+self.key
 typescript
 var
 venv
diff --git a/README.md b/README.md
@@ -83,15 +83,27 @@ The following table lists the environment variables used by the LabCAS ML Servic
 
 ## 🚀 Deployment at JPL
 
-To deploy this into production at the NASA Jet Propulsion Laboratory, first publish an official version-tagged image to the Docker Hub (described above). Then copy the `docker-compose.yaml` file to the appropriate location on `edrn-docker`. Add a `@reboot` entry in the system's crontab to run
+To deploy this into production at the NASA Jet Propulsion Laboratory, we can't use any of the conveniences afforded by Docker because of absolutely inane security requirements. Here's how to get it going in production at JPL:
 
-    env EDRN_ML_SERVE_VERSION=X.Y.Z EDRN_HTTP_PORT=9080 docker compose up --quiet-pull --remove-orphans --detach
+    python3.9 -m venv python3
+    python3/bin/pip install --upgrade --quiet setuptools pip wheel build
+    python3/bin/pip install --requirement requirements.txt
+    mkdir -p var/sockets
 
-replacing `X.Y.Z` with the blessed version.
+Set the two environment variables:
+
+-   `ML_SERVE_HOME` to the directory containing the ML Serve software (tarball extracted or git-cloned)
+-   `NGINX_ETC` to the directory where Nginx's `mime.types` file (amongst others) may be found.
+
+Then at boot up, arrange to have run:
+
+    env ML_SERVE_HOME=… NGINX_ETC=… $ML_SERVE_HOME/python3/bin/supervisord --configuration $ML_SERVE_HOME/etc/supervisord.conf
+
+Note that the Supervisor also sets `ML_SERVE_IP` and `ML_SERVE_PORT` for you. You only need to set these manually (to 127.0.01 and 8081 respectively) if you're debugging.
 
 Next, inform the system administrators to set up a reverse-proxy so that
 
-    https://edrn-labcas.jpl.nasa.gov/mlserve/ → https://edrn-docker:9443/
+    https://edrn-labcas.jpl.nasa.gov/mlserve/ → https://localhost:9443/
 
 This endpoint should be behind an HTTP Basic auth challenge that uses `ldaps://edrn-ds.jpl.nasa.gov/dc=edrn,dc=jpl,dc=nasa,dc=gov?uid?one?(objectClass=edrnPerson)` as the AuthLDAPURL
 

diff --git a/configs/basic.py b/configs/basic.py
@@ -1,24 +1,30 @@
-environments_config={
+import os, os.path
+
+_base_dir = os.getenv('ML_SERVE_HOME', '/usr/src/app')
+_port = os.getenv('ML_SERVE_PORT', '8080')
+_ip = os.getenv('ML_SERVE_IP', '127.0.0.2')
+
+environments_config = {
     # "environment_A":
     #         {'ip': '127.0.0.2',
     #         'port': '6378',
     #         'namespace': 'serve',
-    #         'serve_port': '8080',
+    #         'serve_port': '8081',
     #         'deployments': ['/usr/src/app/configs/environments/environment_A/deployer.py'],
     #         # 'deployments': ['/Users/asitangmishra/PycharmProjects/labcas-ml-serve/configs/environments/environment_A/deployer.py'],
     #         # 'pyenv': '/Users/asitangmishra/PycharmProjects/alpha_n/venv/bin',
     #         'object_store_memory': '500000000',  # 500 MB
     #         'num_cpus': '8',
     #         'dashboard-port': '8265'
     #         },
-"environment_B":
-            {'ip': '127.0.0.2',
-            'port': '6378',
-            'namespace': 'serve',
-            'serve_port': '8080',
-            'deployments': ['/usr/src/app/configs/environments/environment_B/deployer.py'],
-            'object_store_memory': '500000000',  # 500 MB
-            'num_cpus': '8',
-            'dashboard-port': '8265'
-            }
+    "environment_B": {
+        'ip': _ip,
+        'port': '6378',
+        'namespace': 'serve',
+        'serve_port': _port,
+        'deployments': [os.path.join(_base_dir, 'configs/environments/environment_B/deployer.py')],
+        'object_store_memory': '500000000',  # 500 MB
+        'num_cpus': '8',
+        'dashboard-port': '8265'
+    }
 }
diff --git a/etc/nginx.conf.in b/etc/nginx.conf.in
@@ -0,0 +1,42 @@
+worker_processes auto;
+error_log ${ML_SERVE_HOME}/var/log/nginx/error.log notice;
+pid ${ML_SERVE_HOME}/var/run/nginx.pid;
+
+events {
+    worker_connections 128;
+}
+
+http {
+    include ${NGINX_ETC}/mime.types;
+    default_type application/octet-stream;
+    sendfile on;
+    keepalive_timeout 65;
+    server_tokens off;
+    log_format main 'MLServe-HTTPS $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent '
+        '"$http_referer" "$http_user_agent" "$http_x_forwarded_for"';
+    access_log ${ML_SERVE_HOME}/var/log/nginx/access.log main;
+
+    server {
+        listen 9443 ssl;
+        listen [::]:9443 ssl;
+        server_name localhost;
+        ssl_certificate ${ML_SERVE_HOME}/self.cert;
+        ssl_certificate_key ${ML_SERVE_HOME}/self.key;
+        ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
+        ssl_ciphers HIGH:!aNULL:!MD5;
+        ssl_session_cache shared:SSL:128k;
+        ssl_session_timeout 5m;
+
+        location / {
+            proxy_pass http://localhost:8081;
+            proxy_redirect default;
+            proxy_set_header Host $host;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_ssl_verify off;
+        }
+    }
+}
+
+# -*- mode: nginx; tab-width: 4; -*-
diff --git a/etc/supervisord.conf b/etc/supervisord.conf
@@ -0,0 +1,25 @@
+[supervisord]
+logfile = %(ENV_ML_SERVE_HOME)s/var/log/supervisor.log
+logfile_backups = 3
+loglevel = info
+pidfile = %(ENV_ML_SERVE_HOME)s/var/supervisor.pid
+directory = %(ENV_ML_SERVE_HOME)s
+environment = PATH="%(ENV_ML_SERVE_HOME)s/python3/bin:%(ENV_PATH)s",ML_SERVE_PORT="8081",ML_SERVE_IP="127.0.0.1"
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
+
+[unix_http_server]
+file = %(ENV_ML_SERVE_HOME)s/var/supervisor
+
+[supervisorctl]
+serverurl = unix://%(ENV_ML_SERVE_HOME)s/var/supervisor
+
+[program:ml-serve]
+command = %(ENV_ML_SERVE_HOME)s/sbin/launch.sh
+autorestart = false
+redirect_stderr = true
+
+[program:nginx]
+command = %(ENV_ML_SERVE_HOME)s/sbin/nginx.sh
+autorestart = true
diff --git a/labcas-ml-serve.sublime-project b/labcas-ml-serve.sublime-project
@@ -13,7 +13,9 @@
             "*.egg-info",
             "var",
             "__pycache__",
-            "venv"
+            "venv",
+            ".venv",
+            "python3"
          ],
          "path" : "."
       }

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,26 @@
+# python 3.9.0
+
+# framework
+fastapi==0.79.0
+ray[serve]==1.13.0
+redis==4.3.4
+aiofiles==22.1.0
+python-multipart==0.0.5
+
+# for alphan's code
+tensorflow==2.9.1
+
+# ray[serve]]]1.13.0 installs numpy==1.23.4 as a dependency, so making this match:
+numpy==1.23.4
+
+pandas==1.4.3
+scipy==1.8.1
+scikit-learn==1.1.1
+scikit-image==0.19.3
+matplotlib==3.5.2
+glob2==0.7
+imagecodecs==2022.2.22
+
+# For running outside of Docker
+
+supervisor~=4.2.5
diff --git a/sbin/launch.sh b/sbin/launch.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+#
+# Non-containerized entrypoint for LabCAS ML Serve
+#
+# This just executes `ray_start.py` and if it exits successfully, goes into a spin loop.
+# A better approach would be for us to get Ray Serve started without the extra baggage and
+# have it stay in the foreground. Yeah, this is ugly.
+#
+# We assume `python3` on the PATH is the correct Python 3.9 virtual environemnt with all
+# dependencies to support running.
+
+
+: ${ML_SERVE_HOME:?✋ The environment variable ML_SERVE_HOME is required}
+
+PATH=${ML_SERVE_HOME}/python3/bin:${PATH}
+export PATH
+
+cd "$ML_SERVE_HOME"
+if [ \! -f src/ray_start.py ]; then
+    echo "‼️ src/ray_start.py is not found; is your ML_SERVE_HOME set correctly?" 1>&2
+    exit -2
+fi
+
+# This should get called when supervisor interrupts us, but for some reason
+# it never is. Thankfully ray_start does a stop on startup.
+killit() {
+    python3 src/ray_stop.py
+    exit 0
+}
+trap killit 1 2 3 6 15
+
+# Start up
+python3 src/ray_start.py </dev/null
+rc=$?
+
+if [ $rc -ne 0 ]; then
+    echo "🤒 ray_start failed with $rc; exiting" 1>&2
+    exit -1
+else
+    # ray_start.py exits whether successful or not, but we want to stay in the
+    # foreground because that's what supervisord expects, so start spinning.
+    while :; do
+        sleep 999999
+    done
+fi
diff --git a/sbin/nginx.sh b/sbin/nginx.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+#
+# Non-containerized entrypoint for Nginx
+#
+# We need Nginx because we aren't allowed to reverse-proxy to http:, only https:, and
+# including certificate support in Ray Serve is an unknown.
+
+PATH=/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin:${PATH}
+export PATH
+
+: ${ML_SERVE_HOME:?✋ The environment variable ML_SERVE_HOME is required}
+: ${NGINX_ETC:?✋ The environment variable NGINX_ETC is required}
+
+if [ \! -f "${ML_SERVE_HOME}/etc/nginx.conf.in" ]; then
+    echo "‼️ nginx.conf.in is not found; is ML_SERVE_HOME set correctly?" 1>&2
+    exit -2
+fi
+
+CERT_CN=${CERT_CN:-localhost}
+CERT_DAYS=${CERT_DAYS:-365}
+
+echo "💁‍♀️ CERT_CN is ${CERT_CN}" 1>&2
+
+rm -f ${ML_SERVE_HOME}/self.key ${ML_SERVE_HOME}/self.cert
+openssl req -nodes -x509 -days ${CERT_DAYS} -newkey rsa:2048 -keyout ${ML_SERVE_HOME}/self.key \
+    -out ${ML_SERVE_HOME}/self.cert -subj "/C=US/ST=California/L=Pasadena/O=Caltech/CN=${CERT_CN}"
+rm -f ${ML_SERVE_HOME}/nginx.conf
+install -d ${ML_SERVE_HOME}/var ${ML_SERVE_HOME}/var/log ${ML_SERVE_HOME}/var/log/nginx ${ML_SERVE_HOME}/var/run
+envsubst '$ML_SERVE_HOME $NGINX_ETC' < ${ML_SERVE_HOME}/etc/nginx.conf.in > ${ML_SERVE_HOME}/nginx.conf
+exec nginx -g "daemon off;" -c ${ML_SERVE_HOME}/nginx.conf