diff --git a/.dockerignore b/.dockerignore index 3b940a9..9eba8a3 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,6 +9,7 @@ build dist docker-compose.yaml outputs +python3 README.md received_data TODOS.py diff --git a/.gitignore b/.gitignore index 7aa45f3..30d1550 100644 --- a/.gitignore +++ b/.gitignore @@ -8,14 +8,17 @@ .RData .Rhistory .venv -.venv __pycache__ _build bin build dist +nginx.conf output.zip +python3 received_data +self.cert +self.key typescript var venv diff --git a/README.md b/README.md index 0441098..2527f8b 100644 --- a/README.md +++ b/README.md @@ -83,15 +83,27 @@ The following table lists the environment variables used by the LabCAS ML Servic ## 🚀 Deployment at JPL -To deploy this into production at the NASA Jet Propulsion Laboratory, first publish an official version-tagged image to the Docker Hub (described above). Then copy the `docker-compose.yaml` file to the appropriate location on `edrn-docker`. Add a `@reboot` entry in the system's crontab to run +To deploy this into production at the NASA Jet Propulsion Laboratory, we can't use any of the conveniences afforded by Docker because of absolutely inane security requirements. Here's how to get it going in production at JPL: - env EDRN_ML_SERVE_VERSION=X.Y.Z EDRN_HTTP_PORT=9080 docker compose up --quiet-pull --remove-orphans --detach + python3.9 -m venv python3 + python3/bin/pip install --upgrade --quiet setuptools pip wheel build + python3/bin/pip install --requirement requirements.txt + mkdir -p var/sockets -replacing `X.Y.Z` with the blessed version. +Set the two environment variables: + +- `ML_SERVE_HOME` to the directory containing the ML Serve software (tarball extracted or git-cloned) +- `NGINX_ETC` to the directory where Nginx's `mime.types` file (amongst others) may be found. + +Then at boot up, arrange to have run: + + env ML_SERVE_HOME=… NGINX_ETC=… $ML_SERVE_HOME/python3/bin/supervisord --configuration $ML_SERVE_HOME/etc/supervisord.conf + +Note that the Supervisor also sets `ML_SERVE_IP` and `ML_SERVE_PORT` for you. You only need to set these manually (to 127.0.01 and 8081 respectively) if you're debugging. Next, inform the system administrators to set up a reverse-proxy so that - https://edrn-labcas.jpl.nasa.gov/mlserve/ → https://edrn-docker:9443/ + https://edrn-labcas.jpl.nasa.gov/mlserve/ → https://localhost:9443/ This endpoint should be behind an HTTP Basic auth challenge that uses `ldaps://edrn-ds.jpl.nasa.gov/dc=edrn,dc=jpl,dc=nasa,dc=gov?uid?one?(objectClass=edrnPerson)` as the AuthLDAPURL diff --git a/configs/basic.py b/configs/basic.py index 656f655..e8a3cf6 100644 --- a/configs/basic.py +++ b/configs/basic.py @@ -1,9 +1,15 @@ -environments_config={ +import os, os.path + +_base_dir = os.getenv('ML_SERVE_HOME', '/usr/src/app') +_port = os.getenv('ML_SERVE_PORT', '8080') +_ip = os.getenv('ML_SERVE_IP', '127.0.0.2') + +environments_config = { # "environment_A": # {'ip': '127.0.0.2', # 'port': '6378', # 'namespace': 'serve', - # 'serve_port': '8080', + # 'serve_port': '8081', # 'deployments': ['/usr/src/app/configs/environments/environment_A/deployer.py'], # # 'deployments': ['/Users/asitangmishra/PycharmProjects/labcas-ml-serve/configs/environments/environment_A/deployer.py'], # # 'pyenv': '/Users/asitangmishra/PycharmProjects/alpha_n/venv/bin', @@ -11,14 +17,14 @@ # 'num_cpus': '8', # 'dashboard-port': '8265' # }, -"environment_B": - {'ip': '127.0.0.2', - 'port': '6378', - 'namespace': 'serve', - 'serve_port': '8080', - 'deployments': ['/usr/src/app/configs/environments/environment_B/deployer.py'], - 'object_store_memory': '500000000', # 500 MB - 'num_cpus': '8', - 'dashboard-port': '8265' - } + "environment_B": { + 'ip': _ip, + 'port': '6378', + 'namespace': 'serve', + 'serve_port': _port, + 'deployments': [os.path.join(_base_dir, 'configs/environments/environment_B/deployer.py')], + 'object_store_memory': '500000000', # 500 MB + 'num_cpus': '8', + 'dashboard-port': '8265' + } } diff --git a/etc/nginx.conf.in b/etc/nginx.conf.in new file mode 100644 index 0000000..354ee9b --- /dev/null +++ b/etc/nginx.conf.in @@ -0,0 +1,42 @@ +worker_processes auto; +error_log ${ML_SERVE_HOME}/var/log/nginx/error.log notice; +pid ${ML_SERVE_HOME}/var/run/nginx.pid; + +events { + worker_connections 128; +} + +http { + include ${NGINX_ETC}/mime.types; + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; + server_tokens off; + log_format main 'MLServe-HTTPS $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent" "$http_x_forwarded_for"'; + access_log ${ML_SERVE_HOME}/var/log/nginx/access.log main; + + server { + listen 9443 ssl; + listen [::]:9443 ssl; + server_name localhost; + ssl_certificate ${ML_SERVE_HOME}/self.cert; + ssl_certificate_key ${ML_SERVE_HOME}/self.key; + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; + ssl_ciphers HIGH:!aNULL:!MD5; + ssl_session_cache shared:SSL:128k; + ssl_session_timeout 5m; + + location / { + proxy_pass http://localhost:8081; + proxy_redirect default; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Real-IP $remote_addr; + proxy_ssl_verify off; + } + } +} + +# -*- mode: nginx; tab-width: 4; -*- diff --git a/etc/supervisord.conf b/etc/supervisord.conf new file mode 100644 index 0000000..8cbf22b --- /dev/null +++ b/etc/supervisord.conf @@ -0,0 +1,25 @@ +[supervisord] +logfile = %(ENV_ML_SERVE_HOME)s/var/log/supervisor.log +logfile_backups = 3 +loglevel = info +pidfile = %(ENV_ML_SERVE_HOME)s/var/supervisor.pid +directory = %(ENV_ML_SERVE_HOME)s +environment = PATH="%(ENV_ML_SERVE_HOME)s/python3/bin:%(ENV_PATH)s",ML_SERVE_PORT="8081",ML_SERVE_IP="127.0.0.1" + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[unix_http_server] +file = %(ENV_ML_SERVE_HOME)s/var/supervisor + +[supervisorctl] +serverurl = unix://%(ENV_ML_SERVE_HOME)s/var/supervisor + +[program:ml-serve] +command = %(ENV_ML_SERVE_HOME)s/sbin/launch.sh +autorestart = false +redirect_stderr = true + +[program:nginx] +command = %(ENV_ML_SERVE_HOME)s/sbin/nginx.sh +autorestart = true diff --git a/labcas-ml-serve.sublime-project b/labcas-ml-serve.sublime-project index 13d5842..652d7e8 100644 --- a/labcas-ml-serve.sublime-project +++ b/labcas-ml-serve.sublime-project @@ -13,7 +13,9 @@ "*.egg-info", "var", "__pycache__", - "venv" + "venv", + ".venv", + "python3" ], "path" : "." } diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f489daa --- /dev/null +++ b/requirements.txt @@ -0,0 +1,26 @@ +# python 3.9.0 + +# framework +fastapi==0.79.0 +ray[serve]==1.13.0 +redis==4.3.4 +aiofiles==22.1.0 +python-multipart==0.0.5 + +# for alphan's code +tensorflow==2.9.1 + +# ray[serve]]]1.13.0 installs numpy==1.23.4 as a dependency, so making this match: +numpy==1.23.4 + +pandas==1.4.3 +scipy==1.8.1 +scikit-learn==1.1.1 +scikit-image==0.19.3 +matplotlib==3.5.2 +glob2==0.7 +imagecodecs==2022.2.22 + +# For running outside of Docker + +supervisor~=4.2.5 diff --git a/sbin/launch.sh b/sbin/launch.sh new file mode 100755 index 0000000..9872ef2 --- /dev/null +++ b/sbin/launch.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# +# Non-containerized entrypoint for LabCAS ML Serve +# +# This just executes `ray_start.py` and if it exits successfully, goes into a spin loop. +# A better approach would be for us to get Ray Serve started without the extra baggage and +# have it stay in the foreground. Yeah, this is ugly. +# +# We assume `python3` on the PATH is the correct Python 3.9 virtual environemnt with all +# dependencies to support running. + + +: ${ML_SERVE_HOME:?✋ The environment variable ML_SERVE_HOME is required} + +PATH=${ML_SERVE_HOME}/python3/bin:${PATH} +export PATH + +cd "$ML_SERVE_HOME" +if [ \! -f src/ray_start.py ]; then + echo "‼️ src/ray_start.py is not found; is your ML_SERVE_HOME set correctly?" 1>&2 + exit -2 +fi + +# This should get called when supervisor interrupts us, but for some reason +# it never is. Thankfully ray_start does a stop on startup. +killit() { + python3 src/ray_stop.py + exit 0 +} +trap killit 1 2 3 6 15 + +# Start up +python3 src/ray_start.py &2 + exit -1 +else + # ray_start.py exits whether successful or not, but we want to stay in the + # foreground because that's what supervisord expects, so start spinning. + while :; do + sleep 999999 + done +fi diff --git a/sbin/nginx.sh b/sbin/nginx.sh new file mode 100755 index 0000000..04d4320 --- /dev/null +++ b/sbin/nginx.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# +# Non-containerized entrypoint for Nginx +# +# We need Nginx because we aren't allowed to reverse-proxy to http:, only https:, and +# including certificate support in Ray Serve is an unknown. + +PATH=/usr/local/sbin:/usr/sbin:/sbin:/usr/local/bin:/usr/bin:/bin:${PATH} +export PATH + +: ${ML_SERVE_HOME:?✋ The environment variable ML_SERVE_HOME is required} +: ${NGINX_ETC:?✋ The environment variable NGINX_ETC is required} + +if [ \! -f "${ML_SERVE_HOME}/etc/nginx.conf.in" ]; then + echo "‼️ nginx.conf.in is not found; is ML_SERVE_HOME set correctly?" 1>&2 + exit -2 +fi + +CERT_CN=${CERT_CN:-localhost} +CERT_DAYS=${CERT_DAYS:-365} + +echo "💁‍♀️ CERT_CN is ${CERT_CN}" 1>&2 + +rm -f ${ML_SERVE_HOME}/self.key ${ML_SERVE_HOME}/self.cert +openssl req -nodes -x509 -days ${CERT_DAYS} -newkey rsa:2048 -keyout ${ML_SERVE_HOME}/self.key \ + -out ${ML_SERVE_HOME}/self.cert -subj "/C=US/ST=California/L=Pasadena/O=Caltech/CN=${CERT_CN}" +rm -f ${ML_SERVE_HOME}/nginx.conf +install -d ${ML_SERVE_HOME}/var ${ML_SERVE_HOME}/var/log ${ML_SERVE_HOME}/var/log/nginx ${ML_SERVE_HOME}/var/run +envsubst '$ML_SERVE_HOME $NGINX_ETC' < ${ML_SERVE_HOME}/etc/nginx.conf.in > ${ML_SERVE_HOME}/nginx.conf +exec nginx -g "daemon off;" -c ${ML_SERVE_HOME}/nginx.conf