Skip to content

Commit

Permalink
dockerfile overhaul, remove scripts, configutils update
Browse files Browse the repository at this point in the history
  • Loading branch information
kennethjmyers committed Aug 11, 2023
1 parent 807ad76 commit 186b969
Show file tree
Hide file tree
Showing 23 changed files with 109 additions and 343 deletions.
1 change: 1 addition & 0 deletions .tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python 3.8.15
34 changes: 23 additions & 11 deletions model/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,30 @@
# Intended to be run from the model/ directory
# docker build -t predict-etl:latest -f ./Dockerfile .
# docker run [-idt] --name predictETL --rm predict-etl
# Intended to be run from the top-level package directory
# This package is meant to be an all purpose container of this project.
# You should be able to develop, test, debug, and use it as a base layer for other layers.
#
# docker build -t predict-etl-base:latest -f ./model/Dockerfile .
# docker run [-idt] --name predict-etl-base --rm predict-etl-base:latest
# test locally with:
# docker run -it --name predictETL \
# docker run -it --name predict-etl-base \
# -e AWS_ACCESS_KEY_ID=[access key] \
# -e AWS_SECRET_ACCESS_KEY=[secret key] \
# --rm predict-etl
# --rm predict-etl-base:latest

FROM predict-etl-packages:latest
FROM --platform=linux/amd64 bitnami/spark:3.3.0-debian-11-r44 AS pyspark-build

COPY . .
USER root

# allows us to call executables directly on the path
ENV PATH=$PATH:/tmp/
RUN apt-get -y update
RUN apt-get -y install git
RUN apt-get -y install vim

# execute python script
CMD PredictETL.py
COPY . ./app

# install the package requirements
# the -e in case you want use container to develop locally (necessary on Apple silicon Macs)
RUN cd ./app && pip install -e .

# safer to switch back to nobody, but you need root if you want to develop from VS Code
# USER nobody

CMD ["bash"]
15 changes: 15 additions & 0 deletions model/Dockerfile.execute
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Intended to be run from the top-level package directory
# This file is meant for running the predict ETL pipeline.
#
# docker build -t predict-etl-execute:latest -f ./model/Dockerfile.execute .
# docker run [-idt] --name predict-etl-execute --rm predict-etl-execute:latest
# test locally with:
# docker run -it --name predict-etl-execute \
# -e AWS_ACCESS_KEY_ID=[access key] \
# -e AWS_SECRET_ACCESS_KEY=[secret key] \
# --rm predict-etl-execute:latest

FROM --platform=linux/amd64 predict-etl-base:latest

# execute predict python script
CMD ./app/model/PredictETL.py
28 changes: 0 additions & 28 deletions model/Dockerfile.packages

This file was deleted.

2 changes: 1 addition & 1 deletion model/ModelETL.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import os
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(THIS_DIR, '../'))
import configUtils as cu
import viral_reddit_posts_utils.configUtils as cu


# Forcing Timezone keeps things consistent with running on aws and without it timestamps get additional
Expand Down
6 changes: 3 additions & 3 deletions model/Monitoring.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion model/PredictETL.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(THIS_DIR, '../'))
import configUtils as cu
import viral_reddit_posts_utils.configUtils as cu


os.environ['TZ'] = 'UTC'
Expand Down
22 changes: 11 additions & 11 deletions model/model-GBM.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion model/modelUtils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
from configparser import ConfigParser
from datetime import datetime, timedelta
from pyspark.sql import DataFrame
from schema import fromDynamoConversion, toSparkSchema
Expand Down
2 changes: 1 addition & 1 deletion model/test_discordUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(THIS_DIR, '../'))
import configUtils as cu
import viral_reddit_posts_utils.configUtils as cu
import responses


Expand Down
2 changes: 1 addition & 1 deletion model/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(THIS_DIR, '../lambdaFunctions/getRedditDataFunction/'))
sys.path.append(os.path.join(THIS_DIR, '../'))
import configUtils as cu
import viral_reddit_posts_utils.configUtils as cu
import tableDefinition
import pandas as pd
import json
Expand Down
2 changes: 1 addition & 1 deletion patches/rds/newColumns.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(THIS_DIR, '../../'))
sys.path.append(os.path.join(THIS_DIR, '../../model/'))
import configUtils as cu
import viral_reddit_posts_utils.configUtils as cu
import sqlUtils as su


Expand Down
16 changes: 0 additions & 16 deletions requirements.txt

This file was deleted.

22 changes: 8 additions & 14 deletions scripts/buildAndPushDockerImage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,22 @@ done
: ${account_number:?Missing -a} # checks if these have been set https://unix.stackexchange.com/questions/621004/bash-getopts-mandatory-arguments
echo "account_number: $account_number";

cd ../model
cd ..

# make the predict script executable
chmod +x PredictETL.py
chmod +x model/PredictETL.py
# make it so we can write the latest model from S3 to the pickledModels directory
chmod -R +w pickledModels/

# build the environment image
echo "Building predict-etl-packages image"
docker build -t predict-etl-packages:latest -f ./Dockerfile.packages .
# build the base image
echo "Building predict-etl-base image"
docker build -t predict-etl-base:latest -f ./model/Dockerfile .

# copy configUtils, wasn't needed for the environment image
cp ../configUtils.py .

# build the predict-etl image
echo "Building predict-etl image"
docker build -t predict-etl:latest -f ./Dockerfile .
# build the predict-etl-execute image
echo "Building predict-etl-execute image"
docker build -t predict-etl-execute:latest -f ./model/Dockerfile.execute .

# Push to ECR
aws ecr get-login-password --region us-east-2 | docker login --username AWS --password-stdin ${account_number}.dkr.ecr.us-east-2.amazonaws.com
docker tag predict-etl:latest ${account_number}.dkr.ecr.us-east-2.amazonaws.com/predict-etl:latest
docker push ${account_number}.dkr.ecr.us-east-2.amazonaws.com/predict-etl:latest

# remove configUtils
rm ./configUtils.py
99 changes: 0 additions & 99 deletions scripts/conda_env_export.py

This file was deleted.

1 change: 0 additions & 1 deletion scripts/copyConfigToS3.sh

This file was deleted.

30 changes: 0 additions & 30 deletions scripts/createDockerEnvironmentYAML.sh

This file was deleted.

41 changes: 0 additions & 41 deletions scripts/createLambdaFunction.sh

This file was deleted.

Loading

0 comments on commit 186b969

Please sign in to comment.