Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Swap sqlite for postgres, update Airflow to 2.9.0, Python to 3.12 #66

Merged
merged 7 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ AIRFLOW__CORE__HIDE_SENSITIVE_VAR_CONN_FIELDS=False
# CSRF key https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#secret-key
AIRFLOW__WEBSERVER__SECRET_KEY=sample-secret-key=
# Executor to use
AIRFLOW__CORE__EXECUTOR=SequentialExecutor
AIRFLOW__CORE__EXECUTOR=LocalExecutor
# Environment this instance is being run in
AIRFLOW_VAR_ENVIRONMENT=dev

Expand All @@ -28,7 +28,7 @@ AIRFLOW_VAR_ENVIRONMENT=dev
########################################################################################
# Airflow primary metadata database
# Change the following line in prod to use the appropriate DB
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=sqlite:////opt/airflow/db/airflow.db
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
# Remote logging connection ID
# Replace "access_key" and "secret+key" with the real values. Secret key must be URL-encoded
AIRFLOW_CONN_AWS_DEFAULT=aws://test_key:test_secret@?region_name=us-east-1&endpoint_url=http://s3:5000
Expand Down
16 changes: 7 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ ENV DAGS_FOLDER=${AIRFLOW_HOME}/techbloc_airflow/dags
ENV PYTHONPATH=${DAGS_FOLDER}

# Container optimizations
ENV PIPNOCACHEDIR=1
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1
ENV PIP_NO_COLOR=1

# Airflow/workflow configuration
Expand All @@ -32,13 +31,12 @@ ENV AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID=aws_default
ENV AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER=s3://techbloc-airflow-logs


#RUN apt-get update && apt-get -yqq upgrade && apt-get -yqq install \
# build-essential \
# libpq-dev \
# libffi-dev \
# && apt-get autoremove -y \
# && rm -rf /var/lib/apt/lists/*
USER root
RUN apt-get update && apt-get -yqq install \
build-essential \
libpq-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p ${DATABASE_DIR} /home/airflow/.ipython/ /opt/ssh/ && \
chown -R airflow ${DATABASE_DIR} /home/airflow/.ipython/ /opt/ssh/
USER airflow
Expand All @@ -54,7 +52,7 @@ ARG PROJECT_AIRFLOW_VERSION
# https://airflow.apache.org/docs/apache-airflow/stable/installation/installing-from-pypi.html#constraints-files
ARG CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-${PROJECT_AIRFLOW_VERSION}/constraints-${PROJECT_PY_VERSION}.txt"

RUN pip install --user -r ${REQUIREMENTS_FILE} -c ${CONSTRAINTS_FILE}
RUN pip install -r ${REQUIREMENTS_FILE} -c ${CONSTRAINTS_FILE}

COPY entrypoint.sh /opt/airflow/entrypoint.sh

Expand Down
4 changes: 2 additions & 2 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ x-airflow-common:
&airflow-common
restart: on-failure
depends_on:
- postgres
- s3
build:
context: .
Expand Down Expand Up @@ -56,8 +57,6 @@ services:
# Dev changes for the scheduler
scheduler:
<<: *airflow-common
depends_on:
- s3
environment:
_AIRFLOW_WWW_USER_CREATE: "true"
_AIRFLOW_WWW_USER_USERNAME: airflow
Expand All @@ -72,6 +71,7 @@ services:
<<: *airflow-common
depends_on:
- s3
- postgres
- scheduler

volumes:
Expand Down
23 changes: 19 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,41 @@
version: '3'

services:
postgres:
image: postgres:13
environment:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-airflow}
POSTGRES_DB: airflow
volumes:
- airflow-db:/var/lib/postgresql/data
healthcheck:
test: ["CMD", "pg_isready", "-U", "airflow"]
interval: 10s
retries: 5
restart: always

scheduler:
image: ghcr.io/orcacollective/techbloc-airflow:${DOCKER_IMAGE_TAG:-latest}
depends_on:
- postgres
env_file: .env
restart: always
# Only necessary for the entrypoint, services run as "airflow"
user: root
environment:
# Upgrade the DB on startup
_AIRFLOW_DB_UPGRADE: "true"
_AIRFLOW_DB_MIGRATE: "true"
command: scheduler
expose:
- "8793" # Used for fetching logs
volumes:
- ./techbloc_airflow/dags:/opt/airflow/techbloc_airflow/dags
- db:/opt/airflow/db

webserver:
image: ghcr.io/orcacollective/techbloc-airflow:${DOCKER_IMAGE_TAG:-latest}
depends_on:
- postgres
env_file: .env
restart: always
# Only necessary for the entrypoint, services run as "airflow"
Expand All @@ -29,7 +45,6 @@ services:
- "${AIRFLOW_PORT}:8080"
volumes:
- ./techbloc_airflow/dags:/opt/airflow/techbloc_airflow/dags
- db:/opt/airflow/db

volumes:
db:
airflow-db:
20 changes: 10 additions & 10 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,8 @@ deploy:
lint:
pre-commit run --all-files

# Load any dependencies necessary for actions on the stack without running the webserver
_deps:
@just up "s3"

# Mount the tests directory and run a particular command
@_mount-tests command: _deps
@_mount-tests command:
# The test directory is mounted into the container only during testing
@just _dc run \
-v {{ justfile_directory() }}/tests:/opt/airflow/tests/ \
Expand All @@ -99,26 +95,30 @@ _deps:
test-session:
@just _mount-tests bash

# Run pytest using the webserver image
# Run pytest using the scheduler image
test *pytestargs:
@just _mount-tests "bash -c \'pytest {{ pytestargs }}\'"

# Open a shell into the webserver container
# Open a shell into the scheduler container
shell user="airflow": up
@just _dc exec -u {{ user }} {{ SERVICE }} /bin/bash

# Launch an IPython REPL using the webserver image
ipython: _deps
# Launch an IPython REPL using the scheduler image
ipython:
@just _dc run \
--rm \
-w /opt/airflow/techbloc_airflow/dags \
{{ SERVICE }} \
bash -c \'ipython\'

# Run a given command in bash using the scheduler image
run *args: _deps
run *args:
@just _dc run --rm {{ SERVICE }} bash -c \'{{ args }}\'

# Initialize the database
init:
@just run airflow db init

# Launch a pgcli shell on the postgres container (defaults to openledger) use "airflow" for airflow metastore
db-shell:
@just run bash -c 'sqlite3 /opt/airflow/db/airflow.db'
2 changes: 1 addition & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

flaky==3.7.0
ipython
pytest-mock==3.11.1
pytest-mock
pytest-raises==0.11
pytest-sugar==0.9.7
pytest-xdist
4 changes: 2 additions & 2 deletions requirements_prod.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# PYTHON=3.10
apache-airflow[amazon,sqlite,http,ssh]==2.4.3
# PYTHON=3.12
apache-airflow[amazon,postgres,http,ssh]==2.9.0
Loading