diff --git a/.env.template b/.env.template index 7bb1dd5..9c9078d 100644 --- a/.env.template +++ b/.env.template @@ -19,7 +19,7 @@ AIRFLOW__CORE__HIDE_SENSITIVE_VAR_CONN_FIELDS=False # CSRF key https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#secret-key AIRFLOW__WEBSERVER__SECRET_KEY=sample-secret-key= # Executor to use -AIRFLOW__CORE__EXECUTOR=SequentialExecutor +AIRFLOW__CORE__EXECUTOR=LocalExecutor # Environment this instance is being run in AIRFLOW_VAR_ENVIRONMENT=dev @@ -28,7 +28,7 @@ AIRFLOW_VAR_ENVIRONMENT=dev ######################################################################################## # Airflow primary metadata database # Change the following line in prod to use the appropriate DB -AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=sqlite:////opt/airflow/db/airflow.db +AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow # Remote logging connection ID # Replace "access_key" and "secret+key" with the real values. Secret key must be URL-encoded AIRFLOW_CONN_AWS_DEFAULT=aws://test_key:test_secret@?region_name=us-east-1&endpoint_url=http://s3:5000 diff --git a/Dockerfile b/Dockerfile index bc3b2a9..d819595 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,8 @@ ENV DAGS_FOLDER=${AIRFLOW_HOME}/techbloc_airflow/dags ENV PYTHONPATH=${DAGS_FOLDER} # Container optimizations -ENV PIPNOCACHEDIR=1 -ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 +ENV PIP_NO_CACHE_DIR=1 ENV PIP_NO_COLOR=1 # Airflow/workflow configuration @@ -32,13 +31,12 @@ ENV AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID=aws_default ENV AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER=s3://techbloc-airflow-logs -#RUN apt-get update && apt-get -yqq upgrade && apt-get -yqq install \ -# build-essential \ -# libpq-dev \ -# libffi-dev \ -# && apt-get autoremove -y \ -# && rm -rf /var/lib/apt/lists/* USER root +RUN apt-get update && apt-get -yqq install \ + build-essential \ + libpq-dev \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* RUN mkdir -p ${DATABASE_DIR} /home/airflow/.ipython/ /opt/ssh/ && \ chown -R airflow ${DATABASE_DIR} /home/airflow/.ipython/ /opt/ssh/ USER airflow @@ -54,7 +52,7 @@ ARG PROJECT_AIRFLOW_VERSION # https://airflow.apache.org/docs/apache-airflow/stable/installation/installing-from-pypi.html#constraints-files ARG CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-${PROJECT_AIRFLOW_VERSION}/constraints-${PROJECT_PY_VERSION}.txt" -RUN pip install --user -r ${REQUIREMENTS_FILE} -c ${CONSTRAINTS_FILE} +RUN pip install -r ${REQUIREMENTS_FILE} -c ${CONSTRAINTS_FILE} COPY entrypoint.sh /opt/airflow/entrypoint.sh diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 6961cbf..c428081 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -7,6 +7,7 @@ x-airflow-common: &airflow-common restart: on-failure depends_on: + - postgres - s3 build: context: . @@ -56,8 +57,6 @@ services: # Dev changes for the scheduler scheduler: <<: *airflow-common - depends_on: - - s3 environment: _AIRFLOW_WWW_USER_CREATE: "true" _AIRFLOW_WWW_USER_USERNAME: airflow @@ -72,6 +71,7 @@ services: <<: *airflow-common depends_on: - s3 + - postgres - scheduler volumes: diff --git a/docker-compose.yml b/docker-compose.yml index 7e5220c..1f31ae2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,25 +1,41 @@ version: '3' services: + postgres: + image: postgres:13 + environment: + POSTGRES_USER: airflow + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-airflow} + POSTGRES_DB: airflow + volumes: + - airflow-db:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "pg_isready", "-U", "airflow"] + interval: 10s + retries: 5 + restart: always scheduler: image: ghcr.io/orcacollective/techbloc-airflow:${DOCKER_IMAGE_TAG:-latest} + depends_on: + - postgres env_file: .env restart: always # Only necessary for the entrypoint, services run as "airflow" user: root environment: # Upgrade the DB on startup - _AIRFLOW_DB_UPGRADE: "true" + _AIRFLOW_DB_MIGRATE: "true" command: scheduler expose: - "8793" # Used for fetching logs volumes: - ./techbloc_airflow/dags:/opt/airflow/techbloc_airflow/dags - - db:/opt/airflow/db webserver: image: ghcr.io/orcacollective/techbloc-airflow:${DOCKER_IMAGE_TAG:-latest} + depends_on: + - postgres env_file: .env restart: always # Only necessary for the entrypoint, services run as "airflow" @@ -29,7 +45,6 @@ services: - "${AIRFLOW_PORT}:8080" volumes: - ./techbloc_airflow/dags:/opt/airflow/techbloc_airflow/dags - - db:/opt/airflow/db volumes: - db: + airflow-db: diff --git a/justfile b/justfile index 8b078d6..acf7751 100644 --- a/justfile +++ b/justfile @@ -81,12 +81,8 @@ deploy: lint: pre-commit run --all-files -# Load any dependencies necessary for actions on the stack without running the webserver -_deps: - @just up "s3" - # Mount the tests directory and run a particular command -@_mount-tests command: _deps +@_mount-tests command: # The test directory is mounted into the container only during testing @just _dc run \ -v {{ justfile_directory() }}/tests:/opt/airflow/tests/ \ @@ -99,16 +95,16 @@ _deps: test-session: @just _mount-tests bash -# Run pytest using the webserver image +# Run pytest using the scheduler image test *pytestargs: @just _mount-tests "bash -c \'pytest {{ pytestargs }}\'" -# Open a shell into the webserver container +# Open a shell into the scheduler container shell user="airflow": up @just _dc exec -u {{ user }} {{ SERVICE }} /bin/bash -# Launch an IPython REPL using the webserver image -ipython: _deps +# Launch an IPython REPL using the scheduler image +ipython: @just _dc run \ --rm \ -w /opt/airflow/techbloc_airflow/dags \ @@ -116,9 +112,13 @@ ipython: _deps bash -c \'ipython\' # Run a given command in bash using the scheduler image -run *args: _deps +run *args: @just _dc run --rm {{ SERVICE }} bash -c \'{{ args }}\' +# Initialize the database +init: + @just run airflow db init + # Launch a pgcli shell on the postgres container (defaults to openledger) use "airflow" for airflow metastore db-shell: @just run bash -c 'sqlite3 /opt/airflow/db/airflow.db' diff --git a/requirements_dev.txt b/requirements_dev.txt index cd00e03..a34f2e6 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,7 +2,7 @@ flaky==3.7.0 ipython -pytest-mock==3.11.1 +pytest-mock pytest-raises==0.11 pytest-sugar==0.9.7 pytest-xdist diff --git a/requirements_prod.txt b/requirements_prod.txt index df87ad6..f0d550f 100644 --- a/requirements_prod.txt +++ b/requirements_prod.txt @@ -1,2 +1,2 @@ -# PYTHON=3.10 -apache-airflow[amazon,sqlite,http,ssh]==2.4.3 +# PYTHON=3.12 +apache-airflow[amazon,postgres,http,ssh]==2.9.0