From e8fa01b77b5e20462fd8a465494b1091a1e8a3fe Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Apr 2024 17:43:53 -0400 Subject: [PATCH 1/6] simplify dockerfile, eliminate references to adapter repos as they will be handled in those repos --- docker/Dockerfile | 125 +++++++--------------------------------------- 1 file changed, 18 insertions(+), 107 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0466c05aa93..da9ed7aa763 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,133 +1,44 @@ -## -# Generic dockerfile for dbt image building. -# See README for operational details -## +# this image gets published to GHCR for production use +ARG py_version=3.10.7 -# Top level build args -ARG build_for=linux/amd64 +FROM python:$py_version-slim-bullseye as base -## -# base image (abstract) -## -# Please do not upgrade beyond python3.10.7 currently as dbt-spark does not support -# 3.11py and images do not get made properly -FROM --platform=$build_for python:3.10.7-slim-bullseye as base - -# N.B. The refs updated automagically every release via bumpversion -ARG dbt_core_ref=dbt-core@v1.8.0b2 -ARG dbt_postgres_ref=dbt-postgres@v1.8.0b2 -ARG dbt_redshift_ref=dbt-redshift@v1.8.0b2 -ARG dbt_bigquery_ref=dbt-bigquery@v1.8.0b2 -ARG dbt_snowflake_ref=dbt-snowflake@v1.8.0b2 -ARG dbt_spark_ref=dbt-spark@v1.8.0b2 -# special case args -ARG dbt_spark_version=all -ARG dbt_third_party - -# System setup RUN apt-get update \ && apt-get dist-upgrade -y \ && apt-get install -y --no-install-recommends \ - git \ - ssh-client \ - software-properties-common \ - make \ - build-essential \ - ca-certificates \ - libpq-dev \ + build-essential=12.9 \ + ca-certificates=20210119 \ + git=1:2.30.2-1+deb11u2 \ + libpq-dev=13.14-0+deb11u1 \ + make=4.3-4.1 \ + openssh-client=1:8.4p1-5+deb11u3 \ + software-properties-common=0.96.20.2-2.1 \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ /tmp/* \ /var/tmp/* -# Env vars ENV PYTHONIOENCODING=utf-8 ENV LANG=C.UTF-8 -# Update python -RUN python -m pip install --upgrade pip setuptools wheel --no-cache-dir +RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir -# Set docker basics -WORKDIR /usr/app/dbt/ -ENTRYPOINT ["dbt"] -## -# dbt-core -## FROM base as dbt-core -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_core_ref}#egg=dbt-core&subdirectory=core" - -## -# dbt-postgres -## -FROM base as dbt-postgres -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_postgres_ref}#egg=dbt-postgres&subdirectory=plugins/postgres" +ARG commit_ref=main -## -# dbt-redshift -## -FROM base as dbt-redshift -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_redshift_ref}#egg=dbt-redshift" +HEALTHCHECK CMD dbt --version || exit 1 +WORKDIR /usr/app/dbt/ +ENTRYPOINT ["dbt"] -## -# dbt-bigquery -## -FROM base as dbt-bigquery -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_bigquery_ref}#egg=dbt-bigquery" - +RUN python -m pip install --no-cache-dir "dbt-core @ git+https://github.com/dbt-labs/dbt-core@${commit_ref}#subdirectory=core" -## -# dbt-snowflake -## -FROM base as dbt-snowflake -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_snowflake_ref}#egg=dbt-snowflake" -## -# dbt-spark -## -FROM base as dbt-spark -RUN apt-get update \ - && apt-get dist-upgrade -y \ - && apt-get install -y --no-install-recommends \ - python-dev \ - libsasl2-dev \ - gcc \ - unixodbc-dev \ - && apt-get clean \ - && rm -rf \ - /var/lib/apt/lists/* \ - /tmp/* \ - /var/tmp/* -RUN python -m pip install --no-cache-dir "git+https://github.com/dbt-labs/${dbt_spark_ref}#egg=dbt-spark[${dbt_spark_version}]" +FROM dbt-core as dbt-third-party +ARG dbt_third_party -## -# dbt-third-party -## -FROM dbt-core as dbt-third-party RUN python -m pip install --no-cache-dir "${dbt_third_party}" - -## -# dbt-all -## -FROM base as dbt-all -RUN apt-get update \ - && apt-get dist-upgrade -y \ - && apt-get install -y --no-install-recommends \ - python-dev \ - libsasl2-dev \ - gcc \ - unixodbc-dev \ - && apt-get clean \ - && rm -rf \ - /var/lib/apt/lists/* \ - /tmp/* \ - /var/tmp/* - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_redshift_ref}#egg=dbt-redshift" - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_bigquery_ref}#egg=dbt-bigquery" - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_snowflake_ref}#egg=dbt-snowflake" - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_spark_ref}#egg=dbt-spark[${dbt_spark_version}]" - RUN python -m pip install --no-cache "git+https://github.com/dbt-labs/${dbt_postgres_ref}#egg=dbt-postgres&subdirectory=plugins/postgres" From 078848de2323e9051a8c92dac7b750ba69b1d1d2 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Apr 2024 17:44:08 -0400 Subject: [PATCH 2/6] eliminate references to adapter repos as they will be handled in those repos --- docker/README.md | 69 +++++++----------------------------------------- 1 file changed, 9 insertions(+), 60 deletions(-) diff --git a/docker/README.md b/docker/README.md index e4af582a29a..d05184146ed 100644 --- a/docker/README.md +++ b/docker/README.md @@ -5,13 +5,9 @@ This docker file is suitable for building dbt Docker images locally or using wit ## Building an image: This Dockerfile can create images for the following targets, each named after the database they support: * `dbt-core` _(no db-adapter support)_ -* `dbt-postgres` -* `dbt-redshift` -* `dbt-bigquery` -* `dbt-snowflake` -* `dbt-spark` * `dbt-third-party` _(requires additional build-arg)_ -* `dbt-all` _(installs all of the above in a single image)_ + +For platform-specific images, please refer to that platform's repository (eg. `dbt-labs/dbt-postgres`) In order to build a new image, run the following docker command. ``` @@ -22,53 +18,27 @@ docker build --tag --target --- -By default the images will be populated with the most recent release of `dbt-core` and whatever database adapter you select. If you need to use a different version you can specify it by git ref using the `--build-arg` flag: +By default the images will be populated with `dbt-core` on `main`. +If you need to use a different version you can specify it by git ref (tag, branch, sha) using the `--build-arg` flag: ``` docker build --tag \ --target \ - --build-arg = \ + --build-arg commit_ref= \ ``` -valid arg names for versioning are: -* `dbt_core_ref` -* `dbt_postgres_ref` -* `dbt_redshift_ref` -* `dbt_bigquery_ref` -* `dbt_snowflake_ref` -* `dbt_spark_ref` - ---- ->**NOTE:** Only override a _single_ build arg for each build. Using multiple overrides may lead to a non-functioning image. ---- - -If you wish to build an image with a third-party adapter you can use the `dbt-third-party` target. This target requires you provide a path to the adapter that can be processed by `pip` by using the `dbt_third_party` build arg: +If you wish to build an image with a third-party adapter you can use the `dbt-third-party` target. +This target requires you provide a path to the adapter that can be processed by `pip` by using the `dbt_third_party` build arg: ``` docker build --tag \ --target dbt-third-party \ --build-arg dbt_third_party= \ ``` +This can also be combined with the `commit_ref` build arg to specify a version of `dbt-core`. ### Examples: -To build an image named "my-dbt" that supports redshift using the latest releases: -``` -cd dbt-core/docker -docker build --tag my-dbt --target dbt-redshift . -``` - -To build an image named "my-other-dbt" that supports bigquery using `dbt-core` version 0.21.latest and the bigquery adapter version 1.0.0b1: -``` -cd dbt-core/docker -docker build \ - --tag my-other-dbt \ - --target dbt-bigquery \ - --build-arg dbt_bigquery_ref=dbt-bigquery@v1.0.0b1 \ - --build-arg dbt_core_ref=dbt-core@0.21.latest \ - . -``` - -To build an image named "my-third-party-dbt" that uses [Materilize third party adapter](https://github.com/MaterializeInc/materialize/tree/main/misc/dbt-materialize) and the latest release of `dbt-core`: +To build an image named "my-third-party-dbt" that uses the latest release of [Materialize third party adapter](https://github.com/MaterializeInc/materialize/tree/main/misc/dbt-materialize) and the latest dev version of `dbt-core`: ``` cd dbt-core/docker docker build --tag my-third-party-dbt \ @@ -78,27 +48,6 @@ docker build --tag my-third-party-dbt \ ``` -## Special cases -There are a few special cases worth noting: -* The `dbt-spark` database adapter comes in three different versions named `PyHive`, `ODBC`, and the default `all`. If you wish to overide this you can use the `--build-arg` flag with the value of `dbt_spark_version=`. See the [docs](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile) for more information. - -``` -docker build --tag my_dbt \ - --target dbt-postgres \ - --build-arg dbt_postgres_ref=dbt-core@1.0.0b1 \ - -``` - -* If you need to build against another architecture (linux/arm64 in this example) you can overide the `build_for` build arg: -``` -docker build --tag my_dbt \ - --target dbt-postgres \ - --build-arg build_for=linux/arm64 \ - -``` - -Supported architectures can be found in the python docker [dockerhub page](https://hub.docker.com/_/python). - ## Running an image in a container: The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal: ``` From 70a4f75eabcc18778df4cac58e89a7fe66b3def3 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Apr 2024 17:48:33 -0400 Subject: [PATCH 3/6] changie entry --- .changes/unreleased/Under the Hood-20240412-174824.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20240412-174824.yaml diff --git a/.changes/unreleased/Under the Hood-20240412-174824.yaml b/.changes/unreleased/Under the Hood-20240412-174824.yaml new file mode 100644 index 00000000000..c001cb218c5 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240412-174824.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Update Docker release process to reflect distributed release workflows +time: 2024-04-12T17:48:24.29846-04:00 +custom: + Author: mikealfare + Issue: "9928" From 38b5f0175189ce3890536cbc43a7ebf9a3e36ccf Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Apr 2024 17:54:51 -0400 Subject: [PATCH 4/6] add dbt-postgres target for historical releases of dbt-postgres --- docker/Dockerfile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index da9ed7aa763..7d7b0527da7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -37,6 +37,18 @@ ENTRYPOINT ["dbt"] RUN python -m pip install --no-cache-dir "dbt-core @ git+https://github.com/dbt-labs/dbt-core@${commit_ref}#subdirectory=core" +FROM base as dbt-postgres + +ARG commit_ref=main + +HEALTHCHECK CMD dbt --version || exit 1 + +WORKDIR /usr/app/dbt/ +ENTRYPOINT ["dbt"] + +RUN python -m pip install --no-cache-dir "dbt-postgres @ git+https://github.com/dbt-labs/dbt-core@${commit_ref}#subdirectory=plugins/postgres" + + FROM dbt-core as dbt-third-party ARG dbt_third_party From 12f2fbe60526fe0a9239f2b7aea61b492fe03579 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Mon, 15 Apr 2024 16:51:50 -0400 Subject: [PATCH 5/6] update third party image to pip install conditionally --- docker/Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7d7b0527da7..10e63d3ec27 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -53,4 +53,8 @@ FROM dbt-core as dbt-third-party ARG dbt_third_party -RUN python -m pip install --no-cache-dir "${dbt_third_party}" +RUN if [ "$dbt_third_party" ]; then \ + python -m pip install --no-cache-dir "${dbt_third_party}"; \ + else \ + echo "No third party adapter provided"; \ + fi \ From 5d4ed80332eeaa950b4966eafd9c7f068beb7d41 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Mon, 15 Apr 2024 17:01:56 -0400 Subject: [PATCH 6/6] remove changelog --- .changes/unreleased/Under the Hood-20240412-174824.yaml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .changes/unreleased/Under the Hood-20240412-174824.yaml diff --git a/.changes/unreleased/Under the Hood-20240412-174824.yaml b/.changes/unreleased/Under the Hood-20240412-174824.yaml deleted file mode 100644 index c001cb218c5..00000000000 --- a/.changes/unreleased/Under the Hood-20240412-174824.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Update Docker release process to reflect distributed release workflows -time: 2024-04-12T17:48:24.29846-04:00 -custom: - Author: mikealfare - Issue: "9928"