Skip to content

Commit

Permalink
Merge pull request #42 from gestaogovbr/clean-libs
Browse files Browse the repository at this point in the history
clean libs and add requirements files
vitorbellini authored Nov 24, 2023
2 parents 5bc4691 + 03e931e commit ee4260d
Showing 3 changed files with 168 additions and 33 deletions.
53 changes: 20 additions & 33 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,28 +1,6 @@
FROM apache/airflow:2.7.1-python3.10
# for dev: docker build -t ghcr.io/gestaogovbr/airflow2-docker:latest-dev --build-arg dev_build=true .

ARG PYTHON_DEPS=" \
ctds==1.12.0 \
tqdm==4.60.0 \
ijson==3.0.4 \
pysmb==1.2.6 \
xlrd==1.2.0 \
pygsheets==2.0.5 \
ipdb==0.13.3 \
py-trello==0.17.1 \
PyPDF2==1.26.0 \
frictionless==5.11.1 \
great-expectations==0.17.2 \
unidecode==1.2.0 \
odfpy==1.4.1 \
openpyxl==3.0.7 \
pytest==6.2.5 \
ckanapi==4.6 \
sharepy==1.3.0 \
Office365-REST-Python-Client==2.3.14 \
GeoAlchemy2==0.10.2 \
acryl-datahub-airflow-plugin==0.10.4 \
geopandas==0.12.2 \
"
FROM apache/airflow:2.7.3-python3.10

USER root
RUN apt-get update \
@@ -38,7 +16,7 @@ RUN apt-get update \
&& curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add --no-tty - \
&& curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list \
&& apt-get update -yqq \
&& ACCEPT_EULA=Y apt-get install -yqq msodbcsql17 \
&& ACCEPT_EULA=Y apt-get install -yqq msodbcsql17 mssql-tools \
&& sed -i 's,^\(MinProtocol[ ]*=\).*,\1'TLSv1.0',g' /etc/ssl/openssl.cnf \
&& sed -i 's,^\(CipherString[ ]*=\).*,\1'DEFAULT@SECLEVEL=1',g' /etc/ssl/openssl.cnf \
&& curl -O http://acraiz.icpbrasil.gov.br/credenciadas/CertificadosAC-ICP-Brasil/ACcompactado.zip \
@@ -63,24 +41,33 @@ RUN curl https://ssltools.digicert.com/chainTester/webservice/validatecerts/cert

USER airflow

RUN if [ -n "${PYTHON_DEPS}" ]; \
then pip install --no-cache-dir --user ${PYTHON_DEPS}; \
fi \
&& mkdir /opt/airflow/export-data
WORKDIR /opt/airflow

RUN pip install --no-cache-dir --user \
apache-airflow[jdbc,microsoft.mssql,samba,google_auth,odbc,sentry] \
COPY requirements-uninstall.txt .
COPY requirements-cdata-dags.txt .

RUN pip uninstall -y -r requirements-uninstall.txt && \
pip install --no-cache-dir --user -r requirements-cdata-dags.txt && \
pip install --no-cache-dir --user -r \
https://raw.githubusercontent.com/gestaogovbr/Ro-dou/main/requirements.txt && \
pip install --no-cache-dir --user \
apache-airflow[jdbc,microsoft.mssql,samba,odbc,sentry] \
apache-airflow-providers-docker \
apache-airflow-providers-common-sql \
apache-airflow-providers-telegram
apache-airflow-providers-telegram \
acryl-datahub-airflow-plugin==0.10.4

ARG dev_build="false"
RUN \
if [[ "${dev_build}" == "false" ]] ; \
then pip install --no-cache-dir --user apache-airflow-providers-fastetl; \
else \
echo ***apache-airflow-providers-fastetl not installed*** ; \
echo ***apache-airflow-providers-fastetl not installed*** && \
pip install --no-cache-dir --user -r https://raw.githubusercontent.com/gestaogovbr/FastETL/main/requirements.txt ; \
fi

RUN while [[ "$(curl -s -o /tmp/thawte.pem -w ''%{http_code}'' https://ssltools.digicert.com/chainTester/webservice/validatecerts/certificate?certKey=issuer.intermediate.cert.98&fileName=Thawte%20RSA%20CA%202018&fileExtension=txt)" != "200" ]]; do sleep 1; done
RUN cat /tmp/thawte.pem >> /home/airflow/.local/lib/python3.10/site-packages/certifi/cacert.pem
RUN echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bashrc && \
source ~/.bashrc
RUN rm ACcompactado.zip requirements-cdata-dags.txt requirements-uninstall.txt
19 changes: 19 additions & 0 deletions requirements-cdata-dags.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# airflow-dags
ctds==1.12.0
tqdm==4.60.0
py-trello==0.17.1
frictionless==5.11.1
great-expectations==0.17.2
openpyxl==3.0.7
Office365-REST-Python-Client==2.3.14
geopandas==0.12.2
pandas==1.5.2,<2

# airflow-dags-delog
# tqdm==4.60.0
unidecode==1.2.0

# airflow-dags-detru

# airflow-commons
# great-expectations==0.17.2
129 changes: 129 additions & 0 deletions requirements-uninstall.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# from apache-airflow-providers-amazon
apache-airflow-providers-amazon
aiobotocore
jsonpath-ng
boto3
mypy-boto3-appflow
mypy-boto3-rds
mypy-boto3-redshift-data
mypy-boto3-s3
redshift-connector
scramp
sqlalchemy-redshift
watchtower

# from apache-airflow-providers-elasticsearch
apache-airflow-providers-elasticsearch
elasticsearch
elastic-transport

# from apache-airflow-providers-sendgrid
apache-airflow-providers-sendgrid
sendgrid
starkbank-ecdsa

# from apache-airflow-providers-snowflake
apache-airflow-providers-snowflake
snowflake-connector-python
snowflake-sqlalchemy
pycryptodomex
tomlkit
asn1crypto
oscrypto

# from apache-airflow-providers-microsoft-azure
apache-airflow-providers-microsoft-azure
adal
azure-batch
azure-cosmos
azure-datalake-store
azure-identity
azure-keyvault-secrets
azure-kusto-data
azure-mgmt-containerinstance
azure-mgmt-cosmosdb
azure-mgmt-datafactory
azure-mgmt-datalake-store
azure-mgmt-resource
azure-servicebus
azure-storage-blob
azure-storage-common
azure-storage-file
azure-storage-file-datalake
azure-synapse-spark
azure-common
azure-core
msal-extensions
msrest
azure-mgmt-datalake-nspkg
adal
azure-mgmt-nspkg
azure-mgmt-containerregistry
azure-mgmt-core
azure-mgmt-storage
azure-nspkg
azure-storage-file-share

# from apache-airflow-providers-google
apache-airflow-providers-google
gcloud-aio-auth
gcloud-aio-bigquery
gcloud-aio-storage
google-ads
google-api-core
google-api-python-client
google-cloud-aiplatform
google-cloud-automl
google-cloud-batch
google-cloud-bigquery-datatransfer
google-cloud-bigtable
google-cloud-build
google-cloud-compute
google-cloud-container
google-cloud-datacatalog
google-cloud-dataflow-client
google-cloud-dataform
google-cloud-dataplex
google-cloud-dataproc
google-cloud-dataproc-metastore
google-cloud-dlp
google-cloud-kms
google-cloud-language
google-cloud-logging
google-cloud-memcache
google-cloud-monitoring
google-cloud-orchestration-airflow
google-cloud-os-login
google-cloud-pubsub
google-cloud-redis
google-cloud-run
google-cloud-secret-manager
google-cloud-spanner
google-cloud-speech
google-cloud-storage
google-cloud-storage-transfer
google-cloud-tasks
google-cloud-texttospeech
google-cloud-translate
google-cloud-videointelligence
google-cloud-vision
google-cloud-workflows
grpcio-gcp
json-merge-patch
looker-sdk
pandas-gbq
proto-plus
PyOpenSSL
sqlalchemy-bigquery
sqlalchemy-spanner
google-auth-oauthlib
google-cloud-appengine-logging
google-cloud-audit-log
google-cloud-bigquery
google-cloud-bigquery-storage
google-cloud-core
google-cloud-resource-manager
google-crc32c
google-resumable-media
grpc-google-iam-v1
pydata-google-auth

0 comments on commit ee4260d

Please sign in to comment.