From 03e931eb8cdf651340c0cb060ae0626a2ff49028 Mon Sep 17 00:00:00 2001 From: Vitor Bellini Date: Fri, 24 Nov 2023 12:20:22 +0400 Subject: [PATCH] clean libs and add requirements files --- Dockerfile | 53 ++++++--------- requirements-cdata-dags.txt | 19 ++++++ requirements-uninstall.txt | 129 ++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 33 deletions(-) create mode 100644 requirements-cdata-dags.txt create mode 100644 requirements-uninstall.txt diff --git a/Dockerfile b/Dockerfile index db10a31..00c6849 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,28 +1,6 @@ -FROM apache/airflow:2.7.1-python3.10 +# for dev: docker build -t ghcr.io/gestaogovbr/airflow2-docker:latest-dev --build-arg dev_build=true . -ARG PYTHON_DEPS=" \ - ctds==1.12.0 \ - tqdm==4.60.0 \ - ijson==3.0.4 \ - pysmb==1.2.6 \ - xlrd==1.2.0 \ - pygsheets==2.0.5 \ - ipdb==0.13.3 \ - py-trello==0.17.1 \ - PyPDF2==1.26.0 \ - frictionless==5.11.1 \ - great-expectations==0.17.2 \ - unidecode==1.2.0 \ - odfpy==1.4.1 \ - openpyxl==3.0.7 \ - pytest==6.2.5 \ - ckanapi==4.6 \ - sharepy==1.3.0 \ - Office365-REST-Python-Client==2.3.14 \ - GeoAlchemy2==0.10.2 \ - acryl-datahub-airflow-plugin==0.10.4 \ - geopandas==0.12.2 \ - " +FROM apache/airflow:2.7.3-python3.10 USER root RUN apt-get update \ @@ -38,7 +16,7 @@ RUN apt-get update \ && curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add --no-tty - \ && curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list \ && apt-get update -yqq \ - && ACCEPT_EULA=Y apt-get install -yqq msodbcsql17 \ + && ACCEPT_EULA=Y apt-get install -yqq msodbcsql17 mssql-tools \ && sed -i 's,^\(MinProtocol[ ]*=\).*,\1'TLSv1.0',g' /etc/ssl/openssl.cnf \ && sed -i 's,^\(CipherString[ ]*=\).*,\1'DEFAULT@SECLEVEL=1',g' /etc/ssl/openssl.cnf \ && curl -O http://acraiz.icpbrasil.gov.br/credenciadas/CertificadosAC-ICP-Brasil/ACcompactado.zip \ @@ -63,24 +41,33 @@ RUN curl https://ssltools.digicert.com/chainTester/webservice/validatecerts/cert USER airflow -RUN if [ -n "${PYTHON_DEPS}" ]; \ - then pip install --no-cache-dir --user ${PYTHON_DEPS}; \ - fi \ - && mkdir /opt/airflow/export-data +WORKDIR /opt/airflow -RUN pip install --no-cache-dir --user \ - apache-airflow[jdbc,microsoft.mssql,samba,google_auth,odbc,sentry] \ +COPY requirements-uninstall.txt . +COPY requirements-cdata-dags.txt . + +RUN pip uninstall -y -r requirements-uninstall.txt && \ + pip install --no-cache-dir --user -r requirements-cdata-dags.txt && \ + pip install --no-cache-dir --user -r \ + https://raw.githubusercontent.com/gestaogovbr/Ro-dou/main/requirements.txt && \ + pip install --no-cache-dir --user \ + apache-airflow[jdbc,microsoft.mssql,samba,odbc,sentry] \ apache-airflow-providers-docker \ apache-airflow-providers-common-sql \ - apache-airflow-providers-telegram + apache-airflow-providers-telegram \ + acryl-datahub-airflow-plugin==0.10.4 ARG dev_build="false" RUN \ if [[ "${dev_build}" == "false" ]] ; \ then pip install --no-cache-dir --user apache-airflow-providers-fastetl; \ else \ - echo ***apache-airflow-providers-fastetl not installed*** ; \ + echo ***apache-airflow-providers-fastetl not installed*** && \ + pip install --no-cache-dir --user -r https://raw.githubusercontent.com/gestaogovbr/FastETL/main/requirements.txt ; \ fi RUN while [[ "$(curl -s -o /tmp/thawte.pem -w ''%{http_code}'' https://ssltools.digicert.com/chainTester/webservice/validatecerts/certificate?certKey=issuer.intermediate.cert.98&fileName=Thawte%20RSA%20CA%202018&fileExtension=txt)" != "200" ]]; do sleep 1; done RUN cat /tmp/thawte.pem >> /home/airflow/.local/lib/python3.10/site-packages/certifi/cacert.pem +RUN echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bashrc && \ + source ~/.bashrc +RUN rm ACcompactado.zip requirements-cdata-dags.txt requirements-uninstall.txt diff --git a/requirements-cdata-dags.txt b/requirements-cdata-dags.txt new file mode 100644 index 0000000..19312a6 --- /dev/null +++ b/requirements-cdata-dags.txt @@ -0,0 +1,19 @@ +# airflow-dags +ctds==1.12.0 +tqdm==4.60.0 +py-trello==0.17.1 +frictionless==5.11.1 +great-expectations==0.17.2 +openpyxl==3.0.7 +Office365-REST-Python-Client==2.3.14 +geopandas==0.12.2 +pandas==1.5.2,<2 + +# airflow-dags-delog +# tqdm==4.60.0 +unidecode==1.2.0 + +# airflow-dags-detru + +# airflow-commons +# great-expectations==0.17.2 \ No newline at end of file diff --git a/requirements-uninstall.txt b/requirements-uninstall.txt new file mode 100644 index 0000000..ecbd797 --- /dev/null +++ b/requirements-uninstall.txt @@ -0,0 +1,129 @@ +# from apache-airflow-providers-amazon +apache-airflow-providers-amazon +aiobotocore +jsonpath-ng +boto3 +mypy-boto3-appflow +mypy-boto3-rds +mypy-boto3-redshift-data +mypy-boto3-s3 +redshift-connector +scramp +sqlalchemy-redshift +watchtower + +# from apache-airflow-providers-elasticsearch +apache-airflow-providers-elasticsearch +elasticsearch +elastic-transport + +# from apache-airflow-providers-sendgrid +apache-airflow-providers-sendgrid +sendgrid +starkbank-ecdsa + +# from apache-airflow-providers-snowflake +apache-airflow-providers-snowflake +snowflake-connector-python +snowflake-sqlalchemy +pycryptodomex +tomlkit +asn1crypto +oscrypto + +# from apache-airflow-providers-microsoft-azure +apache-airflow-providers-microsoft-azure +adal +azure-batch +azure-cosmos +azure-datalake-store +azure-identity +azure-keyvault-secrets +azure-kusto-data +azure-mgmt-containerinstance +azure-mgmt-cosmosdb +azure-mgmt-datafactory +azure-mgmt-datalake-store +azure-mgmt-resource +azure-servicebus +azure-storage-blob +azure-storage-common +azure-storage-file +azure-storage-file-datalake +azure-synapse-spark +azure-common +azure-core +msal-extensions +msrest +azure-mgmt-datalake-nspkg +adal +azure-mgmt-nspkg +azure-mgmt-containerregistry +azure-mgmt-core +azure-mgmt-storage +azure-nspkg +azure-storage-file-share + +# from apache-airflow-providers-google +apache-airflow-providers-google +gcloud-aio-auth +gcloud-aio-bigquery +gcloud-aio-storage +google-ads +google-api-core +google-api-python-client +google-cloud-aiplatform +google-cloud-automl +google-cloud-batch +google-cloud-bigquery-datatransfer +google-cloud-bigtable +google-cloud-build +google-cloud-compute +google-cloud-container +google-cloud-datacatalog +google-cloud-dataflow-client +google-cloud-dataform +google-cloud-dataplex +google-cloud-dataproc +google-cloud-dataproc-metastore +google-cloud-dlp +google-cloud-kms +google-cloud-language +google-cloud-logging +google-cloud-memcache +google-cloud-monitoring +google-cloud-orchestration-airflow +google-cloud-os-login +google-cloud-pubsub +google-cloud-redis +google-cloud-run +google-cloud-secret-manager +google-cloud-spanner +google-cloud-speech +google-cloud-storage +google-cloud-storage-transfer +google-cloud-tasks +google-cloud-texttospeech +google-cloud-translate +google-cloud-videointelligence +google-cloud-vision +google-cloud-workflows +grpcio-gcp +json-merge-patch +looker-sdk +pandas-gbq +proto-plus +PyOpenSSL +sqlalchemy-bigquery +sqlalchemy-spanner +google-auth-oauthlib +google-cloud-appengine-logging +google-cloud-audit-log +google-cloud-bigquery +google-cloud-bigquery-storage +google-cloud-core +google-cloud-resource-manager +google-crc32c +google-resumable-media +grpc-google-iam-v1 +pydata-google-auth