diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml index 633885b..71901b1 100644 --- a/.github/workflows/flake8.yml +++ b/.github/workflows/flake8.yml @@ -7,11 +7,11 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: [3.8] steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 name: Set up Python ${{ matrix.python-version }} with: python-version: ${{ matrix.python-version }} @@ -20,6 +20,6 @@ jobs: python3 -m pip install --upgrade pip pip3 install flake8 - name: run flake8 ⚙️ - run: | - flake8 + run: | + flake8 . diff --git a/README.md b/README.md index fdf9567..57f5909 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,8 @@ are automatically installed during installation. ```bash # setup virtualenv -python3 -m venv --system-site-packages woudc-api -cd woudc-api +python3 -m venv --system-site-packages woudc-api_env +cd woudc-api_env source bin/activate # setup local OGC schemas (i.e. WOUDC_API_OGC_SCHEMAS_LOCATION in default.env) @@ -38,13 +38,21 @@ curl -O http://schemas.opengis.net/SCHEMAS_OPENGIS_NET.zip && unzip ./SCHEMAS_OP # clone pygeoapi codebase and install git clone https://github.com/geopython/pygeoapi.git cd pygeoapi -python setup.py install +git checkout 0.16.1 +pip3 install -r requirements.txt +python3 setup.py install +cd .. + +# clone woudc-extcsv and install +git clone https://github.com/woudc/woudc-extcsv.git +cd woudc-extcsv +python3 setup.py install cd .. # clone woudc-api codebase and install git clone https://github.com/woudc/woudc-api.git cd woudc-api -python setup.py install +python3 setup.py install # set system environment variables cp default.env local.env @@ -52,7 +60,10 @@ vi local.env # edit accordingly . local.env # generate openapi document -pygeoapi openapi generate -c $PYGEOAPI_CONFIG > $PYGEOAPI_OPENAPI +pygeoapi openapi generate ${PYGEOAPI_CONFIG} -f json --output-file ${PYGEOAPI_OPENAPI} + +# optional: validate openapi document +pygeoapi openapi validate ${PYGEOAPI_OPENAPI} # run the server woudc-api serve # server runs on http://localhost:5000 diff --git a/debian/control b/debian/control index da27fc0..f3577e3 100644 --- a/debian/control +++ b/debian/control @@ -4,12 +4,12 @@ Priority: optional Maintainer: Tom Kralidis Build-Depends: debhelper (>= 9), python3, python3-setuptools, curl, unzip Standards-Version: 3.9.5 -X-Python-Version: >= 3.6 +X-Python-Version: >= 3.8 Vcs-Git: https://github.com/woudc/woudc-api.git Package: woudc-api Architecture: all -Depends: elasticsearch (>=7), elasticsearch (<8), libapache2-mod-wsgi-py3, python3, python3-flask, python3-pygeoapi, woudc-extcsv -Suggests: python3-elasticsearch (>=7), python3-elasticsearch (<8) +Depends: elasticsearch (>=8), elasticsearch (<9), libapache2-mod-wsgi-py3, python3, python3-flask, python3-pygeoapi, woudc-extcsv +Suggests: python3-elasticsearch (>=8), python3-elasticsearch (<9) Homepage: https://woudc.org Description: WMO WOUDC REST API diff --git a/debian/postinst b/debian/postinst index 191810b..bbaea52 100644 --- a/debian/postinst +++ b/debian/postinst @@ -1,6 +1,6 @@ #!/bin/sh -set -e +set -e USERNAME="woudcuser" GROUP="www-data" diff --git a/default.env b/default.env index e38b110..af9b883 100644 --- a/default.env +++ b/default.env @@ -1,7 +1,10 @@ export WOUDC_API_BIND_HOST=0.0.0.0 export WOUDC_API_BIND_PORT=5000 export WOUDC_API_URL=http://localhost:5000/oapi -export WOUDC_API_ES_URL=http://localhost:9200 +export WOUDC_API_ES_USERNAME=elasticsearch +export WOUDC_API_ES_PASSWORD= +export WOUDC_API_ES_URL=http://${WOUDC_API_ES_USERNAME}:${WOUDC_API_ES_PASSWORD}@localhost:9200 +export WOUDC_API_ES_INDEX_PREFIX=woudc_data_registry export WOUDC_API_OGC_SCHEMAS_LOCATION=/opt/woudc-api/schemas.opengis.net export PYGEOAPI_CONFIG=/path/to/woudc-api-config.yml export PYGEOAPI_OPENAPI=/path/to/woudc-api-openapi.yml diff --git a/deploy/default/kevin-woudc-api-config.yml b/deploy/default/kevin-woudc-api-config.yml new file mode 100755 index 0000000..da8588c --- /dev/null +++ b/deploy/default/kevin-woudc-api-config.yml @@ -0,0 +1,338 @@ +server: + bind: + host: ${WOUDC_API_BIND_HOST} + port: ${WOUDC_API_BIND_PORT} + url: ${WOUDC_API_URL} + basepath: / + mimetype: application/json; charset=UTF-8 + encoding: utf-8 + language: en-CA + # cors: true + pretty_print: true + limit: 500 + # templates: /path/to/templates + map: + url: https://tile.openstreetmap.org/{z}/{x}/{y}.png + attribution: '© OpenStreetMap contributors' + ogc_schemas_location: ${WOUDC_API_OGC_SCHEMAS_LOCATION} + +logging: + level: DEBUG + logfile: /tmp/kevin-woudc-api.log + +metadata: + identification: + title: World Ozone and Ultraviolet Radiation Data Centre (WOUDC) + description: The World Ozone and Ultraviolet Radiation Data Centre (WOUDC) is one of six World Data Centres which are part of the Global Atmosphere Watch (GAW) programme of the World Meteorological Organization (WMO). WOUDC contains ozone and UV data measured by instruments located on ground-based, shipborne or airborne platforms. Data are subject to the WOUDC Data Policy (https://woudc.org/about/data-policy.php) + keywords: + - ozone + - ultraviolet + - uv + - totalozone + - ozonesonde + - umkehr + - gaw + - wmo + - spectral + - stations + - instruments + keywords_type: theme + terms_of_service: https://www.canada.ca/en/environment-climate-change/corporate/transparency.html + url: https://woudc.org + license: + name: WOUDC Data Use Policy + url: https://woudc.org/about/data-policy.php + provider: + name: Government of Canada, Environment and Climate Change Canada, Meteorological Service of Canada + url: https://www.canada.ca/en/services/environment/weather.html + contact: + name: World Ozone and Ultraviolet Radiation Data Centre + position: Data Centre Manager + phone: None + fax: None + address: 4905 Dufferin Street + city: Toronto + stateorprovince: Ontario + postalcode: M3H 5T4 + country: Canada + email: woudc@ec.gc.ca + url: https://woudc.org/contact.php + hours: 1400h - 2200h UTC + instructions: During hours of service + +resources: + projects: + type: collection + title: WOUDC Data Registry Projects + description: Connection to projects in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.project + id_field: identifier + discovery_metadata: + type: collection + title: WOUDC Data Registry Discovery Metadata + description: Connection to dataset metadata in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.discovery_metadata + id_field: identifier + datasets: + type: collection + title: WOUDC Data Registry Datasets + description: Connection to datasets in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.dataset + id_field: identifier + countries: + type: collection + title: WOUDC Data Registry Countries + description: Connection to countries in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.country + id_field: identifier + contributors: + type: collection + title: WOUDC Data Registry Contributors + description: Connection to contributors in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.contributor + id_field: identifier + stations: + type: collection + title: WOUDC Data Registry Stations + description: Connection to stations in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.station + id_field: woudc_id + instruments: + type: collection + title: WOUDC Data Registry Instruments + description: Connection to instruments in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.instrument + id_field: identifier + deployments: + type: collection + title: WOUDC Data Registry Deployments + description: Connection to contributor deployments in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.deployment + id_field: identifier + data_records: + type: collection + title: WOUDC Data Registry Data Records + description: Connection to data records in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18T00:00:00Z + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.data_record + id_field: identifier + time_field: timestamp_utc + notifications: + type: collection + title: WOUDC notifications + description: WOUDC notifications + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 2015-02-03 + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.notification + id_field: published_date + date_field: published_date + peer_data_records: + type: collection + title: WOUDC Peer Data Records + description: Connection to data from federated data centres in the WOUDC Data Registry Search Index + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18T00:00:00Z + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.peer_data_record + id_field: identifier + time_field: start_datetime + uv_index_hourly: + type: collection + title: WOUDC UV Index Hourly + description: Connection to uv_index_hourly in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18T00:00:00Z + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.uv_index_hourly + id_field: identifier + time_field: timestamp_utc + totalozone: + type: collection + title: WOUDC Total Ozone + description: Connection to totalozone in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18T00:00:00Z + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.totalozone + id_field: identifier + time_field: observation_date + ozonesonde: + type: collection + title: WOUDC OzoneSonde + description: Connection to ozonesonde in the WOUDC Data Registry Search Index. + keywords: [] + links: [] + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: 1928-08-18T00:00:00Z + end: null # or empty + providers: + - type: feature + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.ozonesonde + id_field: identifier + time_field: timestamp_date + + woudc-data-registry-metrics: + type: process + processor: + name: woudc_api.plugins.metrics.MetricsProcessor + woudc-data-registry-explore: + type: process + processor: + name: woudc_api.plugins.explore.SearchPageProcessor + woudc-data-registry-select-distinct: + type: process + processor: + name: woudc_api.plugins.distinct.GroupSearchProcessor + woudc-data-registry-validate: + type: process + processor: + name: woudc_api.plugins.validate.ExtendedCSVProcessor diff --git a/deploy/default/woudc-api-config.yml b/deploy/default/woudc-api-config.yml old mode 100644 new mode 100755 index 135a862..c3c24db --- a/deploy/default/woudc-api-config.yml +++ b/deploy/default/woudc-api-config.yml @@ -18,7 +18,7 @@ server: logging: level: ERROR - # logfile: /tmp/pygeoapi.log + # logfile: /tmp/woudc-api.log metadata: identification: @@ -76,8 +76,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.project + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.project id_field: identifier discovery_metadata: type: collection @@ -94,8 +94,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.discovery_metadata + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.discovery_metadata id_field: identifier datasets: type: collection @@ -112,8 +112,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.dataset + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.dataset id_field: identifier countries: type: collection @@ -130,8 +130,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.country + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.country id_field: identifier contributors: type: collection @@ -148,8 +148,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.contributor + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.contributor id_field: identifier stations: type: collection @@ -166,8 +166,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.station + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.station id_field: woudc_id instruments: type: collection @@ -184,8 +184,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.instrument + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.instrument id_field: identifier deployments: type: collection @@ -202,8 +202,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.deployment + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.deployment id_field: identifier data_records: type: collection @@ -220,8 +220,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.data_record + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.data_record id_field: identifier time_field: timestamp_utc notifications: @@ -239,8 +239,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.notification + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.notification id_field: published_date date_field: published_date peer_data_records: @@ -258,8 +258,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.peer_data_record + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.peer_data_record id_field: identifier time_field: start_datetime uv_index_hourly: @@ -277,8 +277,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.uv_index_hourly + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.uv_index_hourly id_field: identifier time_field: timestamp_utc totalozone: @@ -296,8 +296,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.totalozone + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.totalozone id_field: identifier time_field: observation_date ozonesonde: @@ -315,8 +315,8 @@ resources: end: null # or empty providers: - type: feature - name: Elasticsearch - data: ${WOUDC_API_ES_URL}/woudc_data_registry.ozonesonde + name: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + data: ${WOUDC_API_ES_URL}/${WOUDC_API_ES_INDEX_PREFIX}.ozonesonde id_field: identifier time_field: timestamp_date diff --git a/deploy/default/woudc-api-openapi.yml b/deploy/default/woudc-api-openapi.yml old mode 100644 new mode 100755 index 3f2a85e..d3b0835 --- a/deploy/default/woudc-api-openapi.yml +++ b/deploy/default/woudc-api-openapi.yml @@ -653,14 +653,14 @@ paths: name: content_form required: false schema: - type: byte + type: string style: form - explode: false in: query name: content_level required: false schema: - type: float + type: number style: form - explode: false in: query @@ -689,7 +689,7 @@ paths: name: data_generation_version required: false schema: - type: float + type: number style: form - explode: false in: query @@ -2359,14 +2359,14 @@ paths: name: daily_columno3 required: false schema: - type: float + type: number style: form - explode: false in: query name: daily_columnso2 required: false schema: - type: float + type: number style: form - explode: false in: query @@ -2388,7 +2388,7 @@ paths: name: daily_nobs required: false schema: - type: float + type: number style: form - explode: false in: query @@ -2402,7 +2402,7 @@ paths: name: daily_stdevo3 required: false schema: - type: float + type: number style: form - explode: false in: query @@ -2486,7 +2486,7 @@ paths: name: monthly_columno3 required: false schema: - type: float + type: number style: form - explode: false in: query @@ -2508,7 +2508,7 @@ paths: name: monthly_stdevo3 required: false schema: - type: float + type: number style: form - explode: false in: query @@ -2770,7 +2770,7 @@ paths: name: solar_zenith_angle required: false schema: - type: float + type: number style: form - explode: false in: query @@ -2812,14 +2812,14 @@ paths: name: uv_daily_max required: false schema: - type: float + type: number style: form - explode: false in: query name: uv_index required: false schema: - type: float + type: number style: form - explode: false in: query diff --git a/deploy/default/woudc-api.conf b/deploy/default/woudc-api.conf old mode 100644 new mode 100755 diff --git a/deploy/default/woudc-api.wsgi b/deploy/default/woudc-api.wsgi old mode 100644 new mode 100755 diff --git a/deploy/nightly/deploy-nightly.sh b/deploy/nightly/deploy-nightly.sh old mode 100644 new mode 100755 index 0c75bfc..c531f9a --- a/deploy/nightly/deploy-nightly.sh +++ b/deploy/nightly/deploy-nightly.sh @@ -1,8 +1,10 @@ +#!/bin/bash + # ================================================================= # -# Author: Tom Kralidis +# Author: Kevin Ngai # -# Copyright (c) 2021 Tom Kralidis +# Copyright (c) 2024 Kevin Ngai # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -36,60 +38,87 @@ DAYSTOKEEP=7 export WOUDC_API_URL=https://gods-geo.woudc-dev.cmc.ec.gc.ca/woudc-api/nightly/latest/oapi/ export WOUDC_API_BIND_HOST=0.0.0.0/ export WOUDC_API_BIND_PORT=5000 -export WOUDC_API_ES_URL=http://localhost:9200 +# WOUDC_API_ES_USERNAME and WOUDC_API_ES_PASSWORD loaded from ~/.profile +export WOUDC_API_ES_URL=https://${WOUDC_API_ES_USERNAME}:${WOUDC_API_ES_PASSWORD}@localhost:9200 export WOUDC_API_OGC_SCHEMAS_LOCATION=/data/web/woudc-api-nightly/latest/schemas.opengis.net +DATETIME=$(date +%Y%m%d) +TIMESTAMP=$(date +%Y%m%d.%H%M) +NIGHTLYDIR="woudc-api-$TIMESTAMP" + +log() { + echo "$(date +'%Y-%m-%d %H:%M:%S') - $*" +} + +cleanup_old_builds() { + log "Deleting nightly builds > $DAYSTOKEEP days old" + find . -type d -name "woudc-api-20*" | while read -r dir; do + DATETIME2=$(echo "$dir" | awk -F- '{print $3}' | awk -F. '{print $1}') + DIFF=$(( ( $(date +%s -d "$DATETIME") - $(date +%s -d "$DATETIME2") ) / 86400 )) + if [ "$DIFF" -gt "$DAYSTOKEEP" ]; then + rm -rf "$dir" + fi + done + rm -rf latest +} + +create_venv_and_install() { + log "Generating nightly build for $TIMESTAMP" + python3 -m venv --system-site-packages "$NIGHTLYDIR" && cd "$NIGHTLYDIR" || exit 1 + source bin/activate + + log "Cloning repositories..." + git clone "$WOUDC_API_GITREPO" + git clone "$WOUDC_EXTCSV_GITREPO" + git clone "$PYGEOAPI_GITREPO" + + log "Installing pygeoapi..." + cd pygeoapi || exit 1 + git checkout 0.16.1 + pip3 install -r requirements.txt + python3 setup.py install + + log "Installing woudc-extcsv..." + cd ../woudc-extcsv || exit 1 + python3 setup.py install + + log "Installing woudc-api..." + cd ../woudc-api || exit 1 + python3 setup.py install + cd .. +} + +configure_woudc_api() { + log "Generating schemas.opengis.net..." + mkdir schemas.opengis.net + curl -O http://schemas.opengis.net/SCHEMAS_OPENGIS_NET.zip + unzip ./SCHEMAS_OPENGIS_NET.zip "ogcapi/*" -d schemas.opengis.net + rm -f ./SCHEMAS_OPENGIS_NET.zip + + log "Configuring woudc-api configurations..." + cp woudc-api/deploy/default/woudc-api-config.yml woudc-api/deploy/nightly + sed -i 's#basepath: /#basepath: /woudc-api/nightly/latest#' woudc-api/deploy/nightly/woudc-api-config.yml + sed -i 's^# cors: true^cors: true^' woudc-api/deploy/nightly/woudc-api-config.yml + + log "Generating woudc-api-openapi.yml..." + pygeoapi openapi generate woudc-api/deploy/nightly/woudc-api-config.yml > woudc-api/deploy/nightly/woudc-api-openapi.yml + sed -i "s#http://schemas.opengis.net#$WOUDC_API_URL/schemas#g" woudc-api/deploy/nightly/woudc-api-openapi.yml +} + +set_symlink_and_permissions() { + log "Creating 'latest' symlink and setting correct permissions..." + ln -s "$NIGHTLYDIR" latest + chgrp eccc-hpc-cmdx -R "$NIGHTLYDIR" + chmod -R 775 "$NIGHTLYDIR" + log "Done." +} + +main() { + cd "$BASEDIR" || exit 1 + cleanup_old_builds + create_venv_and_install + configure_woudc_api + set_symlink_and_permissions +} -# you should be okay from here - -DATETIME=`date +%Y%m%d` -TIMESTAMP=`date +%Y%m%d.%H%M` -NIGHTLYDIR=woudc-api-$TIMESTAMP - -echo "Deleting nightly builds > $DAYSTOKEEP days old" - -cd $BASEDIR - -for f in `find . -type d -name "woudc-api-20*"` -do - DATETIME2=`echo $f | awk -F- '{print $3}' | awk -F. '{print $1}'` - let DIFF=(`date +%s -d $DATETIME`-`date +%s -d $DATETIME2`)/86400 - if [ $DIFF -gt $DAYSTOKEEP ]; then - rm -fr $f - fi -done - -rm -fr latest -echo "Generating nightly build for $TIMESTAMP" -python3.8 -m venv --system-site-packages $NIGHTLYDIR && cd $NIGHTLYDIR -source bin/activate -git clone $WOUDC_API_GITREPO -git clone $WOUDC_EXTCSV_GITREPO -git clone $PYGEOAPI_GITREPO -cd pygeoapi -pip3.8 install --prefix $BASEDIR/$NIGHTLYDIR cython -pip3.8 install --prefix $BASEDIR/$NIGHTLYDIR "click >= 7.1" pyproj==1.9.6 -pip3.8 install --prefix $BASEDIR/$NIGHTLYDIR -r requirements.txt -pip3.8 install --prefix $BASEDIR/$NIGHTLYDIR flask_cors elasticsearch -python3.8 setup.py install -cd ../woudc-extcsv -python3.8 setup.py install -cd ../woudc-api -python3.8 setup.py install -cd .. - -mkdir schemas.opengis.net -curl -O http://schemas.opengis.net/SCHEMAS_OPENGIS_NET.zip && unzip ./SCHEMAS_OPENGIS_NET.zip "ogcapi/*" -d schemas.opengis.net && rm -f ./SCHEMAS_OPENGIS_NET.zip - -cp woudc-api/deploy/default/woudc-api-config.yml woudc-api/deploy/nightly -sed -i 's#basepath: /#basepath: /woudc-api/nightly/latest#' woudc-api/deploy/nightly/woudc-api-config.yml -sed -i 's^# cors: true^cors: true^' woudc-api/deploy/nightly/woudc-api-config.yml - -pygeoapi openapi generate woudc-api/deploy/nightly/woudc-api-config.yml > woudc-api/deploy/nightly/woudc-api-openapi.yml -sed -i "s#http://schemas.opengis.net#$WOUDC_API_URL/schemas#g" woudc-api/deploy/nightly/woudc-api-openapi.yml - -cd .. - -ln -s $NIGHTLYDIR latest -chgrp dmsec -R $NIGHTLYDIR # ensure correct group permission -chmod -R 775 $NIGHTLYDIR # ensure group writable +main "$@" diff --git a/deploy/nightly/woudc-api.conf b/deploy/nightly/woudc-api.conf old mode 100644 new mode 100755 diff --git a/deploy/nightly/woudc-api.wsgi b/deploy/nightly/woudc-api.wsgi old mode 100644 new mode 100755 diff --git a/woudc_api/plugins/distinct.py b/woudc_api/plugins/distinct.py index d064658..69f58bc 100644 --- a/woudc_api/plugins/distinct.py +++ b/woudc_api/plugins/distinct.py @@ -46,7 +46,8 @@ import os import logging -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch, RequestsHttpConnection +from urllib.parse import urlparse from pygeoapi.process.base import BaseProcessor, ProcessorExecuteError @@ -278,14 +279,31 @@ def __init__(self, provider_def): BaseProcessor.__init__(self, provider_def, PROCESS_SETTINGS) LOGGER.debug('Setting Elasticsearch properties') - url_tokens = os.environ.get('WOUDC_API_ES_URL').split('/') - host = url_tokens[2] + es_url = os.environ.get('WOUDC_API_ES_URL', + 'http://elastic:password@localhost:9200') + + # Parse the URL to extract components + parsed_url = urlparse(es_url) + host = parsed_url.hostname + username = parsed_url.username + password = parsed_url.password + scheme = parsed_url.scheme + + self.index_prefix = os.environ.get('WOUDC_API_ES_INDEX_PREFIX', + 'woudc_data_registry') + '.' LOGGER.debug('Host: {}'.format(host)) - self.index_prefix = 'woudc_data_registry.' + LOGGER.debug('Index prefix name: {}'.format(self.index_prefix)) LOGGER.debug('Connecting to Elasticsearch') - self.es = Elasticsearch(host) + auth = (username, password) + self.es = Elasticsearch( + [es_url], + http_auth=auth, + use_ssl=(scheme == 'https'), + verify_certs=False, + connection_class=RequestsHttpConnection + ) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' diff --git a/woudc_api/plugins/explore.py b/woudc_api/plugins/explore.py index c9b3e01..7499260 100644 --- a/woudc_api/plugins/explore.py +++ b/woudc_api/plugins/explore.py @@ -46,7 +46,8 @@ import os import logging -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch, RequestsHttpConnection +from urllib.parse import urlparse from pygeoapi.process.base import BaseProcessor, ProcessorExecuteError @@ -128,16 +129,33 @@ def __init__(self, provider_def): BaseProcessor.__init__(self, provider_def, PROCESS_SETTINGS) LOGGER.debug('Setting Elasticsearch properties') - url_tokens = os.environ.get('WOUDC_API_ES_URL').split('/') - host = url_tokens[2] + es_url = os.environ.get('WOUDC_API_ES_URL', + 'http://elastic:password@localhost:9200') - self.index = 'woudc_data_registry.contribution' + # Parse the URL to extract components + parsed_url = urlparse(es_url) + host = parsed_url.hostname + username = parsed_url.username + password = parsed_url.password + scheme = parsed_url.scheme + + self.index_prefix = os.environ.get('WOUDC_API_ES_INDEX_PREFIX', + 'woudc_data_registry') + '.' + self.index = self.index_prefix + 'contribution' LOGGER.debug('Host: {}'.format(host)) + LOGGER.debug('Index prefix name: {}'.format(self.index_prefix)) LOGGER.debug('Index name: {}'.format(self.index)) LOGGER.debug('Connecting to Elasticsearch') - self.es = Elasticsearch(host) + auth = (username, password) + self.es = Elasticsearch( + [es_url], + http_auth=auth, + use_ssl=(scheme == 'https'), + verify_certs=False, + connection_class=RequestsHttpConnection + ) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' @@ -170,7 +188,7 @@ def execute(self, inputs): 'ndacc_total', 'ndacc_uv', 'ndacc_vertical']: - self.index = 'woudc_data_registry.peer_data_record' + self.index = self.index_prefix + 'peer_data_record' peer_records = True filters = { diff --git a/woudc_api/plugins/metrics.py b/woudc_api/plugins/metrics.py index 9802656..ec06e15 100644 --- a/woudc_api/plugins/metrics.py +++ b/woudc_api/plugins/metrics.py @@ -46,7 +46,8 @@ import os import logging -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch, RequestsHttpConnection +from urllib.parse import urlparse from pygeoapi.process.base import BaseProcessor, ProcessorExecuteError @@ -180,17 +181,32 @@ def __init__(self, provider_def): BaseProcessor.__init__(self, provider_def, PROCESS_SETTINGS) - url_tokens = os.environ.get('WOUDC_API_ES_URL').split('/') - LOGGER.debug('Setting Elasticsearch properties') - self.index = 'woudc_data_registry.data_record' - host = url_tokens[2] + es_url = os.environ.get('WOUDC_API_ES_URL', + 'http://elastic:password@localhost:9200') + + # Parse the URL to extract components + parsed_url = urlparse(es_url) + host = parsed_url.hostname + username = parsed_url.username + password = parsed_url.password + scheme = parsed_url.scheme + self.index_prefix = os.environ.get('WOUDC_API_ES_INDEX_PREFIX', + 'woudc_data_registry') + '.' + self.index = self.index_prefix + 'data_record' LOGGER.debug('Host: {}'.format(host)) LOGGER.debug('Index name: {}'.format(self.index)) LOGGER.debug('Connecting to Elasticsearch') - self.es = Elasticsearch(host) + auth = (username, password) + self.es = Elasticsearch( + [es_url], + http_auth=auth, + use_ssl=(scheme == 'https'), + verify_certs=False, + connection_class=RequestsHttpConnection + ) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' @@ -221,7 +237,7 @@ def execute(self, inputs): 'ndacc_total', 'ndacc_uv', 'ndacc_vertical']: - self.index = 'woudc_data_registry.peer_data_record' + self.index = self.index_prefix + 'peer_data_record' peer_records = True if domain == 'dataset': diff --git a/woudc_api/plugins/validate.py b/woudc_api/plugins/validate.py index 6240f9c..d2f8641 100644 --- a/woudc_api/plugins/validate.py +++ b/woudc_api/plugins/validate.py @@ -46,7 +46,8 @@ import os import logging -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch, RequestsHttpConnection +from urllib.parse import urlparse from pygeoapi.process.base import BaseProcessor, ProcessorExecuteError from woudc_extcsv import (ExtendedCSV, MetadataValidationError, @@ -126,16 +127,31 @@ def __init__(self, provider_def): BaseProcessor.__init__(self, provider_def, PROCESS_SETTINGS) LOGGER.debug('Setting Elasticsearch properties') - url_tokens = os.environ.get('WOUDC_API_ES_URL').split('/') - host = url_tokens[2] + es_url = os.environ.get('WOUDC_API_ES_URL', + 'http://elastic:password@localhost:9200') - self.index = 'woudc_data_registry' + # Parse the URL to extract components + parsed_url = urlparse(es_url) + host = parsed_url.hostname + username = parsed_url.username + password = parsed_url.password + scheme = parsed_url.scheme + + self.index_prefix = os.environ.get('WOUDC_API_ES_INDEX_PREFIX', + 'woudc_data_registry') + '.' LOGGER.debug('Host: {}'.format(host)) - LOGGER.debug('Index name: {}'.format(self.index)) + LOGGER.debug('Index prefix name: {}'.format(self.index_prefix)) LOGGER.debug('Connecting to Elasticsearch') - self.es = Elasticsearch(host) + auth = (username, password) + self.es = Elasticsearch( + [es_url], + http_auth=auth, + use_ssl=(scheme == 'https'), + verify_certs=False, + connection_class=RequestsHttpConnection + ) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' @@ -746,7 +762,6 @@ def execute(self, inputs, metadata_only=False): if self.success and check_metadata: # Perform metadata checks - self.index_prefix = 'woudc_data_registry.' self.project = self.ecsv.extcsv['CONTENT']['Class'] self.dataset = self.ecsv.extcsv['CONTENT']['Category'] self.level = self.ecsv.extcsv['CONTENT']['Level'] diff --git a/woudc_api/provider/__init__.py b/woudc_api/provider/__init__.py new file mode 100644 index 0000000..5c4a8ef --- /dev/null +++ b/woudc_api/provider/__init__.py @@ -0,0 +1,30 @@ +# ================================================================= +# +# Author: Kevin Ngai +# +# Copyright (c) 2024 Kevin Ngai +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +"""Provider module containing the plugins wrapping data sources""" diff --git a/woudc_api/provider/elasticsearch.py b/woudc_api/provider/elasticsearch.py new file mode 100644 index 0000000..dc908cd --- /dev/null +++ b/woudc_api/provider/elasticsearch.py @@ -0,0 +1,129 @@ +# ================================================================= +# +# Authors: Kevin Ngai +# +# Copyright (c) 2024 Kevin Ngai +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +import logging +import urllib.parse + +from elasticsearch import Elasticsearch, exceptions + +import warnings +from urllib3.exceptions import InsecureRequestWarning + +from pygeoapi.provider.elasticsearch_ import ElasticsearchProvider +from pygeoapi.provider.base import (ProviderConnectionError, + ProviderQueryError) + + +LOGGER = logging.getLogger(__name__) + +# Suppress Warnings for development using https://localhost +warnings.simplefilter('ignore', InsecureRequestWarning) + + +class ElasticsearchWOUDCProvider(ElasticsearchProvider): + """Custom Elasticsearch Provider for WOUDC""" + + def __init__(self, provider_def): + """ + Initialize object with custom behavior for WOUDC + + :param provider_def: provider definition + + :returns: woudc_api.provider.elasticsearch.ElasticsearchWOUDCProvider + """ + LOGGER.debug('Initializing ElasticsearchWOUDCProvider') + LOGGER.debug(f'provider_def: {provider_def}') + + # Ensure that 'properties' attribute is set up + self.properties = getattr(self, 'properties', {}) + self.select_properties = getattr(self, 'select_properties', []) + + # Extract from provider_def + # Redoing BaseProvider.__init__ because we are not + # using super().__init__() + try: + self.name = provider_def['name'] + self.type = provider_def['type'] + self.data = provider_def['data'] + except KeyError: + raise RuntimeError('name/type/data are required') + + self.editable = provider_def.get('editable', False) + self.options = provider_def.get('options') + self.id_field = provider_def.get('id_field') + self.uri_field = provider_def.get('uri_field') + self.x_field = provider_def.get('x_field') + self.y_field = provider_def.get('y_field') + self.time_field = provider_def.get('time_field') + self.title_field = provider_def.get('title_field') + self.properties = provider_def.get('properties', []) + self.file_types = provider_def.get('file_types', []) + self.fields = {} + self.filename = None + + # for coverage providers + self.axes = [] + self.crs = None + self.num_bands = None + # end of BaseProvider.__init__ + + LOGGER.debug('Setting Elasticsearch properties') + + # Extract URL information from self.data + self.es_host, self.index_name = self.data.rsplit('/', 1) + parsed_url = urllib.parse.urlparse(self.es_host) + auth = (parsed_url.username, parsed_url.password) + + LOGGER.debug(f'host: {self.es_host}') + LOGGER.debug(f'index: {self.index_name}') + + LOGGER.debug('Connecting to Elasticsearch') + self.es = Elasticsearch( + [self.es_host], + http_auth=auth, + verify_certs=False + ) + if not self.es.ping(): + msg = f'Cannot connect to Elasticsearch: {self.es_host}' + LOGGER.error(msg) + raise ProviderConnectionError(msg) + + LOGGER.debug('Determining ES version') + v = self.es.info()['version']['number'][:3] + if float(v) < 8: + msg = 'only ES 8+ supported' + LOGGER.error(msg) + raise ProviderConnectionError(msg) + + LOGGER.debug('Grabbing field information') + try: + self.fields = self.get_fields() + except exceptions.NotFoundError as err: + LOGGER.error(err) + raise ProviderQueryError(err)