From 8041a0f4177e035799e2233e50581106b12cb102 Mon Sep 17 00:00:00 2001 From: Jonathan Healy Date: Sun, 3 Mar 2024 19:19:00 +0800 Subject: [PATCH] Revert "Update makefile etc " --- .github/workflows/cicd.yml | 215 ++-- CHANGELOG.md | 115 +- Dockerfile.deploy.es | 20 + ...rfile.deploy.mongo => Dockerfile.deploy.os | 5 +- Dockerfile.dev.es | 19 + .../Dockerfile.dev.mongo => Dockerfile.dev.os | 3 +- LICENSE | 2 +- Makefile | 100 +- README.md | 159 ++- data_loader/data_loader.py | 12 +- docker-compose.yml | 92 +- elasticsearch/config/elasticsearch.yml | 18 + .../elasticsearch/config/elasticsearch.yml | 18 + opensearch/config/opensearch.yml | 19 + scripts/publish | 73 ++ stac_fastapi/core/README.md | 1 + stac_fastapi/core/setup.cfg | 2 + stac_fastapi/core/setup.py | 44 + .../core/stac_fastapi/core/__init__.py | 1 + .../stac_fastapi/core/base_database_logic.py | 54 + .../core/stac_fastapi/core/base_settings.py | 12 + stac_fastapi/core/stac_fastapi/core/core.py | 950 +++++++++++++++++ .../core/stac_fastapi/core/datetime_utils.py | 14 + .../stac_fastapi/core/extensions/__init__.py | 5 + .../stac_fastapi/core/extensions/filter.py | 267 +++++ .../stac_fastapi/core/extensions/query.py | 81 ++ .../core/stac_fastapi/core/models/__init__.py | 1 + .../core/stac_fastapi/core/models/links.py | 138 +++ .../core/stac_fastapi/core/models/search.py | 1 + .../core/stac_fastapi/core/serializers.py | 156 +++ .../core/stac_fastapi/core/session.py | 25 + .../core/stac_fastapi/core/types/core.py | 306 ++++++ .../core/stac_fastapi/core/utilities.py | 21 + .../stac_fastapi/core}/version.py | 0 stac_fastapi/elasticsearch/README.md | 3 + .../{mongo => elasticsearch}/pytest.ini | 0 stac_fastapi/elasticsearch/setup.cfg | 2 + stac_fastapi/elasticsearch/setup.py | 58 + .../stac_fastapi/elasticsearch/__init__.py | 1 + .../stac_fastapi/elasticsearch}/app.py | 18 +- .../stac_fastapi/elasticsearch/config.py | 80 ++ .../elasticsearch/database_logic.py | 894 ++++++++++++++++ .../stac_fastapi/elasticsearch/version.py | 2 + stac_fastapi/mongo/README.md | 1 - stac_fastapi/mongo/setup.cfg | 2 - .../mongo/stac_fastapi/mongo/config.py | 76 -- .../stac_fastapi/mongo/database_logic.py | 995 ------------------ .../mongo/stac_fastapi/mongo/utilities.py | 30 - stac_fastapi/opensearch/README.md | 1 + stac_fastapi/opensearch/pytest.ini | 4 + stac_fastapi/opensearch/setup.cfg | 2 + stac_fastapi/{mongo => opensearch}/setup.py | 14 +- .../stac_fastapi/opensearch}/__init__.py | 0 .../opensearch/stac_fastapi/opensearch/app.py | 109 ++ .../stac_fastapi/opensearch/config.py | 81 ++ .../stac_fastapi/opensearch/database_logic.py | 922 ++++++++++++++++ .../stac_fastapi/opensearch/version.py | 2 + stac_fastapi/tests/conftest.py | 4 - stac_fastapi/tests/resources/test_item.py | 2 - 59 files changed, 4937 insertions(+), 1315 deletions(-) create mode 100644 Dockerfile.deploy.es rename dockerfiles/Dockerfile.deploy.mongo => Dockerfile.deploy.os (55%) create mode 100644 Dockerfile.dev.es rename dockerfiles/Dockerfile.dev.mongo => Dockerfile.dev.os (73%) create mode 100644 elasticsearch/config/elasticsearch.yml create mode 100644 examples/pip_docker/elasticsearch/config/elasticsearch.yml create mode 100644 opensearch/config/opensearch.yml create mode 100755 scripts/publish create mode 100644 stac_fastapi/core/README.md create mode 100644 stac_fastapi/core/setup.cfg create mode 100644 stac_fastapi/core/setup.py create mode 100644 stac_fastapi/core/stac_fastapi/core/__init__.py create mode 100644 stac_fastapi/core/stac_fastapi/core/base_database_logic.py create mode 100644 stac_fastapi/core/stac_fastapi/core/base_settings.py create mode 100644 stac_fastapi/core/stac_fastapi/core/core.py create mode 100644 stac_fastapi/core/stac_fastapi/core/datetime_utils.py create mode 100644 stac_fastapi/core/stac_fastapi/core/extensions/__init__.py create mode 100644 stac_fastapi/core/stac_fastapi/core/extensions/filter.py create mode 100644 stac_fastapi/core/stac_fastapi/core/extensions/query.py create mode 100644 stac_fastapi/core/stac_fastapi/core/models/__init__.py create mode 100644 stac_fastapi/core/stac_fastapi/core/models/links.py create mode 100644 stac_fastapi/core/stac_fastapi/core/models/search.py create mode 100644 stac_fastapi/core/stac_fastapi/core/serializers.py create mode 100644 stac_fastapi/core/stac_fastapi/core/session.py create mode 100644 stac_fastapi/core/stac_fastapi/core/types/core.py create mode 100644 stac_fastapi/core/stac_fastapi/core/utilities.py rename stac_fastapi/{mongo/stac_fastapi/mongo => core/stac_fastapi/core}/version.py (100%) create mode 100644 stac_fastapi/elasticsearch/README.md rename stac_fastapi/{mongo => elasticsearch}/pytest.ini (100%) create mode 100644 stac_fastapi/elasticsearch/setup.cfg create mode 100644 stac_fastapi/elasticsearch/setup.py create mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/__init__.py rename stac_fastapi/{mongo/stac_fastapi/mongo => elasticsearch/stac_fastapi/elasticsearch}/app.py (89%) create mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py create mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py create mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py delete mode 100644 stac_fastapi/mongo/README.md delete mode 100644 stac_fastapi/mongo/setup.cfg delete mode 100644 stac_fastapi/mongo/stac_fastapi/mongo/config.py delete mode 100644 stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py delete mode 100644 stac_fastapi/mongo/stac_fastapi/mongo/utilities.py create mode 100644 stac_fastapi/opensearch/README.md create mode 100644 stac_fastapi/opensearch/pytest.ini create mode 100644 stac_fastapi/opensearch/setup.cfg rename stac_fastapi/{mongo => opensearch}/setup.py (80%) rename stac_fastapi/{mongo/stac_fastapi/mongo => opensearch/stac_fastapi/opensearch}/__init__.py (100%) create mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/app.py create mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/config.py create mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py create mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/version.py diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 2a7968bc..915929cd 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -1,4 +1,5 @@ -name: stac-fastapi-mongo +name: stac-fastapi-elasticsearch +on: push: branches: - main @@ -7,123 +8,123 @@ name: stac-fastapi-mongo - main - features/** -# jobs: -# test: -# runs-on: ubuntu-latest -# timeout-minutes: 10 +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 10 -# services: + services: -# elasticsearch_8_svc: -# image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0 -# env: -# cluster.name: stac-cluster -# node.name: es01 -# network.host: 0.0.0.0 -# transport.host: 0.0.0.0 -# discovery.type: single-node -# http.port: 9200 -# xpack.license.self_generated.type: basic -# xpack.security.enabled: false -# xpack.security.transport.ssl.enabled: false -# ES_JAVA_OPTS: -Xms512m -Xmx1g -# ports: -# - 9200:9200 + elasticsearch_8_svc: + image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0 + env: + cluster.name: stac-cluster + node.name: es01 + network.host: 0.0.0.0 + transport.host: 0.0.0.0 + discovery.type: single-node + http.port: 9200 + xpack.license.self_generated.type: basic + xpack.security.enabled: false + xpack.security.transport.ssl.enabled: false + ES_JAVA_OPTS: -Xms512m -Xmx1g + ports: + - 9200:9200 -# elasticsearch_7_svc: -# image: docker.elastic.co/elasticsearch/elasticsearch:7.17.1 -# env: -# cluster.name: stac-cluster -# node.name: es01 -# network.host: 0.0.0.0 -# transport.host: 0.0.0.0 -# discovery.type: single-node -# http.port: 9400 -# xpack.license.self_generated.type: basic -# xpack.security.enabled: false -# xpack.security.transport.ssl.enabled: false -# ES_JAVA_OPTS: -Xms512m -Xmx1g -# ports: -# - 9400:9400 + elasticsearch_7_svc: + image: docker.elastic.co/elasticsearch/elasticsearch:7.17.1 + env: + cluster.name: stac-cluster + node.name: es01 + network.host: 0.0.0.0 + transport.host: 0.0.0.0 + discovery.type: single-node + http.port: 9400 + xpack.license.self_generated.type: basic + xpack.security.enabled: false + xpack.security.transport.ssl.enabled: false + ES_JAVA_OPTS: -Xms512m -Xmx1g + ports: + - 9400:9400 -# opensearch_2_11: -# image: opensearchproject/opensearch:2.11.1 -# env: -# cluster.name: stac-cluster -# node.name: os01 -# network.host: 0.0.0.0 -# transport.host: 0.0.0.0 -# discovery.type: single-node -# http.port: 9202 -# http.cors.enabled: true -# plugins.security.disabled: true -# plugins.security.ssl.http.enabled: true -# OPENSEARCH_JAVA_OPTS: -Xms512m -Xmx512m -# ports: -# - 9202:9202 -# strategy: -# matrix: -# python-version: [ "3.8", "3.9", "3.10", "3.11"] + opensearch_2_11: + image: opensearchproject/opensearch:2.11.1 + env: + cluster.name: stac-cluster + node.name: os01 + network.host: 0.0.0.0 + transport.host: 0.0.0.0 + discovery.type: single-node + http.port: 9202 + http.cors.enabled: true + plugins.security.disabled: true + plugins.security.ssl.http.enabled: true + OPENSEARCH_JAVA_OPTS: -Xms512m -Xmx512m + ports: + - 9202:9202 + strategy: + matrix: + python-version: [ "3.8", "3.9", "3.10", "3.11"] -# name: Python ${{ matrix.python-version }} testing + name: Python ${{ matrix.python-version }} testing -# steps: -# - name: Check out repository code -# uses: actions/checkout@v4 + steps: + - name: Check out repository code + uses: actions/checkout@v4 -# # Setup Python (faster than using Python container) -# - name: Setup Python -# uses: actions/setup-python@v5 -# with: -# python-version: ${{ matrix.python-version }} -# - name: Lint code -# uses: pre-commit/action@v3.0.1 + # Setup Python (faster than using Python container) + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Lint code + uses: pre-commit/action@v3.0.1 -# - name: Install pipenv -# run: | -# python -m pip install --upgrade pipenv wheel + - name: Install pipenv + run: | + python -m pip install --upgrade pipenv wheel -# - name: Install elasticsearch stac-fastapi -# run: | -# pip install ./stac_fastapi/elasticsearch[dev,server] + - name: Install elasticsearch stac-fastapi + run: | + pip install ./stac_fastapi/elasticsearch[dev,server] -# - name: Install opensearch stac-fastapi -# run: | -# pip install ./stac_fastapi/opensearch[dev,server] + - name: Install opensearch stac-fastapi + run: | + pip install ./stac_fastapi/opensearch[dev,server] -# - name: Install core library stac-fastapi -# run: | -# pip install ./stac_fastapi/core + - name: Install core library stac-fastapi + run: | + pip install ./stac_fastapi/core -# - name: Run test suite against Elasticsearch 7.x -# run: | -# pipenv run pytest -svvv -# env: -# ENVIRONMENT: testing -# ES_PORT: 9200 -# ES_HOST: 172.17.0.1 -# ES_USE_SSL: false -# ES_VERIFY_CERTS: false -# BACKEND: elasticsearch + - name: Run test suite against Elasticsearch 7.x + run: | + pipenv run pytest -svvv + env: + ENVIRONMENT: testing + ES_PORT: 9200 + ES_HOST: 172.17.0.1 + ES_USE_SSL: false + ES_VERIFY_CERTS: false + BACKEND: elasticsearch -# - name: Run test suite against Elasticsearch 8.x -# run: | -# pipenv run pytest -svvv -# env: -# ENVIRONMENT: testing -# ES_PORT: 9400 -# ES_HOST: 172.17.0.1 -# ES_USE_SSL: false -# ES_VERIFY_CERTS: false -# BACKEND: elasticsearch + - name: Run test suite against Elasticsearch 8.x + run: | + pipenv run pytest -svvv + env: + ENVIRONMENT: testing + ES_PORT: 9400 + ES_HOST: 172.17.0.1 + ES_USE_SSL: false + ES_VERIFY_CERTS: false + BACKEND: elasticsearch -# - name: Run test suite against OpenSearch 2.11.1 -# run: | -# pipenv run pytest -svvv -# env: -# ENVIRONMENT: testing -# ES_PORT: 9202 -# ES_HOST: 172.17.0.1 -# ES_USE_SSL: false -# ES_VERIFY_CERTS: false -# BACKEND: opensearch \ No newline at end of file + - name: Run test suite against OpenSearch 2.11.1 + run: | + pipenv run pytest -svvv + env: + ENVIRONMENT: testing + ES_PORT: 9202 + ES_HOST: 172.17.0.1 + ES_USE_SSL: false + ES_VERIFY_CERTS: false + BACKEND: opensearch \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index d6880575..67882e2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,24 +5,133 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). - ## [Unreleased] ### Added +- Added explicit mapping for ID in `ES_COLLECTIONS_MAPPINGS` [#198](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/198) + ### Changed -### Fixed +- Removed database logic from core.py all_collections [#196](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/196) +- Changed OpenSearch config ssl_version to SSLv23 [#200](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/200) +### Fixed ## [v2.0.0] ### Added +- Added core library package for common logic [#186](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/186) + +### Changed + +- Moved Elasticsearch and Opensearch backends into separate packages [#186](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/186) + +### Fixed + +- Allow additional top-level properties on collections [#191](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/191) + +## [v1.1.0] + +### Added + +- Advanced comparison (LIKE, IN, BETWEEN) operators to the Filter extension [#178](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/178) +- Collection update endpoint no longer delete all sub items [#177](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/177) +- OpenSearch 2.11.1 support [#188](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/188) + ### Changed +- Elasticsearch drivers from 7.17.9 to 8.11.0 [#169](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/169) +- Collection update endpoint no longer delete all sub items [#177](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/177) + +### Fixed + +- Exclude unset fields in search response [#166](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/166) +- Upgrade stac-fastapi to v2.4.9 [#172](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/172) +- Set correct default filter-lang for GET /search requests [#179](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/179) + +## [v1.0.0] + +### Added + +- Collection-level Assets to the CollectionSerializer [#148](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/148) +- Pagination for /collections - GET all collections - route [#164](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/164) +- Examples folder with example docker setup for running sfes from pip [#147](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/147) +- GET /search filter extension queries [#163](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/163) +- Added support for GET /search intersection queries [#158](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/158) + +### Changed + +- Update elasticsearch version from 8.1.3 to 8.10.4 in cicd, gh actions [#164](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/164) +- Updated core stac-fastapi libraries to 2.4.8 from 2.4.3 [#151](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/151) +- Use aliases on Elasticsearch indices, add number suffix in index name. [#152](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/152) + ### Fixed +- Corrected the closing of client connections in ES index management functions [#132](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/132) +- Corrected the automatic converstion of float values to int when building Filter Clauses [#135](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/135) +- Do not index `proj:geometry` field as geo_shape [#154](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/154) +- Remove unsupported characters from Elasticsearch index names [#153](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/153) +- Fixed GET /search sortby requests [#25](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/25) + + +## [v0.3.0] + +### Added + +- Added bbox and datetime parameters and functionality to item_collection [#127](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/127) +- Added collection_id parameter to create_item function [#127](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/127) +- Added item_id and collection_id to update_item [#127](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/127) +- The default Collection objects index can be overridden by the `STAC_COLLECTIONS_INDEX` environment variable [#128](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/128) +- The default Item objects index prefix can be overridden by the `STAC_ITEMS_INDEX_PREFIX` environment variable [#128](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/128) +- Fields Extension [#129](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/129) +- Support for Python 3.11 [#131](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/131) + +### Changed + +- Updated core stac-fastapi libraries to 2.4.3 from 2.3.0 [#127](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/127) + + +## [v0.2.0] + +### Added + +- Filter Extension as GET with CQL2-Text and POST with CQL2-JSON, + supporting the Basic CQL2 and Basic Spatial Operators conformance classes. +- Added Elasticsearch local config to support snapshot/restore to local filesystem + +### Fixed + +- Fixed search intersects query. +- Corrected the Sort and Query conformance class URIs. + +### Changed + +- Default to Python 3.10 +- Default to Elasticsearch 8.x +- Collection objects are now stored in `collections` index rather than `stac_collections` index +- Item objects are no longer stored in `stac_items`, but in indices per collection named `items_{collection_id}` +- When using bulk ingest, items will continue to be ingested if any of them fail. Previously, the call would fail + immediately if any items failed. + + +## [v0.1.0] + +### Changed + +- Elasticsearch index mappings updated to be more thorough. +- Endpoints that return items (e.g., /search) now sort the results by 'properties.datetime,id,collection'. + Previously, there was no sort order defined. +- Db_to_stac serializer moved to core.py for consistency as it existed in both core and database_logic previously. +- Use genexp in execute_search and get_all_collections to return results. +- Added db_to_stac serializer to item_collection method in core.py. + [Unreleased]: -[v2.0.0]: +[v2.0.0]: +[v1.1.0]: +[v1.0.0]: +[v0.3.0]: +[v0.2.0]: +[v0.1.0]: \ No newline at end of file diff --git a/Dockerfile.deploy.es b/Dockerfile.deploy.es new file mode 100644 index 00000000..2eab7b9d --- /dev/null +++ b/Dockerfile.deploy.es @@ -0,0 +1,20 @@ +FROM python:3.10-slim + +RUN apt-get update && \ + apt-get -y upgrade && \ + apt-get -y install gcc && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + +WORKDIR /app + +COPY . /app + +RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir ./stac_fastapi/elasticsearch[server] + +EXPOSE 8080 + +CMD ["uvicorn", "stac_fastapi.elasticsearch.app:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/dockerfiles/Dockerfile.deploy.mongo b/Dockerfile.deploy.os similarity index 55% rename from dockerfiles/Dockerfile.deploy.mongo rename to Dockerfile.deploy.os index d0d95df2..035b181e 100644 --- a/dockerfiles/Dockerfile.deploy.mongo +++ b/Dockerfile.deploy.os @@ -12,8 +12,9 @@ WORKDIR /app COPY . /app -RUN pip install --no-cache-dir ./stac_fastapi/mongo[server] +RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir ./stac_fastapi/opensearch[server] EXPOSE 8080 -CMD ["uvicorn", "stac_fastapi.mongo.app:app", "--host", "0.0.0.0", "--port", "8080"] +CMD ["uvicorn", "stac_fastapi.opensearch.app:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/Dockerfile.dev.es b/Dockerfile.dev.es new file mode 100644 index 00000000..a4248d39 --- /dev/null +++ b/Dockerfile.dev.es @@ -0,0 +1,19 @@ +FROM python:3.10-slim + + +# update apt pkgs, and install build-essential for ciso8601 +RUN apt-get update && \ + apt-get -y upgrade && \ + apt-get install -y build-essential && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# update certs used by Requests +ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + +WORKDIR /app + +COPY . /app + +RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir -e ./stac_fastapi/elasticsearch[dev,server] diff --git a/dockerfiles/Dockerfile.dev.mongo b/Dockerfile.dev.os similarity index 73% rename from dockerfiles/Dockerfile.dev.mongo rename to Dockerfile.dev.os index 4f43abac..d9dc8b0a 100644 --- a/dockerfiles/Dockerfile.dev.mongo +++ b/Dockerfile.dev.os @@ -15,4 +15,5 @@ WORKDIR /app COPY . /app -RUN pip install --no-cache-dir -e ./stac_fastapi/mongo[dev,server] +RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir -e ./stac_fastapi/opensearch[dev,server] diff --git a/LICENSE b/LICENSE index 5a60713e..998f791a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Jonathan Healy +Copyright (c) 2022 Jonathan Healy Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 7467eb8e..545d2311 100644 --- a/Makefile +++ b/Makefile @@ -2,58 +2,92 @@ APP_HOST ?= 0.0.0.0 EXTERNAL_APP_PORT ?= ${APP_PORT} -MONGO_APP_PORT ?= 8084 -MONGO_HOST ?= docker.for.mac.localhost -MONGO_PORT ?= 27017 +ES_APP_PORT ?= 8080 +ES_HOST ?= docker.for.mac.localhost +ES_PORT ?= 9200 -run_mongo = docker-compose \ +OS_APP_PORT ?= 8082 +OS_HOST ?= docker.for.mac.localhost +OS_PORT ?= 9202 + +run_es = docker-compose \ run \ - -p ${EXTERNAL_APP_PORT}:${MONGO_APP_PORT} \ + -p ${EXTERNAL_APP_PORT}:${ES_APP_PORT} \ -e PY_IGNORE_IMPORTMISMATCH=1 \ -e APP_HOST=${APP_HOST} \ - -e APP_PORT=${MONGO_APP_PORT} \ - app-mongo + -e APP_PORT=${ES_APP_PORT} \ + app-elasticsearch -.PHONY: image-deploy-mongo -image-deploy-mongo: - docker build -f dockerfiles/Dockerfile.dev.mongo -t stac-fastapi-mongo:latest . +run_os = docker-compose \ + run \ + -p ${EXTERNAL_APP_PORT}:${OS_APP_PORT} \ + -e PY_IGNORE_IMPORTMISMATCH=1 \ + -e APP_HOST=${APP_HOST} \ + -e APP_PORT=${OS_APP_PORT} \ + app-opensearch +.PHONY: image-deploy-es +image-deploy-es: + docker build -f Dockerfile.dev.es -t stac-fastapi-elasticsearch:latest . + +.PHONY: image-deploy-os +image-deploy-os: + docker build -f Dockerfile.dev.os -t stac-fastapi-opensearch:latest . .PHONY: run-deploy-locally run-deploy-locally: - docker run -it -p 8084:8084 \ - -e ES_HOST=${MONGO_HOST} \ - -e ES_PORT=${MONGO_PORT} \ - -e ES_USER=${MONGO_USER} \ - -e ES_PASS=${MONGO_PASS} \ - stac-fastapi-mongo:latest + docker run -it -p 8080:8080 \ + -e ES_HOST=${ES_HOST} \ + -e ES_PORT=${ES_PORT} \ + -e ES_USER=${ES_USER} \ + -e ES_PASS=${ES_PASS} \ + stac-fastapi-elasticsearch:latest .PHONY: image-dev image-dev: docker-compose build -.PHONY: docker-run-mongo -docker-run-mongo: image-dev - $(run_mongo) +.PHONY: docker-run-es +docker-run-es: image-dev + $(run_es) + +.PHONY: docker-run-os +docker-run-os: image-dev + $(run_os) -.PHONY: docker-shell-mongo -docker-shell-mongo: - $(run_mongo) /bin/bash +.PHONY: docker-shell-es +docker-shell-es: + $(run_es) /bin/bash +.PHONY: docker-shell-os +docker-shell-os: + $(run_os) /bin/bash -.PHONY: test-mongo -test-mongo: - -$(run_mongo) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh mongo:27017 && cd stac_fastapi/tests/ && pytest' +.PHONY: test-elasticsearch +test-elasticsearch: + -$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest' + docker-compose down + +.PHONY: test-opensearch +test-opensearch: + -$(run_os) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest' docker-compose down .PHONY: test test: - -$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh mongo:27017 && cd stac_fastapi/tests/ && pytest' + -$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest' + docker-compose down + + -$(run_os) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest' docker-compose down -.PHONY: run-database-mongo -run-database-mongo: - docker-compose run --rm mongo +.PHONY: run-database-es +run-database-es: + docker-compose run --rm elasticsearch + +.PHONY: run-database-os +run-database-os: + docker-compose run --rm opensearch .PHONY: pybase-install pybase-install: @@ -63,9 +97,13 @@ pybase-install: pip install -e ./stac_fastapi/extensions[dev] && \ pip install -e ./stac_fastapi/core -.PHONY: install-mongo +.PHONY: install-es install-es: pybase-install - pip install -e ./stac_fastapi/mongo[dev,server] + pip install -e ./stac_fastapi/elasticsearch[dev,server] + +.PHONY: install-os +install-os: pybase-install + pip install -e ./stac_fastapi/opensearch[dev,server] .PHONY: ingest ingest: diff --git a/README.md b/README.md index 8815d393..7c662480 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,17 @@ -# stac-fastapi-mongo +# stac-fastapi-elasticsearch-opensearch (sfeos) -## Mongo backend for the stac-fastapi project built on top of the [sfeos](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) core api library. - -- Note: This is presently in development and, for now, is a proof of concept project showing that other databases can be plugged into the [sfeos](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) core api library, besides for elasticsearch and opensearch. +## Elasticsearch and Opensearch backends for the stac-fastapi project +[![PyPI version](https://badge.fury.io/py/stac-fastapi.elasticsearch.svg)](https://badge.fury.io/py/stac-fastapi.elasticsearch) To install from PyPI: ```shell -pip install stac_fastapi.mongo +pip install stac_fastapi.elasticsearch +``` +or +``` +pip install stac_fastapi.opensearch ``` #### For changes, see the [Changelog](CHANGELOG.md) @@ -19,7 +22,13 @@ pip install stac_fastapi.mongo To install the classes in your local Python env, run: ```shell -pip install -e 'stac_fastapi/mongo[dev]' +pip install -e 'stac_fastapi/elasticsearch[dev]' +``` + +or + +```shell +pip install -e 'stac_fastapi/opensearch[dev]' ``` @@ -36,20 +45,30 @@ pre-commit run --all-files ## Build Elasticsearh API backend ```shell -docker-compose up mongo -docker-compose build app-mongo +docker-compose up elasticsearch +docker-compose build app-elasticsearch ``` -## Running Mongo API on localhost:8084 +## Running Elasticsearh API on localhost:8080 + +```shell +docker-compose up app-elasticsearch +``` + +By default, docker-compose uses Elasticsearch 8.x and OpenSearch 2.11.1. +If you wish to use a different version, put the following in a +file named `.env` in the same directory you run docker-compose from: ```shell -docker-compose up app-mongo +ELASTICSEARCH_VERSION=7.17.1 +OPENSEARCH_VERSION=2.11.0 ``` +The most recent Elasticsearch 7.x versions should also work. See the [opensearch-py docs](https://github.com/opensearch-project/opensearch-py/blob/main/COMPATIBILITY.md) for compatibility information. To create a new Collection: ```shell -curl -X "POST" "http://localhost:8084/collections" \ +curl -X "POST" "http://localhost:8080/collections" \ -H 'Content-Type: application/json; charset=utf-8' \ -d $'{ "id": "my_collection" @@ -66,7 +85,7 @@ returned from the `/collections` route contains a `next` link with the token tha get the next page of results. ```shell -curl -X "GET" "http://localhost:8084/collections?limit=1&token=example_token" +curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token" ``` ## Testing @@ -74,10 +93,126 @@ curl -X "GET" "http://localhost:8084/collections?limit=1&token=example_token" ```shell make test ``` +Test against OpenSearch only + +```shell +make test-opensearch +``` +Test against Elasticsearch only + +```shell +make test-elasticsearch +``` ## Ingest sample data ```shell make ingest ``` + +## Elasticsearch Mappings + +Mappings apply to search index, not source. + + +## Managing Elasticsearch Indices + +This section covers how to create a snapshot repository and then create and restore snapshots with this. + +Create a snapshot repository. This puts the files in the `elasticsearch/snapshots` in this git repo clone, as +the elasticsearch.yml and docker-compose files create a mapping from that directory to +`/usr/share/elasticsearch/snapshots` within the Elasticsearch container and grant permissions on using it. + +```shell +curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "type": "fs", + "settings": { + "location": "/usr/share/elasticsearch/snapshots/my_fs_backup" + } +}' +``` + +The next step is to create a snapshot of one or more indices into this snapshot repository. This command creates +a snapshot named `my_snapshot_2` and waits for the action to be completed before returning. This can also be done +asynchronously, and queried for status. The `indices` parameter determines which indices are snapshotted, and +can include wildcards. + +```shell +curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2?wait_for_completion=true" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "metadata": { + "taken_because": "dump of all items", + "taken_by": "pvarner" + }, + "include_global_state": false, + "ignore_unavailable": false, + "indices": "items_my-collection" +}' +``` + +To see the status of this snapshot: + +```shell +curl http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2 +``` + +To see all the snapshots: + +```shell +curl http://localhost:9200/_snapshot/my_fs_backup/_all +``` + +To restore a snapshot, run something similar to the following. This specific command will restore any indices that +match `items_*` and rename them so that the new index name will be suffixed with `-copy`. + +```shell +curl -X "POST" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2/_restore?wait_for_completion=true" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "include_aliases": false, + "include_global_state": false, + "ignore_unavailable": true, + "rename_replacement": "items_$1-copy", + "indices": "items_*", + "rename_pattern": "items_(.+)" +}' +``` + +Now the item documents have been restored in to the new index (e.g., `my-collection-copy`), but the value of the +`collection` field in those documents is still the original value of `my-collection`. To update these to match the +new collection name, run the following Elasticsearch Update By Query command, substituting the old collection name +into the term filter and the new collection name into the script parameter: + +```shell +curl -X "POST" "http://localhost:9200/items_my-collection-copy/_update_by_query" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "query": { + "match_all": {} +}, + "script": { + "lang": "painless", + "params": { + "collection": "my-collection-copy" + }, + "source": "ctx._source.collection = params.collection" + } +}' +``` + +Then, create a new collection through the api with the new name for each of the restored indices: + +```shell +curl -X "POST" "http://localhost:8080/collections" \ + -H 'Content-Type: application/json' \ + -d $'{ + "id": "my-collection-copy" +}' +``` + +Voila! You have a copy of the collection now that has a resource URI (`/collections/my-collection-copy`) and can be +correctly queried by collection name. diff --git a/data_loader/data_loader.py b/data_loader/data_loader.py index 38ca5666..c438811d 100644 --- a/data_loader/data_loader.py +++ b/data_loader/data_loader.py @@ -6,13 +6,19 @@ import click import requests -if len(sys.argv) != 1: - print("Usage: python data_loader.py") +if len(sys.argv) != 2: + print("Usage: python data_loader.py ") sys.exit(1) DATA_DIR = os.path.join(os.path.dirname(__file__), "setup_data/") -STAC_API_BASE_URL = "http://localhost:8084" +backend = sys.argv[1].lower() +if backend == "opensearch": + STAC_API_BASE_URL = "http://localhost:8082" +elif backend == "elasticsearch": + STAC_API_BASE_URL = "http://localhost:8080" +else: + print("Invalid backend tag. Enter either 'opensearch' or 'elasticsearch'.") def load_data(filename): diff --git a/docker-compose.yml b/docker-compose.yml index d531a85b..9d665bce 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,50 +1,86 @@ version: '3.9' services: - app-mongo: - container_name: stac-fastapi-mongo - image: stac-utils/stac-fastapi-mongo + app-elasticsearch: + container_name: stac-fastapi-es + image: stac-utils/stac-fastapi-es restart: always build: context: . - dockerfile: dockerfiles/Dockerfile.dev.mongo + dockerfile: Dockerfile.dev.es environment: - APP_HOST=0.0.0.0 - - APP_PORT=8084 + - APP_PORT=8080 - RELOAD=true - ENVIRONMENT=local - - BACKEND=mongo - - MONGO_DB=stac - - MONGO_HOST=mongo - - MONGO_USER=root - - MONGO_PASS=example - - MONGO_PORT=27017 + - WEB_CONCURRENCY=10 + - ES_HOST=elasticsearch + - ES_PORT=9200 + - ES_USE_SSL=false + - ES_VERIFY_CERTS=false + - BACKEND=elasticsearch ports: - - "8084:8084" + - "8080:8080" volumes: - ./stac_fastapi:/app/stac_fastapi - ./scripts:/app/scripts + - ./esdata:/usr/share/elasticsearch/data depends_on: - - mongo + - elasticsearch command: - bash -c "./scripts/wait-for-it-es.sh mongo-container:27017 && python -m stac_fastapi.mongo.app" + bash -c "./scripts/wait-for-it-es.sh es-container:9200 && python -m stac_fastapi.elasticsearch.app" - mongo: - container_name: mongo-container - image: mongo:7.0.5 - hostname: mongo + app-opensearch: + container_name: stac-fastapi-os + image: stac-utils/stac-fastapi-os + restart: always + build: + context: . + dockerfile: Dockerfile.dev.os environment: - - MONGO_INITDB_ROOT_USERNAME=root - - MONGO_INITDB_ROOT_PASSWORD=example + - APP_HOST=0.0.0.0 + - APP_PORT=8082 + - RELOAD=true + - ENVIRONMENT=local + - WEB_CONCURRENCY=10 + - ES_HOST=opensearch + - ES_PORT=9202 + - ES_USE_SSL=false + - ES_VERIFY_CERTS=false + - BACKEND=opensearch ports: - - "27017:27017" + - "8082:8082" + volumes: + - ./stac_fastapi:/app/stac_fastapi + - ./scripts:/app/scripts + - ./osdata:/usr/share/opensearch/data + depends_on: + - opensearch + command: + bash -c "./scripts/wait-for-it-es.sh os-container:9202 && python -m stac_fastapi.opensearch.app" - mongo-express: - image: mongo-express - restart: always + elasticsearch: + container_name: es-container + image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.11.0} + hostname: elasticsearch + environment: + ES_JAVA_OPTS: -Xms512m -Xmx1g + volumes: + - ./elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml + - ./elasticsearch/snapshots:/usr/share/elasticsearch/snapshots ports: - - "8081:8081" + - "9200:9200" + + opensearch: + container_name: os-container + image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-2.11.1} + hostname: opensearch environment: - - ME_CONFIG_MONGODB_ADMINUSERNAME=root - - ME_CONFIG_MONGODB_ADMINPASSWORD=example - - ME_CONFIG_MONGODB_URL=mongodb://root:example@mongo:27017/ + - discovery.type=single-node + - plugins.security.disabled=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m + volumes: + - ./opensearch/config/opensearch.yml:/usr/share/opensearch/config/opensearch.yml + - ./opensearch/snapshots:/usr/share/opensearch/snapshots + ports: + - "9202:9202" diff --git a/elasticsearch/config/elasticsearch.yml b/elasticsearch/config/elasticsearch.yml new file mode 100644 index 00000000..0bf5b680 --- /dev/null +++ b/elasticsearch/config/elasticsearch.yml @@ -0,0 +1,18 @@ +## Cluster Settings +cluster.name: stac-cluster +node.name: es01 +network.host: 0.0.0.0 +transport.host: 0.0.0.0 +discovery.type: single-node +http.port: 9200 + +path: + repo: + - /usr/share/elasticsearch/snapshots + +## License +xpack.license.self_generated.type: basic + +# Security +xpack.security.enabled: false +xpack.security.transport.ssl.enabled: false \ No newline at end of file diff --git a/examples/pip_docker/elasticsearch/config/elasticsearch.yml b/examples/pip_docker/elasticsearch/config/elasticsearch.yml new file mode 100644 index 00000000..0bf5b680 --- /dev/null +++ b/examples/pip_docker/elasticsearch/config/elasticsearch.yml @@ -0,0 +1,18 @@ +## Cluster Settings +cluster.name: stac-cluster +node.name: es01 +network.host: 0.0.0.0 +transport.host: 0.0.0.0 +discovery.type: single-node +http.port: 9200 + +path: + repo: + - /usr/share/elasticsearch/snapshots + +## License +xpack.license.self_generated.type: basic + +# Security +xpack.security.enabled: false +xpack.security.transport.ssl.enabled: false \ No newline at end of file diff --git a/opensearch/config/opensearch.yml b/opensearch/config/opensearch.yml new file mode 100644 index 00000000..5e44b259 --- /dev/null +++ b/opensearch/config/opensearch.yml @@ -0,0 +1,19 @@ +## Cluster Settings +cluster.name: stac-cluster +node.name: os01 +network.host: 0.0.0.0 +transport.host: 0.0.0.0 +discovery.type: single-node +http.port: 9202 +http.cors.enabled: true +http.cors.allow-headers: X-Requested-With,Content-Type,Content-Length,Accept,Authorization + +path: + repo: + - /usr/share/opensearch/snapshots + +# Security +plugins.security.disabled: true +plugins.security.ssl.http.enabled: true + +node.max_local_storage_nodes: 3 diff --git a/scripts/publish b/scripts/publish new file mode 100755 index 00000000..464bbe05 --- /dev/null +++ b/scripts/publish @@ -0,0 +1,73 @@ +#!/bin/bash + +set -e + +if [[ -n "${CI}" ]]; then + set -x +fi + +# Import shared variables +SUBPACKAGE_DIRS=( + "stac_fastapi/types" + "stac_fastapi/extensions" + "stac_fastapi/api" + "stac_fastapi/sqlalchemy" + "stac_fastapi/pgstac" + "stac_fastapi/elasticsearch" +) + +function usage() { + echo -n \ + "Usage: $(basename "$0") +Publish all stac-fastapi packages. + +Options: +--test Publish to test pypi. Requires a 'testpypi' repository + be defined in your .pypirc; + See https://packaging.python.org/guides/using-testpypi/#using-testpypi-with-pip +" +} + +POSITIONAL=() +while [[ $# -gt 0 ]] +do + key="$1" + case $key in + + --help) + usage + exit 0 + shift + ;; + + --test) + TEST_PYPI="--repository testpypi" + shift + ;; + + *) # unknown option + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; + esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters + +# Fail if this isn't CI and we aren't publishing to test pypi +if [ -z "${TEST_PYPI}" ] && [ -z "${CI}" ]; then + echo "Only CI can publish to pypi" + exit 1 +fi + +if [ "${BASH_SOURCE[0]}" = "${0}" ]; then + for PACKAGE_DIR in "${SUBPACKAGE_DIRS[@]}" + do + echo ${PACKAGE_DIR} + pushd ./${PACKAGE_DIR} + rm -rf dist + python setup.py sdist bdist_wheel + twine upload ${TEST_PYPI} dist/* + popd + + done +fi \ No newline at end of file diff --git a/stac_fastapi/core/README.md b/stac_fastapi/core/README.md new file mode 100644 index 00000000..02f4e35a --- /dev/null +++ b/stac_fastapi/core/README.md @@ -0,0 +1 @@ +# stac-fastapi core library for Elasticsearch and Opensearch backends \ No newline at end of file diff --git a/stac_fastapi/core/setup.cfg b/stac_fastapi/core/setup.cfg new file mode 100644 index 00000000..1eb3fa49 --- /dev/null +++ b/stac_fastapi/core/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +version = attr: stac_fastapi.core.version.__version__ diff --git a/stac_fastapi/core/setup.py b/stac_fastapi/core/setup.py new file mode 100644 index 00000000..68ba8f70 --- /dev/null +++ b/stac_fastapi/core/setup.py @@ -0,0 +1,44 @@ +"""stac_fastapi: core elasticsearch/ opensearch module.""" + +from setuptools import find_namespace_packages, setup + +with open("README.md") as f: + desc = f.read() + +install_requires = [ + "fastapi", + "attrs", + "pydantic[dotenv]<2", + "stac_pydantic==2.0.*", + "stac-fastapi.types==2.4.9", + "stac-fastapi.api==2.4.9", + "stac-fastapi.extensions==2.4.9", + "pystac[validation]", + "orjson", + "overrides", + "geojson-pydantic", + "pygeofilter==0.2.1", +] + +setup( + name="stac-fastapi.core", + description="Core library for the Elasticsearch and Opensearch stac-fastapi backends.", + long_description=desc, + long_description_content_type="text/markdown", + python_requires=">=3.8", + classifiers=[ + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + ], + url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", + license="MIT", + packages=find_namespace_packages(), + zip_safe=False, + install_requires=install_requires, +) diff --git a/stac_fastapi/core/stac_fastapi/core/__init__.py b/stac_fastapi/core/stac_fastapi/core/__init__.py new file mode 100644 index 00000000..32b338eb --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/__init__.py @@ -0,0 +1 @@ +"""Core library.""" diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py new file mode 100644 index 00000000..0043cfb8 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -0,0 +1,54 @@ +"""Base database logic.""" + +import abc +from typing import Any, Dict, Iterable, Optional + + +class BaseDatabaseLogic(abc.ABC): + """ + Abstract base class for database logic. + + This class defines the basic structure and operations for database interactions. + Subclasses must provide implementations for these methods. + """ + + @abc.abstractmethod + async def get_all_collections( + self, token: Optional[str], limit: int + ) -> Iterable[Dict[str, Any]]: + """Retrieve a list of all collections from the database.""" + pass + + @abc.abstractmethod + async def get_one_item(self, collection_id: str, item_id: str) -> Dict: + """Retrieve a single item from the database.""" + pass + + @abc.abstractmethod + async def create_item(self, item: Dict, refresh: bool = False) -> None: + """Create an item in the database.""" + pass + + @abc.abstractmethod + async def delete_item( + self, item_id: str, collection_id: str, refresh: bool = False + ) -> None: + """Delete an item from the database.""" + pass + + @abc.abstractmethod + async def create_collection(self, collection: Dict, refresh: bool = False) -> None: + """Create a collection in the database.""" + pass + + @abc.abstractmethod + async def find_collection(self, collection_id: str) -> Dict: + """Find a collection in the database.""" + pass + + @abc.abstractmethod + async def delete_collection( + self, collection_id: str, refresh: bool = False + ) -> None: + """Delete a collection from the database.""" + pass diff --git a/stac_fastapi/core/stac_fastapi/core/base_settings.py b/stac_fastapi/core/stac_fastapi/core/base_settings.py new file mode 100644 index 00000000..f30d07a4 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/base_settings.py @@ -0,0 +1,12 @@ +"""Base settings.""" + +from abc import ABC, abstractmethod + + +class ApiBaseSettings(ABC): + """Abstract base class for API settings.""" + + @abstractmethod + def create_client(self): + """Create a database client.""" + pass diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py new file mode 100644 index 00000000..99b58e16 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -0,0 +1,950 @@ +"""Item crud client.""" +import logging +import re +from datetime import datetime as datetime_type +from datetime import timezone +from typing import Any, Dict, List, Optional, Set, Type, Union +from urllib.parse import unquote_plus, urljoin + +import attr +import orjson +import stac_pydantic +from fastapi import HTTPException, Request +from overrides import overrides +from pydantic import ValidationError +from pygeofilter.backends.cql2_json import to_cql2 +from pygeofilter.parsers.cql2_text import parse as parse_cql2_text +from stac_pydantic.links import Relations +from stac_pydantic.shared import MimeTypes +from stac_pydantic.version import STAC_VERSION + +from stac_fastapi.core.base_database_logic import BaseDatabaseLogic +from stac_fastapi.core.base_settings import ApiBaseSettings +from stac_fastapi.core.models.links import PagingLinks +from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer +from stac_fastapi.core.session import Session +from stac_fastapi.core.types.core import ( + AsyncBaseCoreClient, + AsyncBaseFiltersClient, + AsyncBaseTransactionsClient, +) +from stac_fastapi.extensions.third_party.bulk_transactions import ( + BaseBulkTransactionsClient, + BulkTransactionMethod, + Items, +) +from stac_fastapi.types import stac as stac_types +from stac_fastapi.types.config import Settings +from stac_fastapi.types.conformance import BASE_CONFORMANCE_CLASSES +from stac_fastapi.types.extension import ApiExtension +from stac_fastapi.types.links import CollectionLinks +from stac_fastapi.types.requests import get_base_url +from stac_fastapi.types.search import BaseSearchPostRequest +from stac_fastapi.types.stac import Collection, Collections, Item, ItemCollection + +logger = logging.getLogger(__name__) + +NumType = Union[float, int] + + +@attr.s +class CoreClient(AsyncBaseCoreClient): + """Client for core endpoints defined by the STAC specification. + + This class is a implementation of `AsyncBaseCoreClient` that implements the core endpoints + defined by the STAC specification. It uses the `DatabaseLogic` class to interact with the + database, and `ItemSerializer` and `CollectionSerializer` to convert between STAC objects and + database records. + + Attributes: + session (Session): A requests session instance to be used for all HTTP requests. + item_serializer (Type[serializers.ItemSerializer]): A serializer class to be used to convert + between STAC items and database records. + collection_serializer (Type[serializers.CollectionSerializer]): A serializer class to be + used to convert between STAC collections and database records. + database (DatabaseLogic): An instance of the `DatabaseLogic` class that is used to interact + with the database. + """ + + database: BaseDatabaseLogic = attr.ib() + base_conformance_classes: List[str] = attr.ib( + factory=lambda: BASE_CONFORMANCE_CLASSES + ) + extensions: List[ApiExtension] = attr.ib(default=attr.Factory(list)) + + session: Session = attr.ib(default=attr.Factory(Session.create_from_env)) + item_serializer: Type[ItemSerializer] = attr.ib(default=ItemSerializer) + collection_serializer: Type[CollectionSerializer] = attr.ib( + default=CollectionSerializer + ) + post_request_model = attr.ib(default=BaseSearchPostRequest) + stac_version: str = attr.ib(default=STAC_VERSION) + landing_page_id: str = attr.ib(default="stac-fastapi") + title: str = attr.ib(default="stac-fastapi") + description: str = attr.ib(default="stac-fastapi") + + def _landing_page( + self, + base_url: str, + conformance_classes: List[str], + extension_schemas: List[str], + ) -> stac_types.LandingPage: + landing_page = stac_types.LandingPage( + type="Catalog", + id=self.landing_page_id, + title=self.title, + description=self.description, + stac_version=self.stac_version, + conformsTo=conformance_classes, + links=[ + { + "rel": Relations.self.value, + "type": MimeTypes.json, + "href": base_url, + }, + { + "rel": Relations.root.value, + "type": MimeTypes.json, + "href": base_url, + }, + { + "rel": "data", + "type": MimeTypes.json, + "href": urljoin(base_url, "collections"), + }, + { + "rel": Relations.conformance.value, + "type": MimeTypes.json, + "title": "STAC/WFS3 conformance classes implemented by this server", + "href": urljoin(base_url, "conformance"), + }, + { + "rel": Relations.search.value, + "type": MimeTypes.geojson, + "title": "STAC search", + "href": urljoin(base_url, "search"), + "method": "GET", + }, + { + "rel": Relations.search.value, + "type": MimeTypes.geojson, + "title": "STAC search", + "href": urljoin(base_url, "search"), + "method": "POST", + }, + ], + stac_extensions=extension_schemas, + ) + return landing_page + + async def landing_page(self, **kwargs) -> stac_types.LandingPage: + """Landing page. + + Called with `GET /`. + + Returns: + API landing page, serving as an entry point to the API. + """ + request: Request = kwargs["request"] + base_url = get_base_url(request) + landing_page = self._landing_page( + base_url=base_url, + conformance_classes=self.conformance_classes(), + extension_schemas=[], + ) + collections = await self.all_collections(request=kwargs["request"]) + for collection in collections["collections"]: + landing_page["links"].append( + { + "rel": Relations.child.value, + "type": MimeTypes.json.value, + "title": collection.get("title") or collection.get("id"), + "href": urljoin(base_url, f"collections/{collection['id']}"), + } + ) + + # Add OpenAPI URL + landing_page["links"].append( + { + "rel": "service-desc", + "type": "application/vnd.oai.openapi+json;version=3.0", + "title": "OpenAPI service description", + "href": urljoin( + str(request.base_url), request.app.openapi_url.lstrip("/") + ), + } + ) + + # Add human readable service-doc + landing_page["links"].append( + { + "rel": "service-doc", + "type": "text/html", + "title": "OpenAPI service documentation", + "href": urljoin( + str(request.base_url), request.app.docs_url.lstrip("/") + ), + } + ) + + return landing_page + + async def all_collections(self, **kwargs) -> Collections: + """Read all collections from the database. + + Args: + **kwargs: Keyword arguments from the request. + + Returns: + A Collections object containing all the collections in the database and links to various resources. + """ + request = kwargs["request"] + base_url = str(request.base_url) + limit = int(request.query_params.get("limit", 10)) + token = request.query_params.get("token") + + collections, next_token = await self.database.get_all_collections( + token=token, limit=limit, base_url=base_url + ) + + links = [ + {"rel": Relations.root.value, "type": MimeTypes.json, "href": base_url}, + {"rel": Relations.parent.value, "type": MimeTypes.json, "href": base_url}, + { + "rel": Relations.self.value, + "type": MimeTypes.json, + "href": urljoin(base_url, "collections"), + }, + ] + + if next_token: + next_link = PagingLinks(next=next_token, request=request).link_next() + links.append(next_link) + + return Collections(collections=collections, links=links) + + async def get_collection(self, collection_id: str, **kwargs) -> Collection: + """Get a collection from the database by its id. + + Args: + collection_id (str): The id of the collection to retrieve. + kwargs: Additional keyword arguments passed to the API call. + + Returns: + Collection: A `Collection` object representing the requested collection. + + Raises: + NotFoundError: If the collection with the given id cannot be found in the database. + """ + base_url = str(kwargs["request"].base_url) + collection = await self.database.find_collection(collection_id=collection_id) + return self.collection_serializer.db_to_stac( + collection=collection, base_url=base_url + ) + + async def item_collection( + self, + collection_id: str, + bbox: Optional[List[NumType]] = None, + datetime: Union[str, datetime_type, None] = None, + limit: int = 10, + token: str = None, + **kwargs, + ) -> ItemCollection: + """Read items from a specific collection in the database. + + Args: + collection_id (str): The identifier of the collection to read items from. + bbox (Optional[List[NumType]]): The bounding box to filter items by. + datetime (Union[str, datetime_type, None]): The datetime range to filter items by. + limit (int): The maximum number of items to return. The default value is 10. + token (str): A token used for pagination. + request (Request): The incoming request. + + Returns: + ItemCollection: An `ItemCollection` object containing the items from the specified collection that meet + the filter criteria and links to various resources. + + Raises: + HTTPException: If the specified collection is not found. + Exception: If any error occurs while reading the items from the database. + """ + request: Request = kwargs["request"] + base_url = str(request.base_url) + + collection = await self.get_collection( + collection_id=collection_id, request=request + ) + collection_id = collection.get("id") + if collection_id is None: + raise HTTPException(status_code=404, detail="Collection not found") + + search = self.database.make_search() + search = self.database.apply_collections_filter( + search=search, collection_ids=[collection_id] + ) + + if datetime: + datetime_search = self._return_date(datetime) + search = self.database.apply_datetime_filter( + search=search, datetime_search=datetime_search + ) + + if bbox: + bbox = [float(x) for x in bbox] + if len(bbox) == 6: + bbox = [bbox[0], bbox[1], bbox[3], bbox[4]] + + search = self.database.apply_bbox_filter(search=search, bbox=bbox) + + items, maybe_count, next_token = await self.database.execute_search( + search=search, + limit=limit, + sort=None, + token=token, # type: ignore + collection_ids=[collection_id], + ) + + items = [ + self.item_serializer.db_to_stac(item, base_url=base_url) for item in items + ] + + context_obj = None + if self.extension_is_enabled("ContextExtension"): + context_obj = { + "returned": len(items), + "limit": limit, + } + if maybe_count is not None: + context_obj["matched"] = maybe_count + + links = [] + if next_token: + links = await PagingLinks(request=request, next=next_token).get_links() + + return ItemCollection( + type="FeatureCollection", + features=items, + links=links, + context=context_obj, + ) + + async def get_item(self, item_id: str, collection_id: str, **kwargs) -> Item: + """Get an item from the database based on its id and collection id. + + Args: + collection_id (str): The ID of the collection the item belongs to. + item_id (str): The ID of the item to be retrieved. + + Returns: + Item: An `Item` object representing the requested item. + + Raises: + Exception: If any error occurs while getting the item from the database. + NotFoundError: If the item does not exist in the specified collection. + """ + base_url = str(kwargs["request"].base_url) + item = await self.database.get_one_item( + item_id=item_id, collection_id=collection_id + ) + return self.item_serializer.db_to_stac(item, base_url) + + @staticmethod + def _return_date(interval_str): + """ + Convert a date interval string into a dictionary for filtering search results. + + The date interval string should be formatted as either a single date or a range of dates separated + by "/". The date format should be ISO-8601 (YYYY-MM-DDTHH:MM:SSZ). If the interval string is a + single date, it will be converted to a dictionary with a single "eq" key whose value is the date in + the ISO-8601 format. If the interval string is a range of dates, it will be converted to a + dictionary with "gte" (greater than or equal to) and "lte" (less than or equal to) keys. If the + interval string is a range of dates with ".." instead of "/", the start and end dates will be + assigned default values to encompass the entire possible date range. + + Args: + interval_str (str): The date interval string to be converted. + + Returns: + dict: A dictionary representing the date interval for use in filtering search results. + """ + intervals = interval_str.split("/") + if len(intervals) == 1: + datetime = f"{intervals[0][0:19]}Z" + return {"eq": datetime} + else: + start_date = intervals[0] + end_date = intervals[1] + if ".." not in intervals: + start_date = f"{start_date[0:19]}Z" + end_date = f"{end_date[0:19]}Z" + elif start_date != "..": + start_date = f"{start_date[0:19]}Z" + end_date = "2200-12-01T12:31:12Z" + elif end_date != "..": + start_date = "1900-10-01T00:00:00Z" + end_date = f"{end_date[0:19]}Z" + else: + start_date = "1900-10-01T00:00:00Z" + end_date = "2200-12-01T12:31:12Z" + + return {"lte": end_date, "gte": start_date} + + async def get_search( + self, + request: Request, + collections: Optional[List[str]] = None, + ids: Optional[List[str]] = None, + bbox: Optional[List[NumType]] = None, + datetime: Optional[Union[str, datetime_type]] = None, + limit: Optional[int] = 10, + query: Optional[str] = None, + token: Optional[str] = None, + fields: Optional[List[str]] = None, + sortby: Optional[str] = None, + intersects: Optional[str] = None, + filter: Optional[str] = None, + filter_lang: Optional[str] = None, + **kwargs, + ) -> ItemCollection: + """Get search results from the database. + + Args: + collections (Optional[List[str]]): List of collection IDs to search in. + ids (Optional[List[str]]): List of item IDs to search for. + bbox (Optional[List[NumType]]): Bounding box to search in. + datetime (Optional[Union[str, datetime_type]]): Filter items based on the datetime field. + limit (Optional[int]): Maximum number of results to return. + query (Optional[str]): Query string to filter the results. + token (Optional[str]): Access token to use when searching the catalog. + fields (Optional[List[str]]): Fields to include or exclude from the results. + sortby (Optional[str]): Sorting options for the results. + intersects (Optional[str]): GeoJSON geometry to search in. + kwargs: Additional parameters to be passed to the API. + + Returns: + ItemCollection: Collection of `Item` objects representing the search results. + + Raises: + HTTPException: If any error occurs while searching the catalog. + """ + base_args = { + "collections": collections, + "ids": ids, + "bbox": bbox, + "limit": limit, + "token": token, + "query": orjson.loads(query) if query else query, + } + + # this is borrowed from stac-fastapi-pgstac + # Kludgy fix because using factory does not allow alias for filter-lan + query_params = str(request.query_params) + if filter_lang is None: + match = re.search(r"filter-lang=([a-z0-9-]+)", query_params, re.IGNORECASE) + if match: + filter_lang = match.group(1) + + if datetime: + base_args["datetime"] = datetime + + if intersects: + base_args["intersects"] = orjson.loads(unquote_plus(intersects)) + + if sortby: + sort_param = [] + for sort in sortby: + sort_param.append( + { + "field": sort[1:], + "direction": "desc" if sort[0] == "-" else "asc", + } + ) + print(sort_param) + base_args["sortby"] = sort_param + + if filter: + if filter_lang == "cql2-json": + base_args["filter-lang"] = "cql2-json" + base_args["filter"] = orjson.loads(unquote_plus(filter)) + else: + base_args["filter-lang"] = "cql2-json" + base_args["filter"] = orjson.loads(to_cql2(parse_cql2_text(filter))) + + if fields: + includes = set() + excludes = set() + for field in fields: + if field[0] == "-": + excludes.add(field[1:]) + elif field[0] == "+": + includes.add(field[1:]) + else: + includes.add(field) + base_args["fields"] = {"include": includes, "exclude": excludes} + + # Do the request + try: + search_request = self.post_request_model(**base_args) + except ValidationError: + raise HTTPException(status_code=400, detail="Invalid parameters provided") + resp = await self.post_search(search_request=search_request, request=request) + + return resp + + async def post_search( + self, search_request: BaseSearchPostRequest, request: Request + ) -> ItemCollection: + """ + Perform a POST search on the catalog. + + Args: + search_request (BaseSearchPostRequest): Request object that includes the parameters for the search. + kwargs: Keyword arguments passed to the function. + + Returns: + ItemCollection: A collection of items matching the search criteria. + + Raises: + HTTPException: If there is an error with the cql2_json filter. + """ + base_url = str(request.base_url) + + search = self.database.make_search() + + if search_request.ids: + search = self.database.apply_ids_filter( + search=search, item_ids=search_request.ids + ) + + if search_request.collections: + search = self.database.apply_collections_filter( + search=search, collection_ids=search_request.collections + ) + + if search_request.datetime: + datetime_search = self._return_date(search_request.datetime) + search = self.database.apply_datetime_filter( + search=search, datetime_search=datetime_search + ) + + if search_request.bbox: + bbox = search_request.bbox + if len(bbox) == 6: + bbox = [bbox[0], bbox[1], bbox[3], bbox[4]] + + search = self.database.apply_bbox_filter(search=search, bbox=bbox) + + if search_request.intersects: + search = self.database.apply_intersects_filter( + search=search, intersects=search_request.intersects + ) + + if search_request.query: + for field_name, expr in search_request.query.items(): + field = "properties__" + field_name + for op, value in expr.items(): + search = self.database.apply_stacql_filter( + search=search, op=op, field=field, value=value + ) + + # only cql2_json is supported here + if hasattr(search_request, "filter"): + cql2_filter = getattr(search_request, "filter", None) + try: + search = self.database.apply_cql2_filter(search, cql2_filter) + except Exception as e: + raise HTTPException( + status_code=400, detail=f"Error with cql2_json filter: {e}" + ) + + sort = None + if search_request.sortby: + sort = self.database.populate_sort(search_request.sortby) + + limit = 10 + if search_request.limit: + limit = search_request.limit + + items, maybe_count, next_token = await self.database.execute_search( + search=search, + limit=limit, + token=search_request.token, # type: ignore + sort=sort, + collection_ids=search_request.collections, + ) + + items = [ + self.item_serializer.db_to_stac(item, base_url=base_url) for item in items + ] + + if self.extension_is_enabled("FieldsExtension"): + if search_request.query is not None: + query_include: Set[str] = set( + [ + k if k in Settings.get().indexed_fields else f"properties.{k}" + for k in search_request.query.keys() + ] + ) + if not search_request.fields.include: + search_request.fields.include = query_include + else: + search_request.fields.include.union(query_include) + + filter_kwargs = search_request.fields.filter_fields + + items = [ + orjson.loads( + stac_pydantic.Item(**feat).json(**filter_kwargs, exclude_unset=True) + ) + for feat in items + ] + + context_obj = None + if self.extension_is_enabled("ContextExtension"): + context_obj = { + "returned": len(items), + "limit": limit, + } + if maybe_count is not None: + context_obj["matched"] = maybe_count + + links = [] + if next_token: + links = await PagingLinks(request=request, next=next_token).get_links() + + return ItemCollection( + type="FeatureCollection", + features=items, + links=links, + context=context_obj, + ) + + +@attr.s +class TransactionsClient(AsyncBaseTransactionsClient): + """Transactions extension specific CRUD operations.""" + + database: BaseDatabaseLogic = attr.ib() + settings: ApiBaseSettings = attr.ib() + session: Session = attr.ib(default=attr.Factory(Session.create_from_env)) + + @overrides + async def create_item( + self, collection_id: str, item: stac_types.Item, **kwargs + ) -> Optional[stac_types.Item]: + """Create an item in the collection. + + Args: + collection_id (str): The id of the collection to add the item to. + item (stac_types.Item): The item to be added to the collection. + kwargs: Additional keyword arguments. + + Returns: + stac_types.Item: The created item. + + Raises: + NotFound: If the specified collection is not found in the database. + ConflictError: If the item in the specified collection already exists. + + """ + base_url = str(kwargs["request"].base_url) + + # If a feature collection is posted + if item["type"] == "FeatureCollection": + bulk_client = BulkTransactionsClient( + database=self.database, settings=self.settings + ) + processed_items = [ + bulk_client.preprocess_item(item, base_url, BulkTransactionMethod.INSERT) for item in item["features"] # type: ignore + ] + + await self.database.bulk_async( + collection_id, processed_items, refresh=kwargs.get("refresh", False) + ) + + return None + else: + item = await self.database.prep_create_item(item=item, base_url=base_url) + await self.database.create_item(item, refresh=kwargs.get("refresh", False)) + return item + + @overrides + async def update_item( + self, collection_id: str, item_id: str, item: stac_types.Item, **kwargs + ) -> stac_types.Item: + """Update an item in the collection. + + Args: + collection_id (str): The ID of the collection the item belongs to. + item_id (str): The ID of the item to be updated. + item (stac_types.Item): The new item data. + kwargs: Other optional arguments, including the request object. + + Returns: + stac_types.Item: The updated item object. + + Raises: + NotFound: If the specified collection is not found in the database. + + """ + base_url = str(kwargs["request"].base_url) + now = datetime_type.now(timezone.utc).isoformat().replace("+00:00", "Z") + item["properties"]["updated"] = now + + await self.database.check_collection_exists(collection_id) + await self.delete_item(item_id=item_id, collection_id=collection_id) + await self.create_item(collection_id=collection_id, item=item, **kwargs) + + return ItemSerializer.db_to_stac(item, base_url) + + @overrides + async def delete_item( + self, item_id: str, collection_id: str, **kwargs + ) -> Optional[stac_types.Item]: + """Delete an item from a collection. + + Args: + item_id (str): The identifier of the item to delete. + collection_id (str): The identifier of the collection that contains the item. + + Returns: + Optional[stac_types.Item]: The deleted item, or `None` if the item was successfully deleted. + """ + await self.database.delete_item(item_id=item_id, collection_id=collection_id) + return None + + @overrides + async def create_collection( + self, collection: stac_types.Collection, **kwargs + ) -> stac_types.Collection: + """Create a new collection in the database. + + Args: + collection (stac_types.Collection): The collection to be created. + kwargs: Additional keyword arguments. + + Returns: + stac_types.Collection: The created collection object. + + Raises: + ConflictError: If the collection already exists. + """ + base_url = str(kwargs["request"].base_url) + collection_links = CollectionLinks( + collection_id=collection["id"], base_url=base_url + ).create_links() + collection["links"] = collection_links + await self.database.create_collection(collection=collection) + + return CollectionSerializer.db_to_stac(collection, base_url) + + @overrides + async def update_collection( + self, collection: stac_types.Collection, **kwargs + ) -> stac_types.Collection: + """ + Update a collection. + + This method updates an existing collection in the database by first finding + the collection by the id given in the keyword argument `collection_id`. + If no `collection_id` is given the id of the given collection object is used. + If the object and keyword collection ids don't match the sub items + collection id is updated else the items are left unchanged. + The updated collection is then returned. + + Args: + collection: A STAC collection that needs to be updated. + kwargs: Additional keyword arguments. + + Returns: + A STAC collection that has been updated in the database. + + """ + base_url = str(kwargs["request"].base_url) + + collection_id = kwargs["request"].query_params.get( + "collection_id", collection["id"] + ) + + collection_links = CollectionLinks( + collection_id=collection["id"], base_url=base_url + ).create_links() + collection["links"] = collection_links + + await self.database.update_collection( + collection_id=collection_id, collection=collection + ) + + return CollectionSerializer.db_to_stac(collection, base_url) + + @overrides + async def delete_collection( + self, collection_id: str, **kwargs + ) -> Optional[stac_types.Collection]: + """ + Delete a collection. + + This method deletes an existing collection in the database. + + Args: + collection_id (str): The identifier of the collection that contains the item. + kwargs: Additional keyword arguments. + + Returns: + None. + + Raises: + NotFoundError: If the collection doesn't exist. + """ + await self.database.delete_collection(collection_id=collection_id) + return None + + +@attr.s +class BulkTransactionsClient(BaseBulkTransactionsClient): + """A client for posting bulk transactions to a Postgres database. + + Attributes: + session: An instance of `Session` to use for database connection. + database: An instance of `DatabaseLogic` to perform database operations. + """ + + database: BaseDatabaseLogic = attr.ib() + settings: ApiBaseSettings = attr.ib() + session: Session = attr.ib(default=attr.Factory(Session.create_from_env)) + + def __attrs_post_init__(self): + """Create es engine.""" + self.client = self.settings.create_client + + def preprocess_item( + self, item: stac_types.Item, base_url, method: BulkTransactionMethod + ) -> stac_types.Item: + """Preprocess an item to match the data model. + + Args: + item: The item to preprocess. + base_url: The base URL of the request. + method: The bulk transaction method. + + Returns: + The preprocessed item. + """ + exist_ok = method == BulkTransactionMethod.UPSERT + return self.database.sync_prep_create_item( + item=item, base_url=base_url, exist_ok=exist_ok + ) + + @overrides + def bulk_item_insert( + self, items: Items, chunk_size: Optional[int] = None, **kwargs + ) -> str: + """Perform a bulk insertion of items into the database using Elasticsearch. + + Args: + items: The items to insert. + chunk_size: The size of each chunk for bulk processing. + **kwargs: Additional keyword arguments, such as `request` and `refresh`. + + Returns: + A string indicating the number of items successfully added. + """ + request = kwargs.get("request") + if request: + base_url = str(request.base_url) + else: + base_url = "" + + processed_items = [ + self.preprocess_item(item, base_url, items.method) + for item in items.items.values() + ] + + # not a great way to get the collection_id-- should be part of the method signature + collection_id = processed_items[0]["collection"] + + self.database.bulk_sync( + collection_id, processed_items, refresh=kwargs.get("refresh", False) + ) + + return f"Successfully added {len(processed_items)} Items." + + +@attr.s +class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient): + """Defines a pattern for implementing the STAC filter extension.""" + + # todo: use the ES _mapping endpoint to dynamically find what fields exist + async def get_queryables( + self, collection_id: Optional[str] = None, **kwargs + ) -> Dict[str, Any]: + """Get the queryables available for the given collection_id. + + If collection_id is None, returns the intersection of all + queryables over all collections. + + This base implementation returns a blank queryable schema. This is not allowed + under OGC CQL but it is allowed by the STAC API Filter Extension + + https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter#queryables + + Args: + collection_id (str, optional): The id of the collection to get queryables for. + **kwargs: additional keyword arguments + + Returns: + Dict[str, Any]: A dictionary containing the queryables for the given collection. + """ + return { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://stac-api.example.com/queryables", + "type": "object", + "title": "Queryables for Example STAC API", + "description": "Queryable names for the example STAC API Item Search filter.", + "properties": { + "id": { + "description": "ID", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/properties/id", + }, + "collection": { + "description": "Collection", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/then/properties/collection", + }, + "geometry": { + "description": "Geometry", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/1/oneOf/0/properties/geometry", + }, + "datetime": { + "description": "Acquisition Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/datetime", + }, + "created": { + "description": "Creation Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/created", + }, + "updated": { + "description": "Creation Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/updated", + }, + "cloud_cover": { + "description": "Cloud Cover", + "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover", + }, + "cloud_shadow_percentage": { + "description": "Cloud Shadow Percentage", + "title": "Cloud Shadow Percentage", + "type": "number", + "minimum": 0, + "maximum": 100, + }, + "nodata_pixel_percentage": { + "description": "No Data Pixel Percentage", + "title": "No Data Pixel Percentage", + "type": "number", + "minimum": 0, + "maximum": 100, + }, + }, + "additionalProperties": True, + } diff --git a/stac_fastapi/core/stac_fastapi/core/datetime_utils.py b/stac_fastapi/core/stac_fastapi/core/datetime_utils.py new file mode 100644 index 00000000..2b7a3017 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/datetime_utils.py @@ -0,0 +1,14 @@ +"""A few datetime methods.""" +from datetime import datetime, timezone + +from pystac.utils import datetime_to_str + + +def now_in_utc() -> datetime: + """Return a datetime value of now with the UTC timezone applied.""" + return datetime.now(timezone.utc) + + +def now_to_rfc3339_str() -> str: + """Return an RFC 3339 string representing now.""" + return datetime_to_str(now_in_utc()) diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/__init__.py b/stac_fastapi/core/stac_fastapi/core/extensions/__init__.py new file mode 100644 index 00000000..7ee6eea5 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/extensions/__init__.py @@ -0,0 +1,5 @@ +"""elasticsearch extensions modifications.""" + +from .query import Operator, QueryableTypes, QueryExtension + +__all__ = ["Operator", "QueryableTypes", "QueryExtension"] diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py new file mode 100644 index 00000000..fe691ddf --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py @@ -0,0 +1,267 @@ +""" +Implements Filter Extension. + +Basic CQL2 (AND, OR, NOT), comparison operators (=, <>, <, <=, >, >=), and IS NULL. +The comparison operators are allowed against string, numeric, boolean, date, and datetime types. + +Advanced comparison operators (http://www.opengis.net/spec/cql2/1.0/req/advanced-comparison-operators) +defines the LIKE, IN, and BETWEEN operators. + +Basic Spatial Operators (http://www.opengis.net/spec/cql2/1.0/conf/basic-spatial-operators) +defines the intersects operator (S_INTERSECTS). +""" +from __future__ import annotations + +import datetime +import re +from enum import Enum +from typing import List, Union + +from geojson_pydantic import ( + GeometryCollection, + LineString, + MultiLineString, + MultiPoint, + MultiPolygon, + Point, + Polygon, +) +from pydantic import BaseModel + +queryables_mapping = { + "id": "id", + "collection": "collection", + "geometry": "geometry", + "datetime": "properties.datetime", + "created": "properties.created", + "updated": "properties.updated", + "cloud_cover": "properties.eo:cloud_cover", + "cloud_shadow_percentage": "properties.s2:cloud_shadow_percentage", + "nodata_pixel_percentage": "properties.s2:nodata_pixel_percentage", +} + + +class LogicalOp(str, Enum): + """Logical operator. + + CQL2 logical operators and, or, and not. + """ + + _and = "and" + _or = "or" + _not = "not" + + +class ComparisonOp(str, Enum): + """Comparison operator. + + CQL2 comparison operators =, <>, <, <=, >, >=, and isNull. + """ + + eq = "=" + neq = "<>" + lt = "<" + lte = "<=" + gt = ">" + gte = ">=" + is_null = "isNull" + + def to_es(self): + """Generate an Elasticsearch term operator.""" + if self == ComparisonOp.lt: + return "lt" + elif self == ComparisonOp.lte: + return "lte" + elif self == ComparisonOp.gt: + return "gt" + elif self == ComparisonOp.gte: + return "gte" + else: + raise RuntimeError( + f"Comparison op {self.value} does not have an Elasticsearch term operator equivalent." + ) + + +class AdvancedComparisonOp(str, Enum): + """Advanced Comparison operator. + + CQL2 advanced comparison operators like (~), between, and in. + """ + + like = "like" + between = "between" + _in = "in" + + +class SpatialIntersectsOp(str, Enum): + """Spatial intersections operator s_intersects.""" + + s_intersects = "s_intersects" + + +class PropertyReference(BaseModel): + """Property reference.""" + + property: str + + def to_es(self): + """Produce a term value for this, possibly mapped by a queryable.""" + return queryables_mapping.get(self.property, self.property) + + +class Timestamp(BaseModel): + """Representation of an RFC 3339 datetime value object.""" + + timestamp: datetime.datetime + + def to_es(self): + """Produce an RFC 3339 datetime string.""" + return self.timestamp.isoformat() + + +class Date(BaseModel): + """Representation of an ISO 8601 date value object.""" + + date: datetime.date + + def to_es(self): + """Produce an ISO 8601 date string.""" + return self.date.isoformat() + + +class FloatInt(float): + """Representation of Float/Int.""" + + @classmethod + def __get_validators__(cls): + """Return validator to use.""" + yield cls.validate + + @classmethod + def validate(cls, v): + """Validate input value.""" + if isinstance(v, float): + return v + else: + return int(v) + + +Arg = Union[ + "Clause", + PropertyReference, + Timestamp, + Date, + Point, + MultiPoint, + LineString, + MultiLineString, + Polygon, + MultiPolygon, + GeometryCollection, + FloatInt, + str, + bool, +] + + +class Clause(BaseModel): + """Filter extension clause.""" + + op: Union[LogicalOp, ComparisonOp, AdvancedComparisonOp, SpatialIntersectsOp] + args: List[Union[Arg, List[Arg]]] + + def to_es(self): + """Generate an Elasticsearch expression for this Clause.""" + if self.op == LogicalOp._and: + return {"bool": {"filter": [to_es(arg) for arg in self.args]}} + elif self.op == LogicalOp._or: + return {"bool": {"should": [to_es(arg) for arg in self.args]}} + elif self.op == LogicalOp._not: + return {"bool": {"must_not": [to_es(arg) for arg in self.args]}} + elif self.op == ComparisonOp.eq: + return {"term": {to_es(self.args[0]): to_es(self.args[1])}} + elif self.op == ComparisonOp.neq: + return { + "bool": { + "must_not": [{"term": {to_es(self.args[0]): to_es(self.args[1])}}] + } + } + elif self.op == AdvancedComparisonOp.like: + return { + "wildcard": { + to_es(self.args[0]): { + "value": cql2_like_to_es(str(to_es(self.args[1]))), + "case_insensitive": "false", + } + } + } + elif self.op == AdvancedComparisonOp.between: + return { + "range": { + to_es(self.args[0]): { + "gte": to_es(self.args[1]), + "lte": to_es(self.args[2]), + } + } + } + elif self.op == AdvancedComparisonOp._in: + if not isinstance(self.args[1], List): + raise RuntimeError(f"Arg {self.args[1]} is not a list") + return { + "terms": {to_es(self.args[0]): [to_es(arg) for arg in self.args[1]]} + } + elif ( + self.op == ComparisonOp.lt + or self.op == ComparisonOp.lte + or self.op == ComparisonOp.gt + or self.op == ComparisonOp.gte + ): + return { + "range": {to_es(self.args[0]): {to_es(self.op): to_es(self.args[1])}} + } + elif self.op == ComparisonOp.is_null: + return {"bool": {"must_not": {"exists": {"field": to_es(self.args[0])}}}} + elif self.op == SpatialIntersectsOp.s_intersects: + return { + "geo_shape": { + to_es(self.args[0]): { + "shape": to_es(self.args[1]), + "relation": "intersects", + } + } + } + + +def to_es(arg: Arg): + """Generate an Elasticsearch expression for this Arg.""" + if (to_es_method := getattr(arg, "to_es", None)) and callable(to_es_method): + return to_es_method() + elif gi := getattr(arg, "__geo_interface__", None): + return gi + elif isinstance(arg, GeometryCollection): + return arg.dict() + elif ( + isinstance(arg, int) + or isinstance(arg, float) + or isinstance(arg, str) + or isinstance(arg, bool) + ): + return arg + else: + raise RuntimeError(f"unknown arg {repr(arg)}") + + +def cql2_like_to_es(string): + """Convert wildcard characters in CQL2 ('_' and '%') to Elasticsearch wildcard characters ('?' and '*', respectively). Handle escape characters and pass through Elasticsearch wildcards.""" + percent_pattern = r"(? Callable[[Any, Any], bool]: + """Return python operator.""" + return getattr(operator, self._value_) + + +class Queryables(str, AutoValueEnum): + """Queryable fields.""" + + ... + + +@dataclass +class QueryableTypes: + """Defines a set of queryable fields.""" + + ... + + +class QueryExtensionPostRequest(BaseModel): + """Queryable validation. + + Add queryables validation to the POST request + to raise errors for unsupported querys. + """ + + query: Optional[Dict[Queryables, Dict[Operator, Any]]] + + @root_validator(pre=True) + def validate_query_fields(cls, values: Dict) -> Dict: + """Validate query fields.""" + ... + + +class QueryExtension(QueryExtensionBase): + """Query Extenson. + + Override the POST request model to add validation against + supported fields + """ + + ... diff --git a/stac_fastapi/core/stac_fastapi/core/models/__init__.py b/stac_fastapi/core/stac_fastapi/core/models/__init__.py new file mode 100644 index 00000000..d0748bcc --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/models/__init__.py @@ -0,0 +1 @@ +"""stac_fastapi.elasticsearch.models module.""" diff --git a/stac_fastapi/core/stac_fastapi/core/models/links.py b/stac_fastapi/core/stac_fastapi/core/models/links.py new file mode 100644 index 00000000..3941a149 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/models/links.py @@ -0,0 +1,138 @@ +"""link helpers.""" + +from typing import Any, Dict, List, Optional +from urllib.parse import ParseResult, parse_qs, unquote, urlencode, urljoin, urlparse + +import attr +from stac_pydantic.links import Relations +from stac_pydantic.shared import MimeTypes +from starlette.requests import Request + +# Copied from pgstac links + +# These can be inferred from the item/collection, so they aren't included in the database +# Instead they are dynamically generated when querying the database using the classes defined below +INFERRED_LINK_RELS = ["self", "item", "parent", "collection", "root"] + + +def merge_params(url: str, newparams: Dict) -> str: + """Merge url parameters.""" + u = urlparse(url) + params = parse_qs(u.query) + params.update(newparams) + param_string = unquote(urlencode(params, True)) + + href = ParseResult( + scheme=u.scheme, + netloc=u.netloc, + path=u.path, + params=u.params, + query=param_string, + fragment=u.fragment, + ).geturl() + return href + + +@attr.s +class BaseLinks: + """Create inferred links common to collections and items.""" + + request: Request = attr.ib() + + @property + def base_url(self): + """Get the base url.""" + return str(self.request.base_url) + + @property + def url(self): + """Get the current request url.""" + return str(self.request.url) + + def resolve(self, url): + """Resolve url to the current request url.""" + return urljoin(str(self.base_url), str(url)) + + def link_self(self) -> Dict: + """Return the self link.""" + return dict(rel=Relations.self.value, type=MimeTypes.json.value, href=self.url) + + def link_root(self) -> Dict: + """Return the catalog root.""" + return dict( + rel=Relations.root.value, type=MimeTypes.json.value, href=self.base_url + ) + + def create_links(self) -> List[Dict[str, Any]]: + """Return all inferred links.""" + links = [] + for name in dir(self): + if name.startswith("link_") and callable(getattr(self, name)): + link = getattr(self, name)() + if link is not None: + links.append(link) + return links + + async def get_links( + self, extra_links: Optional[List[Dict[str, Any]]] = None + ) -> List[Dict[str, Any]]: + """ + Generate all the links. + + Get the links object for a stac resource by iterating through + available methods on this class that start with link_. + """ + # TODO: Pass request.json() into function so this doesn't need to be coroutine + if self.request.method == "POST": + self.request.postbody = await self.request.json() + # join passed in links with generated links + # and update relative paths + links = self.create_links() + + if extra_links: + # For extra links passed in, + # add links modified with a resolved href. + # Drop any links that are dynamically + # determined by the server (e.g. self, parent, etc.) + # Resolving the href allows for relative paths + # to be stored in pgstac and for the hrefs in the + # links of response STAC objects to be resolved + # to the request url. + links += [ + {**link, "href": self.resolve(link["href"])} + for link in extra_links + if link["rel"] not in INFERRED_LINK_RELS + ] + + return links + + +@attr.s +class PagingLinks(BaseLinks): + """Create links for paging.""" + + next: Optional[str] = attr.ib(kw_only=True, default=None) + + def link_next(self) -> Optional[Dict[str, Any]]: + """Create link for next page.""" + if self.next is not None: + method = self.request.method + if method == "GET": + href = merge_params(self.url, {"token": self.next}) + link = dict( + rel=Relations.next.value, + type=MimeTypes.json.value, + method=method, + href=href, + ) + return link + if method == "POST": + return { + "rel": Relations.next, + "type": MimeTypes.json, + "method": method, + "href": f"{self.request.url}", + "body": {**self.request.postbody, "token": self.next}, + } + + return None diff --git a/stac_fastapi/core/stac_fastapi/core/models/search.py b/stac_fastapi/core/stac_fastapi/core/models/search.py new file mode 100644 index 00000000..33b73b68 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/models/search.py @@ -0,0 +1 @@ +"""Unused search model.""" diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py new file mode 100644 index 00000000..8e83ef7c --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -0,0 +1,156 @@ +"""Serializers.""" +import abc +from copy import deepcopy +from typing import Any + +import attr + +from stac_fastapi.core.datetime_utils import now_to_rfc3339_str +from stac_fastapi.types import stac as stac_types +from stac_fastapi.types.links import CollectionLinks, ItemLinks, resolve_links + + +@attr.s +class Serializer(abc.ABC): + """Defines serialization methods between the API and the data model. + + This class is meant to be subclassed and implemented by specific serializers for different STAC objects (e.g. Item, Collection). + """ + + @classmethod + @abc.abstractmethod + def db_to_stac(cls, item: dict, base_url: str) -> Any: + """Transform database model to STAC object. + + Arguments: + item (dict): A dictionary representing the database model. + base_url (str): The base URL of the STAC API. + + Returns: + Any: A STAC object, e.g. an `Item` or `Collection`, representing the input `item`. + """ + ... + + @classmethod + @abc.abstractmethod + def stac_to_db(cls, stac_object: Any, base_url: str) -> dict: + """Transform STAC object to database model. + + Arguments: + stac_object (Any): A STAC object, e.g. an `Item` or `Collection`. + base_url (str): The base URL of the STAC API. + + Returns: + dict: A dictionary representing the database model. + """ + ... + + +class ItemSerializer(Serializer): + """Serialization methods for STAC items.""" + + @classmethod + def stac_to_db(cls, stac_data: stac_types.Item, base_url: str) -> stac_types.Item: + """Transform STAC item to database-ready STAC item. + + Args: + stac_data (stac_types.Item): The STAC item object to be transformed. + base_url (str): The base URL for the STAC API. + + Returns: + stac_types.Item: The database-ready STAC item object. + """ + item_links = ItemLinks( + collection_id=stac_data["collection"], + item_id=stac_data["id"], + base_url=base_url, + ).create_links() + stac_data["links"] = item_links + + now = now_to_rfc3339_str() + if "created" not in stac_data["properties"]: + stac_data["properties"]["created"] = now + stac_data["properties"]["updated"] = now + return stac_data + + @classmethod + def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item: + """Transform database-ready STAC item to STAC item. + + Args: + item (dict): The database-ready STAC item to be transformed. + base_url (str): The base URL for the STAC API. + + Returns: + stac_types.Item: The STAC item object. + """ + item_id = item["id"] + collection_id = item["collection"] + item_links = ItemLinks( + collection_id=collection_id, item_id=item_id, base_url=base_url + ).create_links() + + original_links = item.get("links", []) + if original_links: + item_links += resolve_links(original_links, base_url) + + return stac_types.Item( + type="Feature", + stac_version=item.get("stac_version", ""), + stac_extensions=item.get("stac_extensions", []), + id=item_id, + collection=item.get("collection", ""), + geometry=item.get("geometry", {}), + bbox=item.get("bbox", []), + properties=item.get("properties", {}), + links=item_links, + assets=item.get("assets", {}), + ) + + +class CollectionSerializer(Serializer): + """Serialization methods for STAC collections.""" + + @classmethod + def db_to_stac(cls, collection: dict, base_url: str) -> stac_types.Collection: + """Transform database model to STAC collection. + + Args: + collection (dict): The collection data in dictionary form, extracted from the database. + base_url (str): The base URL for the collection. + + Returns: + stac_types.Collection: The STAC collection object. + """ + # Avoid modifying the input dict in-place ... doing so breaks some tests + collection = deepcopy(collection) + + # Set defaults + collection_id = collection.get("id") + collection.setdefault("type", "Collection") + collection.setdefault("stac_extensions", []) + collection.setdefault("stac_version", "") + collection.setdefault("title", "") + collection.setdefault("description", "") + collection.setdefault("keywords", []) + collection.setdefault("license", "") + collection.setdefault("providers", []) + collection.setdefault("summaries", {}) + collection.setdefault( + "extent", {"spatial": {"bbox": []}, "temporal": {"interval": []}} + ) + collection.setdefault("assets", {}) + + # Create the collection links using CollectionLinks + collection_links = CollectionLinks( + collection_id=collection_id, base_url=base_url + ).create_links() + + # Add any additional links from the collection dictionary + original_links = collection.get("links") + if original_links: + collection_links += resolve_links(original_links, base_url) + collection["links"] = collection_links + + # Return the stac_types.Collection object + return stac_types.Collection(**collection) diff --git a/stac_fastapi/core/stac_fastapi/core/session.py b/stac_fastapi/core/stac_fastapi/core/session.py new file mode 100644 index 00000000..d5a7aa3c --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/session.py @@ -0,0 +1,25 @@ +"""database session management.""" +import logging + +import attr + +logger = logging.getLogger(__name__) + + +@attr.s +class Session: + """Database session management.""" + + @classmethod + def create_from_env(cls): + """Create from environment.""" + ... + + @classmethod + def create_from_settings(cls, settings): + """Create a Session object from settings.""" + ... + + def __attrs_post_init__(self): + """Post init handler.""" + ... diff --git a/stac_fastapi/core/stac_fastapi/core/types/core.py b/stac_fastapi/core/stac_fastapi/core/types/core.py new file mode 100644 index 00000000..1212619c --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/types/core.py @@ -0,0 +1,306 @@ +"""Base clients. Takef from stac-fastapi.types.core v2.4.9.""" +import abc +from datetime import datetime +from typing import Any, Dict, List, Optional, Union + +import attr +from starlette.responses import Response + +from stac_fastapi.core.base_database_logic import BaseDatabaseLogic +from stac_fastapi.types import stac as stac_types +from stac_fastapi.types.conformance import BASE_CONFORMANCE_CLASSES +from stac_fastapi.types.extension import ApiExtension +from stac_fastapi.types.search import BaseSearchPostRequest +from stac_fastapi.types.stac import Conformance + +NumType = Union[float, int] +StacType = Dict[str, Any] + + +@attr.s +class AsyncBaseTransactionsClient(abc.ABC): + """Defines a pattern for implementing the STAC transaction extension.""" + + database = attr.ib(default=BaseDatabaseLogic) + + @abc.abstractmethod + async def create_item( + self, + collection_id: str, + item: Union[stac_types.Item, stac_types.ItemCollection], + **kwargs, + ) -> Optional[Union[stac_types.Item, Response, None]]: + """Create a new item. + + Called with `POST /collections/{collection_id}/items`. + + Args: + item: the item or item collection + collection_id: the id of the collection from the resource path + + Returns: + The item that was created or None if item collection. + """ + ... + + @abc.abstractmethod + async def update_item( + self, collection_id: str, item_id: str, item: stac_types.Item, **kwargs + ) -> Optional[Union[stac_types.Item, Response]]: + """Perform a complete update on an existing item. + + Called with `PUT /collections/{collection_id}/items`. It is expected + that this item already exists. The update should do a diff against the + saved item and perform any necessary updates. Partial updates are not + supported by the transactions extension. + + Args: + item: the item (must be complete) + + Returns: + The updated item. + """ + ... + + @abc.abstractmethod + async def delete_item( + self, item_id: str, collection_id: str, **kwargs + ) -> Optional[Union[stac_types.Item, Response]]: + """Delete an item from a collection. + + Called with `DELETE /collections/{collection_id}/items/{item_id}` + + Args: + item_id: id of the item. + collection_id: id of the collection. + + Returns: + The deleted item. + """ + ... + + @abc.abstractmethod + async def create_collection( + self, collection: stac_types.Collection, **kwargs + ) -> Optional[Union[stac_types.Collection, Response]]: + """Create a new collection. + + Called with `POST /collections`. + + Args: + collection: the collection + + Returns: + The collection that was created. + """ + ... + + @abc.abstractmethod + async def update_collection( + self, collection: stac_types.Collection, **kwargs + ) -> Optional[Union[stac_types.Collection, Response]]: + """Perform a complete update on an existing collection. + + Called with `PUT /collections`. It is expected that this item already + exists. The update should do a diff against the saved collection and + perform any necessary updates. Partial updates are not supported by the + transactions extension. + + Args: + collection: the collection (must be complete) + + Returns: + The updated collection. + """ + ... + + @abc.abstractmethod + async def delete_collection( + self, collection_id: str, **kwargs + ) -> Optional[Union[stac_types.Collection, Response]]: + """Delete a collection. + + Called with `DELETE /collections/{collection_id}` + + Args: + collection_id: id of the collection. + + Returns: + The deleted collection. + """ + ... + + +@attr.s # type:ignore +class AsyncBaseCoreClient(abc.ABC): + """Defines a pattern for implementing STAC api core endpoints. + + Attributes: + extensions: list of registered api extensions. + """ + + database = attr.ib(default=BaseDatabaseLogic) + + base_conformance_classes: List[str] = attr.ib( + factory=lambda: BASE_CONFORMANCE_CLASSES + ) + extensions: List[ApiExtension] = attr.ib(default=attr.Factory(list)) + post_request_model = attr.ib(default=BaseSearchPostRequest) + + def conformance_classes(self) -> List[str]: + """Generate conformance classes.""" + conformance_classes = self.base_conformance_classes.copy() + + for extension in self.extensions: + extension_classes = getattr(extension, "conformance_classes", []) + conformance_classes.extend(extension_classes) + + return list(set(conformance_classes)) + + def extension_is_enabled(self, extension: str) -> bool: + """Check if an api extension is enabled.""" + return any([type(ext).__name__ == extension for ext in self.extensions]) + + async def conformance(self, **kwargs) -> stac_types.Conformance: + """Conformance classes. + + Called with `GET /conformance`. + + Returns: + Conformance classes which the server conforms to. + """ + return Conformance(conformsTo=self.conformance_classes()) + + @abc.abstractmethod + async def post_search( + self, search_request: BaseSearchPostRequest, **kwargs + ) -> stac_types.ItemCollection: + """Cross catalog search (POST). + + Called with `POST /search`. + + Args: + search_request: search request parameters. + + Returns: + ItemCollection containing items which match the search criteria. + """ + ... + + @abc.abstractmethod + async def get_search( + self, + collections: Optional[List[str]] = None, + ids: Optional[List[str]] = None, + bbox: Optional[List[NumType]] = None, + datetime: Optional[Union[str, datetime]] = None, + limit: Optional[int] = 10, + query: Optional[str] = None, + token: Optional[str] = None, + fields: Optional[List[str]] = None, + sortby: Optional[str] = None, + intersects: Optional[str] = None, + **kwargs, + ) -> stac_types.ItemCollection: + """Cross catalog search (GET). + + Called with `GET /search`. + + Returns: + ItemCollection containing items which match the search criteria. + """ + ... + + @abc.abstractmethod + async def get_item( + self, item_id: str, collection_id: str, **kwargs + ) -> stac_types.Item: + """Get item by id. + + Called with `GET /collections/{collection_id}/items/{item_id}`. + + Args: + item_id: Id of the item. + collection_id: Id of the collection. + + Returns: + Item. + """ + ... + + @abc.abstractmethod + async def all_collections(self, **kwargs) -> stac_types.Collections: + """Get all available collections. + + Called with `GET /collections`. + + Returns: + A list of collections. + """ + ... + + @abc.abstractmethod + async def get_collection( + self, collection_id: str, **kwargs + ) -> stac_types.Collection: + """Get collection by id. + + Called with `GET /collections/{collection_id}`. + + Args: + collection_id: Id of the collection. + + Returns: + Collection. + """ + ... + + @abc.abstractmethod + async def item_collection( + self, + collection_id: str, + bbox: Optional[List[NumType]] = None, + datetime: Optional[Union[str, datetime]] = None, + limit: int = 10, + token: str = None, + **kwargs, + ) -> stac_types.ItemCollection: + """Get all items from a specific collection. + + Called with `GET /collections/{collection_id}/items` + + Args: + collection_id: id of the collection. + limit: number of items to return. + token: pagination token. + + Returns: + An ItemCollection. + """ + ... + + +@attr.s +class AsyncBaseFiltersClient(abc.ABC): + """Defines a pattern for implementing the STAC filter extension.""" + + async def get_queryables( + self, collection_id: Optional[str] = None, **kwargs + ) -> Dict[str, Any]: + """Get the queryables available for the given collection_id. + + If collection_id is None, returns the intersection of all queryables over all + collections. + + This base implementation returns a blank queryable schema. This is not allowed + under OGC CQL but it is allowed by the STAC API Filter Extension + https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter#queryables + """ + return { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://example.org/queryables", + "type": "object", + "title": "Queryables for Example STAC API", + "description": "Queryable names for the example STAC API Item Search filter.", + "properties": {}, + } diff --git a/stac_fastapi/core/stac_fastapi/core/utilities.py b/stac_fastapi/core/stac_fastapi/core/utilities.py new file mode 100644 index 00000000..b5dac390 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/utilities.py @@ -0,0 +1,21 @@ +"""Module for geospatial processing functions. + +This module contains functions for transforming geospatial coordinates, +such as converting bounding boxes to polygon representations. +""" +from typing import List + + +def bbox2polygon(b0: float, b1: float, b2: float, b3: float) -> List[List[List[float]]]: + """Transform a bounding box represented by its four coordinates `b0`, `b1`, `b2`, and `b3` into a polygon. + + Args: + b0 (float): The x-coordinate of the lower-left corner of the bounding box. + b1 (float): The y-coordinate of the lower-left corner of the bounding box. + b2 (float): The x-coordinate of the upper-right corner of the bounding box. + b3 (float): The y-coordinate of the upper-right corner of the bounding box. + + Returns: + List[List[List[float]]]: A polygon represented as a list of lists of coordinates. + """ + return [[[b0, b1], [b2, b1], [b2, b3], [b0, b3], [b0, b1]]] diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/version.py b/stac_fastapi/core/stac_fastapi/core/version.py similarity index 100% rename from stac_fastapi/mongo/stac_fastapi/mongo/version.py rename to stac_fastapi/core/stac_fastapi/core/version.py diff --git a/stac_fastapi/elasticsearch/README.md b/stac_fastapi/elasticsearch/README.md new file mode 100644 index 00000000..becdb4d7 --- /dev/null +++ b/stac_fastapi/elasticsearch/README.md @@ -0,0 +1,3 @@ +# Requirements + +The Elasticsearch backend requires **elasticsearch**. diff --git a/stac_fastapi/mongo/pytest.ini b/stac_fastapi/elasticsearch/pytest.ini similarity index 100% rename from stac_fastapi/mongo/pytest.ini rename to stac_fastapi/elasticsearch/pytest.ini diff --git a/stac_fastapi/elasticsearch/setup.cfg b/stac_fastapi/elasticsearch/setup.cfg new file mode 100644 index 00000000..7a42432c --- /dev/null +++ b/stac_fastapi/elasticsearch/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +version = attr: stac_fastapi.elasticsearch.version.__version__ diff --git a/stac_fastapi/elasticsearch/setup.py b/stac_fastapi/elasticsearch/setup.py new file mode 100644 index 00000000..278cc356 --- /dev/null +++ b/stac_fastapi/elasticsearch/setup.py @@ -0,0 +1,58 @@ +"""stac_fastapi: elasticsearch module.""" + +from setuptools import find_namespace_packages, setup + +with open("README.md") as f: + desc = f.read() + +install_requires = [ + "stac-fastapi.core==2.0.0", + "elasticsearch[async]==8.11.0", + "elasticsearch-dsl==8.11.0", + "uvicorn", + "starlette", +] + +extra_reqs = { + "dev": [ + "pytest", + "pytest-cov", + "pytest-asyncio", + "pre-commit", + "requests", + "ciso8601", + "httpx", + ], + "docs": ["mkdocs", "mkdocs-material", "pdocs"], + "server": ["uvicorn[standard]==0.19.0"], +} + +setup( + name="stac-fastapi.elasticsearch", + description="An implementation of STAC API based on the FastAPI framework with both Elasticsearch and Opensearch.", + long_description=desc, + long_description_content_type="text/markdown", + python_requires=">=3.8", + classifiers=[ + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + ], + url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", + license="MIT", + packages=find_namespace_packages(exclude=["alembic", "tests", "scripts"]), + zip_safe=False, + install_requires=install_requires, + tests_require=extra_reqs["dev"], + extras_require=extra_reqs, + entry_points={ + "console_scripts": [ + "stac-fastapi-elasticsearch=stac_fastapi.elasticsearch.app:run" + ] + }, +) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/__init__.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/__init__.py new file mode 100644 index 00000000..dbb6116a --- /dev/null +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/__init__.py @@ -0,0 +1 @@ +"""elasticsearch submodule.""" diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py similarity index 89% rename from stac_fastapi/mongo/stac_fastapi/mongo/app.py rename to stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 59823985..0d896534 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -10,6 +10,11 @@ ) from stac_fastapi.core.extensions import QueryExtension from stac_fastapi.core.session import Session +from stac_fastapi.elasticsearch.config import ElasticsearchSettings +from stac_fastapi.elasticsearch.database_logic import ( + DatabaseLogic, + create_collection_index, +) from stac_fastapi.extensions.core import ( ContextExtension, FieldsExtension, @@ -19,14 +24,8 @@ TransactionExtension, ) from stac_fastapi.extensions.third_party import BulkTransactionExtension -from stac_fastapi.mongo.config import AsyncMongoDBSettings -from stac_fastapi.mongo.database_logic import ( - DatabaseLogic, - create_collection_index, - create_item_index, -) -settings = AsyncMongoDBSettings() +settings = ElasticsearchSettings() session = Session.create_from_settings(settings) filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) @@ -75,7 +74,6 @@ @app.on_event("startup") async def _startup_event() -> None: await create_collection_index() - await create_item_index() def run() -> None: @@ -83,10 +81,8 @@ def run() -> None: try: import uvicorn - print("host: ", settings.app_host) - print("port: ", settings.app_port) uvicorn.run( - "stac_fastapi.mongo.app:app", + "stac_fastapi.elasticsearch.app:app", host=settings.app_host, port=settings.app_port, log_level="info", diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py new file mode 100644 index 00000000..10cf95e9 --- /dev/null +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py @@ -0,0 +1,80 @@ +"""API configuration.""" +import os +import ssl +from typing import Any, Dict, Set + +from elasticsearch import AsyncElasticsearch, Elasticsearch # type: ignore +from stac_fastapi.types.config import ApiSettings + + +def _es_config() -> Dict[str, Any]: + # Determine the scheme (http or https) + use_ssl = os.getenv("ES_USE_SSL", "true").lower() == "true" + scheme = "https" if use_ssl else "http" + + # Configure the hosts parameter with the correct scheme + hosts = [f"{scheme}://{os.getenv('ES_HOST')}:{os.getenv('ES_PORT')}"] + + # Initialize the configuration dictionary + config = { + "hosts": hosts, + "headers": {"accept": "application/vnd.elasticsearch+json; compatible-with=7"}, + } + + # Explicitly exclude SSL settings when not using SSL + if not use_ssl: + return config + + # Include SSL settings if using https + config["ssl_version"] = ssl.TLSVersion.TLSv1_3 # type: ignore + config["verify_certs"] = os.getenv("ES_VERIFY_CERTS", "true").lower() != "false" # type: ignore + + # Include CA Certificates if verifying certs + if config["verify_certs"]: + config["ca_certs"] = os.getenv( + "CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt" + ) + + # Handle authentication + if (u := os.getenv("ES_USER")) and (p := os.getenv("ES_PASS")): + config["http_auth"] = (u, p) + + if api_key := os.getenv("ES_API_KEY"): + if isinstance(config["headers"], dict): + headers = {**config["headers"], "x-api-key": api_key} + + else: + config["headers"] = {"x-api-key": api_key} + + config["headers"] = headers + + return config + + +_forbidden_fields: Set[str] = {"type"} + + +class ElasticsearchSettings(ApiSettings): + """API settings.""" + + # Fields which are defined by STAC but not included in the database model + forbidden_fields: Set[str] = _forbidden_fields + indexed_fields: Set[str] = {"datetime"} + + @property + def create_client(self): + """Create es client.""" + return Elasticsearch(**_es_config()) + + +class AsyncElasticsearchSettings(ApiSettings): + """API settings.""" + + # Fields which are defined by STAC but not included in the database model + forbidden_fields: Set[str] = _forbidden_fields + indexed_fields: Set[str] = {"datetime"} + + @property + def create_client(self): + """Create async elasticsearch client.""" + return AsyncElasticsearch(**_es_config()) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py new file mode 100644 index 00000000..87ca8916 --- /dev/null +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -0,0 +1,894 @@ +"""Database logic.""" +import asyncio +import logging +import os +from base64 import urlsafe_b64decode, urlsafe_b64encode +from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union + +import attr +from elasticsearch_dsl import Q, Search + +from elasticsearch import exceptions, helpers # type: ignore +from stac_fastapi.core.extensions import filter +from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer +from stac_fastapi.core.utilities import bbox2polygon +from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings +from stac_fastapi.elasticsearch.config import ( + ElasticsearchSettings as SyncElasticsearchSettings, +) +from stac_fastapi.types.errors import ConflictError, NotFoundError +from stac_fastapi.types.stac import Collection, Item + +logger = logging.getLogger(__name__) + +NumType = Union[float, int] + +COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") +ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") +ES_INDEX_NAME_UNSUPPORTED_CHARS = { + "\\", + "/", + "*", + "?", + '"', + "<", + ">", + "|", + " ", + ",", + "#", + ":", +} + +ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" + +DEFAULT_SORT = { + "properties.datetime": {"order": "desc"}, + "id": {"order": "desc"}, + "collection": {"order": "desc"}, +} + +ES_ITEMS_SETTINGS = { + "index": { + "sort.field": list(DEFAULT_SORT.keys()), + "sort.order": [v["order"] for v in DEFAULT_SORT.values()], + } +} + +ES_MAPPINGS_DYNAMIC_TEMPLATES = [ + # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md + { + "descriptions": { + "match_mapping_type": "string", + "match": "description", + "mapping": {"type": "text"}, + } + }, + { + "titles": { + "match_mapping_type": "string", + "match": "title", + "mapping": {"type": "text"}, + } + }, + # Projection Extension https://github.com/stac-extensions/projection + {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, + { + "proj_projjson": { + "match": "proj:projjson", + "mapping": {"type": "object", "enabled": False}, + } + }, + { + "proj_centroid": { + "match": "proj:centroid", + "mapping": {"type": "geo_point"}, + } + }, + { + "proj_geometry": { + "match": "proj:geometry", + "mapping": {"type": "object", "enabled": False}, + } + }, + { + "no_index_href": { + "match": "href", + "mapping": {"type": "text", "index": False}, + } + }, + # Default all other strings not otherwise specified to keyword + {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, + {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, +] + +ES_ITEMS_MAPPINGS = { + "numeric_detection": False, + "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, + "properties": { + "id": {"type": "keyword"}, + "collection": {"type": "keyword"}, + "geometry": {"type": "geo_shape"}, + "assets": {"type": "object", "enabled": False}, + "links": {"type": "object", "enabled": False}, + "properties": { + "type": "object", + "properties": { + # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md + "datetime": {"type": "date"}, + "start_datetime": {"type": "date"}, + "end_datetime": {"type": "date"}, + "created": {"type": "date"}, + "updated": {"type": "date"}, + # Satellite Extension https://github.com/stac-extensions/sat + "sat:absolute_orbit": {"type": "integer"}, + "sat:relative_orbit": {"type": "integer"}, + }, + }, + }, +} + +ES_COLLECTIONS_MAPPINGS = { + "numeric_detection": False, + "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, + "properties": { + "id": {"type": "keyword"}, + "extent.spatial.bbox": {"type": "long"}, + "extent.temporal.interval": {"type": "date"}, + "providers": {"type": "object", "enabled": False}, + "links": {"type": "object", "enabled": False}, + "item_assets": {"type": "object", "enabled": False}, + }, +} + + +def index_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index name. + + Args: + collection_id (str): The collection id to translate into an index name. + + Returns: + str: The index name derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + + +def indices(collection_ids: Optional[List[str]]) -> str: + """ + Get a comma-separated string of index names for a given list of collection ids. + + Args: + collection_ids: A list of collection ids. + + Returns: + A string of comma-separated index names. If `collection_ids` is None, returns the default indices. + """ + if collection_ids is None: + return ITEM_INDICES + else: + return ",".join([index_by_collection_id(c) for c in collection_ids]) + + +async def create_collection_index() -> None: + """ + Create the index for a Collection. + + Returns: + None + + """ + client = AsyncElasticsearchSettings().create_client + + await client.options(ignore_status=400).indices.create( + index=f"{COLLECTIONS_INDEX}-000001", + aliases={COLLECTIONS_INDEX: {}}, + mappings=ES_COLLECTIONS_MAPPINGS, + ) + await client.close() + + +async def create_item_index(collection_id: str): + """ + Create the index for Items. + + Args: + collection_id (str): Collection identifier. + + Returns: + None + + """ + client = AsyncElasticsearchSettings().create_client + index_name = index_by_collection_id(collection_id) + + await client.options(ignore_status=400).indices.create( + index=f"{index_by_collection_id(collection_id)}-000001", + aliases={index_name: {}}, + mappings=ES_ITEMS_MAPPINGS, + settings=ES_ITEMS_SETTINGS, + ) + await client.close() + + +async def delete_item_index(collection_id: str): + """Delete the index for items in a collection. + + Args: + collection_id (str): The ID of the collection whose items index will be deleted. + """ + client = AsyncElasticsearchSettings().create_client + + name = index_by_collection_id(collection_id) + resolved = await client.indices.resolve_index(name=name) + if "aliases" in resolved and resolved["aliases"]: + [alias] = resolved["aliases"] + await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) + await client.indices.delete(index=alias["indices"]) + else: + await client.indices.delete(index=name) + await client.close() + + +def mk_item_id(item_id: str, collection_id: str): + """Create the document id for an Item in Elasticsearch. + + Args: + item_id (str): The id of the Item. + collection_id (str): The id of the Collection that the Item belongs to. + + Returns: + str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. + """ + return f"{item_id}|{collection_id}" + + +def mk_actions(collection_id: str, processed_items: List[Item]): + """Create Elasticsearch bulk actions for a list of processed items. + + Args: + collection_id (str): The identifier for the collection the items belong to. + processed_items (List[Item]): The list of processed items to be bulk indexed. + + Returns: + List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, + each action being a dictionary with the following keys: + - `_index`: the index to store the document in. + - `_id`: the document's identifier. + - `_source`: the source of the document. + """ + return [ + { + "_index": index_by_collection_id(collection_id), + "_id": mk_item_id(item["id"], item["collection"]), + "_source": item, + } + for item in processed_items + ] + + +# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, +# So create our own Protocol for typing +# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] +class Geometry(Protocol): # noqa + type: str + coordinates: Any + + +@attr.s +class DatabaseLogic: + """Database logic.""" + + client = AsyncElasticsearchSettings().create_client + sync_client = SyncElasticsearchSettings().create_client + + item_serializer: Type[ItemSerializer] = attr.ib(default=ItemSerializer) + collection_serializer: Type[CollectionSerializer] = attr.ib( + default=CollectionSerializer + ) + + """CORE LOGIC""" + + async def get_all_collections( + self, token: Optional[str], limit: int, base_url: str + ) -> Tuple[List[Dict[str, Any]], Optional[str]]: + """Retrieve a list of all collections from Elasticsearch, supporting pagination. + + Args: + token (Optional[str]): The pagination token. + limit (int): The number of results to return. + + Returns: + A tuple of (collections, next pagination token if any). + """ + search_after = None + if token: + search_after = [token] + + response = await self.client.search( + index=COLLECTIONS_INDEX, + body={ + "sort": [{"id": {"order": "asc"}}], + "size": limit, + "search_after": search_after, + }, + ) + + hits = response["hits"]["hits"] + collections = [ + self.collection_serializer.db_to_stac( + collection=hit["_source"], base_url=base_url + ) + for hit in hits + ] + + next_token = None + if len(hits) == limit: + next_token = hits[-1]["sort"][0] + + return collections, next_token + + async def get_one_item(self, collection_id: str, item_id: str) -> Dict: + """Retrieve a single item from the database. + + Args: + collection_id (str): The id of the Collection that the Item belongs to. + item_id (str): The id of the Item. + + Returns: + item (Dict): A dictionary containing the source data for the Item. + + Raises: + NotFoundError: If the specified Item does not exist in the Collection. + + Notes: + The Item is retrieved from the Elasticsearch database using the `client.get` method, + with the index for the Collection as the target index and the combined `mk_item_id` as the document id. + """ + try: + item = await self.client.get( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + ) + except exceptions.NotFoundError: + raise NotFoundError( + f"Item {item_id} does not exist in Collection {collection_id}" + ) + return item["_source"] + + @staticmethod + def make_search(): + """Database logic to create a Search instance.""" + return Search().sort(*DEFAULT_SORT) + + @staticmethod + def apply_ids_filter(search: Search, item_ids: List[str]): + """Database logic to search a list of STAC item ids.""" + return search.filter("terms", id=item_ids) + + @staticmethod + def apply_collections_filter(search: Search, collection_ids: List[str]): + """Database logic to search a list of STAC collection ids.""" + return search.filter("terms", collection=collection_ids) + + @staticmethod + def apply_datetime_filter(search: Search, datetime_search): + """Apply a filter to search based on datetime field. + + Args: + search (Search): The search object to filter. + datetime_search (dict): The datetime filter criteria. + + Returns: + Search: The filtered search object. + """ + if "eq" in datetime_search: + search = search.filter( + "term", **{"properties__datetime": datetime_search["eq"]} + ) + else: + search = search.filter( + "range", properties__datetime={"lte": datetime_search["lte"]} + ) + search = search.filter( + "range", properties__datetime={"gte": datetime_search["gte"]} + ) + return search + + @staticmethod + def apply_bbox_filter(search: Search, bbox: List): + """Filter search results based on bounding box. + + Args: + search (Search): The search object to apply the filter to. + bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy]. + + Returns: + search (Search): The search object with the bounding box filter applied. + + Notes: + The bounding box is transformed into a polygon using the `bbox2polygon` function and + a geo_shape filter is added to the search object, set to intersect with the specified polygon. + """ + return search.filter( + Q( + { + "geo_shape": { + "geometry": { + "shape": { + "type": "polygon", + "coordinates": bbox2polygon(*bbox), + }, + "relation": "intersects", + } + } + } + ) + ) + + @staticmethod + def apply_intersects_filter( + search: Search, + intersects: Geometry, + ): + """Filter search results based on intersecting geometry. + + Args: + search (Search): The search object to apply the filter to. + intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. + + Returns: + search (Search): The search object with the intersecting geometry filter applied. + + Notes: + A geo_shape filter is added to the search object, set to intersect with the specified geometry. + """ + return search.filter( + Q( + { + "geo_shape": { + "geometry": { + "shape": { + "type": intersects.type.lower(), + "coordinates": intersects.coordinates, + }, + "relation": "intersects", + } + } + } + ) + ) + + @staticmethod + def apply_stacql_filter(search: Search, op: str, field: str, value: float): + """Filter search results based on a comparison between a field and a value. + + Args: + search (Search): The search object to apply the filter to. + op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal), + 'lt' (less than), or 'lte' (less than or equal). + field (str): The field to perform the comparison on. + value (float): The value to compare the field against. + + Returns: + search (Search): The search object with the specified filter applied. + """ + if op != "eq": + key_filter = {field: {f"{op}": value}} + search = search.filter(Q("range", **key_filter)) + else: + search = search.filter("term", **{field: value}) + + return search + + @staticmethod + def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): + """Database logic to perform query for search endpoint.""" + if _filter is not None: + search = search.filter(filter.Clause.parse_obj(_filter).to_es()) + return search + + @staticmethod + def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: + """Database logic to sort search instance.""" + if sortby: + return {s.field: {"order": s.direction} for s in sortby} + else: + return None + + async def execute_search( + self, + search: Search, + limit: int, + token: Optional[str], + sort: Optional[Dict[str, Dict[str, str]]], + collection_ids: Optional[List[str]], + ignore_unavailable: bool = True, + ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: + """Execute a search query with limit and other optional parameters. + + Args: + search (Search): The search query to be executed. + limit (int): The maximum number of results to be returned. + token (Optional[str]): The token used to return the next set of results. + sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted. + collection_ids (Optional[List[str]]): The collection ids to search. + ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True. + + Returns: + Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing: + - An iterable of search results, where each result is a dictionary with keys and values representing the + fields and values of each document. + - The total number of results (if the count could be computed), or None if the count could not be + computed. + - The token to be used to retrieve the next set of results, or None if there are no more results. + + Raises: + NotFoundError: If the collections specified in `collection_ids` do not exist. + """ + search_after = None + if token: + search_after = urlsafe_b64decode(token.encode()).decode().split(",") + + query = search.query.to_dict() if search.query else None + + index_param = indices(collection_ids) + + search_task = asyncio.create_task( + self.client.search( + index=index_param, + ignore_unavailable=ignore_unavailable, + query=query, + sort=sort or DEFAULT_SORT, + search_after=search_after, + size=limit, + ) + ) + + count_task = asyncio.create_task( + self.client.count( + index=index_param, + ignore_unavailable=ignore_unavailable, + body=search.to_dict(count=True), + ) + ) + + try: + es_response = await search_task + except exceptions.NotFoundError: + raise NotFoundError(f"Collections '{collection_ids}' do not exist") + + hits = es_response["hits"]["hits"] + items = (hit["_source"] for hit in hits) + + next_token = None + if hits and (sort_array := hits[-1].get("sort")): + next_token = urlsafe_b64encode( + ",".join([str(x) for x in sort_array]).encode() + ).decode() + + # (1) count should not block returning results, so don't wait for it to be done + # (2) don't cancel the task so that it will populate the ES cache for subsequent counts + maybe_count = None + if count_task.done(): + try: + maybe_count = count_task.result().get("count") + except Exception as e: + logger.error(f"Count task failed: {e}") + + return items, maybe_count, next_token + + """ TRANSACTION LOGIC """ + + async def check_collection_exists(self, collection_id: str): + """Database logic to check if a collection exists.""" + if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise NotFoundError(f"Collection {collection_id} does not exist") + + async def prep_create_item( + self, item: Item, base_url: str, exist_ok: bool = False + ) -> Item: + """ + Preps an item for insertion into the database. + + Args: + item (Item): The item to be prepped for insertion. + base_url (str): The base URL used to create the item's self URL. + exist_ok (bool): Indicates whether the item can exist already. + + Returns: + Item: The prepped item. + + Raises: + ConflictError: If the item already exists in the database. + + """ + await self.check_collection_exists(collection_id=item["collection"]) + + if not exist_ok and await self.client.exists( + index=index_by_collection_id(item["collection"]), + id=mk_item_id(item["id"], item["collection"]), + ): + raise ConflictError( + f"Item {item['id']} in collection {item['collection']} already exists" + ) + + return self.item_serializer.stac_to_db(item, base_url) + + def sync_prep_create_item( + self, item: Item, base_url: str, exist_ok: bool = False + ) -> Item: + """ + Prepare an item for insertion into the database. + + This method performs pre-insertion preparation on the given `item`, + such as checking if the collection the item belongs to exists, + and optionally verifying that an item with the same ID does not already exist in the database. + + Args: + item (Item): The item to be inserted into the database. + base_url (str): The base URL used for constructing URLs for the item. + exist_ok (bool): Indicates whether the item can exist already. + + Returns: + Item: The item after preparation is done. + + Raises: + NotFoundError: If the collection that the item belongs to does not exist in the database. + ConflictError: If an item with the same ID already exists in the collection. + """ + item_id = item["id"] + collection_id = item["collection"] + if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise NotFoundError(f"Collection {collection_id} does not exist") + + if not exist_ok and self.sync_client.exists( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + ): + raise ConflictError( + f"Item {item_id} in collection {collection_id} already exists" + ) + + return self.item_serializer.stac_to_db(item, base_url) + + async def create_item(self, item: Item, refresh: bool = False): + """Database logic for creating one item. + + Args: + item (Item): The item to be created. + refresh (bool, optional): Refresh the index after performing the operation. Defaults to False. + + Raises: + ConflictError: If the item already exists in the database. + + Returns: + None + """ + # todo: check if collection exists, but cache + item_id = item["id"] + collection_id = item["collection"] + es_resp = await self.client.index( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + document=item, + refresh=refresh, + ) + + if (meta := es_resp.get("meta")) and meta.get("status") == 409: + raise ConflictError( + f"Item {item_id} in collection {collection_id} already exists" + ) + + async def delete_item( + self, item_id: str, collection_id: str, refresh: bool = False + ): + """Delete a single item from the database. + + Args: + item_id (str): The id of the Item to be deleted. + collection_id (str): The id of the Collection that the Item belongs to. + refresh (bool, optional): Whether to refresh the index after the deletion. Default is False. + + Raises: + NotFoundError: If the Item does not exist in the database. + """ + try: + await self.client.delete( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + refresh=refresh, + ) + except exceptions.NotFoundError: + raise NotFoundError( + f"Item {item_id} in collection {collection_id} not found" + ) + + async def create_collection(self, collection: Collection, refresh: bool = False): + """Create a single collection in the database. + + Args: + collection (Collection): The Collection object to be created. + refresh (bool, optional): Whether to refresh the index after the creation. Default is False. + + Raises: + ConflictError: If a Collection with the same id already exists in the database. + + Notes: + A new index is created for the items in the Collection using the `create_item_index` function. + """ + collection_id = collection["id"] + + if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise ConflictError(f"Collection {collection_id} already exists") + + await self.client.index( + index=COLLECTIONS_INDEX, + id=collection_id, + document=collection, + refresh=refresh, + ) + + await create_item_index(collection_id) + + async def find_collection(self, collection_id: str) -> Collection: + """Find and return a collection from the database. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be found. + + Returns: + Collection: The found collection, represented as a `Collection` object. + + Raises: + NotFoundError: If the collection with the given `collection_id` is not found in the database. + + Notes: + This function searches for a collection in the database using the specified `collection_id` and returns the found + collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised. + """ + try: + collection = await self.client.get( + index=COLLECTIONS_INDEX, id=collection_id + ) + except exceptions.NotFoundError: + raise NotFoundError(f"Collection {collection_id} not found") + + return collection["_source"] + + async def update_collection( + self, collection_id: str, collection: Collection, refresh: bool = False + ): + """Update a collection from the database. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be updated. + collection (Collection): The Collection object to be used for the update. + + Raises: + NotFoundError: If the collection with the given `collection_id` is not + found in the database. + + Notes: + This function updates the collection in the database using the specified + `collection_id` and with the collection specified in the `Collection` object. + If the collection is not found, a `NotFoundError` is raised. + """ + await self.find_collection(collection_id=collection_id) + + if collection_id != collection["id"]: + await self.create_collection(collection, refresh=refresh) + + await self.client.reindex( + body={ + "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"}, + "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"}, + "script": { + "lang": "painless", + "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", + }, + }, + wait_for_completion=True, + refresh=refresh, + ) + + await self.delete_collection(collection_id) + + else: + await self.client.index( + index=COLLECTIONS_INDEX, + id=collection_id, + document=collection, + refresh=refresh, + ) + + async def delete_collection(self, collection_id: str, refresh: bool = False): + """Delete a collection from the database. + + Parameters: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be deleted. + refresh (bool): Whether to refresh the index after the deletion (default: False). + + Raises: + NotFoundError: If the collection with the given `collection_id` is not found in the database. + + Notes: + This function first verifies that the collection with the specified `collection_id` exists in the database, and then + deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this + function also calls `delete_item_index` to delete the index for the items in the collection. + """ + await self.find_collection(collection_id=collection_id) + await self.client.delete( + index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh + ) + await delete_item_index(collection_id) + + async def bulk_async( + self, collection_id: str, processed_items: List[Item], refresh: bool = False + ) -> None: + """Perform a bulk insert of items into the database asynchronously. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to which the items belong. + processed_items (List[Item]): A list of `Item` objects to be inserted into the database. + refresh (bool): Whether to refresh the index after the bulk insert (default: False). + + Notes: + This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The + insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The + `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the + index is refreshed after the bulk insert. The function does not return any value. + """ + await helpers.async_bulk( + self.client, + mk_actions(collection_id, processed_items), + refresh=refresh, + raise_on_error=False, + ) + + def bulk_sync( + self, collection_id: str, processed_items: List[Item], refresh: bool = False + ) -> None: + """Perform a bulk insert of items into the database synchronously. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to which the items belong. + processed_items (List[Item]): A list of `Item` objects to be inserted into the database. + refresh (bool): Whether to refresh the index after the bulk insert (default: False). + + Notes: + This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The + insert is performed synchronously and blocking, meaning that the function does not return until the insert has + completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to + True, the index is refreshed after the bulk insert. The function does not return any value. + """ + helpers.bulk( + self.sync_client, + mk_actions(collection_id, processed_items), + refresh=refresh, + raise_on_error=False, + ) + + # DANGER + async def delete_items(self) -> None: + """Danger. this is only for tests.""" + await self.client.delete_by_query( + index=ITEM_INDICES, + body={"query": {"match_all": {}}}, + wait_for_completion=True, + ) + + # DANGER + async def delete_collections(self) -> None: + """Danger. this is only for tests.""" + await self.client.delete_by_query( + index=COLLECTIONS_INDEX, + body={"query": {"match_all": {}}}, + wait_for_completion=True, + ) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py new file mode 100644 index 00000000..6b648e2b --- /dev/null +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py @@ -0,0 +1,2 @@ +"""library version.""" +__version__ = "2.0.0" diff --git a/stac_fastapi/mongo/README.md b/stac_fastapi/mongo/README.md deleted file mode 100644 index ef8b06d4..00000000 --- a/stac_fastapi/mongo/README.md +++ /dev/null @@ -1 +0,0 @@ -# stac-fastapi-mongo \ No newline at end of file diff --git a/stac_fastapi/mongo/setup.cfg b/stac_fastapi/mongo/setup.cfg deleted file mode 100644 index a234c94b..00000000 --- a/stac_fastapi/mongo/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -version = attr: stac_fastapi.mongo.version.__version__ diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/config.py b/stac_fastapi/mongo/stac_fastapi/mongo/config.py deleted file mode 100644 index 8dbc4f58..00000000 --- a/stac_fastapi/mongo/stac_fastapi/mongo/config.py +++ /dev/null @@ -1,76 +0,0 @@ -"""API configuration.""" -import os -import ssl -from typing import Any, Dict, Set - -from motor.motor_asyncio import AsyncIOMotorClient -from pymongo import MongoClient - -from stac_fastapi.types.config import ApiSettings - - -def _mongodb_config() -> Dict[str, Any]: - # MongoDB connection URI and client options - user = os.getenv("MONGO_USER") - password = os.getenv("MONGO_PASS") - host = os.getenv("MONGO_HOST", "localhost") - port = os.getenv("MONGO_PORT", "27017") - # database = os.getenv("MONGO_DB", "stac") # Default to 'stac' database - use_ssl = os.getenv("MONGO_USE_SSL", "false").lower() == "true" - verify_certs = os.getenv("MONGO_VERIFY_CERTS", "true").lower() == "true" - - ssl_cert_reqs = ssl.CERT_REQUIRED if verify_certs else ssl.CERT_NONE - - # Adjust URI based on whether using SRV record or not - # if "mongodb+srv" in os.getenv("MONGO_CONNECTION_STRING", ""): - # uri = f"mongodb+srv://{user}:{password}@{host}/{database}?retryWrites=true&w=majority" - # else: - # uri = f"mongodb://{user}:{password}@{host}:{port}/{database}?retryWrites=true" - - if "mongodb+srv" in os.getenv("MONGO_CONNECTION_STRING", ""): - uri = f"mongodb+srv://{user}:{password}@{host}?retryWrites=true&w=majority" - else: - uri = f"mongodb://{user}:{password}@{host}:{port}?retryWrites=true" - - if use_ssl: - uri += "&ssl=true&ssl_cert_reqs={}".format(ssl_cert_reqs) - - # Initialize the configuration dictionary - config = { - "uri": uri, - # "database": database, - # MongoDB does not use headers, but added here for structure alignment - "headers": {}, # Placeholder for consistency - } - - return config - - -_forbidden_fields: Set[str] = {"type"} - - -class MongoDBSettings(ApiSettings): - """MongoDB specific API settings.""" - - forbidden_fields: Set[str] = _forbidden_fields - indexed_fields: Set[str] = {"datetime"} - - @property - def create_client(self) -> MongoClient: - """Create a synchronous MongoDB client.""" - config = _mongodb_config() - return MongoClient(config["uri"]) - - -class AsyncMongoDBSettings(ApiSettings): - """Async MongoDB specific API settings.""" - - forbidden_fields: Set[str] = _forbidden_fields - indexed_fields: Set[str] = {"datetime"} - - @property - def create_client(self) -> AsyncIOMotorClient: - """Create an asynchronous MongoDB client.""" - config = _mongodb_config() - print(config) - return AsyncIOMotorClient(config["uri"]) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py deleted file mode 100644 index a6242784..00000000 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ /dev/null @@ -1,995 +0,0 @@ -"""Database logic.""" -import base64 -import logging -import os -import re -from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union - -import attr -from bson import ObjectId -from pymongo.errors import BulkWriteError, PyMongoError - -from stac_fastapi.core import serializers -from stac_fastapi.core.extensions import filter -from stac_fastapi.core.utilities import bbox2polygon -from stac_fastapi.extensions.core import SortExtension -from stac_fastapi.mongo.config import AsyncMongoDBSettings as AsyncSearchSettings -from stac_fastapi.mongo.config import MongoDBSettings as SyncSearchSettings -from stac_fastapi.mongo.utilities import decode_token, encode_token, serialize_doc -from stac_fastapi.types.errors import ConflictError, NotFoundError -from stac_fastapi.types.stac import Collection, Item - -logger = logging.getLogger(__name__) - -NumType = Union[float, int] - -COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") -ITEMS_INDEX = os.getenv("STAC_ITEMS_INDEX", "items") -DATABASE = os.getenv("MONGO_DB", "admin") - - -async def create_collection_index(): - """ - Ensure indexes for the collections collection in MongoDB using the asynchronous client. - - Returns: - None - """ - client = AsyncSearchSettings().create_client - if client: - try: - db = client[DATABASE] - await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) - print(f"Index created successfully for collection: {COLLECTIONS_INDEX}.") - except Exception as e: - # Handle exceptions, which could be due to existing index conflicts, etc. - print( - f"An error occurred while creating indexe for collection {COLLECTIONS_INDEX}: {e}" - ) - finally: - print(f"Closing client: {client}") - client.close() - else: - print("Failed to create MongoDB client.") - - -async def create_item_index(): - """ - Ensure indexes for a specific collection of items in MongoDB using the asynchronous client. - - Args: - collection_id (str): Collection identifier used to derive the MongoDB collection name for items. - - Returns: - None - """ - client = AsyncSearchSettings().create_client - - if client: - db = client[DATABASE] - collection = db[ITEMS_INDEX] - try: - await collection.create_index([("properties.datetime", -1)]) - await collection.create_index([("id", 1)], unique=True) - await collection.create_index([("geometry", "2dsphere")]) - print(f"Indexes created successfully for collection: {ITEMS_INDEX}.") - except Exception as e: - # Handle exceptions, which could be due to existing index conflicts, etc. - print( - f"An error occurred while creating indexes for collection {ITEMS_INDEX}: {e}" - ) - finally: - client.close() - - -def mk_item_id(item_id: str, collection_id: str): - """Create the document id for an Item in Elasticsearch. - - Args: - item_id (str): The id of the Item. - collection_id (str): The id of the Collection that the Item belongs to. - - Returns: - str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. - """ - return f"{item_id}|{collection_id}" - - -class Geometry(Protocol): # noqa - type: str - coordinates: Any - - -class MongoSearchAdapter: - """ - Adapter class to manage search filters and sorting for MongoDB queries. - - Attributes: - filters (list): A list of filter conditions to be applied to the MongoDB query. - sort (list): A list of tuples specifying field names and their corresponding sort directions - for MongoDB sorting. - - Methods: - add_filter(filter_condition): Adds a new filter condition to the filters list. - set_sort(sort_conditions): Sets the sorting criteria based on a dictionary of field names - and sort directions. - """ - - def __init__(self): - """ - Initialize the MongoSearchAdapter with default sorting criteria. - - The default sort order is by 'properties.datetime' in descending order, followed by 'id' in descending order, - and finally by 'collection' in descending order. This matches typical STAC item queries where the most recent items - are retrieved first. - """ - self.filters = [] - # self.sort = [("properties.datetime", -1), ("id", -1), ("collection", -1)] - - def add_filter(self, filter_condition): - """ - Add a filter condition to the query. - - This method appends a new filter condition to the list of existing filters. Each filter condition - should be a dictionary representing a MongoDB query condition. - - Args: - filter_condition (dict): A dictionary representing a MongoDB filter condition. - """ - self.filters.append(filter_condition) - - -@attr.s -class DatabaseLogic: - """Database logic.""" - - client = AsyncSearchSettings().create_client - sync_client = SyncSearchSettings().create_client - - item_serializer: Type[serializers.ItemSerializer] = attr.ib( - default=serializers.ItemSerializer - ) - collection_serializer: Type[serializers.CollectionSerializer] = attr.ib( - default=serializers.CollectionSerializer - ) - - """CORE LOGIC""" - - async def get_all_collections( - self, token: Optional[str], limit: int, base_url: str - ) -> Tuple[List[Dict[str, Any]], Optional[str]]: - """ - Retrieve a list of all collections from the MongoDB database, supporting pagination. - - Args: - token (Optional[str]): The pagination token, which is the ID of the last collection seen. - limit (int): The maximum number of results to return. - base_url (str): The base URL for constructing fully qualified links. - - Returns: - Tuple[List[Dict[str, Any]], Optional[str]]: A tuple containing a list of collections - and an optional next token for pagination. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - - query: Dict[str, Any] = {} - if token: - last_seen_id = decode_token(token) - print(f"Decoded token (Last seen ID): {last_seen_id}") - query = {"id": {"$gt": last_seen_id}} - - print(f"Query: {query}, Limit: {limit}") - cursor = collections_collection.find(query).sort("id", 1).limit(limit) - collections = await cursor.to_list(length=limit) - - next_token = None - if len(collections) == limit: - # Assumes collections are sorted by 'id' in ascending order. - next_token = encode_token(collections[-1]["id"]) - print(f"Next token (for next page): {next_token}") - - serialized_collections = [ - self.collection_serializer.db_to_stac(serialize_doc(collection), base_url) - for collection in collections - ] - - print( - f"Serialized Collections: {serialized_collections}, Next Token: {next_token}" - ) - return serialized_collections, next_token - - async def get_one_item(self, collection_id: str, item_id: str) -> Dict: - """Retrieve a single item from the database. - - Args: - collection_id (str): The id of the Collection that the Item belongs to. - item_id (str): The id of the Item. - - Returns: - item (Dict): A dictionary containing the source data for the Item. - - Raises: - NotFoundError: If the specified Item does not exist in the Collection. - """ - db = self.client[DATABASE] - collection = db[ITEMS_INDEX] - - # Adjusted to include collection_id in the query to fetch items within a specific collection - item = await collection.find_one({"id": item_id, "collection": collection_id}) - if not item: - # If the item is not found, raise NotFoundError - raise NotFoundError( - f"Item {item_id} in collection {collection_id} does not exist." - ) - - # Serialize the MongoDB document to make it JSON serializable - serialized_item = serialize_doc(item) - return serialized_item - - @staticmethod - def make_search(): - """Database logic to create a Search instance.""" - return MongoSearchAdapter() - - @staticmethod - def apply_ids_filter(search: MongoSearchAdapter, item_ids: List[str]): - """Database logic to search a list of STAC item ids.""" - search.add_filter({"id": {"$in": item_ids}}) - return search - - @staticmethod - def apply_collections_filter(search: MongoSearchAdapter, collection_ids: List[str]): - """Database logic to search a list of STAC collection ids.""" - search.add_filter({"collection": {"$in": collection_ids}}) - return search - - @staticmethod - def apply_datetime_filter(search: MongoSearchAdapter, datetime_search): - """Apply a filter to search based on datetime field. - - Args: - search (Search): The search object to filter. - datetime_search (dict): The datetime filter criteria. - - Returns: - Search: The filtered search object. - """ - if "eq" in datetime_search: - search.add_filter({"properties.datetime": datetime_search["eq"]}) - else: - if "gte" in datetime_search: - search.add_filter( - {"properties.datetime": {"$gte": datetime_search["gte"]}} - ) - if "lte" in datetime_search: - search.add_filter( - {"properties.datetime": {"$lte": datetime_search["lte"]}} - ) - return search - - @staticmethod - def apply_bbox_filter(search: MongoSearchAdapter, bbox: List): - """Filter search results based on bounding box. - - Args: - search (Search): The search object to apply the filter to. - bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy]. - - Returns: - search (Search): The search object with the bounding box filter applied. - - Notes: - The bounding box is transformed into a polygon using the `bbox2polygon` function and - a geo_shape filter is added to the search object, set to intersect with the specified polygon. - """ - geojson_polygon = {"type": "Polygon", "coordinates": bbox2polygon(*bbox)} - search.add_filter( - { - "geometry": { - "$geoIntersects": { - "$geometry": geojson_polygon, - } - } - } - ) - return search - - @staticmethod - def apply_intersects_filter( - search: MongoSearchAdapter, - intersects: Geometry, - ): - """Filter search results based on intersecting geometry. - - Args: - search (Search): The search object to apply the filter to. - intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. - - Returns: - search (Search): The search object with the intersecting geometry filter applied. - - Notes: - A geo_shape filter is added to the search object, set to intersect with the specified geometry. - """ - geometry_dict = {"type": intersects.type, "coordinates": intersects.coordinates} - search.add_filter( - {"geometry": {"$geoIntersects": {"$geometry": geometry_dict}}} - ) - return search - - @staticmethod - def apply_stacql_filter( - search: MongoSearchAdapter, op: str, field: str, value: float - ): - """Filter search results based on a comparison between a field and a value. - - Args: - search (Search): The search object to apply the filter to. - op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal), - 'lt' (less than), or 'lte' (less than or equal). - field (str): The field to perform the comparison on. - value (float): The value to compare the field against. - - Returns: - search (Search): The search object with the specified filter applied. - """ - # MongoDB comparison operators mapping - op_mapping = { - "eq": "$eq", - "gt": "$gt", - "gte": "$gte", - "lt": "$lt", - "lte": "$lte", - } - - # Replace double underscores with dots for nested field queries - field = field.replace("__", ".") - - # Construct the MongoDB filter - if op in op_mapping: - mongo_op = op_mapping[op] - filter_condition = {field: {mongo_op: value}} - else: - raise ValueError(f"Unsupported operation '{op}'") - - # Add the constructed filter to the search adapter's filters - search.add_filter(filter_condition) - return search - - @staticmethod - def translate_cql2_to_mongo(cql2_filter: Dict[str, Any]) -> Dict[str, Any]: - """ - Translate a CQL2 filter dictionary to a MongoDB query. - - This function translates a CQL2 JSON filter into a MongoDB query format. It supports - various comparison operators, logical operators, and a special handling for spatial - intersections and the 'in' operator. - - Args: - cql2_filter: A dictionary representing the CQL2 filter. - - Returns: - A MongoDB query as a dictionary. - """ - print("CQL2 filter:", cql2_filter) - op_mapping = { - ">": "$gt", - ">=": "$gte", - "<": "$lt", - "<=": "$lte", - "=": "$eq", - "!=": "$ne", - "like": "$regex", - "in": "$in", - } - - if cql2_filter["op"] in ["and", "or"]: - mongo_op = f"${cql2_filter['op']}" - return { - mongo_op: [ - DatabaseLogic.translate_cql2_to_mongo(arg) - for arg in cql2_filter["args"] - ] - } - - elif cql2_filter["op"] == "not": - translated_condition = DatabaseLogic.translate_cql2_to_mongo( - cql2_filter["args"][0] - ) - return {"$nor": [translated_condition]} - - elif cql2_filter["op"] == "s_intersects": - geometry = cql2_filter["args"][1] - return {"geometry": {"$geoIntersects": {"$geometry": geometry}}} - - elif cql2_filter["op"] == "between": - property_name = cql2_filter["args"][0]["property"] - - # Use the special mapping directly if available, or construct the path appropriately - if property_name in filter.queryables_mapping: - property_path = filter.queryables_mapping[property_name] - elif property_name not in [ - "id", - "collection", - ] and not property_name.startswith("properties."): - property_path = f"properties.{property_name}" - else: - property_path = property_name - - lower_bound = cql2_filter["args"][1] - upper_bound = cql2_filter["args"][2] - return {property_path: {"$gte": lower_bound, "$lte": upper_bound}} - - else: - property_name = cql2_filter["args"][0]["property"] - # Check if the property name is in the special mapping - if property_name in filter.queryables_mapping: - property_path = filter.queryables_mapping[property_name] - elif property_name not in [ - "id", - "collection", - ] and not property_name.startswith("properties."): - property_path = f"properties.{property_name}" - else: - property_path = property_name - - value = cql2_filter["args"][1] - # Attempt to convert numeric string to float or integer - try: - if "." in value: - value = float(value) - else: - value = int(value) - except (ValueError, TypeError): - pass # Keep value as is if conversion is not possible - mongo_op = op_mapping.get(cql2_filter["op"]) - - print("VALUE", type(value)) - - if mongo_op is None: - raise ValueError( - f"Unsupported operation '{cql2_filter['op']}' in CQL2 filter." - ) - - if mongo_op == "$regex": - # Replace SQL LIKE wildcards with regex equivalents, handling escaped characters - regex_pattern = re.sub( - r"(? List[Tuple[str, int]]: - """ - Transform a list of sort criteria into the format expected by MongoDB. - - Args: - sortby (List[SortExtension]): A list of SortExtension objects with 'field' - and 'direction' attributes. - - Returns: - List[Tuple[str, int]]: A list of tuples where each tuple is (fieldname, direction), - with direction being 1 for 'asc' and -1 for 'desc'. - Returns an empty list if no sort criteria are provided. - """ - if not sortby: - return [] - - mongo_sort = [] - for sort_extension in sortby: - field = sort_extension.field - # Convert the direction enum to a string, then to MongoDB's expected format - direction = 1 if sort_extension.direction.value == "asc" else -1 - mongo_sort.append((field, direction)) - - return mongo_sort - - async def execute_search( - self, - search: MongoSearchAdapter, - limit: int, - token: Optional[str], - sort: Optional[Dict[str, Dict[str, str]]], - collection_ids: Optional[List[str]], - ignore_unavailable: bool = True, - ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: - """Execute a search query with limit and other optional parameters. - - Args: - search (Search): The search query to be executed. - limit (int): The maximum number of results to be returned. - token (Optional[str]): The token used to return the next set of results. - sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted. - collection_ids (Optional[List[str]]): The collection ids to search. - ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True. - - Returns: - Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing: - - An iterable of search results, where each result is a dictionary with keys and values representing the - fields and values of each document. - - The total number of results (if the count could be computed), or None if the count could not be - computed. - - The token to be used to retrieve the next set of results, or None if there are no more results. - - Raises: - NotFoundError: If the collections specified in `collection_ids` do not exist. - """ - db = self.client[DATABASE] - collection = db[ITEMS_INDEX] - - query = {"$and": search.filters} if search and search.filters else {} - - print("Query: ", query) - if collection_ids: - query["collection"] = {"$in": collection_ids} - - sort_criteria = sort if sort else [("_id", 1)] # Default sort - try: - if token: - last_id = ObjectId(base64.urlsafe_b64decode(token.encode()).decode()) - query["_id"] = {"$gt": last_id} - - cursor = collection.find(query).sort(sort_criteria).limit(limit + 1) - items = await cursor.to_list(length=limit + 1) - - next_token = None - if len(items) > limit: - next_token = base64.urlsafe_b64encode( - str(items[-1]["_id"]).encode() - ).decode() - items = items[:-1] - - maybe_count = None - if not token: - maybe_count = await collection.count_documents(query) - - return items, maybe_count, next_token - except PyMongoError as e: - print(f"Database operation failed: {e}") - raise - - """ TRANSACTION LOGIC """ - - async def check_collection_exists(self, collection_id: str): - """ - Check if a specific STAC collection exists within the MongoDB database. - - This method queries the MongoDB collection specified by COLLECTIONS_INDEX to determine - if a document with the specified collection_id exists. - - Args: - collection_id (str): The ID of the STAC collection to check for existence. - - Raises: - NotFoundError: If the STAC collection specified by `collection_id` does not exist - within the MongoDB collection defined by COLLECTIONS_INDEX. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - - # Query the collections collection to see if a document with the specified collection_id exists - collection_exists = await collections_collection.find_one({"id": collection_id}) - if not collection_exists: - raise NotFoundError(f"Collection {collection_id} does not exist") - - async def create_item(self, item: Item, refresh: bool = False): - """ - Asynchronously inserts a STAC item into MongoDB, ensuring the item does not already exist. - - Args: - item (Item): The STAC item to be created. - refresh (bool, optional): Not used for MongoDB, kept for compatibility with Elasticsearch interface. - - Raises: - ConflictError: If the item with the same ID already exists within the collection. - NotFoundError: If the specified collection does not exist in MongoDB. - """ - db = self.client[DATABASE] - items_collection = db[ITEMS_INDEX] - collections_collection = db[COLLECTIONS_INDEX] - - collection_exists = await collections_collection.count_documents( - {"id": item["collection"]}, limit=1 - ) - if not collection_exists: - raise NotFoundError(f"Collection {item['collection']} does not exist") - - new_item = item.copy() - new_item["_id"] = item.get("_id", ObjectId()) - - existing_item = await items_collection.find_one({"_id": new_item["_id"]}) - if existing_item: - raise ConflictError(f"Item with _id {item['_id']} already exists") - - await items_collection.insert_one(new_item) - item = serialize_doc(item) - - async def prep_create_item( - self, item: Item, base_url: str, exist_ok: bool = False - ) -> Item: - """ - Preps an item for insertion into the MongoDB database. - - Args: - item (Item): The item to be prepped for insertion. - base_url (str): The base URL used to create the item's self URL. - exist_ok (bool): Indicates whether the item can exist already. - - Returns: - Item: The prepped item. - - Raises: - ConflictError: If the item already exists in the database and exist_ok is False. - NotFoundError: If the collection specified by the item does not exist. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - items_collection = db[ITEMS_INDEX] - - # Check if the collection exists - collection_exists = await collections_collection.count_documents( - {"id": item["collection"]}, limit=1 - ) - if not collection_exists: - raise NotFoundError(f"Collection {item['collection']} does not exist") - - # Transform item using item_serializer for MongoDB compatibility - mongo_item = self.item_serializer.stac_to_db(item, base_url) - - if not exist_ok: - existing_item = await items_collection.find_one({"id": mongo_item["id"]}) - if existing_item: - raise ConflictError( - f"Item {mongo_item['id']} in collection {mongo_item['collection']} already exists" - ) - - # Return the transformed item ready for insertion - return serialize_doc(mongo_item) - - def sync_prep_create_item( - self, item: Item, base_url: str, exist_ok: bool = False - ) -> Item: - """ - Preps an item for insertion into the MongoDB database in a synchronous manner. - - Args: - item (Item): The item to be prepped for insertion. - base_url (str): The base URL used to create the item's self URL. - exist_ok (bool): Indicates whether the item can exist already. - - Returns: - Item: The prepped item. - - Raises: - ConflictError: If the item already exists in the database and exist_ok is False. - NotFoundError: If the collection specified by the item does not exist. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - items_collection = db[ITEMS_INDEX] - - # Check if the collection exists - collection_exists = collections_collection.count_documents( - {"id": item["collection"]}, limit=1 - ) - if not collection_exists: - raise NotFoundError(f"Collection {item['collection']} does not exist") - - # Transform item using item_serializer for MongoDB compatibility - mongo_item = self.item_serializer.stac_to_db(item, base_url) - print("mongo item id: ", mongo_item["id"]) - if not exist_ok: - existing_item = items_collection.find_one({"id": mongo_item["id"]}) - if existing_item: - raise ConflictError( - f"Item {mongo_item['id']} in collection {mongo_item['collection']} already exists" - ) - - # Return the transformed item ready for insertion - return serialize_doc(mongo_item) - - async def delete_item( - self, item_id: str, collection_id: str, refresh: bool = False - ): - """ - Delete a single item from the database. - - Args: - item_id (str): The id of the Item to be deleted. - collection_id (str): The id of the Collection that the Item belongs to. - refresh (bool, optional): Whether to refresh the index after the deletion. Default is False. - - Raises: - NotFoundError: If the Item does not exist in the database. - """ - db = self.client[DATABASE] - items_collection = db[ITEMS_INDEX] - - try: - # Attempt to delete the item from the collection - result = await items_collection.delete_one({"id": item_id}) - if result.deleted_count == 0: - # If no items were deleted, it means the item did not exist - raise NotFoundError( - f"Item {item_id} in collection {collection_id} not found" - ) - except PyMongoError as e: - # Catch any MongoDB error and re-raise as NotFoundError for consistency with the original function's behavior - raise NotFoundError( - f"Error deleting item {item_id} in collection {collection_id}: {e}" - ) - - async def create_collection(self, collection: Collection, refresh: bool = False): - """Create a single collection document in the database. - - Args: - collection (Collection): The Collection object to be created. - refresh (bool, optional): Whether to refresh the index after the creation. Default is False. - - Raises: - ConflictError: If a Collection with the same id already exists in the database. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - - # Check if the collection already exists - existing_collection = await collections_collection.find_one( - {"id": collection["id"]} - ) - if existing_collection: - raise ConflictError(f"Collection {collection['id']} already exists") - - try: - # Insert the new collection document into the collections collection - await collections_collection.insert_one(collection) - except PyMongoError as e: - # Catch any MongoDB error and raise an appropriate error - print(f"Failed to create collection {collection['id']}: {e}") - raise ConflictError(f"Failed to create collection {collection['id']}: {e}") - - collection = serialize_doc(collection) - - async def find_collection(self, collection_id: str) -> dict: - """ - Find and return a collection from the database. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be found. - - Returns: - dict: The found collection, represented as a dictionary. - - Raises: - NotFoundError: If the collection with the given `collection_id` is not found in the database. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - - try: - collection = await collections_collection.find_one({"id": collection_id}) - if not collection: - raise NotFoundError(f"Collection {collection_id} not found") - serialized_collection = serialize_doc(collection) - return serialized_collection - except PyMongoError as e: - # This is a general catch-all for MongoDB errors; adjust as needed for more specific handling - print(f"Failed to find collection {collection_id}: {e}") - raise NotFoundError(f"Collection {collection_id} not found") - - async def update_collection( - self, collection_id: str, collection: Collection, refresh: bool = False - ): - """ - Update a collection in the MongoDB database. - - Args: - collection_id (str): The ID of the collection to be updated. - collection (Collection): The new collection data to update. - refresh (bool): Not applicable for MongoDB, kept for compatibility. - - Raises: - NotFoundError: If the collection with the specified ID does not exist. - ConflictError: If attempting to change the collection ID to one that already exists. - - Note: - This function handles both updating a collection's metadata and changing its ID. - It does not directly modify the `_id` field, which is immutable in MongoDB. - When changing a collection's ID, it creates a new document with the new ID and deletes the old document. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - - # Ensure the existing collection exists - existing_collection = await self.find_collection(collection_id) - if not existing_collection: - raise NotFoundError(f"Collection {collection_id} not found") - - # Handle changing collection ID - if collection_id != collection["id"]: - new_id_exists = await collections_collection.find_one( - {"id": collection["id"]} - ) - if new_id_exists: - raise ConflictError( - f"Collection with ID {collection['id']} already exists" - ) - - items_collection = db[ITEMS_INDEX] - # Update only items related to the old collection ID to the new collection ID - await items_collection.update_many( - {"collection": collection_id}, - {"$set": {"collection": collection["id"]}}, - ) - - # Insert the new collection and delete the old one - await collections_collection.insert_one(collection) - await collections_collection.delete_one({"id": collection_id}) - else: - # Update the existing collection with new data, ensuring not to attempt to update `_id` - await collections_collection.update_one( - {"id": collection_id}, - {"$set": {k: v for k, v in collection.items() if k != "_id"}}, - ) - - async def delete_collection(self, collection_id: str): - """ - Delete a collection from the MongoDB database and all items associated with it. - - This function first attempts to delete the specified collection from the database. - If the collection exists and is successfully deleted, it then proceeds to delete - all items that are associated with this collection. If the collection does not exist, - a NotFoundError is raised to indicate the collection cannot be found in the database. - - Args: - collection_id (str): The ID of the collection to be deleted. - - Raises: - NotFoundError: If the collection with the specified ID does not exist in the database. - - This ensures that when a collection is deleted, all of its items are also cleaned up from the database, - maintaining data integrity and avoiding orphaned items without a parent collection. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - items_collection = db[ITEMS_INDEX] - - # Attempt to delete the collection document - collection_result = await collections_collection.delete_one( - {"id": collection_id} - ) - if collection_result.deleted_count == 0: - # Collection not found, raise an error - raise NotFoundError(f"Collection {collection_id} not found") - - # Successfully found and deleted the collection, now delete its items - await items_collection.delete_many({"collection": collection_id}) - - async def bulk_async( - self, collection_id: str, processed_items: List[Item], refresh: bool = False - ) -> None: - """Perform a bulk insert of items into the database asynchronously. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to which the items belong. - processed_items (List[Item]): A list of `Item` objects to be inserted into the database. - refresh (bool): Whether to refresh the index after the bulk insert (default: False). - - Notes: - This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The - insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The - `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the - index is refreshed after the bulk insert. The function does not return any value. - """ - db = self.client[DATABASE] - items_collection = db[ITEMS_INDEX] - - # Prepare the documents for insertion - documents = [item.dict(by_alias=True) for item in processed_items] - - try: - await items_collection.insert_many(documents, ordered=False) - except BulkWriteError as e: - # Handle bulk write errors, e.g., due to duplicate keys - raise ConflictError(f"Bulk insert operation failed: {e.details}") - - def bulk_sync( - self, collection_id: str, processed_items: List[Item], refresh: bool = False - ) -> None: - """Perform a bulk insert of items into the database synchronously. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to which the items belong. - processed_items (List[Item]): A list of `Item` objects to be inserted into the database. - refresh (bool): Whether to refresh the index after the bulk insert (default: False). - - Notes: - This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The - insert is performed synchronously and blocking, meaning that the function does not return until the insert has - completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to - True, the index is refreshed after the bulk insert. The function does not return any value. - """ - db = self.sync_client[DATABASE] - items_collection = db[ITEMS_INDEX] - - # Prepare the documents for insertion - documents = [item.dict(by_alias=True) for item in processed_items] - - try: - items_collection.insert_many(documents, ordered=False) - except BulkWriteError as e: - # Handle bulk write errors, e.g., due to duplicate keys - raise ConflictError(f"Bulk insert operation failed: {e.details}") - - async def delete_items(self) -> None: - """ - Danger. this is only for tests. - - Deletes all items from the 'items' collection in MongoDB. - """ - db = self.client[DATABASE] - items_collection = db[ITEMS_INDEX] - - try: - await items_collection.delete_many({}) - print("All items have been deleted.") - except Exception as e: - print(f"Error deleting items: {e}") - - async def delete_collections(self) -> None: - """ - Danger. this is only for tests. - - Deletes all collections from the 'collections' collection in MongoDB. - """ - db = self.client[DATABASE] - collections_collection = db[COLLECTIONS_INDEX] - - try: - await collections_collection.delete_many({}) - print("All collections have been deleted.") - except Exception as e: - print(f"Error deleting collections: {e}") diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py b/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py deleted file mode 100644 index 1c862440..00000000 --- a/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py +++ /dev/null @@ -1,30 +0,0 @@ -"""utilities for stac-fastapi.mongo.""" - -from base64 import urlsafe_b64decode, urlsafe_b64encode - -from bson import ObjectId - - -def serialize_doc(doc): - """Recursively convert ObjectId to string in MongoDB documents.""" - if isinstance(doc, dict): - for k, v in doc.items(): - if isinstance(v, ObjectId): - doc[k] = str(v) # Convert ObjectId to string - elif isinstance(v, dict) or isinstance(v, list): - doc[k] = serialize_doc(v) # Recurse into sub-docs/lists - elif isinstance(doc, list): - doc = [serialize_doc(item) for item in doc] # Apply to each item in a list - return doc - - -def decode_token(encoded_token: str) -> str: - """Decode a base64 string back to its original token value.""" - token_value = urlsafe_b64decode(encoded_token.encode()).decode() - return token_value - - -def encode_token(token_value: str) -> str: - """Encode a token value (e.g., a UUID or cursor) as a base64 string.""" - encoded_token = urlsafe_b64encode(token_value.encode()).decode() - return encoded_token diff --git a/stac_fastapi/opensearch/README.md b/stac_fastapi/opensearch/README.md new file mode 100644 index 00000000..6b1f8391 --- /dev/null +++ b/stac_fastapi/opensearch/README.md @@ -0,0 +1 @@ +# stac-fastapi-opensearch \ No newline at end of file diff --git a/stac_fastapi/opensearch/pytest.ini b/stac_fastapi/opensearch/pytest.ini new file mode 100644 index 00000000..db0353ef --- /dev/null +++ b/stac_fastapi/opensearch/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +testpaths = tests +addopts = -sv +asyncio_mode = auto \ No newline at end of file diff --git a/stac_fastapi/opensearch/setup.cfg b/stac_fastapi/opensearch/setup.cfg new file mode 100644 index 00000000..9f0be4b7 --- /dev/null +++ b/stac_fastapi/opensearch/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +version = attr: stac_fastapi.opensearch.version.__version__ diff --git a/stac_fastapi/mongo/setup.py b/stac_fastapi/opensearch/setup.py similarity index 80% rename from stac_fastapi/mongo/setup.py rename to stac_fastapi/opensearch/setup.py index 159c3803..224e733b 100644 --- a/stac_fastapi/mongo/setup.py +++ b/stac_fastapi/opensearch/setup.py @@ -1,4 +1,4 @@ -"""stac_fastapi: mongodb module.""" +"""stac_fastapi: opensearch module.""" from setuptools import find_namespace_packages, setup @@ -7,8 +7,8 @@ install_requires = [ "stac-fastapi.core==2.0.0", - "motor==3.3.2", - "pymongo==4.6.1", + "opensearch-py==2.4.2", + "opensearch-py[async]==2.4.2", "uvicorn", "starlette", ] @@ -28,8 +28,8 @@ } setup( - name="stac-fastapi.mongo", - description="Mongodb stac-fastapi backend.", + name="stac-fastapi.opensearch", + description="Opensearch stac-fastapi backend.", long_description=desc, long_description_content_type="text/markdown", python_requires=">=3.8", @@ -49,5 +49,7 @@ zip_safe=False, install_requires=install_requires, extras_require=extra_reqs, - entry_points={"console_scripts": ["stac-fastapi-mongo=stac_fastapi.mongo.app:run"]}, + entry_points={ + "console_scripts": ["stac-fastapi-opensearch=stac_fastapi.opensearch.app:run"] + }, ) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/__init__.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/__init__.py similarity index 100% rename from stac_fastapi/mongo/stac_fastapi/mongo/__init__.py rename to stac_fastapi/opensearch/stac_fastapi/opensearch/__init__.py diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py new file mode 100644 index 00000000..ebb2921e --- /dev/null +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -0,0 +1,109 @@ +"""FastAPI application.""" + +from stac_fastapi.api.app import StacApi +from stac_fastapi.api.models import create_get_request_model, create_post_request_model +from stac_fastapi.core.core import ( + BulkTransactionsClient, + CoreClient, + EsAsyncBaseFiltersClient, + TransactionsClient, +) +from stac_fastapi.core.extensions import QueryExtension +from stac_fastapi.core.session import Session +from stac_fastapi.extensions.core import ( + ContextExtension, + FieldsExtension, + FilterExtension, + SortExtension, + TokenPaginationExtension, + TransactionExtension, +) +from stac_fastapi.extensions.third_party import BulkTransactionExtension +from stac_fastapi.opensearch.config import OpensearchSettings +from stac_fastapi.opensearch.database_logic import ( + DatabaseLogic, + create_collection_index, +) + +settings = OpensearchSettings() +session = Session.create_from_settings(settings) + +filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) +filter_extension.conformance_classes.append( + "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators" +) + +database_logic = DatabaseLogic() + +extensions = [ + TransactionExtension( + client=TransactionsClient( + database=database_logic, session=session, settings=settings + ), + settings=settings, + ), + BulkTransactionExtension( + client=BulkTransactionsClient( + database=database_logic, + session=session, + settings=settings, + ) + ), + FieldsExtension(), + QueryExtension(), + SortExtension(), + TokenPaginationExtension(), + ContextExtension(), + filter_extension, +] + +post_request_model = create_post_request_model(extensions) + +api = StacApi( + settings=settings, + extensions=extensions, + client=CoreClient( + database=database_logic, session=session, post_request_model=post_request_model + ), + search_get_request_model=create_get_request_model(extensions), + search_post_request_model=post_request_model, +) +app = api.app + + +@app.on_event("startup") +async def _startup_event() -> None: + await create_collection_index() + + +def run() -> None: + """Run app from command line using uvicorn if available.""" + try: + import uvicorn + + uvicorn.run( + "stac_fastapi.opensearch.app:app", + host=settings.app_host, + port=settings.app_port, + log_level="info", + reload=settings.reload, + ) + except ImportError: + raise RuntimeError("Uvicorn must be installed in order to use command") + + +if __name__ == "__main__": + run() + + +def create_handler(app): + """Create a handler to use with AWS Lambda if mangum available.""" + try: + from mangum import Mangum + + return Mangum(app) + except ImportError: + return None + + +handler = create_handler(app) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py new file mode 100644 index 00000000..cd34e318 --- /dev/null +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py @@ -0,0 +1,81 @@ +"""API configuration.""" +import os +import ssl +from typing import Any, Dict, Set + +from opensearchpy import AsyncOpenSearch, OpenSearch + +from stac_fastapi.types.config import ApiSettings + + +def _es_config() -> Dict[str, Any]: + # Determine the scheme (http or https) + use_ssl = os.getenv("ES_USE_SSL", "true").lower() == "true" + scheme = "https" if use_ssl else "http" + + # Configure the hosts parameter with the correct scheme + hosts = [f"{scheme}://{os.getenv('ES_HOST')}:{os.getenv('ES_PORT')}"] + + # Initialize the configuration dictionary + config = { + "hosts": hosts, + "headers": {"accept": "application/json", "Content-Type": "application/json"}, + } + + # Explicitly exclude SSL settings when not using SSL + if not use_ssl: + return config + + # Include SSL settings if using https + config["ssl_version"] = ssl.PROTOCOL_SSLv23 # type: ignore + config["verify_certs"] = os.getenv("ES_VERIFY_CERTS", "true").lower() != "false" # type: ignore + + # Include CA Certificates if verifying certs + if config["verify_certs"]: + config["ca_certs"] = os.getenv( + "CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt" + ) + + # Handle authentication + if (u := os.getenv("ES_USER")) and (p := os.getenv("ES_PASS")): + config["http_auth"] = (u, p) + + if api_key := os.getenv("ES_API_KEY"): + if isinstance(config["headers"], dict): + headers = {**config["headers"], "x-api-key": api_key} + + else: + config["headers"] = {"x-api-key": api_key} + + config["headers"] = headers + + return config + + +_forbidden_fields: Set[str] = {"type"} + + +class OpensearchSettings(ApiSettings): + """API settings.""" + + # Fields which are defined by STAC but not included in the database model + forbidden_fields: Set[str] = _forbidden_fields + indexed_fields: Set[str] = {"datetime"} + + @property + def create_client(self): + """Create es client.""" + return OpenSearch(**_es_config()) + + +class AsyncOpensearchSettings(ApiSettings): + """API settings.""" + + # Fields which are defined by STAC but not included in the database model + forbidden_fields: Set[str] = _forbidden_fields + indexed_fields: Set[str] = {"datetime"} + + @property + def create_client(self): + """Create async elasticsearch client.""" + return AsyncOpenSearch(**_es_config()) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py new file mode 100644 index 00000000..0f4bf9cf --- /dev/null +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -0,0 +1,922 @@ +"""Database logic.""" +import asyncio +import logging +import os +from base64 import urlsafe_b64decode, urlsafe_b64encode +from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union + +import attr +from opensearchpy import exceptions, helpers +from opensearchpy.exceptions import TransportError +from opensearchpy.helpers.query import Q +from opensearchpy.helpers.search import Search + +from stac_fastapi.core import serializers +from stac_fastapi.core.extensions import filter +from stac_fastapi.core.utilities import bbox2polygon +from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, +) +from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings +from stac_fastapi.types.errors import ConflictError, NotFoundError +from stac_fastapi.types.stac import Collection, Item + +logger = logging.getLogger(__name__) + +NumType = Union[float, int] + +COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") +ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") +ES_INDEX_NAME_UNSUPPORTED_CHARS = { + "\\", + "/", + "*", + "?", + '"', + "<", + ">", + "|", + " ", + ",", + "#", + ":", +} + +ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" + +DEFAULT_SORT = { + "properties.datetime": {"order": "desc"}, + "id": {"order": "desc"}, + "collection": {"order": "desc"}, +} + +ES_ITEMS_SETTINGS = { + "index": { + "sort.field": list(DEFAULT_SORT.keys()), + "sort.order": [v["order"] for v in DEFAULT_SORT.values()], + } +} + +ES_MAPPINGS_DYNAMIC_TEMPLATES = [ + # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md + { + "descriptions": { + "match_mapping_type": "string", + "match": "description", + "mapping": {"type": "text"}, + } + }, + { + "titles": { + "match_mapping_type": "string", + "match": "title", + "mapping": {"type": "text"}, + } + }, + # Projection Extension https://github.com/stac-extensions/projection + {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, + { + "proj_projjson": { + "match": "proj:projjson", + "mapping": {"type": "object", "enabled": False}, + } + }, + { + "proj_centroid": { + "match": "proj:centroid", + "mapping": {"type": "geo_point"}, + } + }, + { + "proj_geometry": { + "match": "proj:geometry", + "mapping": {"type": "object", "enabled": False}, + } + }, + { + "no_index_href": { + "match": "href", + "mapping": {"type": "text", "index": False}, + } + }, + # Default all other strings not otherwise specified to keyword + {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, + {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, +] + +ES_ITEMS_MAPPINGS = { + "numeric_detection": False, + "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, + "properties": { + "id": {"type": "keyword"}, + "collection": {"type": "keyword"}, + "geometry": {"type": "geo_shape"}, + "assets": {"type": "object", "enabled": False}, + "links": {"type": "object", "enabled": False}, + "properties": { + "type": "object", + "properties": { + # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md + "datetime": {"type": "date"}, + "start_datetime": {"type": "date"}, + "end_datetime": {"type": "date"}, + "created": {"type": "date"}, + "updated": {"type": "date"}, + # Satellite Extension https://github.com/stac-extensions/sat + "sat:absolute_orbit": {"type": "integer"}, + "sat:relative_orbit": {"type": "integer"}, + }, + }, + }, +} + +ES_COLLECTIONS_MAPPINGS = { + "numeric_detection": False, + "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, + "properties": { + "id": {"type": "keyword"}, + "extent.spatial.bbox": {"type": "long"}, + "extent.temporal.interval": {"type": "date"}, + "providers": {"type": "object", "enabled": False}, + "links": {"type": "object", "enabled": False}, + "item_assets": {"type": "object", "enabled": False}, + }, +} + + +def index_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index name. + + Args: + collection_id (str): The collection id to translate into an index name. + + Returns: + str: The index name derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + + +def indices(collection_ids: Optional[List[str]]) -> str: + """ + Get a comma-separated string of index names for a given list of collection ids. + + Args: + collection_ids: A list of collection ids. + + Returns: + A string of comma-separated index names. If `collection_ids` is None, returns the default indices. + """ + if collection_ids is None: + return ITEM_INDICES + else: + return ",".join([index_by_collection_id(c) for c in collection_ids]) + + +async def create_collection_index() -> None: + """ + Create the index for a Collection. + + Returns: + None + + """ + client = AsyncSearchSettings().create_client + + search_body = { + "mappings": ES_COLLECTIONS_MAPPINGS, + "aliases": {COLLECTIONS_INDEX: {}}, + } + + index = f"{COLLECTIONS_INDEX}-000001" + + try: + await client.indices.create(index=index, body=search_body) + except TransportError as e: + if e.status_code == 400: + pass # Ignore 400 status codes + else: + raise e + + await client.close() + + +async def create_item_index(collection_id: str): + """ + Create the index for Items. + + Args: + collection_id (str): Collection identifier. + + Returns: + None + + """ + client = AsyncSearchSettings().create_client + index_name = index_by_collection_id(collection_id) + search_body = { + "aliases": {index_name: {}}, + "mappings": ES_ITEMS_MAPPINGS, + "settings": ES_ITEMS_SETTINGS, + } + + try: + await client.indices.create(index=f"{index_name}-000001", body=search_body) + except TransportError as e: + if e.status_code == 400: + pass # Ignore 400 status codes + else: + raise e + + await client.close() + + +async def delete_item_index(collection_id: str): + """Delete the index for items in a collection. + + Args: + collection_id (str): The ID of the collection whose items index will be deleted. + """ + client = AsyncSearchSettings().create_client + + name = index_by_collection_id(collection_id) + resolved = await client.indices.resolve_index(name=name) + if "aliases" in resolved and resolved["aliases"]: + [alias] = resolved["aliases"] + await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) + await client.indices.delete(index=alias["indices"]) + else: + await client.indices.delete(index=name) + await client.close() + + +def mk_item_id(item_id: str, collection_id: str): + """Create the document id for an Item in Elasticsearch. + + Args: + item_id (str): The id of the Item. + collection_id (str): The id of the Collection that the Item belongs to. + + Returns: + str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. + """ + return f"{item_id}|{collection_id}" + + +def mk_actions(collection_id: str, processed_items: List[Item]): + """Create Elasticsearch bulk actions for a list of processed items. + + Args: + collection_id (str): The identifier for the collection the items belong to. + processed_items (List[Item]): The list of processed items to be bulk indexed. + + Returns: + List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, + each action being a dictionary with the following keys: + - `_index`: the index to store the document in. + - `_id`: the document's identifier. + - `_source`: the source of the document. + """ + return [ + { + "_index": index_by_collection_id(collection_id), + "_id": mk_item_id(item["id"], item["collection"]), + "_source": item, + } + for item in processed_items + ] + + +# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, +# So create our own Protocol for typing +# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] +class Geometry(Protocol): # noqa + type: str + coordinates: Any + + +@attr.s +class DatabaseLogic: + """Database logic.""" + + client = AsyncSearchSettings().create_client + sync_client = SyncSearchSettings().create_client + + item_serializer: Type[serializers.ItemSerializer] = attr.ib( + default=serializers.ItemSerializer + ) + collection_serializer: Type[serializers.CollectionSerializer] = attr.ib( + default=serializers.CollectionSerializer + ) + + """CORE LOGIC""" + + async def get_all_collections( + self, token: Optional[str], limit: int, base_url: str + ) -> Tuple[List[Dict[str, Any]], Optional[str]]: + """ + Retrieve a list of all collections from Opensearch, supporting pagination. + + Args: + token (Optional[str]): The pagination token. + limit (int): The number of results to return. + + Returns: + A tuple of (collections, next pagination token if any). + """ + search_body = { + "sort": [{"id": {"order": "asc"}}], + "size": limit, + } + + # Only add search_after to the query if token is not None and not empty + if token: + search_after = [token] + search_body["search_after"] = search_after + + response = await self.client.search( + index="collections", + body=search_body, + ) + + hits = response["hits"]["hits"] + collections = [ + self.collection_serializer.db_to_stac( + collection=hit["_source"], base_url=base_url + ) + for hit in hits + ] + + next_token = None + if len(hits) == limit: + # Ensure we have a valid sort value for next_token + next_token_values = hits[-1].get("sort") + if next_token_values: + next_token = next_token_values[0] + + return collections, next_token + + async def get_one_item(self, collection_id: str, item_id: str) -> Dict: + """Retrieve a single item from the database. + + Args: + collection_id (str): The id of the Collection that the Item belongs to. + item_id (str): The id of the Item. + + Returns: + item (Dict): A dictionary containing the source data for the Item. + + Raises: + NotFoundError: If the specified Item does not exist in the Collection. + + Notes: + The Item is retrieved from the Elasticsearch database using the `client.get` method, + with the index for the Collection as the target index and the combined `mk_item_id` as the document id. + """ + try: + item = await self.client.get( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + ) + except exceptions.NotFoundError: + raise NotFoundError( + f"Item {item_id} does not exist in Collection {collection_id}" + ) + return item["_source"] + + @staticmethod + def make_search(): + """Database logic to create a Search instance.""" + return Search().sort(*DEFAULT_SORT) + + @staticmethod + def apply_ids_filter(search: Search, item_ids: List[str]): + """Database logic to search a list of STAC item ids.""" + return search.filter("terms", id=item_ids) + + @staticmethod + def apply_collections_filter(search: Search, collection_ids: List[str]): + """Database logic to search a list of STAC collection ids.""" + return search.filter("terms", collection=collection_ids) + + @staticmethod + def apply_datetime_filter(search: Search, datetime_search): + """Apply a filter to search based on datetime field. + + Args: + search (Search): The search object to filter. + datetime_search (dict): The datetime filter criteria. + + Returns: + Search: The filtered search object. + """ + if "eq" in datetime_search: + search = search.filter( + "term", **{"properties__datetime": datetime_search["eq"]} + ) + else: + search = search.filter( + "range", properties__datetime={"lte": datetime_search["lte"]} + ) + search = search.filter( + "range", properties__datetime={"gte": datetime_search["gte"]} + ) + return search + + @staticmethod + def apply_bbox_filter(search: Search, bbox: List): + """Filter search results based on bounding box. + + Args: + search (Search): The search object to apply the filter to. + bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy]. + + Returns: + search (Search): The search object with the bounding box filter applied. + + Notes: + The bounding box is transformed into a polygon using the `bbox2polygon` function and + a geo_shape filter is added to the search object, set to intersect with the specified polygon. + """ + return search.filter( + Q( + { + "geo_shape": { + "geometry": { + "shape": { + "type": "polygon", + "coordinates": bbox2polygon(*bbox), + }, + "relation": "intersects", + } + } + } + ) + ) + + @staticmethod + def apply_intersects_filter( + search: Search, + intersects: Geometry, + ): + """Filter search results based on intersecting geometry. + + Args: + search (Search): The search object to apply the filter to. + intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. + + Returns: + search (Search): The search object with the intersecting geometry filter applied. + + Notes: + A geo_shape filter is added to the search object, set to intersect with the specified geometry. + """ + return search.filter( + Q( + { + "geo_shape": { + "geometry": { + "shape": { + "type": intersects.type.lower(), + "coordinates": intersects.coordinates, + }, + "relation": "intersects", + } + } + } + ) + ) + + @staticmethod + def apply_stacql_filter(search: Search, op: str, field: str, value: float): + """Filter search results based on a comparison between a field and a value. + + Args: + search (Search): The search object to apply the filter to. + op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal), + 'lt' (less than), or 'lte' (less than or equal). + field (str): The field to perform the comparison on. + value (float): The value to compare the field against. + + Returns: + search (Search): The search object with the specified filter applied. + """ + if op != "eq": + key_filter = {field: {f"{op}": value}} + search = search.filter(Q("range", **key_filter)) + else: + search = search.filter("term", **{field: value}) + + return search + + @staticmethod + def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): + """Database logic to perform query for search endpoint.""" + if _filter is not None: + search = search.filter(filter.Clause.parse_obj(_filter).to_es()) + return search + + @staticmethod + def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: + """Database logic to sort search instance.""" + if sortby: + return {s.field: {"order": s.direction} for s in sortby} + else: + return None + + async def execute_search( + self, + search: Search, + limit: int, + token: Optional[str], + sort: Optional[Dict[str, Dict[str, str]]], + collection_ids: Optional[List[str]], + ignore_unavailable: bool = True, + ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: + """Execute a search query with limit and other optional parameters. + + Args: + search (Search): The search query to be executed. + limit (int): The maximum number of results to be returned. + token (Optional[str]): The token used to return the next set of results. + sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted. + collection_ids (Optional[List[str]]): The collection ids to search. + ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True. + + Returns: + Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing: + - An iterable of search results, where each result is a dictionary with keys and values representing the + fields and values of each document. + - The total number of results (if the count could be computed), or None if the count could not be + computed. + - The token to be used to retrieve the next set of results, or None if there are no more results. + + Raises: + NotFoundError: If the collections specified in `collection_ids` do not exist. + """ + search_body: Dict[str, Any] = {} + query = search.query.to_dict() if search.query else None + if query: + search_body["query"] = query + if token: + search_after = urlsafe_b64decode(token.encode()).decode().split(",") + search_body["search_after"] = search_after + search_body["sort"] = sort if sort else DEFAULT_SORT + + index_param = indices(collection_ids) + + search_task = asyncio.create_task( + self.client.search( + index=index_param, + ignore_unavailable=ignore_unavailable, + body=search_body, + size=limit, + ) + ) + + count_task = asyncio.create_task( + self.client.count( + index=index_param, + ignore_unavailable=ignore_unavailable, + body=search.to_dict(count=True), + ) + ) + + try: + es_response = await search_task + except exceptions.NotFoundError: + raise NotFoundError(f"Collections '{collection_ids}' do not exist") + + hits = es_response["hits"]["hits"] + items = (hit["_source"] for hit in hits) + + next_token = None + if hits and (sort_array := hits[-1].get("sort")): + next_token = urlsafe_b64encode( + ",".join([str(x) for x in sort_array]).encode() + ).decode() + + # (1) count should not block returning results, so don't wait for it to be done + # (2) don't cancel the task so that it will populate the ES cache for subsequent counts + maybe_count = None + if count_task.done(): + try: + maybe_count = count_task.result().get("count") + except Exception as e: + logger.error(f"Count task failed: {e}") + + return items, maybe_count, next_token + + """ TRANSACTION LOGIC """ + + async def check_collection_exists(self, collection_id: str): + """Database logic to check if a collection exists.""" + if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise NotFoundError(f"Collection {collection_id} does not exist") + + async def prep_create_item( + self, item: Item, base_url: str, exist_ok: bool = False + ) -> Item: + """ + Preps an item for insertion into the database. + + Args: + item (Item): The item to be prepped for insertion. + base_url (str): The base URL used to create the item's self URL. + exist_ok (bool): Indicates whether the item can exist already. + + Returns: + Item: The prepped item. + + Raises: + ConflictError: If the item already exists in the database. + + """ + await self.check_collection_exists(collection_id=item["collection"]) + + if not exist_ok and await self.client.exists( + index=index_by_collection_id(item["collection"]), + id=mk_item_id(item["id"], item["collection"]), + ): + raise ConflictError( + f"Item {item['id']} in collection {item['collection']} already exists" + ) + + return self.item_serializer.stac_to_db(item, base_url) + + def sync_prep_create_item( + self, item: Item, base_url: str, exist_ok: bool = False + ) -> Item: + """ + Prepare an item for insertion into the database. + + This method performs pre-insertion preparation on the given `item`, + such as checking if the collection the item belongs to exists, + and optionally verifying that an item with the same ID does not already exist in the database. + + Args: + item (Item): The item to be inserted into the database. + base_url (str): The base URL used for constructing URLs for the item. + exist_ok (bool): Indicates whether the item can exist already. + + Returns: + Item: The item after preparation is done. + + Raises: + NotFoundError: If the collection that the item belongs to does not exist in the database. + ConflictError: If an item with the same ID already exists in the collection. + """ + item_id = item["id"] + collection_id = item["collection"] + if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise NotFoundError(f"Collection {collection_id} does not exist") + + if not exist_ok and self.sync_client.exists( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + ): + raise ConflictError( + f"Item {item_id} in collection {collection_id} already exists" + ) + + return self.item_serializer.stac_to_db(item, base_url) + + async def create_item(self, item: Item, refresh: bool = False): + """Database logic for creating one item. + + Args: + item (Item): The item to be created. + refresh (bool, optional): Refresh the index after performing the operation. Defaults to False. + + Raises: + ConflictError: If the item already exists in the database. + + Returns: + None + """ + # todo: check if collection exists, but cache + item_id = item["id"] + collection_id = item["collection"] + es_resp = await self.client.index( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + body=item, + refresh=refresh, + ) + + if (meta := es_resp.get("meta")) and meta.get("status") == 409: + raise ConflictError( + f"Item {item_id} in collection {collection_id} already exists" + ) + + async def delete_item( + self, item_id: str, collection_id: str, refresh: bool = False + ): + """Delete a single item from the database. + + Args: + item_id (str): The id of the Item to be deleted. + collection_id (str): The id of the Collection that the Item belongs to. + refresh (bool, optional): Whether to refresh the index after the deletion. Default is False. + + Raises: + NotFoundError: If the Item does not exist in the database. + """ + try: + await self.client.delete( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + refresh=refresh, + ) + except exceptions.NotFoundError: + raise NotFoundError( + f"Item {item_id} in collection {collection_id} not found" + ) + + async def create_collection(self, collection: Collection, refresh: bool = False): + """Create a single collection in the database. + + Args: + collection (Collection): The Collection object to be created. + refresh (bool, optional): Whether to refresh the index after the creation. Default is False. + + Raises: + ConflictError: If a Collection with the same id already exists in the database. + + Notes: + A new index is created for the items in the Collection using the `create_item_index` function. + """ + collection_id = collection["id"] + + if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise ConflictError(f"Collection {collection_id} already exists") + + await self.client.index( + index=COLLECTIONS_INDEX, + id=collection_id, + body=collection, + refresh=refresh, + ) + + await create_item_index(collection_id) + + async def find_collection(self, collection_id: str) -> Collection: + """Find and return a collection from the database. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be found. + + Returns: + Collection: The found collection, represented as a `Collection` object. + + Raises: + NotFoundError: If the collection with the given `collection_id` is not found in the database. + + Notes: + This function searches for a collection in the database using the specified `collection_id` and returns the found + collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised. + """ + try: + collection = await self.client.get( + index=COLLECTIONS_INDEX, id=collection_id + ) + except exceptions.NotFoundError: + raise NotFoundError(f"Collection {collection_id} not found") + + return collection["_source"] + + async def update_collection( + self, collection_id: str, collection: Collection, refresh: bool = False + ): + """Update a collection from the database. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be updated. + collection (Collection): The Collection object to be used for the update. + + Raises: + NotFoundError: If the collection with the given `collection_id` is not + found in the database. + + Notes: + This function updates the collection in the database using the specified + `collection_id` and with the collection specified in the `Collection` object. + If the collection is not found, a `NotFoundError` is raised. + """ + await self.find_collection(collection_id=collection_id) + + if collection_id != collection["id"]: + await self.create_collection(collection, refresh=refresh) + + await self.client.reindex( + body={ + "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"}, + "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"}, + "script": { + "lang": "painless", + "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", + }, + }, + wait_for_completion=True, + refresh=refresh, + ) + + await self.delete_collection(collection_id) + + else: + await self.client.index( + index=COLLECTIONS_INDEX, + id=collection_id, + body=collection, + refresh=refresh, + ) + + async def delete_collection(self, collection_id: str, refresh: bool = False): + """Delete a collection from the database. + + Parameters: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be deleted. + refresh (bool): Whether to refresh the index after the deletion (default: False). + + Raises: + NotFoundError: If the collection with the given `collection_id` is not found in the database. + + Notes: + This function first verifies that the collection with the specified `collection_id` exists in the database, and then + deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this + function also calls `delete_item_index` to delete the index for the items in the collection. + """ + await self.find_collection(collection_id=collection_id) + await self.client.delete( + index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh + ) + await delete_item_index(collection_id) + + async def bulk_async( + self, collection_id: str, processed_items: List[Item], refresh: bool = False + ) -> None: + """Perform a bulk insert of items into the database asynchronously. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to which the items belong. + processed_items (List[Item]): A list of `Item` objects to be inserted into the database. + refresh (bool): Whether to refresh the index after the bulk insert (default: False). + + Notes: + This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The + insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The + `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the + index is refreshed after the bulk insert. The function does not return any value. + """ + await helpers.async_bulk( + self.client, + mk_actions(collection_id, processed_items), + refresh=refresh, + raise_on_error=False, + ) + + def bulk_sync( + self, collection_id: str, processed_items: List[Item], refresh: bool = False + ) -> None: + """Perform a bulk insert of items into the database synchronously. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to which the items belong. + processed_items (List[Item]): A list of `Item` objects to be inserted into the database. + refresh (bool): Whether to refresh the index after the bulk insert (default: False). + + Notes: + This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The + insert is performed synchronously and blocking, meaning that the function does not return until the insert has + completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to + True, the index is refreshed after the bulk insert. The function does not return any value. + """ + helpers.bulk( + self.sync_client, + mk_actions(collection_id, processed_items), + refresh=refresh, + raise_on_error=False, + ) + + # DANGER + async def delete_items(self) -> None: + """Danger. this is only for tests.""" + await self.client.delete_by_query( + index=ITEM_INDICES, + body={"query": {"match_all": {}}}, + wait_for_completion=True, + ) + + # DANGER + async def delete_collections(self) -> None: + """Danger. this is only for tests.""" + await self.client.delete_by_query( + index=COLLECTIONS_INDEX, + body={"query": {"match_all": {}}}, + wait_for_completion=True, + ) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py new file mode 100644 index 00000000..6b648e2b --- /dev/null +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py @@ -0,0 +1,2 @@ +"""library version.""" +__version__ = "2.0.0" diff --git a/stac_fastapi/tests/conftest.py b/stac_fastapi/tests/conftest.py index 108d0a56..01160ee1 100644 --- a/stac_fastapi/tests/conftest.py +++ b/stac_fastapi/tests/conftest.py @@ -24,10 +24,6 @@ DatabaseLogic, create_collection_index, ) -elif os.getenv("BACKEND", "elasticsearch").lower() == "mongo": - from stac_fastapi.mongo.config import AsyncMongoDBSettings as AsyncSettings - from stac_fastapi.mongo.config import MongoDBSettings as SearchSettings - from stac_fastapi.mongo.database_logic import DatabaseLogic, create_collection_index else: from stac_fastapi.elasticsearch.config import ( ElasticsearchSettings as SearchSettings, diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 341089fd..958d0703 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -20,8 +20,6 @@ if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": from stac_fastapi.opensearch.database_logic import DatabaseLogic -elif os.getenv("BACKEND", "elasticsearch").lower() == "mongo": - from stac_fastapi.mongo.database_logic import DatabaseLogic else: from stac_fastapi.elasticsearch.database_logic import DatabaseLogic