From caac37373238e3509d29b2dfb852116e32637d99 Mon Sep 17 00:00:00 2001 From: rhysrevans3 <34507919+rhysrevans3@users.noreply.github.com> Date: Tue, 11 Feb 2025 01:46:57 +0000 Subject: [PATCH 1/2] Allow capitals in item index aliases (#329) **Related Issue(s):** - #328 **Description:** Separating alias and index names to allow for aliases to include capitalisation. Index names now include the lowered and hex encoded collection ID to prevent clashes of indices. **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --------- Co-authored-by: Jonathan Healy --- CHANGELOG.md | 1 + README.md | 10 ++--- .../elasticsearch/database_logic.py | 34 +++++++++++------ .../stac_fastapi/opensearch/database_logic.py | 38 +++++++++++++------ stac_fastapi/tests/database/test_database.py | 6 +-- 5 files changed, 58 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ec871e..b785cc5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed - Added note on the use of the default `*` use in route authentication dependecies. [#325](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/325) +- Update item index naming and aliasing to allow capitalisation of collection ids [#329](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/329) - Bugfixes for the `IsNull` operator and datetime filtering [#330](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/330) ## [v3.2.2] - 2024-12-15 diff --git a/README.md b/README.md index ad69d55c..84c38d12 100644 --- a/README.md +++ b/README.md @@ -267,17 +267,17 @@ A reindex operation might be useful to apply changes to documents or to correct The index templates will make sure that manually created indices will also have the correct mappings and settings. -In this example, we will make a copy of an existing Item index `items_my-collection-000001` but change the Item identifier to be lowercase. +In this example, we will make a copy of an existing Item index `items_my-collection-lower_my-collection-hex-000001` but change the Item identifier to be lowercase. ```shell curl -X "POST" "http://localhost:9200/_reindex" \ -H 'Content-Type: application/json' \ -d $'{ "source": { - "index": "items_my-collection-000001" + "index": "items_my-collection-lower_my-collection-hex-000001" }, "dest": { - "index": "items_my-collection-000002" + "index": "items_my-collection-lower_my-collection-hex-000002" }, "script": { "source": "ctx._source.id = ctx._source.id.toLowerCase()", @@ -286,7 +286,7 @@ curl -X "POST" "http://localhost:9200/_reindex" \ }' ``` -If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-000002`. +If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-lower_my-collection-hex-000002`. ```shell curl -X "POST" "http://localhost:9200/_aliases" \ -h 'Content-Type: application/json' \ @@ -300,7 +300,7 @@ curl -X "POST" "http://localhost:9200/_aliases" \ }, { "add": { - "index": "items_my-collection-000002", + "index": "items_my-collection-lower_my-collection-hex-000002", "alias": "items_my-collection" } } diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index c404b5e5..0f272218 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -156,7 +156,20 @@ def index_by_collection_id(collection_id: str) -> str: Returns: str: The index name derived from the collection id. """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}" + + +def index_alias_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index alias. + + Args: + collection_id (str): The collection id to translate into an index alias. + + Returns: + str: The index alias derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" def indices(collection_ids: Optional[List[str]]) -> str: @@ -172,7 +185,7 @@ def indices(collection_ids: Optional[List[str]]) -> str: if collection_ids is None or collection_ids == []: return ITEM_INDICES else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) + return ",".join([index_alias_by_collection_id(c) for c in collection_ids]) async def create_index_templates() -> None: @@ -231,11 +244,10 @@ async def create_item_index(collection_id: str): """ client = AsyncElasticsearchSettings().create_client - index_name = index_by_collection_id(collection_id) await client.options(ignore_status=400).indices.create( index=f"{index_by_collection_id(collection_id)}-000001", - aliases={index_name: {}}, + aliases={index_alias_by_collection_id(collection_id): {}}, ) await client.close() @@ -248,7 +260,7 @@ async def delete_item_index(collection_id: str): """ client = AsyncElasticsearchSettings().create_client - name = index_by_collection_id(collection_id) + name = index_alias_by_collection_id(collection_id) resolved = await client.indices.resolve_index(name=name) if "aliases" in resolved and resolved["aliases"]: [alias] = resolved["aliases"] @@ -288,7 +300,7 @@ def mk_actions(collection_id: str, processed_items: List[Item]): """ return [ { - "_index": index_by_collection_id(collection_id), + "_index": index_alias_by_collection_id(collection_id), "_id": mk_item_id(item["id"], item["collection"]), "_source": item, } @@ -449,7 +461,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """ try: item = await self.client.get( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ) except exceptions.NotFoundError: @@ -808,7 +820,7 @@ async def prep_create_item( await self.check_collection_exists(collection_id=item["collection"]) if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), + index=index_alias_by_collection_id(item["collection"]), id=mk_item_id(item["id"], item["collection"]), ): raise ConflictError( @@ -845,7 +857,7 @@ def sync_prep_create_item( raise NotFoundError(f"Collection {collection_id} does not exist") if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ): raise ConflictError( @@ -871,7 +883,7 @@ async def create_item(self, item: Item, refresh: bool = False): item_id = item["id"] collection_id = item["collection"] es_resp = await self.client.index( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), document=item, refresh=refresh, @@ -897,7 +909,7 @@ async def delete_item( """ try: await self.client.delete( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), refresh=refresh, ) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 63a42427..498c9c01 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -158,7 +158,20 @@ def index_by_collection_id(collection_id: str) -> str: Returns: str: The index name derived from the collection id. """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}" + + +def index_alias_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index alias. + + Args: + collection_id (str): The collection id to translate into an index alias. + + Returns: + str: The index alias derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" def indices(collection_ids: Optional[List[str]]) -> str: @@ -174,7 +187,7 @@ def indices(collection_ids: Optional[List[str]]) -> str: if collection_ids is None or collection_ids == []: return ITEM_INDICES else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) + return ",".join([index_alias_by_collection_id(c) for c in collection_ids]) async def create_index_templates() -> None: @@ -243,13 +256,14 @@ async def create_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client - index_name = index_by_collection_id(collection_id) search_body: Dict[str, Any] = { - "aliases": {index_name: {}}, + "aliases": {index_alias_by_collection_id(collection_id): {}}, } try: - await client.indices.create(index=f"{index_name}-000001", body=search_body) + await client.indices.create( + index=f"{index_by_collection_id(collection_id)}-000001", body=search_body + ) except TransportError as e: if e.status_code == 400: pass # Ignore 400 status codes @@ -267,7 +281,7 @@ async def delete_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client - name = index_by_collection_id(collection_id) + name = index_alias_by_collection_id(collection_id) resolved = await client.indices.resolve_index(name=name) if "aliases" in resolved and resolved["aliases"]: [alias] = resolved["aliases"] @@ -307,7 +321,7 @@ def mk_actions(collection_id: str, processed_items: List[Item]): """ return [ { - "_index": index_by_collection_id(collection_id), + "_index": index_alias_by_collection_id(collection_id), "_id": mk_item_id(item["id"], item["collection"]), "_source": item, } @@ -476,7 +490,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """ try: item = await self.client.get( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ) except exceptions.NotFoundError: @@ -838,7 +852,7 @@ async def prep_create_item( await self.check_collection_exists(collection_id=item["collection"]) if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), + index=index_alias_by_collection_id(item["collection"]), id=mk_item_id(item["id"], item["collection"]), ): raise ConflictError( @@ -875,7 +889,7 @@ def sync_prep_create_item( raise NotFoundError(f"Collection {collection_id} does not exist") if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ): raise ConflictError( @@ -901,7 +915,7 @@ async def create_item(self, item: Item, refresh: bool = False): item_id = item["id"] collection_id = item["collection"] es_resp = await self.client.index( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), body=item, refresh=refresh, @@ -927,7 +941,7 @@ async def delete_item( """ try: await self.client.delete( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), refresh=refresh, ) diff --git a/stac_fastapi/tests/database/test_database.py b/stac_fastapi/tests/database/test_database.py index 80acd82c..a5a01e60 100644 --- a/stac_fastapi/tests/database/test_database.py +++ b/stac_fastapi/tests/database/test_database.py @@ -11,14 +11,14 @@ COLLECTIONS_INDEX, ES_COLLECTIONS_MAPPINGS, ES_ITEMS_MAPPINGS, - index_by_collection_id, + index_alias_by_collection_id, ) else: from stac_fastapi.elasticsearch.database_logic import ( COLLECTIONS_INDEX, ES_COLLECTIONS_MAPPINGS, ES_ITEMS_MAPPINGS, - index_by_collection_id, + index_alias_by_collection_id, ) @@ -42,7 +42,7 @@ async def test_index_mapping_items(txn_client, load_test_data): api.Collection(**collection), request=MockRequest ) response = await database.client.indices.get_mapping( - index=index_by_collection_id(collection["id"]) + index=index_alias_by_collection_id(collection["id"]) ) if not isinstance(response, dict): response = response.body From 47ef97a44c6995879e45b861c6c92ee5a02fa8eb Mon Sep 17 00:00:00 2001 From: Jonathan Healy Date: Tue, 11 Feb 2025 10:02:57 +0800 Subject: [PATCH 2/2] update for v3.2.3 (#333) **Related Issue(s):** - # **Description:** **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --- CHANGELOG.md | 5 ++++- stac_fastapi/core/stac_fastapi/core/version.py | 2 +- stac_fastapi/elasticsearch/setup.py | 2 +- .../elasticsearch/stac_fastapi/elasticsearch/version.py | 2 +- stac_fastapi/opensearch/setup.py | 2 +- stac_fastapi/opensearch/stac_fastapi/opensearch/version.py | 2 +- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b785cc5e..a9a3de6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed +## [v3.2.3] - 2025-02-11 + - Added note on the use of the default `*` use in route authentication dependecies. [#325](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/325) - Update item index naming and aliasing to allow capitalisation of collection ids [#329](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/329) - Bugfixes for the `IsNull` operator and datetime filtering [#330](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/330) @@ -289,7 +291,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Use genexp in execute_search and get_all_collections to return results. - Added db_to_stac serializer to item_collection method in core.py. -[Unreleased]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.2...main +[Unreleased]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.3...main +[v3.2.3]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.2...v3.2.3 [v3.2.2]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.1...v3.2.2 [v3.2.1]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.0...v3.2.1 [v3.2.0]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.1.0...v3.2.0 diff --git a/stac_fastapi/core/stac_fastapi/core/version.py b/stac_fastapi/core/stac_fastapi/core/version.py index 05a55a44..c210f05c 100644 --- a/stac_fastapi/core/stac_fastapi/core/version.py +++ b/stac_fastapi/core/stac_fastapi/core/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "3.2.2" +__version__ = "3.2.3" diff --git a/stac_fastapi/elasticsearch/setup.py b/stac_fastapi/elasticsearch/setup.py index fd2723de..ec43d6b3 100644 --- a/stac_fastapi/elasticsearch/setup.py +++ b/stac_fastapi/elasticsearch/setup.py @@ -6,7 +6,7 @@ desc = f.read() install_requires = [ - "stac-fastapi.core==3.2.2", + "stac-fastapi.core==3.2.3", "elasticsearch[async]==8.11.0", "elasticsearch-dsl==8.11.0", "uvicorn", diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py index 05a55a44..c210f05c 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "3.2.2" +__version__ = "3.2.3" diff --git a/stac_fastapi/opensearch/setup.py b/stac_fastapi/opensearch/setup.py index 948e2e44..fea1f75d 100644 --- a/stac_fastapi/opensearch/setup.py +++ b/stac_fastapi/opensearch/setup.py @@ -6,7 +6,7 @@ desc = f.read() install_requires = [ - "stac-fastapi.core==3.2.2", + "stac-fastapi.core==3.2.3", "opensearch-py==2.4.2", "opensearch-py[async]==2.4.2", "uvicorn", diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py index 05a55a44..c210f05c 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "3.2.2" +__version__ = "3.2.3"