diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ec871e..b785cc5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed - Added note on the use of the default `*` use in route authentication dependecies. [#325](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/325) +- Update item index naming and aliasing to allow capitalisation of collection ids [#329](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/329) - Bugfixes for the `IsNull` operator and datetime filtering [#330](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/330) ## [v3.2.2] - 2024-12-15 diff --git a/README.md b/README.md index ad69d55c..84c38d12 100644 --- a/README.md +++ b/README.md @@ -267,17 +267,17 @@ A reindex operation might be useful to apply changes to documents or to correct The index templates will make sure that manually created indices will also have the correct mappings and settings. -In this example, we will make a copy of an existing Item index `items_my-collection-000001` but change the Item identifier to be lowercase. +In this example, we will make a copy of an existing Item index `items_my-collection-lower_my-collection-hex-000001` but change the Item identifier to be lowercase. ```shell curl -X "POST" "http://localhost:9200/_reindex" \ -H 'Content-Type: application/json' \ -d $'{ "source": { - "index": "items_my-collection-000001" + "index": "items_my-collection-lower_my-collection-hex-000001" }, "dest": { - "index": "items_my-collection-000002" + "index": "items_my-collection-lower_my-collection-hex-000002" }, "script": { "source": "ctx._source.id = ctx._source.id.toLowerCase()", @@ -286,7 +286,7 @@ curl -X "POST" "http://localhost:9200/_reindex" \ }' ``` -If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-000002`. +If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-lower_my-collection-hex-000002`. ```shell curl -X "POST" "http://localhost:9200/_aliases" \ -h 'Content-Type: application/json' \ @@ -300,7 +300,7 @@ curl -X "POST" "http://localhost:9200/_aliases" \ }, { "add": { - "index": "items_my-collection-000002", + "index": "items_my-collection-lower_my-collection-hex-000002", "alias": "items_my-collection" } } diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index c404b5e5..0f272218 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -156,7 +156,20 @@ def index_by_collection_id(collection_id: str) -> str: Returns: str: The index name derived from the collection id. """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}" + + +def index_alias_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index alias. + + Args: + collection_id (str): The collection id to translate into an index alias. + + Returns: + str: The index alias derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" def indices(collection_ids: Optional[List[str]]) -> str: @@ -172,7 +185,7 @@ def indices(collection_ids: Optional[List[str]]) -> str: if collection_ids is None or collection_ids == []: return ITEM_INDICES else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) + return ",".join([index_alias_by_collection_id(c) for c in collection_ids]) async def create_index_templates() -> None: @@ -231,11 +244,10 @@ async def create_item_index(collection_id: str): """ client = AsyncElasticsearchSettings().create_client - index_name = index_by_collection_id(collection_id) await client.options(ignore_status=400).indices.create( index=f"{index_by_collection_id(collection_id)}-000001", - aliases={index_name: {}}, + aliases={index_alias_by_collection_id(collection_id): {}}, ) await client.close() @@ -248,7 +260,7 @@ async def delete_item_index(collection_id: str): """ client = AsyncElasticsearchSettings().create_client - name = index_by_collection_id(collection_id) + name = index_alias_by_collection_id(collection_id) resolved = await client.indices.resolve_index(name=name) if "aliases" in resolved and resolved["aliases"]: [alias] = resolved["aliases"] @@ -288,7 +300,7 @@ def mk_actions(collection_id: str, processed_items: List[Item]): """ return [ { - "_index": index_by_collection_id(collection_id), + "_index": index_alias_by_collection_id(collection_id), "_id": mk_item_id(item["id"], item["collection"]), "_source": item, } @@ -449,7 +461,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """ try: item = await self.client.get( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ) except exceptions.NotFoundError: @@ -808,7 +820,7 @@ async def prep_create_item( await self.check_collection_exists(collection_id=item["collection"]) if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), + index=index_alias_by_collection_id(item["collection"]), id=mk_item_id(item["id"], item["collection"]), ): raise ConflictError( @@ -845,7 +857,7 @@ def sync_prep_create_item( raise NotFoundError(f"Collection {collection_id} does not exist") if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ): raise ConflictError( @@ -871,7 +883,7 @@ async def create_item(self, item: Item, refresh: bool = False): item_id = item["id"] collection_id = item["collection"] es_resp = await self.client.index( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), document=item, refresh=refresh, @@ -897,7 +909,7 @@ async def delete_item( """ try: await self.client.delete( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), refresh=refresh, ) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 63a42427..498c9c01 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -158,7 +158,20 @@ def index_by_collection_id(collection_id: str) -> str: Returns: str: The index name derived from the collection id. """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}" + + +def index_alias_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index alias. + + Args: + collection_id (str): The collection id to translate into an index alias. + + Returns: + str: The index alias derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" def indices(collection_ids: Optional[List[str]]) -> str: @@ -174,7 +187,7 @@ def indices(collection_ids: Optional[List[str]]) -> str: if collection_ids is None or collection_ids == []: return ITEM_INDICES else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) + return ",".join([index_alias_by_collection_id(c) for c in collection_ids]) async def create_index_templates() -> None: @@ -243,13 +256,14 @@ async def create_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client - index_name = index_by_collection_id(collection_id) search_body: Dict[str, Any] = { - "aliases": {index_name: {}}, + "aliases": {index_alias_by_collection_id(collection_id): {}}, } try: - await client.indices.create(index=f"{index_name}-000001", body=search_body) + await client.indices.create( + index=f"{index_by_collection_id(collection_id)}-000001", body=search_body + ) except TransportError as e: if e.status_code == 400: pass # Ignore 400 status codes @@ -267,7 +281,7 @@ async def delete_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client - name = index_by_collection_id(collection_id) + name = index_alias_by_collection_id(collection_id) resolved = await client.indices.resolve_index(name=name) if "aliases" in resolved and resolved["aliases"]: [alias] = resolved["aliases"] @@ -307,7 +321,7 @@ def mk_actions(collection_id: str, processed_items: List[Item]): """ return [ { - "_index": index_by_collection_id(collection_id), + "_index": index_alias_by_collection_id(collection_id), "_id": mk_item_id(item["id"], item["collection"]), "_source": item, } @@ -476,7 +490,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """ try: item = await self.client.get( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ) except exceptions.NotFoundError: @@ -838,7 +852,7 @@ async def prep_create_item( await self.check_collection_exists(collection_id=item["collection"]) if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), + index=index_alias_by_collection_id(item["collection"]), id=mk_item_id(item["id"], item["collection"]), ): raise ConflictError( @@ -875,7 +889,7 @@ def sync_prep_create_item( raise NotFoundError(f"Collection {collection_id} does not exist") if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ): raise ConflictError( @@ -901,7 +915,7 @@ async def create_item(self, item: Item, refresh: bool = False): item_id = item["id"] collection_id = item["collection"] es_resp = await self.client.index( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), body=item, refresh=refresh, @@ -927,7 +941,7 @@ async def delete_item( """ try: await self.client.delete( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), refresh=refresh, ) diff --git a/stac_fastapi/tests/database/test_database.py b/stac_fastapi/tests/database/test_database.py index 80acd82c..a5a01e60 100644 --- a/stac_fastapi/tests/database/test_database.py +++ b/stac_fastapi/tests/database/test_database.py @@ -11,14 +11,14 @@ COLLECTIONS_INDEX, ES_COLLECTIONS_MAPPINGS, ES_ITEMS_MAPPINGS, - index_by_collection_id, + index_alias_by_collection_id, ) else: from stac_fastapi.elasticsearch.database_logic import ( COLLECTIONS_INDEX, ES_COLLECTIONS_MAPPINGS, ES_ITEMS_MAPPINGS, - index_by_collection_id, + index_alias_by_collection_id, ) @@ -42,7 +42,7 @@ async def test_index_mapping_items(txn_client, load_test_data): api.Collection(**collection), request=MockRequest ) response = await database.client.indices.get_mapping( - index=index_by_collection_id(collection["id"]) + index=index_alias_by_collection_id(collection["id"]) ) if not isinstance(response, dict): response = response.body