diff --git a/CHANGELOG.md b/CHANGELOG.md index da86de52..1a1d2b8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Advanced comparison (LIKE, IN, BETWEEN) operators to the Filter extension [#178](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/178) + ### Changed - Extended Datetime Search to search on start_datetime and end_datetime as well as datetime fields. [#182](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/182) @@ -19,6 +21,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Exclude unset fields in search response [#166](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/166) - Upgrade stac-fastapi to v2.4.9 [#172](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/172) +- Set correct default filter-lang for GET /search requests [#179](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/179) ## [v1.0.0] diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index e3c4bc64..8adcece4 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -24,6 +24,11 @@ settings = ElasticsearchSettings() session = Session.create_from_settings(settings) +filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) +filter_extension.conformance_classes.append( + "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators" +) + extensions = [ TransactionExtension(client=TransactionsClient(session=session), settings=settings), BulkTransactionExtension(client=BulkTransactionsClient(session=session)), @@ -32,7 +37,7 @@ SortExtension(), TokenPaginationExtension(), ContextExtension(), - FilterExtension(client=EsAsyncBaseFiltersClient()), + filter_extension, ] post_request_model = create_post_request_model(extensions) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py index 4f4d5ce2..b08f8474 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py @@ -368,12 +368,12 @@ async def get_search( base_args["sortby"] = sort_param if filter: - if filter_lang == "cql2-text": + if filter_lang == "cql2-json": base_args["filter-lang"] = "cql2-json" - base_args["filter"] = orjson.loads(to_cql2(parse_cql2_text(filter))) + base_args["filter"] = orjson.loads(unquote_plus(filter)) else: base_args["filter-lang"] = "cql2-json" - base_args["filter"] = orjson.loads(unquote_plus(filter)) + base_args["filter"] = orjson.loads(to_cql2(parse_cql2_text(filter))) if fields: includes = set() diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py index 6cc4ac28..fe691ddf 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py @@ -4,12 +4,16 @@ Basic CQL2 (AND, OR, NOT), comparison operators (=, <>, <, <=, >, >=), and IS NULL. The comparison operators are allowed against string, numeric, boolean, date, and datetime types. +Advanced comparison operators (http://www.opengis.net/spec/cql2/1.0/req/advanced-comparison-operators) +defines the LIKE, IN, and BETWEEN operators. + Basic Spatial Operators (http://www.opengis.net/spec/cql2/1.0/conf/basic-spatial-operators) defines the intersects operator (S_INTERSECTS). """ from __future__ import annotations import datetime +import re from enum import Enum from typing import List, Union @@ -78,6 +82,17 @@ def to_es(self): ) +class AdvancedComparisonOp(str, Enum): + """Advanced Comparison operator. + + CQL2 advanced comparison operators like (~), between, and in. + """ + + like = "like" + between = "between" + _in = "in" + + class SpatialIntersectsOp(str, Enum): """Spatial intersections operator s_intersects.""" @@ -152,8 +167,8 @@ def validate(cls, v): class Clause(BaseModel): """Filter extension clause.""" - op: Union[LogicalOp, ComparisonOp, SpatialIntersectsOp] - args: List[Arg] + op: Union[LogicalOp, ComparisonOp, AdvancedComparisonOp, SpatialIntersectsOp] + args: List[Union[Arg, List[Arg]]] def to_es(self): """Generate an Elasticsearch expression for this Clause.""" @@ -171,6 +186,30 @@ def to_es(self): "must_not": [{"term": {to_es(self.args[0]): to_es(self.args[1])}}] } } + elif self.op == AdvancedComparisonOp.like: + return { + "wildcard": { + to_es(self.args[0]): { + "value": cql2_like_to_es(str(to_es(self.args[1]))), + "case_insensitive": "false", + } + } + } + elif self.op == AdvancedComparisonOp.between: + return { + "range": { + to_es(self.args[0]): { + "gte": to_es(self.args[1]), + "lte": to_es(self.args[2]), + } + } + } + elif self.op == AdvancedComparisonOp._in: + if not isinstance(self.args[1], List): + raise RuntimeError(f"Arg {self.args[1]} is not a list") + return { + "terms": {to_es(self.args[0]): [to_es(arg) for arg in self.args[1]]} + } elif ( self.op == ComparisonOp.lt or self.op == ComparisonOp.lte @@ -210,3 +249,19 @@ def to_es(arg: Arg): return arg else: raise RuntimeError(f"unknown arg {repr(arg)}") + + +def cql2_like_to_es(string): + """Convert wildcard characters in CQL2 ('_' and '%') to Elasticsearch wildcard characters ('?' and '*', respectively). Handle escape characters and pass through Elasticsearch wildcards.""" + percent_pattern = r"(?","args":[{"property":"properties.view:sun_elevation"},"-37.30891534"]},{"op":"<","args":[{"property":"properties.view:sun_elevation"},"-37.30691534"]}]}""" + """/search?filter-lang=cql2-json&filter={"op":"and","args":[{"op":"=","args":[{"property":"id"},"test-item"]},{"op":">","args":[{"property":"properties.view:sun_elevation"},"-37.30891534"]},{"op":"<","args":[{"property":"properties.view:sun_elevation"},"-37.30691534"]}]}""" ) assert resp.status_code == 200 assert len(resp.json()["features"]) == 1 resp = await app_client.get( - """/search?filter={"op":"and","args":[{"op":"=","args":[{"property":"id"},"test-item-7"]},{"op":">","args":[{"property":"properties.view:sun_elevation"},"-37.30891534"]},{"op":"<","args":[{"property":"properties.view:sun_elevation"},"-37.30691534"]}]}""" + """/search?filter-lang=cql2-json&filter={"op":"and","args":[{"op":"=","args":[{"property":"id"},"test-item-7"]},{"op":">","args":[{"property":"properties.view:sun_elevation"},"-37.30891534"]},{"op":"<","args":[{"property":"properties.view:sun_elevation"},"-37.30691534"]}]}""" ) assert resp.status_code == 200 assert len(resp.json()["features"]) == 0 resp = await app_client.get( - """/search?filter={"op":"and","args":[{"op":"=","args":[{"property":"id"},"test-item"]},{"op":">","args":[{"property":"properties.view:sun_elevation"},"-37.30591534"]},{"op":"<","args":[{"property":"properties.view:sun_elevation"},"-37.30491534"]}]}""" + """/search?filter-lang=cql2-json&filter={"op":"and","args":[{"op":"=","args":[{"property":"id"},"test-item"]},{"op":">","args":[{"property":"properties.view:sun_elevation"},"-37.30591534"]},{"op":"<","args":[{"property":"properties.view:sun_elevation"},"-37.30491534"]}]}""" ) assert resp.status_code == 200 @@ -213,3 +224,179 @@ async def test_search_filter_extension_floats_post(app_client, ctx): assert resp.status_code == 200 assert len(resp.json()["features"]) == 1 + + +@pytest.mark.asyncio +async def test_search_filter_extension_wildcard_cql2(app_client, ctx): + single_char = ctx.item["id"][:-1] + "_" + multi_char = ctx.item["id"][:-3] + "%" + + params = { + "filter": { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "id"}, ctx.item["id"]]}, + { + "op": "like", + "args": [ + {"property": "id"}, + single_char, + ], + }, + { + "op": "like", + "args": [ + {"property": "id"}, + multi_char, + ], + }, + ], + } + } + + resp = await app_client.post("/search", json=params) + + assert resp.status_code == 200 + assert len(resp.json()["features"]) == 1 + + +@pytest.mark.asyncio +async def test_search_filter_extension_wildcard_es(app_client, ctx): + single_char = ctx.item["id"][:-1] + "?" + multi_char = ctx.item["id"][:-3] + "*" + + params = { + "filter": { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "id"}, ctx.item["id"]]}, + { + "op": "like", + "args": [ + {"property": "id"}, + single_char, + ], + }, + { + "op": "like", + "args": [ + {"property": "id"}, + multi_char, + ], + }, + ], + } + } + + resp = await app_client.post("/search", json=params) + + assert resp.status_code == 200 + assert len(resp.json()["features"]) == 1 + + +@pytest.mark.asyncio +async def test_search_filter_extension_escape_chars(app_client, ctx): + esc_chars = ( + ctx.item["properties"]["landsat:product_id"].replace("_", "\\_")[:-1] + "_" + ) + + params = { + "filter": { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "id"}, ctx.item["id"]]}, + { + "op": "like", + "args": [ + {"property": "properties.landsat:product_id"}, + esc_chars, + ], + }, + ], + } + } + + resp = await app_client.post("/search", json=params) + + assert resp.status_code == 200 + assert len(resp.json()["features"]) == 1 + + +@pytest.mark.asyncio +async def test_search_filter_extension_in(app_client, ctx): + product_id = ctx.item["properties"]["landsat:product_id"] + + params = { + "filter": { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "id"}, ctx.item["id"]]}, + { + "op": "in", + "args": [ + {"property": "properties.landsat:product_id"}, + [product_id], + ], + }, + ], + } + } + + resp = await app_client.post("/search", json=params) + + assert resp.status_code == 200 + assert len(resp.json()["features"]) == 1 + + +@pytest.mark.asyncio +async def test_search_filter_extension_in_no_list(app_client, ctx): + product_id = ctx.item["properties"]["landsat:product_id"] + + params = { + "filter": { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "id"}, ctx.item["id"]]}, + { + "op": "in", + "args": [ + {"property": "properties.landsat:product_id"}, + product_id, + ], + }, + ], + } + } + + resp = await app_client.post("/search", json=params) + + assert resp.status_code == 400 + assert resp.json() == { + "detail": f"Error with cql2_json filter: Arg {product_id} is not a list" + } + + +@pytest.mark.asyncio +async def test_search_filter_extension_between(app_client, ctx): + sun_elevation = ctx.item["properties"]["view:sun_elevation"] + + params = { + "filter": { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "id"}, ctx.item["id"]]}, + { + "op": "between", + "args": [ + {"property": "properties.view:sun_elevation"}, + sun_elevation - 0.01, + sun_elevation + 0.01, + ], + }, + ], + } + } + resp = await app_client.post("/search", json=params) + + assert resp.status_code == 200 + assert len(resp.json()["features"]) == 1