Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LIKE, IN, BETWEEN operators -- filter extension #178

Merged
merged 7 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Added

- Advanced comparison (LIKE, IN, BETWEEN) operators to the Filter extension [#178](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/178)

### Changed

- Elasticsearch drivers from 7.17.9 to 8.11.0 [#169](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/169)
Expand Down
7 changes: 6 additions & 1 deletion stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
settings = ElasticsearchSettings()
session = Session.create_from_settings(settings)

filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient())
filter_extension.conformance_classes.append(
"http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators"
)

extensions = [
TransactionExtension(client=TransactionsClient(session=session), settings=settings),
BulkTransactionExtension(client=BulkTransactionsClient(session=session)),
Expand All @@ -32,7 +37,7 @@
SortExtension(),
TokenPaginationExtension(),
ContextExtension(),
FilterExtension(client=EsAsyncBaseFiltersClient()),
filter_extension,
]

post_request_model = create_post_request_model(extensions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
Basic CQL2 (AND, OR, NOT), comparison operators (=, <>, <, <=, >, >=), and IS NULL.
The comparison operators are allowed against string, numeric, boolean, date, and datetime types.

Advanced comparison operators (http://www.opengis.net/spec/cql2/1.0/req/advanced-comparison-operators)
defines the LIKE, IN, and BETWEEN operators.

Basic Spatial Operators (http://www.opengis.net/spec/cql2/1.0/conf/basic-spatial-operators)
defines the intersects operator (S_INTERSECTS).
"""
from __future__ import annotations

import datetime
import re
from enum import Enum
from typing import List, Union

Expand Down Expand Up @@ -78,6 +82,17 @@ def to_es(self):
)


class AdvancedComparisonOp(str, Enum):
"""Advanced Comparison operator.

CQL2 advanced comparison operators like (~), between, and in.
"""

like = "like"
between = "between"
_in = "in"


class SpatialIntersectsOp(str, Enum):
"""Spatial intersections operator s_intersects."""

Expand Down Expand Up @@ -152,8 +167,8 @@ def validate(cls, v):
class Clause(BaseModel):
"""Filter extension clause."""

op: Union[LogicalOp, ComparisonOp, SpatialIntersectsOp]
args: List[Arg]
op: Union[LogicalOp, ComparisonOp, AdvancedComparisonOp, SpatialIntersectsOp]
args: List[Union[Arg, List[Arg]]]

def to_es(self):
"""Generate an Elasticsearch expression for this Clause."""
Expand All @@ -171,6 +186,30 @@ def to_es(self):
"must_not": [{"term": {to_es(self.args[0]): to_es(self.args[1])}}]
}
}
elif self.op == AdvancedComparisonOp.like:
return {
"wildcard": {
to_es(self.args[0]): {
"value": cql2_like_to_es(str(to_es(self.args[1]))),
"case_insensitive": "false",
}
}
}
elif self.op == AdvancedComparisonOp.between:
return {
"range": {
to_es(self.args[0]): {
"gte": to_es(self.args[1]),
"lte": to_es(self.args[2]),
}
}
}
elif self.op == AdvancedComparisonOp._in:
if not isinstance(self.args[1], List):
raise RuntimeError(f"Arg {self.args[1]} is not a list")
return {
"terms": {to_es(self.args[0]): [to_es(arg) for arg in self.args[1]]}
}
elif (
self.op == ComparisonOp.lt
or self.op == ComparisonOp.lte
Expand Down Expand Up @@ -210,3 +249,19 @@ def to_es(arg: Arg):
return arg
else:
raise RuntimeError(f"unknown arg {repr(arg)}")


def cql2_like_to_es(string):
"""Convert wildcard characters in CQL2 ('_' and '%') to Elasticsearch wildcard characters ('?' and '*', respectively). Handle escape characters and pass through Elasticsearch wildcards."""
percent_pattern = r"(?<!\\)%"
underscore_pattern = r"(?<!\\)_"
escape_pattern = r"\\(?=[_%])"

for pattern in [
(percent_pattern, "*"),
(underscore_pattern, "?"),
(escape_pattern, ""),
]:
string = re.sub(pattern[0], pattern[1], string)

return string
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"op": "like",
"args": [
{
"property": "scene_id"
},
"LC82030282019133%"
]
}
10 changes: 10 additions & 0 deletions stac_fastapi/elasticsearch/tests/extensions/cql2/example20.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"op": "like",
"args": [
{
"property": "scene_id"
},
"LC82030282019133LGN0_"
]
}

33 changes: 33 additions & 0 deletions stac_fastapi/elasticsearch/tests/extensions/cql2/example21.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"op": "and",
"args": [
{
"op": "between",
"args": [
{
"property": "cloud_cover"
},
0.1,
0.2
]
},
{
"op": "=",
"args": [
{
"property": "landsat:wrs_row"
},
28
]
},
{
"op": "=",
"args": [
{
"property": "landsat:wrs_path"
},
203
]
}
]
}
13 changes: 13 additions & 0 deletions stac_fastapi/elasticsearch/tests/extensions/cql2/example22.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"op": "and",
"args": [
{
"op": "in",
"args": [
{"property": "id"},
["LC08_L1TP_060247_20180905_20180912_01_T1_L1TP"]
]
},
{"op": "=", "args": [{"property": "collection"}, "landsat8_l1tp"]}
]
}
176 changes: 176 additions & 0 deletions stac_fastapi/elasticsearch/tests/extensions/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,179 @@ async def test_search_filter_extension_floats_post(app_client, ctx):

assert resp.status_code == 200
assert len(resp.json()["features"]) == 1


@pytest.mark.asyncio
async def test_search_filter_extension_wildcard_cql2(app_client, ctx):
single_char = ctx.item["id"][:-1] + "_"
multi_char = ctx.item["id"][:-3] + "%"

params = {
"filter": {
"op": "and",
"args": [
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
{
"op": "like",
"args": [
{"property": "id"},
single_char,
],
},
{
"op": "like",
"args": [
{"property": "id"},
multi_char,
],
},
],
}
}

resp = await app_client.post("/search", json=params)

assert resp.status_code == 200
assert len(resp.json()["features"]) == 1


@pytest.mark.asyncio
async def test_search_filter_extension_wildcard_es(app_client, ctx):
single_char = ctx.item["id"][:-1] + "?"
multi_char = ctx.item["id"][:-3] + "*"

params = {
"filter": {
"op": "and",
"args": [
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
{
"op": "like",
"args": [
{"property": "id"},
single_char,
],
},
{
"op": "like",
"args": [
{"property": "id"},
multi_char,
],
},
],
}
}

resp = await app_client.post("/search", json=params)

assert resp.status_code == 200
assert len(resp.json()["features"]) == 1


@pytest.mark.asyncio
async def test_search_filter_extension_escape_chars(app_client, ctx):
esc_chars = (
ctx.item["properties"]["landsat:product_id"].replace("_", "\\_")[:-1] + "_"
)

params = {
"filter": {
"op": "and",
"args": [
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
{
"op": "like",
"args": [
{"property": "properties.landsat:product_id"},
esc_chars,
],
},
],
}
}

resp = await app_client.post("/search", json=params)

assert resp.status_code == 200
assert len(resp.json()["features"]) == 1


@pytest.mark.asyncio
async def test_search_filter_extension_in(app_client, ctx):
product_id = ctx.item["properties"]["landsat:product_id"]

params = {
"filter": {
"op": "and",
"args": [
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
{
"op": "in",
"args": [
{"property": "properties.landsat:product_id"},
[product_id],
],
},
],
}
}

resp = await app_client.post("/search", json=params)

assert resp.status_code == 200
assert len(resp.json()["features"]) == 1


@pytest.mark.asyncio
async def test_search_filter_extension_in_no_list(app_client, ctx):
product_id = ctx.item["properties"]["landsat:product_id"]

params = {
"filter": {
"op": "and",
"args": [
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
{
"op": "in",
"args": [
{"property": "properties.landsat:product_id"},
product_id,
],
},
],
}
}

resp = await app_client.post("/search", json=params)

assert resp.status_code == 400
assert resp.json() == {
"detail": f"Error with cql2_json filter: Arg {product_id} is not a list"
}


@pytest.mark.asyncio
async def test_search_filter_extension_between(app_client, ctx):
sun_elevation = ctx.item["properties"]["view:sun_elevation"]

params = {
"filter": {
"op": "and",
"args": [
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
{
"op": "between",
"args": [
{"property": "properties.view:sun_elevation"},
sun_elevation - 0.01,
sun_elevation + 0.01,
],
},
],
}
}
resp = await app_client.post("/search", json=params)

assert resp.status_code == 200
assert len(resp.json()["features"]) == 1