diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c2dbd1d..823f3c9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Changed +- Support escaped backslashes in CQL2 `LIKE` queries, and reject invalid (or incomplete) escape sequences. [#286](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/286) + ## [v3.0.0] - 2024-08-14 ### Changed diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py index 0be1ea8e..05613595 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py @@ -17,31 +17,41 @@ from enum import Enum from typing import Any, Dict +_cql2_like_patterns = re.compile(r"\\.|[%_]|\\$") +_valid_like_substitutions = { + "\\\\": "\\", + "\\%": "%", + "\\_": "_", + "%": "*", + "_": "?", +} + + +def _replace_like_patterns(match: re.Match) -> str: + pattern = match.group() + try: + return _valid_like_substitutions[pattern] + except KeyError: + raise ValueError(f"'{pattern}' is not a valid escape sequence") + def cql2_like_to_es(string: str) -> str: """ - Convert CQL2 wildcard characters to Elasticsearch wildcard characters. Specifically, it converts '_' to '?' and '%' to '*', handling escape characters properly. + Convert CQL2 "LIKE" characters to Elasticsearch "wildcard" characters. Args: string (str): The string containing CQL2 wildcard characters. Returns: str: The converted string with Elasticsearch compatible wildcards. + + Raises: + ValueError: If an invalid escape sequence is encountered. """ - # Translate '%' and '_' only if they are not preceded by a backslash '\' - percent_pattern = r"(? None: + """Verify CQL2 LIKE query strings are converted correctly.""" + + assert cql2_like_to_es(cql2_value) == expected_es_value + + +@pytest.mark.parametrize( + "cql2_value", + ( + pytest.param("\\", id="trailing backslash escape"), + pytest.param("\\1", id="invalid escape sequence"), + ), +) +def test_cql2_like_to_es_invalid(cql2_value: str) -> None: + """Verify that incomplete or invalid escape sequences are rejected. + + CQL2 currently doesn't appear to define how to handle invalid escape sequences. + This test assumes that undefined behavior is caught. + """ + + with pytest.raises(ValueError): + cql2_like_to_es(cql2_value)