From ace0c7a3c831edc2f0a0bfe7ad5329a9fd5e6823 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Fri, 23 Aug 2024 07:11:42 -0500 Subject: [PATCH] Make `cql2_like_to_es()` understand escaped backslashes (#286) **Related Issue(s):** - Closes #285 **Description:** This is a break/fix PR. The first commit adds a suite of tests that document correct LIKE-to-wildcard query value conversions, and then fixes the `cql2_like_to_es()` code to correctly process escaped backslashes. **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) _(`make test` fails because the Docker container port ranges don't match, but CI, which doesn't use `make test`, passes)_ - [ ] Documentation has been updated to reflect changes, if applicable _n/a, no docs in repo_ - [x] Changes are added to the changelog --- CHANGELOG.md | 5 ++ .../stac_fastapi/core/extensions/filter.py | 40 ++++++++++------ .../tests/extensions/test_cql2_like_to_es.py | 46 +++++++++++++++++++ 3 files changed, 76 insertions(+), 15 deletions(-) create mode 100644 stac_fastapi/tests/extensions/test_cql2_like_to_es.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c2dbd1d..823f3c9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Changed +- Support escaped backslashes in CQL2 `LIKE` queries, and reject invalid (or incomplete) escape sequences. [#286](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/286) + ## [v3.0.0] - 2024-08-14 ### Changed diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py index 0be1ea8e..05613595 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py @@ -17,31 +17,41 @@ from enum import Enum from typing import Any, Dict +_cql2_like_patterns = re.compile(r"\\.|[%_]|\\$") +_valid_like_substitutions = { + "\\\\": "\\", + "\\%": "%", + "\\_": "_", + "%": "*", + "_": "?", +} + + +def _replace_like_patterns(match: re.Match) -> str: + pattern = match.group() + try: + return _valid_like_substitutions[pattern] + except KeyError: + raise ValueError(f"'{pattern}' is not a valid escape sequence") + def cql2_like_to_es(string: str) -> str: """ - Convert CQL2 wildcard characters to Elasticsearch wildcard characters. Specifically, it converts '_' to '?' and '%' to '*', handling escape characters properly. + Convert CQL2 "LIKE" characters to Elasticsearch "wildcard" characters. Args: string (str): The string containing CQL2 wildcard characters. Returns: str: The converted string with Elasticsearch compatible wildcards. + + Raises: + ValueError: If an invalid escape sequence is encountered. """ - # Translate '%' and '_' only if they are not preceded by a backslash '\' - percent_pattern = r"(? None: + """Verify CQL2 LIKE query strings are converted correctly.""" + + assert cql2_like_to_es(cql2_value) == expected_es_value + + +@pytest.mark.parametrize( + "cql2_value", + ( + pytest.param("\\", id="trailing backslash escape"), + pytest.param("\\1", id="invalid escape sequence"), + ), +) +def test_cql2_like_to_es_invalid(cql2_value: str) -> None: + """Verify that incomplete or invalid escape sequences are rejected. + + CQL2 currently doesn't appear to define how to handle invalid escape sequences. + This test assumes that undefined behavior is caught. + """ + + with pytest.raises(ValueError): + cql2_like_to_es(cql2_value)