Skip to content

Commit

Permalink
Number search wildcard (#491)
Browse files Browse the repository at this point in the history
* implement wildcard-sensitive number search

* add tests

* refactoring

* refactoring
  • Loading branch information
fsimonjetz authored Nov 24, 2023
1 parent 1844242 commit 27f2b86
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 14 deletions.
2 changes: 1 addition & 1 deletion ebl/fragmentarium/infrastructure/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def number_is(number: str) -> dict:

for number_class in [MuseumNumber, Accession, ExcavationNumber]:
try:
or_.append(query_number_is(number_class.of(number)))
or_.append(query_number_is(number_class.of(number), allow_wildcard=True))
except ValueError:
pass
return {"$or": or_}
Expand Down
97 changes: 93 additions & 4 deletions ebl/tests/fragmentarium/test_fragment_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
import random
from ebl.common.domain.period import Period, PeriodModifier
from ebl.common.domain.project import ResearchProject
from ebl.common.domain.scopes import Scope
from ebl.common.query.query_result import QueryItem, QueryResult

Expand Down Expand Up @@ -145,11 +146,64 @@ def test_create_join(database, fragment_repository):
}


def test_query_by_museum_number(database, fragment_repository):
fragment = LemmatizedFragmentFactory.build()
database[COLLECTION].insert_one(FragmentSchema(exclude=["joins"]).dump(fragment))
@pytest.mark.parametrize("number", ["IM.123", "IM.*"])
def test_query_by_museum_number(database, fragment_repository, number):
fragments = {
number: LemmatizedFragmentFactory.build(number=MuseumNumber.of(number))
for number in ["IM.123", "IM.*"]
}

assert fragment_repository.query_by_museum_number(fragment.number) == fragment
database[COLLECTION].insert_many(
[
FragmentSchema(exclude=["joins"]).dump(fragment)
for fragment in fragments.values()
]
)

assert (
fragment_repository.query_by_museum_number(MuseumNumber.of(number))
== fragments[number]
)


@pytest.mark.parametrize(
"query,expected",
[
("IM.*", ["IM.1"]),
("BM.*", ["BM.1", "BM.2", "BM.2.a", "BM.2.b", "BM.3.a"]),
("BM.*.*", ["BM.1", "BM.2", "BM.2.a", "BM.2.b", "BM.3.a"]),
("BM.*.a", ["BM.2.a", "BM.3.a"]),
("*.1", ["BM.1", "IM.1"]),
("*.3.*", ["BM.3.a"]),
("*.*.b", ["BM.2.b"]),
("*.*.*", ["BM.1", "BM.2", "BM.2.a", "BM.2.b", "BM.3.a", "IM.1"]),
("*.*", ["BM.1", "BM.2", "BM.2.a", "BM.2.b", "BM.3.a", "IM.1"]),
],
)
def test_museum_number_wildcard(fragment_repository, query, expected):
all_numbers = ["BM.1", "BM.2", "BM.2.a", "BM.2.b", "BM.3.a", "IM.1"]

fragments = [
FragmentFactory.build(number=MuseumNumber.of(number), script=Script())
for number in all_numbers
]

fragment_repository.create_many(fragments)

expected_result = QueryResult(
[
QueryItem(
fragment.number,
tuple(),
0,
)
for fragment in fragments
if str(fragment.number) in expected
],
0,
)

assert fragment_repository.query({"number": query}) == expected_result


def test_query_by_museum_number_joins(database, fragment_repository):
Expand Down Expand Up @@ -1003,3 +1057,38 @@ def test_query_genres(fragment_repository, query, expected):
)

assert fragment_repository.query({"genre": query}) == expected_result


@pytest.mark.parametrize(
"query,expected",
[
("CAIC", [0]),
("aluGeneva", [1]),
(None, [0, 1]),
],
)
def test_query_project(fragment_repository, query, expected):
projects = [ResearchProject.CAIC, ResearchProject.ALU_GENEVA]

fragments = [
FragmentFactory.build(
number=MuseumNumber.of(f"X.{i}"), projects=[project], script=Script()
)
for i, project in enumerate(projects)
]

fragment_repository.create_many(fragments)

expected_result = QueryResult(
[
QueryItem(
fragments[i].number,
tuple(),
0,
)
for i in expected
],
0,
)

assert fragment_repository.query({"project": query}) == expected_result
38 changes: 38 additions & 0 deletions ebl/tests/transliteration/test_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest
from ebl.transliteration.infrastructure.queries import build_query


PATH_PREFIX = "someNumber"


def create_dto(prefix, number, suffix) -> dict:
dto = {
"prefix": prefix,
"number": number,
"suffix": suffix,
}
return {key: value for key, value in dto.items() if value is not None}


def add_path_prefix(dto: dict):
return {f"{PATH_PREFIX}.{key}": value for key, value in dto.items()}


PREFIXES = ["X", "", "*"]
NUMBERS = ["123", "", "*"]
SUFFIXES = ["a", "", "*"]
WILDCARDS = [True, False]


@pytest.mark.parametrize("prefix", PREFIXES)
@pytest.mark.parametrize("number", NUMBERS)
@pytest.mark.parametrize("suffix", SUFFIXES)
@pytest.mark.parametrize("wildcard", WILDCARDS)
def test_build_query(prefix, number, suffix, wildcard):
suffix = "*" if wildcard and number == "*" and not suffix else suffix
data = (prefix, number, suffix)
dto = create_dto(*data)
expected = add_path_prefix(
create_dto(*(None if wildcard and value == "*" else value for value in data))
)
assert build_query(PATH_PREFIX, dto, wildcard) == expected
33 changes: 24 additions & 9 deletions ebl/transliteration/infrastructure/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,37 @@ def query_number_is(number) -> dict:
raise ValueError(f"Unknown number type: {type(number)}")


def replace_suffix(serialized: dict) -> dict:
suffix = serialized["suffix"]
return {
**serialized,
"suffix": "*" if serialized["number"] == "*" and not suffix else suffix,
}


def build_query(path_prefix: str, serialized: dict, allow_wildcard: bool) -> dict:
serialized = replace_suffix(serialized) if allow_wildcard else serialized

return {
f"{path_prefix}.{key}": value
for key, value in serialized.items()
if not (allow_wildcard and value == "*")
}


@query_number_is.register
def _(number: MuseumNumber) -> dict:
def _(number: MuseumNumber, allow_wildcard=False) -> dict:
serialized = MuseumNumberSchema().dump(number)
return {f"museumNumber.{key}": value for key, value in serialized.items()}
return build_query("museumNumber", serialized, allow_wildcard)


@query_number_is.register
def _(accession: Accession) -> dict:
def _(accession: Accession, allow_wildcard=False) -> dict:
serialized = AccessionSchema().dump(accession)
return {f"accession.{key}": value for key, value in serialized.items()}
return build_query("accession", serialized, allow_wildcard)


@query_number_is.register
def _(number: ExcavationNumber) -> dict:
def _(number: ExcavationNumber, allow_wildcard=False) -> dict:
serialized = ExcavationNumberSchema().dump(number)
return {
f"archaeology.excavationNumber.{key}": value
for key, value in serialized.items()
}
return build_query("archaeology.excavationNumber", serialized, allow_wildcard)

0 comments on commit 27f2b86

Please sign in to comment.