Skip to content

Commit

Permalink
Implement collations & update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
khoidt committed Oct 16, 2024
1 parent 491526b commit d06fb38
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 21 deletions.
9 changes: 8 additions & 1 deletion ebl/common/query/query_collation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Dict, Iterable, Literal, Sequence
from urllib.parse import parse_qsl

DataType = Literal["dictionary", "afo-register"]
DataType = Literal["dictionary", "afo-register", "colophons"]


class Fields(Enum):
Expand All @@ -18,13 +18,20 @@ class Fields(Enum):
"WILDCARD_FIELDS": [],
"MARKDOWN_FIELDS": ["text"],
}
COLOPHONS = {
"COLLATED_FIELDS": ["names"],
"WILDCARD_FIELDS": [],
"MARKDOWN_FIELDS": [],
}

@staticmethod
def findByDataType(data_type: DataType) -> Dict[str, Sequence[str]]:
if data_type == "dictionary":
return Fields.DICTIONARY.value
elif data_type == "afo-register":
return Fields.AFO_REGISTER.value
elif data_type == "colophons":
return Fields.COLOPHONS.value
else:
raise ValueError("Invalid data type")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,42 @@
aggregate_random,
)
from ebl.transliteration.infrastructure.queries import query_number_is
from ebl.common.query.query_collation import CollatedFieldQuery


def has_none_values(dictionary: dict) -> bool:
return not all(dictionary.values())


def _get_colophon_names_query(name_regex: str) -> Sequence[dict]:
return [
{"$unwind": "$colophon.individuals"},
{
"$project": {
"names": [
"$colophon.individuals.name.value",
"$colophon.individuals.sonOf.value",
"$colophon.individuals.grandsonOf.value",
"$colophon.individuals.family.value",
]
}
},
{"$unwind": "$names"},
{
"$match": {
"names": {
"$regex": rf"{name_regex}",
"$options": "i",
}
}
},
{"$group": {"_id": None, "unique_names": {"$addToSet": "$names"}}},
{"$unwind": "$unique_names"},
{"$sort": {"unique_names": 1}},
{"$project": {"_id": 0, "name": "$unique_names"}},
]


class MongoFragmentRepositoryGetExtended(MongoFragmentRepositoryBase):
def __init__(self, database):
super().__init__(database)
Expand Down Expand Up @@ -162,24 +192,9 @@ def fetch_scopes(self, number: MuseumNumber) -> List[Scope]:
]

def fetch_names(self, name_query: str) -> List[str]:
pipeline = [
{"$unwind": "$colophon.individuals"},
{
"$project": {
"names": [
"$colophon.individuals.name.value",
"$colophon.individuals.sonOf.value",
"$colophon.individuals.grandsonOf.value",
"$colophon.individuals.family.value",
]
}
},
{"$unwind": "$names"},
{"$match": {"names": {"$regex": name_query, "$options": "i"}}},
{"$group": {"_id": None, "unique_names": {"$addToSet": "$names"}}},
{"$unwind": "$unique_names"},
{"$sort": {"unique_names": 1}},
{"$project": {"_id": 0, "name": "$unique_names"}},
]
if len(name_query) < 3:
return []
name_regex = CollatedFieldQuery(name_query, "names", "colophons").value
pipeline = _get_colophon_names_query(name_regex)
cursor = self._fragments.aggregate(pipeline)
return [data["name"] for data in cursor if data["name"]]
3 changes: 2 additions & 1 deletion ebl/tests/fragmentarium/test_fragment_repository_colophon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def test_fetch_names(fragment_repository):
names = ["barmarum", "garmarum", "harmarum", "zarmarum"]
names = ["barmarum", "garmarum", "harmarum", "zarmārum"]
[name, second_name, third_name, fourth_name] = [
NameAttestationFactory.build(value=name) for name in names
]
Expand All @@ -32,3 +32,4 @@ def test_fetch_names(fragment_repository):
fragment_repository.create(fragment)
assert names == fragment_repository.fetch_names("mar")
assert ["pallaqum"] == fragment_repository.fetch_names("pal")
assert [] == fragment_repository.fetch_names("ma")

0 comments on commit d06fb38

Please sign in to comment.