diff --git a/ebl/common/application/schemas.py b/ebl/common/application/schemas.py new file mode 100644 index 000000000..dfe66163e --- /dev/null +++ b/ebl/common/application/schemas.py @@ -0,0 +1,16 @@ +from marshmallow import Schema, fields, validate, post_load +from ebl.common.domain.accession import Accession + + +class AbstractMuseumNumberSchema(Schema): + prefix = fields.String(required=True, validate=validate.Length(min=1)) + number = fields.String( + required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) + ) + suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) + + +class AccessionSchema(AbstractMuseumNumberSchema): + @post_load + def create_accession(self, data, **kwargs) -> Accession: + return Accession(**data) diff --git a/ebl/common/domain/accession.py b/ebl/common/domain/accession.py new file mode 100644 index 000000000..80c2252b7 --- /dev/null +++ b/ebl/common/domain/accession.py @@ -0,0 +1,15 @@ +from ebl.transliteration.domain.museum_number import MuseumNumber +import functools +import attr +import re + + +@functools.total_ordering +@attr.s(auto_attribs=True, frozen=True, order=False) +class Accession(MuseumNumber): + @staticmethod + def of(source: str) -> "Accession": + if match := re.compile(r"(.+?)\.([^.]+)(?:\.([^.]+))?").fullmatch(source): + return Accession(match[1], match[2], match[3] or "") + else: + raise ValueError(f"'{source}' is not a valid accession number.") diff --git a/ebl/fragmentarium/application/archaeology_schemas.py b/ebl/fragmentarium/application/archaeology_schemas.py index 9df906b2b..3ebe319bc 100644 --- a/ebl/fragmentarium/application/archaeology_schemas.py +++ b/ebl/fragmentarium/application/archaeology_schemas.py @@ -1,25 +1,24 @@ +from ebl.common.application.schemas import AbstractMuseumNumberSchema from ebl.bibliography.application.reference_schema import ReferenceSchema from ebl.fragmentarium.application.date_schemas import ( DateRangeSchema, DateWithNotesSchema, ) -from ebl.fragmentarium.domain.archaeology import Archaeology -from ebl.fragmentarium.domain.findspot import BuildingType, ExcavationPlan, Findspot +from ebl.fragmentarium.domain.archaeology import ( + Archaeology, + ExcavationNumber, +) +from ebl.fragmentarium.domain.findspot import ( + BuildingType, + ExcavationPlan, + Findspot, + ExcavationSite, +) from ebl.schemas import NameEnumField -from marshmallow import Schema, fields, post_load, validate -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite - +from marshmallow import Schema, fields, post_load -class ExcavationNumberSchema(Schema): - prefix = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - number = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) +class ExcavationNumberSchema(AbstractMuseumNumberSchema): @post_load def create_excavation_number(self, data, **kwargs) -> ExcavationNumber: return ExcavationNumber(**data) diff --git a/ebl/fragmentarium/application/fragment_info_schema.py b/ebl/fragmentarium/application/fragment_info_schema.py index 0ab4bf1da..84f262d1e 100644 --- a/ebl/fragmentarium/application/fragment_info_schema.py +++ b/ebl/fragmentarium/application/fragment_info_schema.py @@ -4,6 +4,7 @@ ReferenceSchema, ApiReferenceSchema, ) +from ebl.common.application.schemas import AccessionSchema from ebl.fragmentarium.application.fragment_schema import ScriptSchema from ebl.fragmentarium.application.genre_schema import GenreSchema from ebl.fragmentarium.domain.fragment_infos_pagination import FragmentInfosPagination @@ -13,7 +14,7 @@ class FragmentInfoSchema(Schema): number: fields.Field = fields.Nested(MuseumNumberSchema, required=True) - accession = fields.String(required=True) + accession = fields.Nested(AccessionSchema, allow_none=True, load_default=None) script = fields.Nested(ScriptSchema, required=True) description = fields.String(required=True) editor = fields.String(load_default="") diff --git a/ebl/fragmentarium/application/fragment_schema.py b/ebl/fragmentarium/application/fragment_schema.py index 758a67fbd..ebdf0d143 100644 --- a/ebl/fragmentarium/application/fragment_schema.py +++ b/ebl/fragmentarium/application/fragment_schema.py @@ -2,6 +2,7 @@ from marshmallow import Schema, fields, post_dump, post_load, EXCLUDE from ebl.bibliography.application.reference_schema import ReferenceSchema +from ebl.common.application.schemas import AccessionSchema from ebl.common.domain.period import Period, PeriodModifier from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema from ebl.fragmentarium.application.genre_schema import GenreSchema @@ -159,7 +160,7 @@ def omit_empty_numbers(self, data, **kwargs): class FragmentSchema(Schema): number = fields.Nested(MuseumNumberSchema, required=True, data_key="museumNumber") - accession = fields.String(required=True) + accession = fields.Nested(AccessionSchema, allow_none=True, load_default=None) edited_in_oracc_project = fields.String( required=True, data_key="editedInOraccProject" ) diff --git a/ebl/fragmentarium/domain/archaeology.py b/ebl/fragmentarium/domain/archaeology.py index 29050614d..957189a10 100644 --- a/ebl/fragmentarium/domain/archaeology.py +++ b/ebl/fragmentarium/domain/archaeology.py @@ -1,9 +1,18 @@ from typing import Optional, Sequence import attr from ebl.fragmentarium.domain.iso_date import DateWithNotes -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite -from ebl.fragmentarium.domain.findspot import Findspot +from ebl.transliteration.domain.museum_number import MuseumNumber +from ebl.fragmentarium.domain.findspot import Findspot, ExcavationSite +import re + + +class ExcavationNumber(MuseumNumber): + @staticmethod + def of(source: str) -> "ExcavationNumber": + if match := re.compile(r"(.+?)\.([^.]+)(?:\.([^.]+))?").fullmatch(source): + return ExcavationNumber(match[1], match[2], match[3] or "") + else: + raise ValueError(f"'{source}' is not a valid excavation number.") @attr.s(auto_attribs=True, frozen=True) diff --git a/ebl/fragmentarium/domain/findspot.py b/ebl/fragmentarium/domain/findspot.py index 0fe6b2182..e6a1bd29c 100644 --- a/ebl/fragmentarium/domain/findspot.py +++ b/ebl/fragmentarium/domain/findspot.py @@ -3,7 +3,10 @@ from enum import Enum, auto from ebl.bibliography.domain.reference import Reference from ebl.fragmentarium.domain.iso_date import DateRange -from ebl.corpus.domain.provenance import Provenance as ExcavationSite +from ebl.corpus.domain.provenance import Provenance + + +ExcavationSite = Provenance class BuildingType(Enum): diff --git a/ebl/fragmentarium/domain/fragment.py b/ebl/fragmentarium/domain/fragment.py index fe87462db..0a8162e14 100644 --- a/ebl/fragmentarium/domain/fragment.py +++ b/ebl/fragmentarium/domain/fragment.py @@ -5,6 +5,7 @@ import pydash from ebl.bibliography.domain.reference import Reference +from ebl.common.domain.accession import Accession from ebl.common.domain.period import Period, PeriodModifier from ebl.common.domain.scopes import Scope from ebl.fragmentarium.application.matches.create_line_to_vec import create_line_to_vec @@ -113,7 +114,7 @@ class ExternalNumbers: @attr.s(auto_attribs=True, frozen=True) class Fragment: number: MuseumNumber - accession: str = "" + accession: Optional[Accession] = None edited_in_oracc_project: str = "" publication: str = "" description: str = "" diff --git a/ebl/fragmentarium/domain/fragment_info.py b/ebl/fragmentarium/domain/fragment_info.py index 72ab20883..6f1482c6e 100644 --- a/ebl/fragmentarium/domain/fragment_info.py +++ b/ebl/fragmentarium/domain/fragment_info.py @@ -3,6 +3,7 @@ import attr from ebl.bibliography.domain.reference import Reference +from ebl.common.domain.accession import Accession from ebl.fragmentarium.domain.fragment import Fragment, Genre, Script from ebl.fragmentarium.domain.record import RecordEntry, RecordType from ebl.transliteration.domain.museum_number import MuseumNumber @@ -12,7 +13,7 @@ @attr.s(frozen=True, auto_attribs=True) class FragmentInfo: number: MuseumNumber - accession: str + accession: Optional[Accession] script: Script description: str matching_lines: Optional[Text] diff --git a/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py b/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py index 2c6b93505..8242e90d3 100644 --- a/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py +++ b/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py @@ -36,7 +36,7 @@ from ebl.transliteration.application.museum_number_schema import MuseumNumberSchema from ebl.transliteration.domain.museum_number import MuseumNumber from ebl.transliteration.infrastructure.collections import FRAGMENTS_COLLECTION -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is RETRIEVE_ALL_LIMIT = 1000 @@ -171,7 +171,7 @@ def query_by_museum_number( ): data = self._fragments.aggregate( [ - {"$match": museum_number_is(number)}, + {"$match": query_number_is(number)}, *( self._omit_text_lines() if exclude_lines @@ -206,7 +206,7 @@ def fetch_date(self, number: MuseumNumber) -> Optional[Date]: def fetch_scopes(self, number: MuseumNumber) -> List[Scope]: fragment = next( self._fragments.find_many( - museum_number_is(number), projection={"authorizedScopes": True} + query_number_is(number), projection={"authorizedScopes": True} ), {}, ) diff --git a/ebl/fragmentarium/infrastructure/queries.py b/ebl/fragmentarium/infrastructure/queries.py index 5cc0f12f3..f7addaf8e 100644 --- a/ebl/fragmentarium/infrastructure/queries.py +++ b/ebl/fragmentarium/infrastructure/queries.py @@ -1,5 +1,7 @@ from typing import List, Sequence +from ebl.common.domain.accession import Accession from ebl.common.domain.scopes import Scope +from ebl.fragmentarium.domain.archaeology import ExcavationNumber from ebl.fragmentarium.domain.fragment import Fragment from ebl.fragmentarium.domain.record import RecordType @@ -9,7 +11,7 @@ FRAGMENTS_COLLECTION, FINDSPOTS_COLLECTION, ) -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is HAS_TRANSLITERATION: dict = {"text.lines.type": {"$exists": True}} NUMBER_OF_LATEST_TRANSLITERATIONS: int = 50 @@ -18,15 +20,17 @@ def fragment_is(fragment: Fragment) -> dict: - return museum_number_is(fragment.number) + return query_number_is(fragment.number) def number_is(number: str) -> dict: - or_ = [{"externalNumbers.cdliNumber": number}, {"accession": number}] - try: - or_.append(museum_number_is(MuseumNumber.of(number))) - except ValueError: - pass + or_ = [{"externalNumbers.cdliNumber": number}] + + for number_class in [MuseumNumber, Accession, ExcavationNumber]: + try: + or_.append(query_number_is(number_class.of(number))) + except ValueError: + pass return {"$or": or_} diff --git a/ebl/tests/common/test_accession.py b/ebl/tests/common/test_accession.py new file mode 100644 index 000000000..725a84415 --- /dev/null +++ b/ebl/tests/common/test_accession.py @@ -0,0 +1,24 @@ +import pytest +from ebl.common.application.schemas import AccessionSchema +from ebl.common.domain.accession import Accession + + +ACCESSION = Accession("A", "38") +ACCESSION_DTO = {"prefix": "A", "number": "38", "suffix": ""} + + +def test_of(): + assert Accession.of("A.38") == ACCESSION + + +def test_of_invalid(): + with pytest.raises(ValueError, match="'invalid.' is not a valid accession number."): + Accession.of("invalid.") + + +def test_serialize(): + assert AccessionSchema().dump(ACCESSION) == ACCESSION_DTO + + +def test_deserialize(): + assert AccessionSchema().load(ACCESSION_DTO) == ACCESSION diff --git a/ebl/tests/factories/archaeology.py b/ebl/tests/factories/archaeology.py index 553049ee7..58a58fc65 100644 --- a/ebl/tests/factories/archaeology.py +++ b/ebl/tests/factories/archaeology.py @@ -1,9 +1,8 @@ -from ebl.fragmentarium.domain.archaeology import Archaeology +from ebl.fragmentarium.domain.archaeology import Archaeology, ExcavationNumber from ebl.fragmentarium.domain.iso_date import DateRange, DateWithNotes from ebl.fragmentarium.domain.findspot import BuildingType, ExcavationPlan, Findspot from ebl.tests.factories.bibliography import ReferenceFactory from ebl.tests.factories.collections import TupleFactory -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber from ebl.corpus.domain.provenance import Provenance as ExcavationSite import factory.fuzzy diff --git a/ebl/tests/factories/fragment.py b/ebl/tests/factories/fragment.py index a11421ce7..8242036f3 100644 --- a/ebl/tests/factories/fragment.py +++ b/ebl/tests/factories/fragment.py @@ -2,6 +2,7 @@ import factory.fuzzy import random +from ebl.common.domain.accession import Accession from ebl.common.domain.period import Period, PeriodModifier from ebl.common.domain.project import ResearchProject @@ -181,7 +182,7 @@ class Meta: number = factory.Sequence(lambda n: MuseumNumber("X", str(n))) edited_in_oracc_project = factory.Sequence(lambda n: f"editedInOracc-{n}") - accession = factory.Sequence(lambda n: f"accession-{n}") + accession = factory.Sequence(lambda n: Accession("A", str(n))) museum = factory.Faker("word") collection = factory.Faker("word") publication = factory.Faker("sentence") diff --git a/ebl/tests/fragmentarium/test_dtos.py b/ebl/tests/fragmentarium/test_dtos.py index ece347e19..acb896d98 100644 --- a/ebl/tests/fragmentarium/test_dtos.py +++ b/ebl/tests/fragmentarium/test_dtos.py @@ -1,6 +1,7 @@ import attr import pydash import pytest +from ebl.common.application.schemas import AccessionSchema from ebl.errors import DataError from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema @@ -44,7 +45,7 @@ def expected_dto(lemmatized_fragment, has_photo): return pydash.omit_by( { "museumNumber": attr.asdict(lemmatized_fragment.number), - "accession": lemmatized_fragment.accession, + "accession": AccessionSchema().dump(lemmatized_fragment.accession), "editedInOraccProject": lemmatized_fragment.edited_in_oracc_project, "publication": lemmatized_fragment.publication, "description": lemmatized_fragment.description, @@ -131,7 +132,7 @@ def test_create_fragment_info_dto(): is_transliteration = record_entry.type == RecordType.TRANSLITERATION assert ApiFragmentInfoSchema().dump(info) == { "number": str(info.number), - "accession": info.accession, + "accession": AccessionSchema().dump(info.accession), "script": ScriptSchema().dump(info.script), "description": info.description, "matchingLines": TextSchema().dump(text), diff --git a/ebl/tests/fragmentarium/test_fragment_archaeology_route.py b/ebl/tests/fragmentarium/test_fragment_archaeology_route.py index b6c2a70c3..0c7a548d0 100644 --- a/ebl/tests/fragmentarium/test_fragment_archaeology_route.py +++ b/ebl/tests/fragmentarium/test_fragment_archaeology_route.py @@ -4,16 +4,19 @@ import falcon import pytest from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema -from ebl.fragmentarium.domain.archaeology import Archaeology +from ebl.fragmentarium.domain.archaeology import ( + Archaeology, + ExcavationNumber, +) +from ebl.fragmentarium.domain.findspot import ExcavationSite from ebl.fragmentarium.domain.fragment import Fragment from ebl.fragmentarium.web.dtos import create_response_dto from ebl.tests.factories.archaeology import DateWithNotesFactory from ebl.tests.factories.fragment import FragmentFactory -from ebl.transliteration.domain.museum_number import MuseumNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite -ARCHAEOLOGY = Archaeology(MuseumNumber("F", "1"), ExcavationSite.KALHU) + +ARCHAEOLOGY = Archaeology(ExcavationNumber("F", "1"), ExcavationSite.KALHU) ARCHAEOLOGIES = [ ARCHAEOLOGY, attr.evolve(ARCHAEOLOGY, site=None), diff --git a/ebl/tests/fragmentarium/test_fragment_schema.py b/ebl/tests/fragmentarium/test_fragment_schema.py index 412a4443c..47abbfb94 100644 --- a/ebl/tests/fragmentarium/test_fragment_schema.py +++ b/ebl/tests/fragmentarium/test_fragment_schema.py @@ -61,3 +61,16 @@ def test_scope_deserialization(): "authorizedScopes": SERIALIZED_SCOPES, } assert FragmentSchema().load(data).authorized_scopes == SCOPES + + +def test_empty_accession_serialization(): + fragment = FragmentFactory.build(accession=None) + assert "accession" not in FragmentSchema().dump(fragment) + + +def test_empty_accession_deserialization(): + data = { + **FragmentSchema().dump(FragmentFactory.build()), + "accession": None, + } + assert FragmentSchema().load(data).accession is None diff --git a/ebl/tests/fragmentarium/test_fragments_search_route.py b/ebl/tests/fragmentarium/test_fragments_search_route.py index 2b670b5b7..8aadddd07 100644 --- a/ebl/tests/fragmentarium/test_fragments_search_route.py +++ b/ebl/tests/fragmentarium/test_fragments_search_route.py @@ -54,7 +54,8 @@ def query_item_of( [ lambda fragment: str(fragment.number), lambda fragment: fragment.cdli_number, - lambda fragment: fragment.accession, + lambda fragment: str(fragment.accession), + lambda fragment: str(fragment.archaeology.excavation_number), ], ) def test_query_fragmentarium_number(get_number, client, fragmentarium): diff --git a/ebl/transliteration/application/museum_number_schema.py b/ebl/transliteration/application/museum_number_schema.py index fe2094a06..8d87c8d80 100644 --- a/ebl/transliteration/application/museum_number_schema.py +++ b/ebl/transliteration/application/museum_number_schema.py @@ -1,15 +1,9 @@ -from marshmallow import Schema, fields, post_load, validate - +from marshmallow import post_load from ebl.transliteration.domain.museum_number import MuseumNumber +from ebl.common.application.schemas import AbstractMuseumNumberSchema -class MuseumNumberSchema(Schema): - prefix = fields.String(required=True, validate=validate.Length(min=1)) - number = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) - +class MuseumNumberSchema(AbstractMuseumNumberSchema): @post_load def create_museum_number(self, data, **kwargs) -> MuseumNumber: return MuseumNumber(**data) diff --git a/ebl/transliteration/infrastructure/mongo_parallel_repository.py b/ebl/transliteration/infrastructure/mongo_parallel_repository.py index fcb2c0b96..f47156b2f 100644 --- a/ebl/transliteration/infrastructure/mongo_parallel_repository.py +++ b/ebl/transliteration/infrastructure/mongo_parallel_repository.py @@ -15,7 +15,7 @@ CHAPTERS_COLLECTION, FRAGMENTS_COLLECTION, ) -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is class MongoParallelRepository(ParallelRepository): @@ -27,7 +27,7 @@ def __init__(self, database: Database): self._chapters = MongoCollection(database, CHAPTERS_COLLECTION) def fragment_exists(self, museum_number: MuseumNumber) -> bool: - return self._fragments.count_documents(museum_number_is(museum_number)) > 0 + return self._fragments.count_documents(query_number_is(museum_number)) > 0 def find_implicit_chapter(self, text_id: TextId) -> ChapterName: try: diff --git a/ebl/transliteration/infrastructure/queries.py b/ebl/transliteration/infrastructure/queries.py index c175480be..7d10ce613 100644 --- a/ebl/transliteration/infrastructure/queries.py +++ b/ebl/transliteration/infrastructure/queries.py @@ -1,7 +1,33 @@ +from functools import singledispatch +from ebl.common.application.schemas import AccessionSchema +from ebl.common.domain.accession import Accession +from ebl.fragmentarium.application.archaeology_schemas import ExcavationNumberSchema +from ebl.fragmentarium.domain.archaeology import ExcavationNumber from ebl.transliteration.application.museum_number_schema import MuseumNumberSchema from ebl.transliteration.domain.museum_number import MuseumNumber -def museum_number_is(number: MuseumNumber) -> dict: +@singledispatch +def query_number_is(number) -> dict: + raise ValueError(f"Unknown number type: {type(number)}") + + +@query_number_is.register +def _(number: MuseumNumber) -> dict: serialized = MuseumNumberSchema().dump(number) return {f"museumNumber.{key}": value for key, value in serialized.items()} + + +@query_number_is.register +def _(accession: Accession) -> dict: + serialized = AccessionSchema().dump(accession) + return {f"accession.{key}": value for key, value in serialized.items()} + + +@query_number_is.register +def _(number: ExcavationNumber) -> dict: + serialized = ExcavationNumberSchema().dump(number) + return { + f"archaeology.excavationNumber.{key}": value + for key, value in serialized.items() + }