From 4a144ba01a3ddda6b3497bcfd0475d37a76683c7 Mon Sep 17 00:00:00 2001 From: fsimonjetz Date: Tue, 21 Nov 2023 15:26:13 +0100 Subject: [PATCH 1/3] Update accession schema (#486) * add Accession class * add AbstractMuseumNumberSchema and AccessionSchema * refactor ExcavationNumberSchema * refactor ExcavationNumber * Refactor ExcavationSite * refactor MuseumNumberSchema * update Accession dtype * add Accession tests * refactor number queries; add query by excavation number * add ExcavationNumber to number query --- ebl/common/application/schemas.py | 16 +++++++++++ ebl/common/domain/accession.py | 15 ++++++++++ .../application/archaeology_schemas.py | 27 +++++++++--------- .../application/fragment_info_schema.py | 3 +- .../application/fragment_schema.py | 3 +- ebl/fragmentarium/domain/archaeology.py | 15 ++++++++-- ebl/fragmentarium/domain/findspot.py | 5 +++- ebl/fragmentarium/domain/fragment.py | 3 +- ebl/fragmentarium/domain/fragment_info.py | 3 +- .../mongo_fragment_repository.py | 6 ++-- ebl/fragmentarium/infrastructure/queries.py | 18 +++++++----- ebl/tests/common/test_accession.py | 24 ++++++++++++++++ ebl/tests/factories/archaeology.py | 3 +- ebl/tests/factories/fragment.py | 3 +- ebl/tests/fragmentarium/test_dtos.py | 5 ++-- .../test_fragment_archaeology_route.py | 11 +++++--- .../fragmentarium/test_fragment_schema.py | 13 +++++++++ .../test_fragments_search_route.py | 3 +- .../application/museum_number_schema.py | 12 ++------ .../mongo_parallel_repository.py | 4 +-- ebl/transliteration/infrastructure/queries.py | 28 ++++++++++++++++++- 21 files changed, 166 insertions(+), 54 deletions(-) create mode 100644 ebl/common/application/schemas.py create mode 100644 ebl/common/domain/accession.py create mode 100644 ebl/tests/common/test_accession.py diff --git a/ebl/common/application/schemas.py b/ebl/common/application/schemas.py new file mode 100644 index 000000000..dfe66163e --- /dev/null +++ b/ebl/common/application/schemas.py @@ -0,0 +1,16 @@ +from marshmallow import Schema, fields, validate, post_load +from ebl.common.domain.accession import Accession + + +class AbstractMuseumNumberSchema(Schema): + prefix = fields.String(required=True, validate=validate.Length(min=1)) + number = fields.String( + required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) + ) + suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) + + +class AccessionSchema(AbstractMuseumNumberSchema): + @post_load + def create_accession(self, data, **kwargs) -> Accession: + return Accession(**data) diff --git a/ebl/common/domain/accession.py b/ebl/common/domain/accession.py new file mode 100644 index 000000000..80c2252b7 --- /dev/null +++ b/ebl/common/domain/accession.py @@ -0,0 +1,15 @@ +from ebl.transliteration.domain.museum_number import MuseumNumber +import functools +import attr +import re + + +@functools.total_ordering +@attr.s(auto_attribs=True, frozen=True, order=False) +class Accession(MuseumNumber): + @staticmethod + def of(source: str) -> "Accession": + if match := re.compile(r"(.+?)\.([^.]+)(?:\.([^.]+))?").fullmatch(source): + return Accession(match[1], match[2], match[3] or "") + else: + raise ValueError(f"'{source}' is not a valid accession number.") diff --git a/ebl/fragmentarium/application/archaeology_schemas.py b/ebl/fragmentarium/application/archaeology_schemas.py index 9df906b2b..3ebe319bc 100644 --- a/ebl/fragmentarium/application/archaeology_schemas.py +++ b/ebl/fragmentarium/application/archaeology_schemas.py @@ -1,25 +1,24 @@ +from ebl.common.application.schemas import AbstractMuseumNumberSchema from ebl.bibliography.application.reference_schema import ReferenceSchema from ebl.fragmentarium.application.date_schemas import ( DateRangeSchema, DateWithNotesSchema, ) -from ebl.fragmentarium.domain.archaeology import Archaeology -from ebl.fragmentarium.domain.findspot import BuildingType, ExcavationPlan, Findspot +from ebl.fragmentarium.domain.archaeology import ( + Archaeology, + ExcavationNumber, +) +from ebl.fragmentarium.domain.findspot import ( + BuildingType, + ExcavationPlan, + Findspot, + ExcavationSite, +) from ebl.schemas import NameEnumField -from marshmallow import Schema, fields, post_load, validate -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite - +from marshmallow import Schema, fields, post_load -class ExcavationNumberSchema(Schema): - prefix = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - number = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) +class ExcavationNumberSchema(AbstractMuseumNumberSchema): @post_load def create_excavation_number(self, data, **kwargs) -> ExcavationNumber: return ExcavationNumber(**data) diff --git a/ebl/fragmentarium/application/fragment_info_schema.py b/ebl/fragmentarium/application/fragment_info_schema.py index 0ab4bf1da..84f262d1e 100644 --- a/ebl/fragmentarium/application/fragment_info_schema.py +++ b/ebl/fragmentarium/application/fragment_info_schema.py @@ -4,6 +4,7 @@ ReferenceSchema, ApiReferenceSchema, ) +from ebl.common.application.schemas import AccessionSchema from ebl.fragmentarium.application.fragment_schema import ScriptSchema from ebl.fragmentarium.application.genre_schema import GenreSchema from ebl.fragmentarium.domain.fragment_infos_pagination import FragmentInfosPagination @@ -13,7 +14,7 @@ class FragmentInfoSchema(Schema): number: fields.Field = fields.Nested(MuseumNumberSchema, required=True) - accession = fields.String(required=True) + accession = fields.Nested(AccessionSchema, allow_none=True, load_default=None) script = fields.Nested(ScriptSchema, required=True) description = fields.String(required=True) editor = fields.String(load_default="") diff --git a/ebl/fragmentarium/application/fragment_schema.py b/ebl/fragmentarium/application/fragment_schema.py index 758a67fbd..ebdf0d143 100644 --- a/ebl/fragmentarium/application/fragment_schema.py +++ b/ebl/fragmentarium/application/fragment_schema.py @@ -2,6 +2,7 @@ from marshmallow import Schema, fields, post_dump, post_load, EXCLUDE from ebl.bibliography.application.reference_schema import ReferenceSchema +from ebl.common.application.schemas import AccessionSchema from ebl.common.domain.period import Period, PeriodModifier from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema from ebl.fragmentarium.application.genre_schema import GenreSchema @@ -159,7 +160,7 @@ def omit_empty_numbers(self, data, **kwargs): class FragmentSchema(Schema): number = fields.Nested(MuseumNumberSchema, required=True, data_key="museumNumber") - accession = fields.String(required=True) + accession = fields.Nested(AccessionSchema, allow_none=True, load_default=None) edited_in_oracc_project = fields.String( required=True, data_key="editedInOraccProject" ) diff --git a/ebl/fragmentarium/domain/archaeology.py b/ebl/fragmentarium/domain/archaeology.py index 29050614d..957189a10 100644 --- a/ebl/fragmentarium/domain/archaeology.py +++ b/ebl/fragmentarium/domain/archaeology.py @@ -1,9 +1,18 @@ from typing import Optional, Sequence import attr from ebl.fragmentarium.domain.iso_date import DateWithNotes -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite -from ebl.fragmentarium.domain.findspot import Findspot +from ebl.transliteration.domain.museum_number import MuseumNumber +from ebl.fragmentarium.domain.findspot import Findspot, ExcavationSite +import re + + +class ExcavationNumber(MuseumNumber): + @staticmethod + def of(source: str) -> "ExcavationNumber": + if match := re.compile(r"(.+?)\.([^.]+)(?:\.([^.]+))?").fullmatch(source): + return ExcavationNumber(match[1], match[2], match[3] or "") + else: + raise ValueError(f"'{source}' is not a valid excavation number.") @attr.s(auto_attribs=True, frozen=True) diff --git a/ebl/fragmentarium/domain/findspot.py b/ebl/fragmentarium/domain/findspot.py index 0fe6b2182..e6a1bd29c 100644 --- a/ebl/fragmentarium/domain/findspot.py +++ b/ebl/fragmentarium/domain/findspot.py @@ -3,7 +3,10 @@ from enum import Enum, auto from ebl.bibliography.domain.reference import Reference from ebl.fragmentarium.domain.iso_date import DateRange -from ebl.corpus.domain.provenance import Provenance as ExcavationSite +from ebl.corpus.domain.provenance import Provenance + + +ExcavationSite = Provenance class BuildingType(Enum): diff --git a/ebl/fragmentarium/domain/fragment.py b/ebl/fragmentarium/domain/fragment.py index fe87462db..0a8162e14 100644 --- a/ebl/fragmentarium/domain/fragment.py +++ b/ebl/fragmentarium/domain/fragment.py @@ -5,6 +5,7 @@ import pydash from ebl.bibliography.domain.reference import Reference +from ebl.common.domain.accession import Accession from ebl.common.domain.period import Period, PeriodModifier from ebl.common.domain.scopes import Scope from ebl.fragmentarium.application.matches.create_line_to_vec import create_line_to_vec @@ -113,7 +114,7 @@ class ExternalNumbers: @attr.s(auto_attribs=True, frozen=True) class Fragment: number: MuseumNumber - accession: str = "" + accession: Optional[Accession] = None edited_in_oracc_project: str = "" publication: str = "" description: str = "" diff --git a/ebl/fragmentarium/domain/fragment_info.py b/ebl/fragmentarium/domain/fragment_info.py index 72ab20883..6f1482c6e 100644 --- a/ebl/fragmentarium/domain/fragment_info.py +++ b/ebl/fragmentarium/domain/fragment_info.py @@ -3,6 +3,7 @@ import attr from ebl.bibliography.domain.reference import Reference +from ebl.common.domain.accession import Accession from ebl.fragmentarium.domain.fragment import Fragment, Genre, Script from ebl.fragmentarium.domain.record import RecordEntry, RecordType from ebl.transliteration.domain.museum_number import MuseumNumber @@ -12,7 +13,7 @@ @attr.s(frozen=True, auto_attribs=True) class FragmentInfo: number: MuseumNumber - accession: str + accession: Optional[Accession] script: Script description: str matching_lines: Optional[Text] diff --git a/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py b/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py index 2c6b93505..8242e90d3 100644 --- a/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py +++ b/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py @@ -36,7 +36,7 @@ from ebl.transliteration.application.museum_number_schema import MuseumNumberSchema from ebl.transliteration.domain.museum_number import MuseumNumber from ebl.transliteration.infrastructure.collections import FRAGMENTS_COLLECTION -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is RETRIEVE_ALL_LIMIT = 1000 @@ -171,7 +171,7 @@ def query_by_museum_number( ): data = self._fragments.aggregate( [ - {"$match": museum_number_is(number)}, + {"$match": query_number_is(number)}, *( self._omit_text_lines() if exclude_lines @@ -206,7 +206,7 @@ def fetch_date(self, number: MuseumNumber) -> Optional[Date]: def fetch_scopes(self, number: MuseumNumber) -> List[Scope]: fragment = next( self._fragments.find_many( - museum_number_is(number), projection={"authorizedScopes": True} + query_number_is(number), projection={"authorizedScopes": True} ), {}, ) diff --git a/ebl/fragmentarium/infrastructure/queries.py b/ebl/fragmentarium/infrastructure/queries.py index 5cc0f12f3..f7addaf8e 100644 --- a/ebl/fragmentarium/infrastructure/queries.py +++ b/ebl/fragmentarium/infrastructure/queries.py @@ -1,5 +1,7 @@ from typing import List, Sequence +from ebl.common.domain.accession import Accession from ebl.common.domain.scopes import Scope +from ebl.fragmentarium.domain.archaeology import ExcavationNumber from ebl.fragmentarium.domain.fragment import Fragment from ebl.fragmentarium.domain.record import RecordType @@ -9,7 +11,7 @@ FRAGMENTS_COLLECTION, FINDSPOTS_COLLECTION, ) -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is HAS_TRANSLITERATION: dict = {"text.lines.type": {"$exists": True}} NUMBER_OF_LATEST_TRANSLITERATIONS: int = 50 @@ -18,15 +20,17 @@ def fragment_is(fragment: Fragment) -> dict: - return museum_number_is(fragment.number) + return query_number_is(fragment.number) def number_is(number: str) -> dict: - or_ = [{"externalNumbers.cdliNumber": number}, {"accession": number}] - try: - or_.append(museum_number_is(MuseumNumber.of(number))) - except ValueError: - pass + or_ = [{"externalNumbers.cdliNumber": number}] + + for number_class in [MuseumNumber, Accession, ExcavationNumber]: + try: + or_.append(query_number_is(number_class.of(number))) + except ValueError: + pass return {"$or": or_} diff --git a/ebl/tests/common/test_accession.py b/ebl/tests/common/test_accession.py new file mode 100644 index 000000000..725a84415 --- /dev/null +++ b/ebl/tests/common/test_accession.py @@ -0,0 +1,24 @@ +import pytest +from ebl.common.application.schemas import AccessionSchema +from ebl.common.domain.accession import Accession + + +ACCESSION = Accession("A", "38") +ACCESSION_DTO = {"prefix": "A", "number": "38", "suffix": ""} + + +def test_of(): + assert Accession.of("A.38") == ACCESSION + + +def test_of_invalid(): + with pytest.raises(ValueError, match="'invalid.' is not a valid accession number."): + Accession.of("invalid.") + + +def test_serialize(): + assert AccessionSchema().dump(ACCESSION) == ACCESSION_DTO + + +def test_deserialize(): + assert AccessionSchema().load(ACCESSION_DTO) == ACCESSION diff --git a/ebl/tests/factories/archaeology.py b/ebl/tests/factories/archaeology.py index 553049ee7..58a58fc65 100644 --- a/ebl/tests/factories/archaeology.py +++ b/ebl/tests/factories/archaeology.py @@ -1,9 +1,8 @@ -from ebl.fragmentarium.domain.archaeology import Archaeology +from ebl.fragmentarium.domain.archaeology import Archaeology, ExcavationNumber from ebl.fragmentarium.domain.iso_date import DateRange, DateWithNotes from ebl.fragmentarium.domain.findspot import BuildingType, ExcavationPlan, Findspot from ebl.tests.factories.bibliography import ReferenceFactory from ebl.tests.factories.collections import TupleFactory -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber from ebl.corpus.domain.provenance import Provenance as ExcavationSite import factory.fuzzy diff --git a/ebl/tests/factories/fragment.py b/ebl/tests/factories/fragment.py index a11421ce7..8242036f3 100644 --- a/ebl/tests/factories/fragment.py +++ b/ebl/tests/factories/fragment.py @@ -2,6 +2,7 @@ import factory.fuzzy import random +from ebl.common.domain.accession import Accession from ebl.common.domain.period import Period, PeriodModifier from ebl.common.domain.project import ResearchProject @@ -181,7 +182,7 @@ class Meta: number = factory.Sequence(lambda n: MuseumNumber("X", str(n))) edited_in_oracc_project = factory.Sequence(lambda n: f"editedInOracc-{n}") - accession = factory.Sequence(lambda n: f"accession-{n}") + accession = factory.Sequence(lambda n: Accession("A", str(n))) museum = factory.Faker("word") collection = factory.Faker("word") publication = factory.Faker("sentence") diff --git a/ebl/tests/fragmentarium/test_dtos.py b/ebl/tests/fragmentarium/test_dtos.py index ece347e19..acb896d98 100644 --- a/ebl/tests/fragmentarium/test_dtos.py +++ b/ebl/tests/fragmentarium/test_dtos.py @@ -1,6 +1,7 @@ import attr import pydash import pytest +from ebl.common.application.schemas import AccessionSchema from ebl.errors import DataError from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema @@ -44,7 +45,7 @@ def expected_dto(lemmatized_fragment, has_photo): return pydash.omit_by( { "museumNumber": attr.asdict(lemmatized_fragment.number), - "accession": lemmatized_fragment.accession, + "accession": AccessionSchema().dump(lemmatized_fragment.accession), "editedInOraccProject": lemmatized_fragment.edited_in_oracc_project, "publication": lemmatized_fragment.publication, "description": lemmatized_fragment.description, @@ -131,7 +132,7 @@ def test_create_fragment_info_dto(): is_transliteration = record_entry.type == RecordType.TRANSLITERATION assert ApiFragmentInfoSchema().dump(info) == { "number": str(info.number), - "accession": info.accession, + "accession": AccessionSchema().dump(info.accession), "script": ScriptSchema().dump(info.script), "description": info.description, "matchingLines": TextSchema().dump(text), diff --git a/ebl/tests/fragmentarium/test_fragment_archaeology_route.py b/ebl/tests/fragmentarium/test_fragment_archaeology_route.py index b6c2a70c3..0c7a548d0 100644 --- a/ebl/tests/fragmentarium/test_fragment_archaeology_route.py +++ b/ebl/tests/fragmentarium/test_fragment_archaeology_route.py @@ -4,16 +4,19 @@ import falcon import pytest from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema -from ebl.fragmentarium.domain.archaeology import Archaeology +from ebl.fragmentarium.domain.archaeology import ( + Archaeology, + ExcavationNumber, +) +from ebl.fragmentarium.domain.findspot import ExcavationSite from ebl.fragmentarium.domain.fragment import Fragment from ebl.fragmentarium.web.dtos import create_response_dto from ebl.tests.factories.archaeology import DateWithNotesFactory from ebl.tests.factories.fragment import FragmentFactory -from ebl.transliteration.domain.museum_number import MuseumNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite -ARCHAEOLOGY = Archaeology(MuseumNumber("F", "1"), ExcavationSite.KALHU) + +ARCHAEOLOGY = Archaeology(ExcavationNumber("F", "1"), ExcavationSite.KALHU) ARCHAEOLOGIES = [ ARCHAEOLOGY, attr.evolve(ARCHAEOLOGY, site=None), diff --git a/ebl/tests/fragmentarium/test_fragment_schema.py b/ebl/tests/fragmentarium/test_fragment_schema.py index 412a4443c..47abbfb94 100644 --- a/ebl/tests/fragmentarium/test_fragment_schema.py +++ b/ebl/tests/fragmentarium/test_fragment_schema.py @@ -61,3 +61,16 @@ def test_scope_deserialization(): "authorizedScopes": SERIALIZED_SCOPES, } assert FragmentSchema().load(data).authorized_scopes == SCOPES + + +def test_empty_accession_serialization(): + fragment = FragmentFactory.build(accession=None) + assert "accession" not in FragmentSchema().dump(fragment) + + +def test_empty_accession_deserialization(): + data = { + **FragmentSchema().dump(FragmentFactory.build()), + "accession": None, + } + assert FragmentSchema().load(data).accession is None diff --git a/ebl/tests/fragmentarium/test_fragments_search_route.py b/ebl/tests/fragmentarium/test_fragments_search_route.py index 2b670b5b7..8aadddd07 100644 --- a/ebl/tests/fragmentarium/test_fragments_search_route.py +++ b/ebl/tests/fragmentarium/test_fragments_search_route.py @@ -54,7 +54,8 @@ def query_item_of( [ lambda fragment: str(fragment.number), lambda fragment: fragment.cdli_number, - lambda fragment: fragment.accession, + lambda fragment: str(fragment.accession), + lambda fragment: str(fragment.archaeology.excavation_number), ], ) def test_query_fragmentarium_number(get_number, client, fragmentarium): diff --git a/ebl/transliteration/application/museum_number_schema.py b/ebl/transliteration/application/museum_number_schema.py index fe2094a06..8d87c8d80 100644 --- a/ebl/transliteration/application/museum_number_schema.py +++ b/ebl/transliteration/application/museum_number_schema.py @@ -1,15 +1,9 @@ -from marshmallow import Schema, fields, post_load, validate - +from marshmallow import post_load from ebl.transliteration.domain.museum_number import MuseumNumber +from ebl.common.application.schemas import AbstractMuseumNumberSchema -class MuseumNumberSchema(Schema): - prefix = fields.String(required=True, validate=validate.Length(min=1)) - number = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) - +class MuseumNumberSchema(AbstractMuseumNumberSchema): @post_load def create_museum_number(self, data, **kwargs) -> MuseumNumber: return MuseumNumber(**data) diff --git a/ebl/transliteration/infrastructure/mongo_parallel_repository.py b/ebl/transliteration/infrastructure/mongo_parallel_repository.py index fcb2c0b96..f47156b2f 100644 --- a/ebl/transliteration/infrastructure/mongo_parallel_repository.py +++ b/ebl/transliteration/infrastructure/mongo_parallel_repository.py @@ -15,7 +15,7 @@ CHAPTERS_COLLECTION, FRAGMENTS_COLLECTION, ) -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is class MongoParallelRepository(ParallelRepository): @@ -27,7 +27,7 @@ def __init__(self, database: Database): self._chapters = MongoCollection(database, CHAPTERS_COLLECTION) def fragment_exists(self, museum_number: MuseumNumber) -> bool: - return self._fragments.count_documents(museum_number_is(museum_number)) > 0 + return self._fragments.count_documents(query_number_is(museum_number)) > 0 def find_implicit_chapter(self, text_id: TextId) -> ChapterName: try: diff --git a/ebl/transliteration/infrastructure/queries.py b/ebl/transliteration/infrastructure/queries.py index c175480be..7d10ce613 100644 --- a/ebl/transliteration/infrastructure/queries.py +++ b/ebl/transliteration/infrastructure/queries.py @@ -1,7 +1,33 @@ +from functools import singledispatch +from ebl.common.application.schemas import AccessionSchema +from ebl.common.domain.accession import Accession +from ebl.fragmentarium.application.archaeology_schemas import ExcavationNumberSchema +from ebl.fragmentarium.domain.archaeology import ExcavationNumber from ebl.transliteration.application.museum_number_schema import MuseumNumberSchema from ebl.transliteration.domain.museum_number import MuseumNumber -def museum_number_is(number: MuseumNumber) -> dict: +@singledispatch +def query_number_is(number) -> dict: + raise ValueError(f"Unknown number type: {type(number)}") + + +@query_number_is.register +def _(number: MuseumNumber) -> dict: serialized = MuseumNumberSchema().dump(number) return {f"museumNumber.{key}": value for key, value in serialized.items()} + + +@query_number_is.register +def _(accession: Accession) -> dict: + serialized = AccessionSchema().dump(accession) + return {f"accession.{key}": value for key, value in serialized.items()} + + +@query_number_is.register +def _(number: ExcavationNumber) -> dict: + serialized = ExcavationNumberSchema().dump(number) + return { + f"archaeology.excavationNumber.{key}": value + for key, value in serialized.items() + } From 75e6a313a4f213138111160e4c85dc42848493df Mon Sep 17 00:00:00 2001 From: fsimonjetz Date: Tue, 21 Nov 2023 17:17:17 +0100 Subject: [PATCH 2/3] Revert "Update accession schema (#486)" (#487) This reverts commit 4a144ba01a3ddda6b3497bcfd0475d37a76683c7. --- ebl/common/application/schemas.py | 16 ----------- ebl/common/domain/accession.py | 15 ---------- .../application/archaeology_schemas.py | 27 +++++++++--------- .../application/fragment_info_schema.py | 3 +- .../application/fragment_schema.py | 3 +- ebl/fragmentarium/domain/archaeology.py | 15 ++-------- ebl/fragmentarium/domain/findspot.py | 5 +--- ebl/fragmentarium/domain/fragment.py | 3 +- ebl/fragmentarium/domain/fragment_info.py | 3 +- .../mongo_fragment_repository.py | 6 ++-- ebl/fragmentarium/infrastructure/queries.py | 18 +++++------- ebl/tests/common/test_accession.py | 24 ---------------- ebl/tests/factories/archaeology.py | 3 +- ebl/tests/factories/fragment.py | 3 +- ebl/tests/fragmentarium/test_dtos.py | 5 ++-- .../test_fragment_archaeology_route.py | 11 +++----- .../fragmentarium/test_fragment_schema.py | 13 --------- .../test_fragments_search_route.py | 3 +- .../application/museum_number_schema.py | 12 ++++++-- .../mongo_parallel_repository.py | 4 +-- ebl/transliteration/infrastructure/queries.py | 28 +------------------ 21 files changed, 54 insertions(+), 166 deletions(-) delete mode 100644 ebl/common/application/schemas.py delete mode 100644 ebl/common/domain/accession.py delete mode 100644 ebl/tests/common/test_accession.py diff --git a/ebl/common/application/schemas.py b/ebl/common/application/schemas.py deleted file mode 100644 index dfe66163e..000000000 --- a/ebl/common/application/schemas.py +++ /dev/null @@ -1,16 +0,0 @@ -from marshmallow import Schema, fields, validate, post_load -from ebl.common.domain.accession import Accession - - -class AbstractMuseumNumberSchema(Schema): - prefix = fields.String(required=True, validate=validate.Length(min=1)) - number = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) - - -class AccessionSchema(AbstractMuseumNumberSchema): - @post_load - def create_accession(self, data, **kwargs) -> Accession: - return Accession(**data) diff --git a/ebl/common/domain/accession.py b/ebl/common/domain/accession.py deleted file mode 100644 index 80c2252b7..000000000 --- a/ebl/common/domain/accession.py +++ /dev/null @@ -1,15 +0,0 @@ -from ebl.transliteration.domain.museum_number import MuseumNumber -import functools -import attr -import re - - -@functools.total_ordering -@attr.s(auto_attribs=True, frozen=True, order=False) -class Accession(MuseumNumber): - @staticmethod - def of(source: str) -> "Accession": - if match := re.compile(r"(.+?)\.([^.]+)(?:\.([^.]+))?").fullmatch(source): - return Accession(match[1], match[2], match[3] or "") - else: - raise ValueError(f"'{source}' is not a valid accession number.") diff --git a/ebl/fragmentarium/application/archaeology_schemas.py b/ebl/fragmentarium/application/archaeology_schemas.py index 3ebe319bc..9df906b2b 100644 --- a/ebl/fragmentarium/application/archaeology_schemas.py +++ b/ebl/fragmentarium/application/archaeology_schemas.py @@ -1,24 +1,25 @@ -from ebl.common.application.schemas import AbstractMuseumNumberSchema from ebl.bibliography.application.reference_schema import ReferenceSchema from ebl.fragmentarium.application.date_schemas import ( DateRangeSchema, DateWithNotesSchema, ) -from ebl.fragmentarium.domain.archaeology import ( - Archaeology, - ExcavationNumber, -) -from ebl.fragmentarium.domain.findspot import ( - BuildingType, - ExcavationPlan, - Findspot, - ExcavationSite, -) +from ebl.fragmentarium.domain.archaeology import Archaeology +from ebl.fragmentarium.domain.findspot import BuildingType, ExcavationPlan, Findspot from ebl.schemas import NameEnumField -from marshmallow import Schema, fields, post_load +from marshmallow import Schema, fields, post_load, validate +from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber +from ebl.corpus.domain.provenance import Provenance as ExcavationSite + +class ExcavationNumberSchema(Schema): + prefix = fields.String( + required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) + ) + number = fields.String( + required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) + ) + suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) -class ExcavationNumberSchema(AbstractMuseumNumberSchema): @post_load def create_excavation_number(self, data, **kwargs) -> ExcavationNumber: return ExcavationNumber(**data) diff --git a/ebl/fragmentarium/application/fragment_info_schema.py b/ebl/fragmentarium/application/fragment_info_schema.py index 84f262d1e..0ab4bf1da 100644 --- a/ebl/fragmentarium/application/fragment_info_schema.py +++ b/ebl/fragmentarium/application/fragment_info_schema.py @@ -4,7 +4,6 @@ ReferenceSchema, ApiReferenceSchema, ) -from ebl.common.application.schemas import AccessionSchema from ebl.fragmentarium.application.fragment_schema import ScriptSchema from ebl.fragmentarium.application.genre_schema import GenreSchema from ebl.fragmentarium.domain.fragment_infos_pagination import FragmentInfosPagination @@ -14,7 +13,7 @@ class FragmentInfoSchema(Schema): number: fields.Field = fields.Nested(MuseumNumberSchema, required=True) - accession = fields.Nested(AccessionSchema, allow_none=True, load_default=None) + accession = fields.String(required=True) script = fields.Nested(ScriptSchema, required=True) description = fields.String(required=True) editor = fields.String(load_default="") diff --git a/ebl/fragmentarium/application/fragment_schema.py b/ebl/fragmentarium/application/fragment_schema.py index ebdf0d143..758a67fbd 100644 --- a/ebl/fragmentarium/application/fragment_schema.py +++ b/ebl/fragmentarium/application/fragment_schema.py @@ -2,7 +2,6 @@ from marshmallow import Schema, fields, post_dump, post_load, EXCLUDE from ebl.bibliography.application.reference_schema import ReferenceSchema -from ebl.common.application.schemas import AccessionSchema from ebl.common.domain.period import Period, PeriodModifier from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema from ebl.fragmentarium.application.genre_schema import GenreSchema @@ -160,7 +159,7 @@ def omit_empty_numbers(self, data, **kwargs): class FragmentSchema(Schema): number = fields.Nested(MuseumNumberSchema, required=True, data_key="museumNumber") - accession = fields.Nested(AccessionSchema, allow_none=True, load_default=None) + accession = fields.String(required=True) edited_in_oracc_project = fields.String( required=True, data_key="editedInOraccProject" ) diff --git a/ebl/fragmentarium/domain/archaeology.py b/ebl/fragmentarium/domain/archaeology.py index 957189a10..29050614d 100644 --- a/ebl/fragmentarium/domain/archaeology.py +++ b/ebl/fragmentarium/domain/archaeology.py @@ -1,18 +1,9 @@ from typing import Optional, Sequence import attr from ebl.fragmentarium.domain.iso_date import DateWithNotes -from ebl.transliteration.domain.museum_number import MuseumNumber -from ebl.fragmentarium.domain.findspot import Findspot, ExcavationSite -import re - - -class ExcavationNumber(MuseumNumber): - @staticmethod - def of(source: str) -> "ExcavationNumber": - if match := re.compile(r"(.+?)\.([^.]+)(?:\.([^.]+))?").fullmatch(source): - return ExcavationNumber(match[1], match[2], match[3] or "") - else: - raise ValueError(f"'{source}' is not a valid excavation number.") +from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber +from ebl.corpus.domain.provenance import Provenance as ExcavationSite +from ebl.fragmentarium.domain.findspot import Findspot @attr.s(auto_attribs=True, frozen=True) diff --git a/ebl/fragmentarium/domain/findspot.py b/ebl/fragmentarium/domain/findspot.py index e6a1bd29c..0fe6b2182 100644 --- a/ebl/fragmentarium/domain/findspot.py +++ b/ebl/fragmentarium/domain/findspot.py @@ -3,10 +3,7 @@ from enum import Enum, auto from ebl.bibliography.domain.reference import Reference from ebl.fragmentarium.domain.iso_date import DateRange -from ebl.corpus.domain.provenance import Provenance - - -ExcavationSite = Provenance +from ebl.corpus.domain.provenance import Provenance as ExcavationSite class BuildingType(Enum): diff --git a/ebl/fragmentarium/domain/fragment.py b/ebl/fragmentarium/domain/fragment.py index 0a8162e14..fe87462db 100644 --- a/ebl/fragmentarium/domain/fragment.py +++ b/ebl/fragmentarium/domain/fragment.py @@ -5,7 +5,6 @@ import pydash from ebl.bibliography.domain.reference import Reference -from ebl.common.domain.accession import Accession from ebl.common.domain.period import Period, PeriodModifier from ebl.common.domain.scopes import Scope from ebl.fragmentarium.application.matches.create_line_to_vec import create_line_to_vec @@ -114,7 +113,7 @@ class ExternalNumbers: @attr.s(auto_attribs=True, frozen=True) class Fragment: number: MuseumNumber - accession: Optional[Accession] = None + accession: str = "" edited_in_oracc_project: str = "" publication: str = "" description: str = "" diff --git a/ebl/fragmentarium/domain/fragment_info.py b/ebl/fragmentarium/domain/fragment_info.py index 6f1482c6e..72ab20883 100644 --- a/ebl/fragmentarium/domain/fragment_info.py +++ b/ebl/fragmentarium/domain/fragment_info.py @@ -3,7 +3,6 @@ import attr from ebl.bibliography.domain.reference import Reference -from ebl.common.domain.accession import Accession from ebl.fragmentarium.domain.fragment import Fragment, Genre, Script from ebl.fragmentarium.domain.record import RecordEntry, RecordType from ebl.transliteration.domain.museum_number import MuseumNumber @@ -13,7 +12,7 @@ @attr.s(frozen=True, auto_attribs=True) class FragmentInfo: number: MuseumNumber - accession: Optional[Accession] + accession: str script: Script description: str matching_lines: Optional[Text] diff --git a/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py b/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py index 8242e90d3..2c6b93505 100644 --- a/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py +++ b/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py @@ -36,7 +36,7 @@ from ebl.transliteration.application.museum_number_schema import MuseumNumberSchema from ebl.transliteration.domain.museum_number import MuseumNumber from ebl.transliteration.infrastructure.collections import FRAGMENTS_COLLECTION -from ebl.transliteration.infrastructure.queries import query_number_is +from ebl.transliteration.infrastructure.queries import museum_number_is RETRIEVE_ALL_LIMIT = 1000 @@ -171,7 +171,7 @@ def query_by_museum_number( ): data = self._fragments.aggregate( [ - {"$match": query_number_is(number)}, + {"$match": museum_number_is(number)}, *( self._omit_text_lines() if exclude_lines @@ -206,7 +206,7 @@ def fetch_date(self, number: MuseumNumber) -> Optional[Date]: def fetch_scopes(self, number: MuseumNumber) -> List[Scope]: fragment = next( self._fragments.find_many( - query_number_is(number), projection={"authorizedScopes": True} + museum_number_is(number), projection={"authorizedScopes": True} ), {}, ) diff --git a/ebl/fragmentarium/infrastructure/queries.py b/ebl/fragmentarium/infrastructure/queries.py index f7addaf8e..5cc0f12f3 100644 --- a/ebl/fragmentarium/infrastructure/queries.py +++ b/ebl/fragmentarium/infrastructure/queries.py @@ -1,7 +1,5 @@ from typing import List, Sequence -from ebl.common.domain.accession import Accession from ebl.common.domain.scopes import Scope -from ebl.fragmentarium.domain.archaeology import ExcavationNumber from ebl.fragmentarium.domain.fragment import Fragment from ebl.fragmentarium.domain.record import RecordType @@ -11,7 +9,7 @@ FRAGMENTS_COLLECTION, FINDSPOTS_COLLECTION, ) -from ebl.transliteration.infrastructure.queries import query_number_is +from ebl.transliteration.infrastructure.queries import museum_number_is HAS_TRANSLITERATION: dict = {"text.lines.type": {"$exists": True}} NUMBER_OF_LATEST_TRANSLITERATIONS: int = 50 @@ -20,17 +18,15 @@ def fragment_is(fragment: Fragment) -> dict: - return query_number_is(fragment.number) + return museum_number_is(fragment.number) def number_is(number: str) -> dict: - or_ = [{"externalNumbers.cdliNumber": number}] - - for number_class in [MuseumNumber, Accession, ExcavationNumber]: - try: - or_.append(query_number_is(number_class.of(number))) - except ValueError: - pass + or_ = [{"externalNumbers.cdliNumber": number}, {"accession": number}] + try: + or_.append(museum_number_is(MuseumNumber.of(number))) + except ValueError: + pass return {"$or": or_} diff --git a/ebl/tests/common/test_accession.py b/ebl/tests/common/test_accession.py deleted file mode 100644 index 725a84415..000000000 --- a/ebl/tests/common/test_accession.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest -from ebl.common.application.schemas import AccessionSchema -from ebl.common.domain.accession import Accession - - -ACCESSION = Accession("A", "38") -ACCESSION_DTO = {"prefix": "A", "number": "38", "suffix": ""} - - -def test_of(): - assert Accession.of("A.38") == ACCESSION - - -def test_of_invalid(): - with pytest.raises(ValueError, match="'invalid.' is not a valid accession number."): - Accession.of("invalid.") - - -def test_serialize(): - assert AccessionSchema().dump(ACCESSION) == ACCESSION_DTO - - -def test_deserialize(): - assert AccessionSchema().load(ACCESSION_DTO) == ACCESSION diff --git a/ebl/tests/factories/archaeology.py b/ebl/tests/factories/archaeology.py index 58a58fc65..553049ee7 100644 --- a/ebl/tests/factories/archaeology.py +++ b/ebl/tests/factories/archaeology.py @@ -1,8 +1,9 @@ -from ebl.fragmentarium.domain.archaeology import Archaeology, ExcavationNumber +from ebl.fragmentarium.domain.archaeology import Archaeology from ebl.fragmentarium.domain.iso_date import DateRange, DateWithNotes from ebl.fragmentarium.domain.findspot import BuildingType, ExcavationPlan, Findspot from ebl.tests.factories.bibliography import ReferenceFactory from ebl.tests.factories.collections import TupleFactory +from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber from ebl.corpus.domain.provenance import Provenance as ExcavationSite import factory.fuzzy diff --git a/ebl/tests/factories/fragment.py b/ebl/tests/factories/fragment.py index 8242036f3..a11421ce7 100644 --- a/ebl/tests/factories/fragment.py +++ b/ebl/tests/factories/fragment.py @@ -2,7 +2,6 @@ import factory.fuzzy import random -from ebl.common.domain.accession import Accession from ebl.common.domain.period import Period, PeriodModifier from ebl.common.domain.project import ResearchProject @@ -182,7 +181,7 @@ class Meta: number = factory.Sequence(lambda n: MuseumNumber("X", str(n))) edited_in_oracc_project = factory.Sequence(lambda n: f"editedInOracc-{n}") - accession = factory.Sequence(lambda n: Accession("A", str(n))) + accession = factory.Sequence(lambda n: f"accession-{n}") museum = factory.Faker("word") collection = factory.Faker("word") publication = factory.Faker("sentence") diff --git a/ebl/tests/fragmentarium/test_dtos.py b/ebl/tests/fragmentarium/test_dtos.py index acb896d98..ece347e19 100644 --- a/ebl/tests/fragmentarium/test_dtos.py +++ b/ebl/tests/fragmentarium/test_dtos.py @@ -1,7 +1,6 @@ import attr import pydash import pytest -from ebl.common.application.schemas import AccessionSchema from ebl.errors import DataError from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema @@ -45,7 +44,7 @@ def expected_dto(lemmatized_fragment, has_photo): return pydash.omit_by( { "museumNumber": attr.asdict(lemmatized_fragment.number), - "accession": AccessionSchema().dump(lemmatized_fragment.accession), + "accession": lemmatized_fragment.accession, "editedInOraccProject": lemmatized_fragment.edited_in_oracc_project, "publication": lemmatized_fragment.publication, "description": lemmatized_fragment.description, @@ -132,7 +131,7 @@ def test_create_fragment_info_dto(): is_transliteration = record_entry.type == RecordType.TRANSLITERATION assert ApiFragmentInfoSchema().dump(info) == { "number": str(info.number), - "accession": AccessionSchema().dump(info.accession), + "accession": info.accession, "script": ScriptSchema().dump(info.script), "description": info.description, "matchingLines": TextSchema().dump(text), diff --git a/ebl/tests/fragmentarium/test_fragment_archaeology_route.py b/ebl/tests/fragmentarium/test_fragment_archaeology_route.py index 0c7a548d0..b6c2a70c3 100644 --- a/ebl/tests/fragmentarium/test_fragment_archaeology_route.py +++ b/ebl/tests/fragmentarium/test_fragment_archaeology_route.py @@ -4,19 +4,16 @@ import falcon import pytest from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema -from ebl.fragmentarium.domain.archaeology import ( - Archaeology, - ExcavationNumber, -) -from ebl.fragmentarium.domain.findspot import ExcavationSite +from ebl.fragmentarium.domain.archaeology import Archaeology from ebl.fragmentarium.domain.fragment import Fragment from ebl.fragmentarium.web.dtos import create_response_dto from ebl.tests.factories.archaeology import DateWithNotesFactory from ebl.tests.factories.fragment import FragmentFactory +from ebl.transliteration.domain.museum_number import MuseumNumber +from ebl.corpus.domain.provenance import Provenance as ExcavationSite - -ARCHAEOLOGY = Archaeology(ExcavationNumber("F", "1"), ExcavationSite.KALHU) +ARCHAEOLOGY = Archaeology(MuseumNumber("F", "1"), ExcavationSite.KALHU) ARCHAEOLOGIES = [ ARCHAEOLOGY, attr.evolve(ARCHAEOLOGY, site=None), diff --git a/ebl/tests/fragmentarium/test_fragment_schema.py b/ebl/tests/fragmentarium/test_fragment_schema.py index 47abbfb94..412a4443c 100644 --- a/ebl/tests/fragmentarium/test_fragment_schema.py +++ b/ebl/tests/fragmentarium/test_fragment_schema.py @@ -61,16 +61,3 @@ def test_scope_deserialization(): "authorizedScopes": SERIALIZED_SCOPES, } assert FragmentSchema().load(data).authorized_scopes == SCOPES - - -def test_empty_accession_serialization(): - fragment = FragmentFactory.build(accession=None) - assert "accession" not in FragmentSchema().dump(fragment) - - -def test_empty_accession_deserialization(): - data = { - **FragmentSchema().dump(FragmentFactory.build()), - "accession": None, - } - assert FragmentSchema().load(data).accession is None diff --git a/ebl/tests/fragmentarium/test_fragments_search_route.py b/ebl/tests/fragmentarium/test_fragments_search_route.py index 8aadddd07..2b670b5b7 100644 --- a/ebl/tests/fragmentarium/test_fragments_search_route.py +++ b/ebl/tests/fragmentarium/test_fragments_search_route.py @@ -54,8 +54,7 @@ def query_item_of( [ lambda fragment: str(fragment.number), lambda fragment: fragment.cdli_number, - lambda fragment: str(fragment.accession), - lambda fragment: str(fragment.archaeology.excavation_number), + lambda fragment: fragment.accession, ], ) def test_query_fragmentarium_number(get_number, client, fragmentarium): diff --git a/ebl/transliteration/application/museum_number_schema.py b/ebl/transliteration/application/museum_number_schema.py index 8d87c8d80..fe2094a06 100644 --- a/ebl/transliteration/application/museum_number_schema.py +++ b/ebl/transliteration/application/museum_number_schema.py @@ -1,9 +1,15 @@ -from marshmallow import post_load +from marshmallow import Schema, fields, post_load, validate + from ebl.transliteration.domain.museum_number import MuseumNumber -from ebl.common.application.schemas import AbstractMuseumNumberSchema -class MuseumNumberSchema(AbstractMuseumNumberSchema): +class MuseumNumberSchema(Schema): + prefix = fields.String(required=True, validate=validate.Length(min=1)) + number = fields.String( + required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) + ) + suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) + @post_load def create_museum_number(self, data, **kwargs) -> MuseumNumber: return MuseumNumber(**data) diff --git a/ebl/transliteration/infrastructure/mongo_parallel_repository.py b/ebl/transliteration/infrastructure/mongo_parallel_repository.py index f47156b2f..fcb2c0b96 100644 --- a/ebl/transliteration/infrastructure/mongo_parallel_repository.py +++ b/ebl/transliteration/infrastructure/mongo_parallel_repository.py @@ -15,7 +15,7 @@ CHAPTERS_COLLECTION, FRAGMENTS_COLLECTION, ) -from ebl.transliteration.infrastructure.queries import query_number_is +from ebl.transliteration.infrastructure.queries import museum_number_is class MongoParallelRepository(ParallelRepository): @@ -27,7 +27,7 @@ def __init__(self, database: Database): self._chapters = MongoCollection(database, CHAPTERS_COLLECTION) def fragment_exists(self, museum_number: MuseumNumber) -> bool: - return self._fragments.count_documents(query_number_is(museum_number)) > 0 + return self._fragments.count_documents(museum_number_is(museum_number)) > 0 def find_implicit_chapter(self, text_id: TextId) -> ChapterName: try: diff --git a/ebl/transliteration/infrastructure/queries.py b/ebl/transliteration/infrastructure/queries.py index 7d10ce613..c175480be 100644 --- a/ebl/transliteration/infrastructure/queries.py +++ b/ebl/transliteration/infrastructure/queries.py @@ -1,33 +1,7 @@ -from functools import singledispatch -from ebl.common.application.schemas import AccessionSchema -from ebl.common.domain.accession import Accession -from ebl.fragmentarium.application.archaeology_schemas import ExcavationNumberSchema -from ebl.fragmentarium.domain.archaeology import ExcavationNumber from ebl.transliteration.application.museum_number_schema import MuseumNumberSchema from ebl.transliteration.domain.museum_number import MuseumNumber -@singledispatch -def query_number_is(number) -> dict: - raise ValueError(f"Unknown number type: {type(number)}") - - -@query_number_is.register -def _(number: MuseumNumber) -> dict: +def museum_number_is(number: MuseumNumber) -> dict: serialized = MuseumNumberSchema().dump(number) return {f"museumNumber.{key}": value for key, value in serialized.items()} - - -@query_number_is.register -def _(accession: Accession) -> dict: - serialized = AccessionSchema().dump(accession) - return {f"accession.{key}": value for key, value in serialized.items()} - - -@query_number_is.register -def _(number: ExcavationNumber) -> dict: - serialized = ExcavationNumberSchema().dump(number) - return { - f"archaeology.excavationNumber.{key}": value - for key, value in serialized.items() - } From ed6c504918ec8c37e1d7cab72bfe425b94d73cd5 Mon Sep 17 00:00:00 2001 From: fsimonjetz Date: Wed, 22 Nov 2023 13:37:17 +0100 Subject: [PATCH 3/3] Update Accession Schema (#488) * add Accession class * add AbstractMuseumNumberSchema and AccessionSchema * refactor ExcavationNumberSchema * refactor ExcavationNumber * Refactor ExcavationSite * refactor MuseumNumberSchema * update Accession dtype * add Accession tests * refactor number queries; add query by excavation number * add ExcavationNumber to number query --- ebl/common/application/schemas.py | 16 +++++++++++ ebl/common/domain/accession.py | 15 ++++++++++ .../application/archaeology_schemas.py | 27 +++++++++--------- .../application/fragment_info_schema.py | 3 +- .../application/fragment_schema.py | 3 +- ebl/fragmentarium/domain/archaeology.py | 15 ++++++++-- ebl/fragmentarium/domain/findspot.py | 5 +++- ebl/fragmentarium/domain/fragment.py | 3 +- ebl/fragmentarium/domain/fragment_info.py | 3 +- .../mongo_fragment_repository.py | 6 ++-- ebl/fragmentarium/infrastructure/queries.py | 18 +++++++----- ebl/tests/common/test_accession.py | 24 ++++++++++++++++ ebl/tests/factories/archaeology.py | 3 +- ebl/tests/factories/fragment.py | 3 +- ebl/tests/fragmentarium/test_dtos.py | 5 ++-- .../test_fragment_archaeology_route.py | 11 +++++--- .../fragmentarium/test_fragment_schema.py | 13 +++++++++ .../test_fragments_search_route.py | 3 +- .../application/museum_number_schema.py | 12 ++------ .../mongo_parallel_repository.py | 4 +-- ebl/transliteration/infrastructure/queries.py | 28 ++++++++++++++++++- 21 files changed, 166 insertions(+), 54 deletions(-) create mode 100644 ebl/common/application/schemas.py create mode 100644 ebl/common/domain/accession.py create mode 100644 ebl/tests/common/test_accession.py diff --git a/ebl/common/application/schemas.py b/ebl/common/application/schemas.py new file mode 100644 index 000000000..dfe66163e --- /dev/null +++ b/ebl/common/application/schemas.py @@ -0,0 +1,16 @@ +from marshmallow import Schema, fields, validate, post_load +from ebl.common.domain.accession import Accession + + +class AbstractMuseumNumberSchema(Schema): + prefix = fields.String(required=True, validate=validate.Length(min=1)) + number = fields.String( + required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) + ) + suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) + + +class AccessionSchema(AbstractMuseumNumberSchema): + @post_load + def create_accession(self, data, **kwargs) -> Accession: + return Accession(**data) diff --git a/ebl/common/domain/accession.py b/ebl/common/domain/accession.py new file mode 100644 index 000000000..80c2252b7 --- /dev/null +++ b/ebl/common/domain/accession.py @@ -0,0 +1,15 @@ +from ebl.transliteration.domain.museum_number import MuseumNumber +import functools +import attr +import re + + +@functools.total_ordering +@attr.s(auto_attribs=True, frozen=True, order=False) +class Accession(MuseumNumber): + @staticmethod + def of(source: str) -> "Accession": + if match := re.compile(r"(.+?)\.([^.]+)(?:\.([^.]+))?").fullmatch(source): + return Accession(match[1], match[2], match[3] or "") + else: + raise ValueError(f"'{source}' is not a valid accession number.") diff --git a/ebl/fragmentarium/application/archaeology_schemas.py b/ebl/fragmentarium/application/archaeology_schemas.py index 9df906b2b..3ebe319bc 100644 --- a/ebl/fragmentarium/application/archaeology_schemas.py +++ b/ebl/fragmentarium/application/archaeology_schemas.py @@ -1,25 +1,24 @@ +from ebl.common.application.schemas import AbstractMuseumNumberSchema from ebl.bibliography.application.reference_schema import ReferenceSchema from ebl.fragmentarium.application.date_schemas import ( DateRangeSchema, DateWithNotesSchema, ) -from ebl.fragmentarium.domain.archaeology import Archaeology -from ebl.fragmentarium.domain.findspot import BuildingType, ExcavationPlan, Findspot +from ebl.fragmentarium.domain.archaeology import ( + Archaeology, + ExcavationNumber, +) +from ebl.fragmentarium.domain.findspot import ( + BuildingType, + ExcavationPlan, + Findspot, + ExcavationSite, +) from ebl.schemas import NameEnumField -from marshmallow import Schema, fields, post_load, validate -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite - +from marshmallow import Schema, fields, post_load -class ExcavationNumberSchema(Schema): - prefix = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - number = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) +class ExcavationNumberSchema(AbstractMuseumNumberSchema): @post_load def create_excavation_number(self, data, **kwargs) -> ExcavationNumber: return ExcavationNumber(**data) diff --git a/ebl/fragmentarium/application/fragment_info_schema.py b/ebl/fragmentarium/application/fragment_info_schema.py index 0ab4bf1da..84f262d1e 100644 --- a/ebl/fragmentarium/application/fragment_info_schema.py +++ b/ebl/fragmentarium/application/fragment_info_schema.py @@ -4,6 +4,7 @@ ReferenceSchema, ApiReferenceSchema, ) +from ebl.common.application.schemas import AccessionSchema from ebl.fragmentarium.application.fragment_schema import ScriptSchema from ebl.fragmentarium.application.genre_schema import GenreSchema from ebl.fragmentarium.domain.fragment_infos_pagination import FragmentInfosPagination @@ -13,7 +14,7 @@ class FragmentInfoSchema(Schema): number: fields.Field = fields.Nested(MuseumNumberSchema, required=True) - accession = fields.String(required=True) + accession = fields.Nested(AccessionSchema, allow_none=True, load_default=None) script = fields.Nested(ScriptSchema, required=True) description = fields.String(required=True) editor = fields.String(load_default="") diff --git a/ebl/fragmentarium/application/fragment_schema.py b/ebl/fragmentarium/application/fragment_schema.py index 758a67fbd..ebdf0d143 100644 --- a/ebl/fragmentarium/application/fragment_schema.py +++ b/ebl/fragmentarium/application/fragment_schema.py @@ -2,6 +2,7 @@ from marshmallow import Schema, fields, post_dump, post_load, EXCLUDE from ebl.bibliography.application.reference_schema import ReferenceSchema +from ebl.common.application.schemas import AccessionSchema from ebl.common.domain.period import Period, PeriodModifier from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema from ebl.fragmentarium.application.genre_schema import GenreSchema @@ -159,7 +160,7 @@ def omit_empty_numbers(self, data, **kwargs): class FragmentSchema(Schema): number = fields.Nested(MuseumNumberSchema, required=True, data_key="museumNumber") - accession = fields.String(required=True) + accession = fields.Nested(AccessionSchema, allow_none=True, load_default=None) edited_in_oracc_project = fields.String( required=True, data_key="editedInOraccProject" ) diff --git a/ebl/fragmentarium/domain/archaeology.py b/ebl/fragmentarium/domain/archaeology.py index 29050614d..957189a10 100644 --- a/ebl/fragmentarium/domain/archaeology.py +++ b/ebl/fragmentarium/domain/archaeology.py @@ -1,9 +1,18 @@ from typing import Optional, Sequence import attr from ebl.fragmentarium.domain.iso_date import DateWithNotes -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite -from ebl.fragmentarium.domain.findspot import Findspot +from ebl.transliteration.domain.museum_number import MuseumNumber +from ebl.fragmentarium.domain.findspot import Findspot, ExcavationSite +import re + + +class ExcavationNumber(MuseumNumber): + @staticmethod + def of(source: str) -> "ExcavationNumber": + if match := re.compile(r"(.+?)\.([^.]+)(?:\.([^.]+))?").fullmatch(source): + return ExcavationNumber(match[1], match[2], match[3] or "") + else: + raise ValueError(f"'{source}' is not a valid excavation number.") @attr.s(auto_attribs=True, frozen=True) diff --git a/ebl/fragmentarium/domain/findspot.py b/ebl/fragmentarium/domain/findspot.py index 0fe6b2182..e6a1bd29c 100644 --- a/ebl/fragmentarium/domain/findspot.py +++ b/ebl/fragmentarium/domain/findspot.py @@ -3,7 +3,10 @@ from enum import Enum, auto from ebl.bibliography.domain.reference import Reference from ebl.fragmentarium.domain.iso_date import DateRange -from ebl.corpus.domain.provenance import Provenance as ExcavationSite +from ebl.corpus.domain.provenance import Provenance + + +ExcavationSite = Provenance class BuildingType(Enum): diff --git a/ebl/fragmentarium/domain/fragment.py b/ebl/fragmentarium/domain/fragment.py index fe87462db..0a8162e14 100644 --- a/ebl/fragmentarium/domain/fragment.py +++ b/ebl/fragmentarium/domain/fragment.py @@ -5,6 +5,7 @@ import pydash from ebl.bibliography.domain.reference import Reference +from ebl.common.domain.accession import Accession from ebl.common.domain.period import Period, PeriodModifier from ebl.common.domain.scopes import Scope from ebl.fragmentarium.application.matches.create_line_to_vec import create_line_to_vec @@ -113,7 +114,7 @@ class ExternalNumbers: @attr.s(auto_attribs=True, frozen=True) class Fragment: number: MuseumNumber - accession: str = "" + accession: Optional[Accession] = None edited_in_oracc_project: str = "" publication: str = "" description: str = "" diff --git a/ebl/fragmentarium/domain/fragment_info.py b/ebl/fragmentarium/domain/fragment_info.py index 72ab20883..6f1482c6e 100644 --- a/ebl/fragmentarium/domain/fragment_info.py +++ b/ebl/fragmentarium/domain/fragment_info.py @@ -3,6 +3,7 @@ import attr from ebl.bibliography.domain.reference import Reference +from ebl.common.domain.accession import Accession from ebl.fragmentarium.domain.fragment import Fragment, Genre, Script from ebl.fragmentarium.domain.record import RecordEntry, RecordType from ebl.transliteration.domain.museum_number import MuseumNumber @@ -12,7 +13,7 @@ @attr.s(frozen=True, auto_attribs=True) class FragmentInfo: number: MuseumNumber - accession: str + accession: Optional[Accession] script: Script description: str matching_lines: Optional[Text] diff --git a/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py b/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py index 2c6b93505..8242e90d3 100644 --- a/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py +++ b/ebl/fragmentarium/infrastructure/mongo_fragment_repository.py @@ -36,7 +36,7 @@ from ebl.transliteration.application.museum_number_schema import MuseumNumberSchema from ebl.transliteration.domain.museum_number import MuseumNumber from ebl.transliteration.infrastructure.collections import FRAGMENTS_COLLECTION -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is RETRIEVE_ALL_LIMIT = 1000 @@ -171,7 +171,7 @@ def query_by_museum_number( ): data = self._fragments.aggregate( [ - {"$match": museum_number_is(number)}, + {"$match": query_number_is(number)}, *( self._omit_text_lines() if exclude_lines @@ -206,7 +206,7 @@ def fetch_date(self, number: MuseumNumber) -> Optional[Date]: def fetch_scopes(self, number: MuseumNumber) -> List[Scope]: fragment = next( self._fragments.find_many( - museum_number_is(number), projection={"authorizedScopes": True} + query_number_is(number), projection={"authorizedScopes": True} ), {}, ) diff --git a/ebl/fragmentarium/infrastructure/queries.py b/ebl/fragmentarium/infrastructure/queries.py index 5cc0f12f3..f7addaf8e 100644 --- a/ebl/fragmentarium/infrastructure/queries.py +++ b/ebl/fragmentarium/infrastructure/queries.py @@ -1,5 +1,7 @@ from typing import List, Sequence +from ebl.common.domain.accession import Accession from ebl.common.domain.scopes import Scope +from ebl.fragmentarium.domain.archaeology import ExcavationNumber from ebl.fragmentarium.domain.fragment import Fragment from ebl.fragmentarium.domain.record import RecordType @@ -9,7 +11,7 @@ FRAGMENTS_COLLECTION, FINDSPOTS_COLLECTION, ) -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is HAS_TRANSLITERATION: dict = {"text.lines.type": {"$exists": True}} NUMBER_OF_LATEST_TRANSLITERATIONS: int = 50 @@ -18,15 +20,17 @@ def fragment_is(fragment: Fragment) -> dict: - return museum_number_is(fragment.number) + return query_number_is(fragment.number) def number_is(number: str) -> dict: - or_ = [{"externalNumbers.cdliNumber": number}, {"accession": number}] - try: - or_.append(museum_number_is(MuseumNumber.of(number))) - except ValueError: - pass + or_ = [{"externalNumbers.cdliNumber": number}] + + for number_class in [MuseumNumber, Accession, ExcavationNumber]: + try: + or_.append(query_number_is(number_class.of(number))) + except ValueError: + pass return {"$or": or_} diff --git a/ebl/tests/common/test_accession.py b/ebl/tests/common/test_accession.py new file mode 100644 index 000000000..725a84415 --- /dev/null +++ b/ebl/tests/common/test_accession.py @@ -0,0 +1,24 @@ +import pytest +from ebl.common.application.schemas import AccessionSchema +from ebl.common.domain.accession import Accession + + +ACCESSION = Accession("A", "38") +ACCESSION_DTO = {"prefix": "A", "number": "38", "suffix": ""} + + +def test_of(): + assert Accession.of("A.38") == ACCESSION + + +def test_of_invalid(): + with pytest.raises(ValueError, match="'invalid.' is not a valid accession number."): + Accession.of("invalid.") + + +def test_serialize(): + assert AccessionSchema().dump(ACCESSION) == ACCESSION_DTO + + +def test_deserialize(): + assert AccessionSchema().load(ACCESSION_DTO) == ACCESSION diff --git a/ebl/tests/factories/archaeology.py b/ebl/tests/factories/archaeology.py index 553049ee7..58a58fc65 100644 --- a/ebl/tests/factories/archaeology.py +++ b/ebl/tests/factories/archaeology.py @@ -1,9 +1,8 @@ -from ebl.fragmentarium.domain.archaeology import Archaeology +from ebl.fragmentarium.domain.archaeology import Archaeology, ExcavationNumber from ebl.fragmentarium.domain.iso_date import DateRange, DateWithNotes from ebl.fragmentarium.domain.findspot import BuildingType, ExcavationPlan, Findspot from ebl.tests.factories.bibliography import ReferenceFactory from ebl.tests.factories.collections import TupleFactory -from ebl.transliteration.domain.museum_number import MuseumNumber as ExcavationNumber from ebl.corpus.domain.provenance import Provenance as ExcavationSite import factory.fuzzy diff --git a/ebl/tests/factories/fragment.py b/ebl/tests/factories/fragment.py index a11421ce7..8242036f3 100644 --- a/ebl/tests/factories/fragment.py +++ b/ebl/tests/factories/fragment.py @@ -2,6 +2,7 @@ import factory.fuzzy import random +from ebl.common.domain.accession import Accession from ebl.common.domain.period import Period, PeriodModifier from ebl.common.domain.project import ResearchProject @@ -181,7 +182,7 @@ class Meta: number = factory.Sequence(lambda n: MuseumNumber("X", str(n))) edited_in_oracc_project = factory.Sequence(lambda n: f"editedInOracc-{n}") - accession = factory.Sequence(lambda n: f"accession-{n}") + accession = factory.Sequence(lambda n: Accession("A", str(n))) museum = factory.Faker("word") collection = factory.Faker("word") publication = factory.Faker("sentence") diff --git a/ebl/tests/fragmentarium/test_dtos.py b/ebl/tests/fragmentarium/test_dtos.py index ece347e19..acb896d98 100644 --- a/ebl/tests/fragmentarium/test_dtos.py +++ b/ebl/tests/fragmentarium/test_dtos.py @@ -1,6 +1,7 @@ import attr import pydash import pytest +from ebl.common.application.schemas import AccessionSchema from ebl.errors import DataError from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema @@ -44,7 +45,7 @@ def expected_dto(lemmatized_fragment, has_photo): return pydash.omit_by( { "museumNumber": attr.asdict(lemmatized_fragment.number), - "accession": lemmatized_fragment.accession, + "accession": AccessionSchema().dump(lemmatized_fragment.accession), "editedInOraccProject": lemmatized_fragment.edited_in_oracc_project, "publication": lemmatized_fragment.publication, "description": lemmatized_fragment.description, @@ -131,7 +132,7 @@ def test_create_fragment_info_dto(): is_transliteration = record_entry.type == RecordType.TRANSLITERATION assert ApiFragmentInfoSchema().dump(info) == { "number": str(info.number), - "accession": info.accession, + "accession": AccessionSchema().dump(info.accession), "script": ScriptSchema().dump(info.script), "description": info.description, "matchingLines": TextSchema().dump(text), diff --git a/ebl/tests/fragmentarium/test_fragment_archaeology_route.py b/ebl/tests/fragmentarium/test_fragment_archaeology_route.py index b6c2a70c3..0c7a548d0 100644 --- a/ebl/tests/fragmentarium/test_fragment_archaeology_route.py +++ b/ebl/tests/fragmentarium/test_fragment_archaeology_route.py @@ -4,16 +4,19 @@ import falcon import pytest from ebl.fragmentarium.application.archaeology_schemas import ArchaeologySchema -from ebl.fragmentarium.domain.archaeology import Archaeology +from ebl.fragmentarium.domain.archaeology import ( + Archaeology, + ExcavationNumber, +) +from ebl.fragmentarium.domain.findspot import ExcavationSite from ebl.fragmentarium.domain.fragment import Fragment from ebl.fragmentarium.web.dtos import create_response_dto from ebl.tests.factories.archaeology import DateWithNotesFactory from ebl.tests.factories.fragment import FragmentFactory -from ebl.transliteration.domain.museum_number import MuseumNumber -from ebl.corpus.domain.provenance import Provenance as ExcavationSite -ARCHAEOLOGY = Archaeology(MuseumNumber("F", "1"), ExcavationSite.KALHU) + +ARCHAEOLOGY = Archaeology(ExcavationNumber("F", "1"), ExcavationSite.KALHU) ARCHAEOLOGIES = [ ARCHAEOLOGY, attr.evolve(ARCHAEOLOGY, site=None), diff --git a/ebl/tests/fragmentarium/test_fragment_schema.py b/ebl/tests/fragmentarium/test_fragment_schema.py index 412a4443c..47abbfb94 100644 --- a/ebl/tests/fragmentarium/test_fragment_schema.py +++ b/ebl/tests/fragmentarium/test_fragment_schema.py @@ -61,3 +61,16 @@ def test_scope_deserialization(): "authorizedScopes": SERIALIZED_SCOPES, } assert FragmentSchema().load(data).authorized_scopes == SCOPES + + +def test_empty_accession_serialization(): + fragment = FragmentFactory.build(accession=None) + assert "accession" not in FragmentSchema().dump(fragment) + + +def test_empty_accession_deserialization(): + data = { + **FragmentSchema().dump(FragmentFactory.build()), + "accession": None, + } + assert FragmentSchema().load(data).accession is None diff --git a/ebl/tests/fragmentarium/test_fragments_search_route.py b/ebl/tests/fragmentarium/test_fragments_search_route.py index 2b670b5b7..8aadddd07 100644 --- a/ebl/tests/fragmentarium/test_fragments_search_route.py +++ b/ebl/tests/fragmentarium/test_fragments_search_route.py @@ -54,7 +54,8 @@ def query_item_of( [ lambda fragment: str(fragment.number), lambda fragment: fragment.cdli_number, - lambda fragment: fragment.accession, + lambda fragment: str(fragment.accession), + lambda fragment: str(fragment.archaeology.excavation_number), ], ) def test_query_fragmentarium_number(get_number, client, fragmentarium): diff --git a/ebl/transliteration/application/museum_number_schema.py b/ebl/transliteration/application/museum_number_schema.py index fe2094a06..8d87c8d80 100644 --- a/ebl/transliteration/application/museum_number_schema.py +++ b/ebl/transliteration/application/museum_number_schema.py @@ -1,15 +1,9 @@ -from marshmallow import Schema, fields, post_load, validate - +from marshmallow import post_load from ebl.transliteration.domain.museum_number import MuseumNumber +from ebl.common.application.schemas import AbstractMuseumNumberSchema -class MuseumNumberSchema(Schema): - prefix = fields.String(required=True, validate=validate.Length(min=1)) - number = fields.String( - required=True, validate=(validate.Length(min=1), validate.ContainsNoneOf(".")) - ) - suffix = fields.String(required=True, validate=validate.ContainsNoneOf(".")) - +class MuseumNumberSchema(AbstractMuseumNumberSchema): @post_load def create_museum_number(self, data, **kwargs) -> MuseumNumber: return MuseumNumber(**data) diff --git a/ebl/transliteration/infrastructure/mongo_parallel_repository.py b/ebl/transliteration/infrastructure/mongo_parallel_repository.py index fcb2c0b96..f47156b2f 100644 --- a/ebl/transliteration/infrastructure/mongo_parallel_repository.py +++ b/ebl/transliteration/infrastructure/mongo_parallel_repository.py @@ -15,7 +15,7 @@ CHAPTERS_COLLECTION, FRAGMENTS_COLLECTION, ) -from ebl.transliteration.infrastructure.queries import museum_number_is +from ebl.transliteration.infrastructure.queries import query_number_is class MongoParallelRepository(ParallelRepository): @@ -27,7 +27,7 @@ def __init__(self, database: Database): self._chapters = MongoCollection(database, CHAPTERS_COLLECTION) def fragment_exists(self, museum_number: MuseumNumber) -> bool: - return self._fragments.count_documents(museum_number_is(museum_number)) > 0 + return self._fragments.count_documents(query_number_is(museum_number)) > 0 def find_implicit_chapter(self, text_id: TextId) -> ChapterName: try: diff --git a/ebl/transliteration/infrastructure/queries.py b/ebl/transliteration/infrastructure/queries.py index c175480be..7d10ce613 100644 --- a/ebl/transliteration/infrastructure/queries.py +++ b/ebl/transliteration/infrastructure/queries.py @@ -1,7 +1,33 @@ +from functools import singledispatch +from ebl.common.application.schemas import AccessionSchema +from ebl.common.domain.accession import Accession +from ebl.fragmentarium.application.archaeology_schemas import ExcavationNumberSchema +from ebl.fragmentarium.domain.archaeology import ExcavationNumber from ebl.transliteration.application.museum_number_schema import MuseumNumberSchema from ebl.transliteration.domain.museum_number import MuseumNumber -def museum_number_is(number: MuseumNumber) -> dict: +@singledispatch +def query_number_is(number) -> dict: + raise ValueError(f"Unknown number type: {type(number)}") + + +@query_number_is.register +def _(number: MuseumNumber) -> dict: serialized = MuseumNumberSchema().dump(number) return {f"museumNumber.{key}": value for key, value in serialized.items()} + + +@query_number_is.register +def _(accession: Accession) -> dict: + serialized = AccessionSchema().dump(accession) + return {f"accession.{key}": value for key, value in serialized.items()} + + +@query_number_is.register +def _(number: ExcavationNumber) -> dict: + serialized = ExcavationNumberSchema().dump(number) + return { + f"archaeology.excavationNumber.{key}": value + for key, value in serialized.items() + }