diff --git a/tests/test_models.py b/tests/test_models.py index ec2600c..d87a73d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,15 +1,6 @@ import pytest -from transmogrifier.models import ( - AlternateTitle, - Contributor, - Date, - DateRange, - Identifier, - Link, - Note, - Subject, -) +import transmogrifier.models as timdex def test_timdex_record_required_fields_only(timdex_record_required_fields): @@ -45,14 +36,16 @@ def test_timdex_record_required_fields_only(timdex_record_required_fields): def test_timdex_record_required_subfields_only(timdex_record_required_fields): - timdex_record_required_fields.contributors = [Contributor(value="Smith, Jane")] - timdex_record_required_fields.identifiers = [Identifier(value="123")] + timdex_record_required_fields.contributors = [timdex.Contributor(value="Smith, Jane")] + timdex_record_required_fields.identifiers = [timdex.Identifier(value="123")] timdex_record_required_fields.links = [ - Link(url="http://dx.doi.org/10.1007/978-94-017-0726-8") + timdex.Link(url="http://dx.doi.org/10.1007/978-94-017-0726-8") ] - timdex_record_required_fields.notes = [Note(value=["This book is awesome"])] - timdex_record_required_fields.alternate_titles = [AlternateTitle(value="Alt Title")] - timdex_record_required_fields.subjects = [Subject(value=["Stuff"])] + timdex_record_required_fields.notes = [timdex.Note(value=["This book is awesome"])] + timdex_record_required_fields.alternate_titles = [ + timdex.AlternateTitle(value="Alt Title") + ] + timdex_record_required_fields.subjects = [timdex.Subject(value=["Stuff"])] assert timdex_record_required_fields.source == "A Cool Repository" assert timdex_record_required_fields.source_link == "https://example.com/123" assert timdex_record_required_fields.timdex_record_id == "cool-repo:123" @@ -337,8 +330,10 @@ def test_timdex_record_date_range_both_gt_and_gte_raises_error( match="range may have a 'gt' or 'gte' value, but not both;", ): timdex_record_required_fields.dates = [ - Date( - range=DateRange(gt="2019-01-01", gte="2019-01-01", lt="2019-06-30"), + timdex.Date( + range=timdex.DateRange( + gt="2019-01-01", gte="2019-01-01", lt="2019-06-30" + ), ) ] @@ -350,8 +345,10 @@ def test_timdex_record_date_range_both_lt_and_lte_raises_error( ValueError, match="range may have a 'lt' or 'lte' value, but not both;" ): timdex_record_required_fields.dates = [ - Date( - range=DateRange(gt="2019-01-01", lt="2019-06-30", lte="2019-06-30"), + timdex.Date( + range=timdex.DateRange( + gt="2019-01-01", lt="2019-06-30", lte="2019-06-30" + ), ) ] @@ -374,3 +371,239 @@ def test_timdex_record_not_a_list_raises_error(timdex_record_required_fields): match="'dates' must be ", ): timdex_record_required_fields.dates = "test" + + +def test_timdex_record_dedupe_alternate_titles(timdex_record_required_fields): + timdex_record_required_fields.alternate_titles = [ + timdex.AlternateTitle(value="My Octopus Teacher"), + timdex.AlternateTitle(value="My Octopus Teacher"), + ] + assert timdex_record_required_fields.alternate_titles == [ + timdex.AlternateTitle(value="My Octopus Teacher") + ] + + +def test_timdex_record_dedupe_call_numbers(timdex_record_required_fields): + timdex_record_required_fields.call_numbers = ["a", "a"] + assert timdex_record_required_fields.call_numbers == ["a"] + + +def test_timdex_record_dedupe_content_type(timdex_record_required_fields): + timdex_record_required_fields.content_type = ["thesis", "thesis"] + assert timdex_record_required_fields.content_type == ["thesis"] + + +def test_timdex_record_dedupe_contents(timdex_record_required_fields): + timdex_record_required_fields.contents = ["Chapter 1", "Chapter 1"] + assert timdex_record_required_fields.contents == ["Chapter 1"] + + +def test_timdex_record_dedupe_contributors(timdex_record_required_fields): + timdex_record_required_fields.contributors = [ + timdex.Contributor( + value="Joe Hisaishi", + affiliation=["Kunitachi College of Music"], + kind="Composer", + ), + timdex.Contributor( + value="Joe Hisaishi", + affiliation=["Kunitachi College of Music"], + kind="Composer", + ), + ] + assert timdex_record_required_fields.contributors == [ + timdex.Contributor( + value="Joe Hisaishi", + affiliation=["Kunitachi College of Music"], + kind="Composer", + ) + ] + + +def test_timdex_record_dedupe_dates(timdex_record_required_fields): + timdex_record_required_fields.dates = [ + timdex.Date(value="2022-01-01", kind="Publication date"), + timdex.Date(value="2022-01-01", kind="Publication date"), + timdex.Date( + range=timdex.DateRange(gt="2019-01-01", lt="2019-06-30"), + ), + timdex.Date( + range=timdex.DateRange(gt="2019-01-01", lt="2019-06-30"), + ), + ] + assert timdex_record_required_fields.dates == [ + timdex.Date(value="2022-01-01", kind="Publication date"), + timdex.Date( + range=timdex.DateRange(gt="2019-01-01", lt="2019-06-30"), + ), + ] + + +def test_timdex_record_dedupe_file_formats(timdex_record_required_fields): + timdex_record_required_fields.file_formats = [ + "application/pdf", + "application/pdf", + ] + assert timdex_record_required_fields.file_formats == ["application/pdf"] + + +def test_timdex_record_dedupe_funding_information(timdex_record_required_fields): + timdex_record_required_fields.funding_information = [ + timdex.Funder(funder_name="NPR Foundation"), + timdex.Funder(funder_name="NPR Foundation"), + ] + assert timdex_record_required_fields.funding_information == [ + timdex.Funder(funder_name="NPR Foundation") + ] + + +def test_timdex_record_dedupe_holdings(timdex_record_required_fields): + timdex_record_required_fields.holdings = [ + timdex.Holding( + call_number="PL2687.L8.A28 1994", + collection="Stacks", + format="Print volume", + location="Hayden Library", + ), + timdex.Holding( + call_number="PL2687.L8.A28 1994", + collection="Stacks", + format="Print volume", + location="Hayden Library", + ), + ] + assert timdex_record_required_fields.holdings == [ + timdex.Holding( + call_number="PL2687.L8.A28 1994", + collection="Stacks", + format="Print volume", + location="Hayden Library", + ) + ] + + +def test_timdex_record_dedupe_identifiers(timdex_record_required_fields): + timdex_record_required_fields.identifiers = [ + timdex.Identifier(value="9781250185969. hardcover", kind="ISBN"), + timdex.Identifier(value="9781250185969. hardcover", kind="ISBN"), + ] + assert timdex_record_required_fields.identifiers == [ + timdex.Identifier(value="9781250185969. hardcover", kind="ISBN") + ] + + +def test_timdex_record_dedupe_languages(timdex_record_required_fields): + timdex_record_required_fields.languages = ["Spanish", "Spanish"] + assert timdex_record_required_fields.languages == ["Spanish"] + + +def test_timdex_record_dedupe_links(timdex_record_required_fields): + timdex_record_required_fields.links = [ + timdex.Link( + url="https://geodata.libraries.mit.edu/record/gismit" + ":GISPORTAL_GISOWNER01_BOSTONWATER95", + kind="Website", + text="Website", + ), + timdex.Link( + url="https://geodata.libraries.mit.edu/record/gismit" + ":GISPORTAL_GISOWNER01_BOSTONWATER95", + kind="Website", + text="Website", + ), + ] + assert timdex_record_required_fields.links == [ + timdex.Link( + url="https://geodata.libraries.mit.edu/record/gismit" + ":GISPORTAL_GISOWNER01_BOSTONWATER95", + kind="Website", + text="Website", + ) + ] + + +def test_timdex_record_dedupe_locations(timdex_record_required_fields): + timdex_record_required_fields.locations = [ + timdex.Location(value="One Place", kind="Place of Publication"), + timdex.Location(value="One Place", kind="Place of Publication"), + ] + assert timdex_record_required_fields.locations == [ + timdex.Location(value="One Place", kind="Place of Publication") + ] + + +def test_timdex_record_dedupe_notes(timdex_record_required_fields): + timdex_record_required_fields.notes = [ + timdex.Note(value=["Survey Data"], kind="Datacite resource type"), + timdex.Note(value=["Survey Data"], kind="Datacite resource type"), + ] + assert timdex_record_required_fields.notes == [ + timdex.Note(value=["Survey Data"], kind="Datacite resource type"), + ] + + +def test_timdex_record_dedupe_publication_frequency(timdex_record_required_fields): + timdex_record_required_fields.publication_frequency = [ + "Three times a year", + "Three times a year", + ] + assert timdex_record_required_fields.publication_frequency == ["Three times a year"] + + +def test_timdex_record_dedupe_publishers(timdex_record_required_fields): + timdex_record_required_fields.publishers = [ + timdex.Publisher(name="Harvard Dataverse"), + timdex.Publisher(name="Harvard Dataverse"), + ] + assert timdex_record_required_fields.publishers == [ + timdex.Publisher(name="Harvard Dataverse") + ] + + +def test_timdex_record_dedupe_related_items(timdex_record_required_fields): + timdex_record_required_fields.related_items = [ + timdex.RelatedItem(description="Nature Communications", relationship="host"), + timdex.RelatedItem(description="Nature Communications", relationship="host"), + ] + assert timdex_record_required_fields.related_items == [ + timdex.RelatedItem(description="Nature Communications", relationship="host") + ] + + +def test_timdex_record_dedupe_rights(timdex_record_required_fields): + timdex_record_required_fields.rights = [ + timdex.Rights(description="MIT authentication required", kind="Access to files"), + timdex.Rights(description="MIT authentication required", kind="Access to files"), + ] + assert timdex_record_required_fields.rights == [ + timdex.Rights(description="MIT authentication required", kind="Access to files") + ] + + +def test_timdex_record_dedupe_subjects(timdex_record_required_fields): + timdex_record_required_fields.subjects = [ + timdex.Subject( + value=["Social Sciences", "Educational materials"], + kind="Subject scheme not provided", + ), + timdex.Subject( + value=["Social Sciences", "Educational materials"], + kind="Subject scheme not provided", + ), + ] + assert timdex_record_required_fields.subjects == [ + timdex.Subject( + value=["Social Sciences", "Educational materials"], + kind="Subject scheme not provided", + ) + ] + + +def test_timdex_record_dedupe_summary(timdex_record_required_fields): + timdex_record_required_fields.summary = [ + "Mitochondria is the powerhouse of the cell.", + "Mitochondria is the powerhouse of the cell.", + ] + assert timdex_record_required_fields.summary == [ + "Mitochondria is the powerhouse of the cell." + ] diff --git a/transmogrifier/models.py b/transmogrifier/models.py index f3ccc0b..6cc7bf1 100644 --- a/transmogrifier/models.py +++ b/transmogrifier/models.py @@ -35,6 +35,12 @@ def list_of(item_type: Any) -> Callable: # noqa: ANN401 ) +def dedupe(item_list: list | Any) -> list | None: # noqa: ANN401 + if not isinstance(item_list, list): + return item_list + return list(dict.fromkeys(item_list)) + + def not_empty( _instance: "TimdexRecord", attribute: "attrs.Attribute", value: "list" ) -> None: @@ -43,11 +49,26 @@ def not_empty( raise ValueError(message) +@define +class ListField: + def __hash__(self) -> int: + """Hash method to create unique identifier for Location objects.""" + values = tuple( + [ + tuple(attrib) if isinstance(attrib, list) else attrib + for attrib in attrs.astuple(self) + ] + ) + return hash(values) + + @define class AlternateTitle: value: str = field(validator=instance_of(str)) # Required subfield kind: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Contributor: @@ -59,6 +80,8 @@ class Contributor: default=None, validator=optional(instance_of(bool)) ) + __hash__ = ListField.__hash__ + @define class DateRange: @@ -73,10 +96,13 @@ class Date: kind: str | None = field(default=None, validator=optional(instance_of(str))) note: str | None = field(default=None, validator=optional(instance_of(str))) range: DateRange | None = field( # type: ignore[misc] - default=None, validator=[optional(instance_of(DateRange)), check_range] + default=None, + validator=[optional(instance_of(DateRange)), check_range], ) value: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Funder: @@ -90,6 +116,8 @@ class Funder: award_number: str | None = field(default=None, validator=optional(instance_of(str))) award_uri: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Holding: @@ -99,12 +127,16 @@ class Holding: location: str | None = field(default=None, validator=optional(instance_of(str))) note: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Identifier: value: str = field(validator=instance_of(str)) # Required subfield kind: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Link: @@ -113,6 +145,8 @@ class Link: restrictions: str | None = field(default=None, validator=optional(instance_of(str))) text: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Location: @@ -120,12 +154,16 @@ class Location: kind: str | None = field(default=None, validator=optional(instance_of(str))) geoshape: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Note: value: list[str] = field(validator=list_of(str)) # Required subfield kind: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Publisher: @@ -133,6 +171,8 @@ class Publisher: date: str | None = field(default=None, validator=optional(instance_of(str))) location: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class RelatedItem: @@ -141,6 +181,8 @@ class RelatedItem: relationship: str | None = field(default=None, validator=optional(instance_of(str))) uri: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Rights: @@ -148,12 +190,16 @@ class Rights: kind: str | None = field(default=None, validator=optional(instance_of(str))) uri: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class Subject: value: list[str] = field(validator=list_of(str)) # Required subfield kind: str | None = field(default=None, validator=optional(instance_of(str))) + __hash__ = ListField.__hash__ + @define class TimdexRecord: @@ -165,54 +211,74 @@ class TimdexRecord: # Optional fields alternate_titles: list[AlternateTitle] | None = field( - default=None, validator=optional(list_of(AlternateTitle)) + default=None, converter=dedupe, validator=optional(list_of(AlternateTitle)) + ) + call_numbers: list[str] | None = field( + default=None, converter=dedupe, validator=optional(list_of(str)) ) - call_numbers: list[str] | None = field(default=None, validator=optional(list_of(str))) citation: str | None = field(default=None, validator=optional(instance_of(str))) - content_type: list[str] | None = field(default=None, validator=optional(list_of(str))) - contents: list[str] | None = field(default=None, validator=optional(list_of(str))) + content_type: list[str] | None = field( + default=None, converter=dedupe, validator=optional(list_of(str)) + ) + contents: list[str] | None = field( + default=None, converter=dedupe, validator=optional(list_of(str)) + ) contributors: list[Contributor] | None = field( - default=None, validator=optional(list_of(Contributor)) + default=None, converter=dedupe, validator=optional(list_of(Contributor)) + ) + dates: list[Date] | None = field( + default=None, converter=dedupe, validator=optional(list_of(Date)) ) - dates: list[Date] | None = field(default=None, validator=optional(list_of(Date))) edition: str | None = field(default=None, validator=optional(instance_of(str))) - file_formats: list[str] | None = field(default=None, validator=optional(list_of(str))) + file_formats: list[str] | None = field( + default=None, converter=dedupe, validator=optional(list_of(str)) + ) format: str | None = field(default=None, validator=optional(instance_of(str))) funding_information: list[Funder] | None = field( - default=None, validator=optional(list_of(Funder)) + default=None, converter=dedupe, validator=optional(list_of(Funder)) ) holdings: list[Holding] | None = field( - default=None, validator=optional(list_of(Holding)) + default=None, converter=dedupe, validator=optional(list_of(Holding)) ) identifiers: list[Identifier] | None = field( - default=None, validator=optional(list_of(Identifier)) + default=None, converter=dedupe, validator=optional(list_of(Identifier)) + ) + languages: list[str] | None = field( + default=None, converter=dedupe, validator=optional(list_of(str)) + ) + links: list[Link] | None = field( + default=None, converter=dedupe, validator=optional(list_of(Link)) ) - languages: list[str] | None = field(default=None, validator=optional(list_of(str))) - links: list[Link] | None = field(default=None, validator=optional(list_of(Link))) literary_form: str | None = field(default=None, validator=optional(instance_of(str))) locations: list[Location] | None = field( - default=None, validator=optional(list_of(Location)) + default=None, converter=dedupe, validator=optional(list_of(Location)) + ) + notes: list[Note] | None = field( + default=None, converter=dedupe, validator=optional(list_of(Note)) ) - notes: list[Note] | None = field(default=None, validator=optional(list_of(Note))) numbering: str | None = field(default=None, validator=optional(instance_of(str))) physical_description: str | None = field( default=None, validator=optional(instance_of(str)) ) provider: str | None = field(default=None, validator=optional(instance_of(str))) publication_frequency: list[str] | None = field( - default=None, validator=optional(list_of(str)) + default=None, converter=dedupe, validator=optional(list_of(str)) ) publishers: list[Publisher] | None = field( - default=None, validator=optional(list_of(Publisher)) + default=None, converter=dedupe, validator=optional(list_of(Publisher)) ) related_items: list[RelatedItem] | None = field( - default=None, validator=optional(list_of(RelatedItem)) + default=None, converter=dedupe, validator=optional(list_of(RelatedItem)) + ) + rights: list[Rights] | None = field( + default=None, converter=dedupe, validator=optional(list_of(Rights)) ) - rights: list[Rights] | None = field(default=None, validator=optional(list_of(Rights))) subjects: list[Subject] | None = field( - default=None, validator=optional(list_of(Subject)) + default=None, converter=dedupe, validator=optional(list_of(Subject)) + ) + summary: list[str] | None = field( + default=None, converter=dedupe, validator=optional(list_of(str)) ) - summary: list[str] | None = field(default=None, validator=optional(list_of(str))) def asdict(self) -> dict[str, Any]: return asdict(self, filter=lambda _, value: value is not None)