From 869522b06febb13d6a3796cbb89b6af315e1d7d3 Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 18 Oct 2024 14:42:13 +0200 Subject: [PATCH 1/3] Support for multiple agents when parsing The logic for parsing iand serializing has been updated to support multiple instances by default. The behaviour in the legacy profiles of just keeping one of the instances has been kept for backwards compatibility but when using `dcat_ap_scheming` and `dcat_ap_3` profiles multiple instances of dct:publisher (although this goes against the DCAT AP spec) and dct:creator are supported. --- ckanext/dcat/profiles/base.py | 21 +++- ckanext/dcat/profiles/euro_dcat_ap_base.py | 33 ++--- .../dcat/profiles/euro_dcat_ap_scheming.py | 119 ++++++++++-------- .../tests/profiles/base/test_base_profile.py | 4 +- 4 files changed, 105 insertions(+), 72 deletions(-) diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index ce942720..01df0460 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -421,9 +421,10 @@ def _insert_or_update_temporal(self, dataset_dict, key, value): else: dataset_dict["extras"].append({"key": key, "value": value}) - def _agent_details(self, subject, predicate): + def _agents_details(self, subject, predicate): """ - Returns a dict with details about a dct:publisher or dct:creator entity, a foaf:Agent + Returns a list of dicts with details about a foaf:Agent property, e.g. + dct:publisher or dct:creator entity. Both subject and predicate must be rdflib URIRef or BNode objects @@ -441,17 +442,22 @@ def _agent_details(self, subject, predicate): an empty string if they could not be found. """ - agent_details = {} - + agents = [] for agent in self.g.objects(subject, predicate): + agent_details = {} agent_details["uri"] = str(agent) if isinstance(agent, term.URIRef) else "" agent_details["name"] = self._object_value(agent, FOAF.name) agent_details["email"] = self._object_value(agent, FOAF.mbox) + if not agent_details["email"]: + agent_details["email"] = self._without_mailto( + self._object_value(agent, VCARD.hasEmail) + ) agent_details["url"] = self._object_value(agent, FOAF.homepage) agent_details["type"] = self._object_value(agent, DCT.type) agent_details['identifier'] = self._object_value(agent, DCT.identifier) + agents.append(agent_details) - return agent_details + return agents def _contact_details(self, subject, predicate): """ @@ -1148,10 +1154,13 @@ def _extract_catalog_dict(self, catalog_ref): if val: out.append({"key": key, "value": val}) + publishers = self._agents_details(catalog_ref, DCT.publisher) + if publishers: + publisher = publishers[0] out.append( { "key": "source_catalog_publisher", - "value": json.dumps(self._agent_details(catalog_ref, DCT.publisher)), + "value": json.dumps(publisher), } ) return out diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index ad40d988..da365be2 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -120,21 +120,24 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref): {"key": "contact_{0}".format(key), "value": contact.get(key)} ) - # Publisher - publisher = self._agent_details(dataset_ref, DCT.publisher) - for key in ("uri", "name", "email", "url", "type", "identifier"): - if publisher.get(key): - dataset_dict["extras"].append( - {"key": "publisher_{0}".format(key), "value": publisher.get(key)} - ) - - # Creator - creator = self._agent_details(dataset_ref, DCT.creator) - for key in ("uri", "name", "email", "url", "type", "identifier"): - if creator.get(key): - dataset_dict["extras"].append( - {"key": "creator_{0}".format(key), "value": creator.get(key)} - ) + # Publishers and creators + for item in [("publisher", DCT.publisher), ("creator", DCT.creator)]: + agent_key, predicate = item + if self._schema_field(agent_key): + # This is a scheming field, will be hanlded in a separate profile + pass + else: + agents = self._agents_details(dataset_ref, predicate) + if agents: + agent = agents[0] + for key in ("uri", "name", "email", "url", "type", "identifier"): + if agent.get(key): + dataset_dict["extras"].append( + { + "key": f"{agent_key}_{key}", + "value": agent.get(key) + } + ) # Temporal start, end = self._time_interval(dataset_ref, DCT.temporal) diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index f87c94ca..a0f7493b 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -87,7 +87,7 @@ def _parse_list_value(data_dict, field_name): check_name = new_fields_mapping.get(field_name, field_name) for extra in dataset_dict.get("extras", []): if extra["key"].startswith(f"{check_name}_"): - subfield = extra["key"][extra["key"].index("_") + 1:] + subfield = extra["key"][extra["key"].index("_") + 1 :] if subfield in [ f["field_name"] for f in schema_field["repeating_subfields"] ]: @@ -100,6 +100,15 @@ def _parse_list_value(data_dict, field_name): dataset_dict[field_name] = [new_dict] dataset_dict["extras"] = new_extras + # Publishers and creators + for item in [("publisher", DCT.publisher), ("creator", DCT.creator)]: + key, predicate = item + agents = self._agents_details(dataset_ref, predicate) + if agents: + dataset_dict[key] = [] + for publisher in agents: + dataset_dict[key].append(publisher) + # Repeating subfields: resources for schema_field in self._dataset_schema["resource_fields"]: if "repeating_subfields" in schema_field: @@ -124,7 +133,11 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref): Add triples to the graph from new repeating subfields """ contact = dataset_dict.get("contact") - if isinstance(contact, list) and len(contact) and self._not_empty_dict(contact[0]): + if ( + isinstance(contact, list) + and len(contact) + and self._not_empty_dict(contact[0]) + ): for item in contact: contact_uri = item.get("uri") if contact_uri: @@ -150,11 +163,11 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref): contact_details, VCARD.hasUID, "identifier", - _type=URIRefOrLiteral + _type=URIRefOrLiteral, ) - self._add_agent(dataset_ref, dataset_dict, "publisher", DCT.publisher) - self._add_agent(dataset_ref, dataset_dict, "creator", DCT.creator) + self._add_agents(dataset_ref, dataset_dict, "publisher", DCT.publisher) + self._add_agents(dataset_ref, dataset_dict, "creator", DCT.creator) temporal = dataset_dict.get("temporal_coverage") if ( @@ -172,7 +185,11 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref): self.g.add((dataset_ref, DCT.temporal, temporal_ref)) spatial = dataset_dict.get("spatial_coverage") - if isinstance(spatial, list) and len(spatial) and self._not_empty_dict(spatial[0]): + if ( + isinstance(spatial, list) + and len(spatial) + and self._not_empty_dict(spatial[0]) + ): for item in spatial: if item.get("uri"): spatial_ref = CleanedURIRef(item["uri"]) @@ -205,55 +222,59 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref): except ValueError: pass - def _add_agent(self, dataset_ref, dataset_dict, agent_key, rdf_predicate): + def _add_agents( + self, dataset_ref, dataset_dict, agent_key, rdf_predicate, first_only=False + ): """ - Adds an agent (publisher or creator) to the RDF graph. + Adds one or more agents (e.g. publisher or creator) to the RDF graph. :param dataset_ref: The RDF reference of the dataset :param dataset_dict: The dataset dictionary containing agent information - :param agent_key: 'publisher' or 'creator' to specify the agent - :param rdf_predicate: The RDF predicate (DCT.publisher or DCT.creator) + :param agent_key: field name in the CKAN dict (.e.g. "publisher", "creator", etc) + :param rdf_predicate: The RDF predicate (DCT.publisher, DCT.creator, etc) + :first_only: Add the first item found only (used for 0..1 properties) """ agent = dataset_dict.get(agent_key) - if ( - isinstance(agent, list) - and len(agent) - and self._not_empty_dict(agent[0]) - ): - agent = agent[0] - agent_uri = agent.get("uri") - if agent_uri: - agent_ref = CleanedURIRef(agent_uri) - else: - agent_ref = BNode() - - self.g.add((agent_ref, RDF.type, FOAF.Agent)) - self.g.add((dataset_ref, rdf_predicate, agent_ref)) - - self._add_triple_from_dict(agent, agent_ref, FOAF.name, "name") - self._add_triple_from_dict(agent, agent_ref, FOAF.homepage, "url", _type=URIRef) - self._add_triple_from_dict( - agent, - agent_ref, - DCT.type, - "type", - _type=URIRefOrLiteral, - ) - self._add_triple_from_dict( - agent, - agent_ref, - VCARD.hasEmail, - "email", - _type=URIRef, - value_modifier=self._add_mailto, - ) - self._add_triple_from_dict( - agent, - agent_ref, - DCT.identifier, - "identifier", - _type=URIRefOrLiteral - ) + if isinstance(agent, list) and len(agent) and self._not_empty_dict(agent[0]): + agents = [agent[0]] if first_only else agent + + for agent in agents: + + agent_uri = agent.get("uri") + if agent_uri: + agent_ref = CleanedURIRef(agent_uri) + else: + agent_ref = BNode() + + self.g.add((agent_ref, RDF.type, FOAF.Agent)) + self.g.add((dataset_ref, rdf_predicate, agent_ref)) + + self._add_triple_from_dict(agent, agent_ref, FOAF.name, "name") + self._add_triple_from_dict( + agent, agent_ref, FOAF.homepage, "url", _type=URIRef + ) + self._add_triple_from_dict( + agent, + agent_ref, + DCT.type, + "type", + _type=URIRefOrLiteral, + ) + self._add_triple_from_dict( + agent, + agent_ref, + VCARD.hasEmail, + "email", + _type=URIRef, + value_modifier=self._add_mailto, + ) + self._add_triple_from_dict( + agent, + agent_ref, + DCT.identifier, + "identifier", + _type=URIRefOrLiteral, + ) @staticmethod def _not_empty_dict(data_dict): diff --git a/ckanext/dcat/tests/profiles/base/test_base_profile.py b/ckanext/dcat/tests/profiles/base/test_base_profile.py index 9b341efc..b5d03064 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_profile.py +++ b/ckanext/dcat/tests/profiles/base/test_base_profile.py @@ -660,7 +660,7 @@ def test_publisher_foaf(self): p = RDFProfile(g) - publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher) + publisher = p._agents_details(URIRef('http://example.org'), DCT.publisher)[0] assert publisher['uri'] == 'http://orgs.vocab.org/some-org' assert publisher['name'] == 'Publishing Organization for dataset 1' @@ -688,7 +688,7 @@ def test_publisher_ref(self): p = RDFProfile(g) - publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher) + publisher = p._agents_details(URIRef('http://example.org'), DCT.publisher)[0] assert publisher['uri'] == 'http://orgs.vocab.org/some-org' From 5c948a8625568522190c6dd1b3dd8a0a67b59e1d Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 22 Oct 2024 13:18:32 +0200 Subject: [PATCH 2/3] Support for multiple contact points, add tests --- ckanext/dcat/profiles/base.py | 9 +- ckanext/dcat/profiles/euro_dcat_ap_base.py | 29 +++-- .../dcat/profiles/euro_dcat_ap_scheming.py | 9 +- .../tests/profiles/base/test_base_profile.py | 2 + .../dcat_ap_2/test_scheming_support.py | 121 +++++++++++++++++- 5 files changed, 146 insertions(+), 24 deletions(-) diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 01df0460..08c78d0d 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -461,7 +461,7 @@ def _agents_details(self, subject, predicate): def _contact_details(self, subject, predicate): """ - Returns a dict with details about a vcard expression + Returns a list of dicts with details about vcard expressions Both subject and predicate must be rdflib URIRef or BNode objects @@ -469,10 +469,10 @@ def _contact_details(self, subject, predicate): an empty string if they could not be found """ - contact = {} - + contacts = [] for agent in self.g.objects(subject, predicate): + contact = {} contact["uri"] = str(agent) if isinstance(agent, URIRef) else "" contact["name"] = self._get_vcard_property_value( @@ -484,8 +484,9 @@ def _contact_details(self, subject, predicate): ) contact["identifier"] = self._get_vcard_property_value(agent, VCARD.hasUID) + contacts.append(contact) - return contact + return contacts def _parse_geodata(self, spatial, datatype, cur_value): """ diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index da365be2..57458c6a 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -108,17 +108,24 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref): dataset_dict["extras"].append({"key": key, "value": json.dumps(values)}) # Contact details - contact = self._contact_details(dataset_ref, DCAT.contactPoint) - if not contact: - # adms:contactPoint was supported on the first version of DCAT-AP - contact = self._contact_details(dataset_ref, ADMS.contactPoint) - - if contact: - for key in ("uri", "name", "email", "identifier"): - if contact.get(key): - dataset_dict["extras"].append( - {"key": "contact_{0}".format(key), "value": contact.get(key)} - ) + if self._schema_field("contact"): + # This is a scheming field, will be hanlded in a separate profile + pass + else: + contact = self._contact_details(dataset_ref, DCAT.contactPoint) + if not contact: + # adms:contactPoint was supported on the first version of DCAT-AP + contact = self._contact_details(dataset_ref, ADMS.contactPoint) + if contact: + contact = contact[0] + for key in ("uri", "name", "email", "identifier"): + if contact.get(key): + dataset_dict["extras"].append( + { + "key": "contact_{0}".format(key), + "value": contact.get(key) + } + ) # Publishers and creators for item in [("publisher", DCT.publisher), ("creator", DCT.creator)]: diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index a0f7493b..ca72cd21 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -100,14 +100,17 @@ def _parse_list_value(data_dict, field_name): dataset_dict[field_name] = [new_dict] dataset_dict["extras"] = new_extras + # Contact details + contacts = self._contact_details(dataset_ref, DCAT.contactPoint) + if contacts: + dataset_dict["contact"] = contacts + # Publishers and creators for item in [("publisher", DCT.publisher), ("creator", DCT.creator)]: key, predicate = item agents = self._agents_details(dataset_ref, predicate) if agents: - dataset_dict[key] = [] - for publisher in agents: - dataset_dict[key].append(publisher) + dataset_dict[key] = agents # Repeating subfields: resources for schema_field in self._dataset_schema["resource_fields"]: diff --git a/ckanext/dcat/tests/profiles/base/test_base_profile.py b/ckanext/dcat/tests/profiles/base/test_base_profile.py index b5d03064..fb08f51e 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_profile.py +++ b/ckanext/dcat/tests/profiles/base/test_base_profile.py @@ -721,6 +721,8 @@ def test_contact_details(self): contact = p._contact_details(URIRef('http://example.org'), ADMS.contactPoint) + contact = contact[0] + assert contact['name'] == 'Point of Contact' # mailto gets removed for storage and is added again on output assert contact['email'] == 'contact@some.org' diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py index 3b1d99f5..c4367613 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py @@ -190,7 +190,7 @@ def test_e2e_ckan_to_dcat(self): g, publisher[0][2], DCT.identifier, - URIRef(dataset_dict["publisher"][0]["identifier"]) + URIRef(dataset_dict["publisher"][0]["identifier"]), ) creator = [t for t in g.triples((dataset_ref, DCT.creator, None))] @@ -221,10 +221,9 @@ def test_e2e_ckan_to_dcat(self): g, creator[0][2], DCT.identifier, - URIRef(dataset_dict["creator"][0]["identifier"]) + URIRef(dataset_dict["creator"][0]["identifier"]), ) - temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))] assert len(temporal) == len(dataset["temporal_coverage"]) @@ -275,8 +274,8 @@ def test_e2e_ckan_to_dcat(self): # Statements for item in [ - ('access_rights', DCT.accessRights), - ('provenance', DCT.provenance), + ("access_rights", DCT.accessRights), + ("provenance", DCT.provenance), ]: statement = [s for s in g.objects(dataset_ref, item[1])][0] assert self._triple(g, statement, RDFS.label, dataset[item[0]]) @@ -388,7 +387,7 @@ def test_e2e_ckan_to_dcat(self): # Resources: statements statement = [s for s in g.objects(distribution_ref, DCT.rights)][0] - assert self._triple(g, statement, RDFS.label, resource['rights']) + assert self._triple(g, statement, RDFS.label, resource["rights"]) def test_publisher_fallback_org(self): @@ -839,6 +838,116 @@ def test_statement_literal(self): assert dataset["notes"] == "This is a dataset" assert dataset["access_rights"] == "Some statement" + def test_multiple_contacts(self): + + data = """ + @prefix dcat: . + @prefix dct: . + @prefix rdfs: . + @prefix vcard: . + + + a dcat:Dataset ; + dct:title "Dataset 1" ; + dct:description "This is a dataset" ; + dcat:contactPoint [ a vcard:Kind ; + vcard:fn "Test Contact 1" ; + vcard:hasEmail ], + [ a vcard:Kind ; + vcard:fn "Test Contact 2" ; + vcard:hasEmail ] ; + . + """ + + p = RDFParser() + + p.parse(data, _format="ttl") + datasets = [d for d in p.datasets()] + + dataset = datasets[0] + assert len(dataset["contact"]) == 2 + assert dataset["contact"][0]["name"] == "Test Contact 1" + assert dataset["contact"][0]["email"] == "contact1@example.org" + assert dataset["contact"][1]["name"] == "Test Contact 2" + assert dataset["contact"][1]["email"] == "contact2@example.org" + + def test_multiple_publishers(self): + + data = """ + @prefix dcat: . + @prefix dct: . + @prefix rdfs: . + @prefix org: . + @prefix skos: . + @prefix foaf: . + @prefix vcard: . + + + a dcat:Dataset ; + dct:title "Dataset 1" ; + dct:description "This is a dataset" ; + dct:publisher [ a org:Organization ; + skos:prefLabel "Test Publisher 1" ; + vcard:hasEmail ; + foaf:name "Test Publisher 1" ], + [ a org:Organization ; + skos:prefLabel "Test Publisher 2" ; + vcard:hasEmail ; + foaf:name "Test Publisher 2" ] ; + . + """ + + p = RDFParser() + + p.parse(data, _format="ttl") + datasets = [d for d in p.datasets()] + + dataset = datasets[0] + assert len(dataset["publisher"]) == 2 + assert dataset["publisher"][0]["name"] == "Test Publisher 1" + assert dataset["publisher"][0]["email"] == "publisher1@example.org" + assert dataset["publisher"][1]["name"] == "Test Publisher 2" + assert dataset["publisher"][1]["email"] == "publisher2@example.org" + + def test_multiple_creators(self): + + data = """ + @prefix dcat: . + @prefix dct: . + @prefix rdfs: . + @prefix org: . + @prefix skos: . + @prefix foaf: . + @prefix vcard: . + + + a dcat:Dataset ; + dct:title "Dataset 1" ; + dct:description "This is a dataset" ; + dct:creator [ a org:Organization ; + skos:prefLabel "Test Creator 1" ; + vcard:hasEmail ; + foaf:name "Test Creator 1" ], + [ a org:Organization ; + skos:prefLabel "Test Creator 2" ; + vcard:hasEmail ; + foaf:name "Test Creator 2" ] ; + . + """ + + p = RDFParser() + + p.parse(data, _format="ttl") + datasets = [d for d in p.datasets()] + + dataset = datasets[0] + assert len(dataset["creator"]) == 2 + assert dataset["creator"][0]["name"] == "Test Creator 1" + assert dataset["creator"][0]["email"] == "creator1@example.org" + assert dataset["creator"][1]["name"] == "Test Creator 2" + assert dataset["creator"][1]["email"] == "creator2@example.org" + + @pytest.mark.usefixtures("with_plugins", "clean_db", "clean_index") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( From 09c64c22e5664175fbe810e4a1aca3a1b961e94b Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 28 Oct 2024 15:58:50 +0100 Subject: [PATCH 3/3] Add properties to agent tests --- .../dcat_ap_2/test_scheming_support.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py index c4367613..383de651 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py @@ -852,10 +852,14 @@ def test_multiple_contacts(self): dct:description "This is a dataset" ; dcat:contactPoint [ a vcard:Kind ; vcard:fn "Test Contact 1" ; - vcard:hasEmail ], + vcard:hasEmail ; + vcard:hasUID "https://orcid.org/0000-0002-9095-9201" + ], [ a vcard:Kind ; vcard:fn "Test Contact 2" ; - vcard:hasEmail ] ; + vcard:hasEmail ; + vcard:hasUID "https://orcid.org/0000-0002-9095-9202" + ] ; . """ @@ -868,8 +872,16 @@ def test_multiple_contacts(self): assert len(dataset["contact"]) == 2 assert dataset["contact"][0]["name"] == "Test Contact 1" assert dataset["contact"][0]["email"] == "contact1@example.org" + assert ( + dataset["contact"][0]["identifier"] + == "https://orcid.org/0000-0002-9095-9201" + ) assert dataset["contact"][1]["name"] == "Test Contact 2" assert dataset["contact"][1]["email"] == "contact2@example.org" + assert ( + dataset["contact"][1]["identifier"] + == "https://orcid.org/0000-0002-9095-9202" + ) def test_multiple_publishers(self): @@ -889,10 +901,12 @@ def test_multiple_publishers(self): dct:publisher [ a org:Organization ; skos:prefLabel "Test Publisher 1" ; vcard:hasEmail ; + dct:identifier "https://orcid.org/0000-0002-9095-9201" ; foaf:name "Test Publisher 1" ], [ a org:Organization ; skos:prefLabel "Test Publisher 2" ; vcard:hasEmail ; + dct:identifier "https://orcid.org/0000-0002-9095-9202" ; foaf:name "Test Publisher 2" ] ; . """ @@ -906,8 +920,16 @@ def test_multiple_publishers(self): assert len(dataset["publisher"]) == 2 assert dataset["publisher"][0]["name"] == "Test Publisher 1" assert dataset["publisher"][0]["email"] == "publisher1@example.org" + assert ( + dataset["publisher"][0]["identifier"] + == "https://orcid.org/0000-0002-9095-9201" + ) assert dataset["publisher"][1]["name"] == "Test Publisher 2" assert dataset["publisher"][1]["email"] == "publisher2@example.org" + assert ( + dataset["publisher"][1]["identifier"] + == "https://orcid.org/0000-0002-9095-9202" + ) def test_multiple_creators(self):