From cf306879d8168a67c4e0f2e476922d1ba9b43247 Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Thu, 3 Oct 2024 16:35:32 +0200 Subject: [PATCH 1/6] Solves URI from contact point --- ckanext/dcat/profiles/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index fd5af492..88b684fc 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -486,7 +486,8 @@ def _contact_details(self, subject, predicate): for agent in self.g.objects(subject, predicate): - contact["uri"] = str(agent) if isinstance(agent, term.URIRef) else "" + contact["uri"] = (str(agent) if isinstance(agent, URIRef) + else self._get_vcard_property_value(agent, VCARD.hasUID)) contact["name"] = self._get_vcard_property_value( agent, VCARD.hasFN, VCARD.fn From 6f4628b716d83a927a075943fd16dd75cf66b1cb Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Sat, 12 Oct 2024 22:32:26 +0200 Subject: [PATCH 2/6] Apply review comments --- ckanext/dcat/profiles/base.py | 2 ++ ckanext/dcat/profiles/euro_dcat_ap_base.py | 2 +- ckanext/dcat/profiles/euro_dcat_ap_scheming.py | 8 ++++++++ ckanext/dcat/schemas/dcat_ap_full.yaml | 5 +++++ ckanext/dcat/tests/profiles/base/test_base_profile.py | 3 +++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 88b684fc..d07869ca 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -497,6 +497,8 @@ def _contact_details(self, subject, predicate): self._get_vcard_property_value(agent, VCARD.hasEmail) ) + contact["identifier"] = self._get_vcard_property_value(agent, VCARD.hasUID) + return contact def _parse_geodata(self, spatial, datatype, cur_value): diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index 28b476bb..2fbcbc13 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -115,7 +115,7 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref): contact = self._contact_details(dataset_ref, ADMS.contactPoint) if contact: - for key in ("uri", "name", "email"): + for key in ("uri", "name", "email", "identifier"): if contact.get(key): dataset_dict["extras"].append( {"key": "contact_{0}".format(key), "value": contact.get(key)} diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index 8d0ffb79..23bc2a94 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -150,6 +150,14 @@ def _not_empty_dict(data_dict): value_modifier=self._add_mailto, ) + self._add_triple_from_dict( + item, + contact_details, + VCARD.hasUID, + "identifier", + _type=URIRefOrLiteral + ) + publisher = dataset_dict.get("publisher") if ( isinstance(publisher, list) diff --git a/ckanext/dcat/schemas/dcat_ap_full.yaml b/ckanext/dcat/schemas/dcat_ap_full.yaml index eea83b20..e19d4796 100644 --- a/ckanext/dcat/schemas/dcat_ap_full.yaml +++ b/ckanext/dcat/schemas/dcat_ap_full.yaml @@ -42,6 +42,11 @@ dataset_fields: - field_name: email label: Email display_snippet: email.html + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the contact point. Such as a ROR ID. + help_text: Contact information for enquiries about the dataset. - field_name: publisher diff --git a/ckanext/dcat/tests/profiles/base/test_base_profile.py b/ckanext/dcat/tests/profiles/base/test_base_profile.py index 221c772c..89b0f43e 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_profile.py +++ b/ckanext/dcat/tests/profiles/base/test_base_profile.py @@ -706,6 +706,7 @@ def test_contact_details(self): Point of Contact + @@ -723,3 +724,5 @@ def test_contact_details(self): assert contact['name'] == 'Point of Contact' # mailto gets removed for storage and is added again on output assert contact['email'] == 'contact@some.org' + + assert contact['identifier'] == 'http://some.org' From 1702e7ebbfe3ad922cc0100ce33e4576b0f85f59 Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Mon, 14 Oct 2024 14:23:39 +0200 Subject: [PATCH 3/6] Add UT contact point identifier --- .../tests/profiles/base/test_base_profile.py | 4 ++-- .../test_euro_dcatap_3_profile_serialize.py | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ckanext/dcat/tests/profiles/base/test_base_profile.py b/ckanext/dcat/tests/profiles/base/test_base_profile.py index 89b0f43e..98ee6852 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_profile.py +++ b/ckanext/dcat/tests/profiles/base/test_base_profile.py @@ -706,7 +706,7 @@ def test_contact_details(self): Point of Contact - + @@ -725,4 +725,4 @@ def test_contact_details(self): # mailto gets removed for storage and is added again on output assert contact['email'] == 'contact@some.org' - assert contact['identifier'] == 'http://some.org' + assert contact['identifier'] == 'https://orcid.org/0000-0002-9095-9201' diff --git a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py index e0001526..77fec47a 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py @@ -82,8 +82,8 @@ def test_e2e_ckan_to_dcat(self): ], # Repeating subfields "contact": [ - {"name": "Contact 1", "email": "contact1@example.org"}, - {"name": "Contact 2", "email": "contact2@example.org"}, + {"name": "Contact 1", "email": "contact1@example.org", "identifier": "123"}, + {"name": "Contact 2", "email": "contact2@example.org", "identifier": "456"}, ], "publisher": [ { @@ -260,6 +260,12 @@ def test_e2e_ckan_to_dcat(self): VCARD.hasEmail, URIRef("mailto:" + dataset_dict["contact"][0]["email"]), ) + assert self._triple( + g, + contact_details[0][2], + VCARD.hasUID, + dataset_dict["contact"][0]["identifier"], + ) assert self._triple( g, contact_details[1][2], VCARD.fn, dataset_dict["contact"][1]["name"] ) @@ -269,6 +275,12 @@ def test_e2e_ckan_to_dcat(self): VCARD.hasEmail, URIRef("mailto:" + dataset_dict["contact"][1]["email"]), ) + assert self._triple( + g, + contact_details[1][2], + VCARD.hasUID, + dataset_dict["contact"][1]["identifier"], + ) publisher = [t for t in g.triples((dataset_ref, DCT.publisher, None))] From 448c3111083ffa546f0f7e60dfb00acf91dc4f0d Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Mon, 14 Oct 2024 14:47:30 +0200 Subject: [PATCH 4/6] revert back deletion --- .../profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py index 8d56fc25..7dc8fa2f 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py @@ -1,3 +1,4 @@ +import json import pytest from rdflib.namespace import RDF From b47f3892f20770c91fc0e4374b32c20364030e21 Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Mon, 14 Oct 2024 15:15:38 +0200 Subject: [PATCH 5/6] Keep this part as it is --- ckanext/dcat/profiles/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 17ac718e..890fc131 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -465,8 +465,7 @@ def _contact_details(self, subject, predicate): for agent in self.g.objects(subject, predicate): - contact["uri"] = (str(agent) if isinstance(agent, URIRef) - else self._get_vcard_property_value(agent, VCARD.hasUID)) + contact["uri"] = str(agent) if isinstance(agent, URIRef) else "" contact["name"] = self._get_vcard_property_value( agent, VCARD.hasFN, VCARD.fn From 41ab477091e6f3b6a94fbbe91b2220cbdba16b8d Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Mon, 14 Oct 2024 15:19:02 +0200 Subject: [PATCH 6/6] add contact contact indentifier to mapping --- docs/mapping.md | 153 ++++++++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 76 deletions(-) diff --git a/docs/mapping.md b/docs/mapping.md index ce4048c4..97261b08 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -18,82 +18,83 @@ some cases the way metadata is stored internally and presented at the CKAN API l fields are properly validated, can use the scheming snippets etc. See [Schemas](getting-started.md#schemas) for more details. -| DCAT class | DCAT property | CKAN dataset field | CKAN fallback fields | Stored as | | -|-------------------|------------------------|---------------------------------------------|--------------------------------|-----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| -| dcat:Dataset | - | custom:uri | | text | See [URIs](mapping.md#uris) | -| dcat:Dataset | dct:title | title | | text | | -| dcat:Dataset | dct:description | notes | | text | | -| dcat:Dataset | dcat:keyword | tags | | text | | -| dcat:Dataset | dcat:theme | custom:theme | | list | See [Lists](#lists) | -| dcat:Dataset | dct:identifier | custom:identifier | custom:guid, id | text | | -| dcat:Dataset | adms:identifier | custom:alternate_identifier | | text | | -| dcat:Dataset | dct:issued | custom:issued | metadata_created | text | | -| dcat:Dataset | dct:modified | custom:modified | metadata_modified | text | | -| dcat:Dataset | owl:versionInfo | version | custom:dcat_version | text | | -| dcat:Dataset | adms:versionNotes | custom:version_notes | | text | | -| dcat:Dataset | dct:language | custom:language | | list | See [Lists](#lists) | -| dcat:Dataset | dcat:landingPage | url | | text | | -| dcat:Dataset | dct:accrualPeriodicity | custom:frequency | | text | | -| dcat:Dataset | dct:conformsTo | custom:conforms_to | | list | See [Lists](#lists) | -| dcat:Dataset | dct:accessRights | custom:access_rights | | text | | -| dcat:Dataset | foaf:page | custom:documentation | | list | See [Lists](#lists) | -| dcat:Dataset | dct:provenance | custom:provenance | | text | | -| dcat:Dataset | dct:type | custom:dcat_type | | text | | -| dcat:Dataset | dct:hasVersion | custom:has_version | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | dct:isVersionOf | custom:is_version_of | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | dct:source | custom:source | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | adms:sample | custom:sample | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to dcat:Distribution instances | -| dcat:Dataset | dct:spatial | custom:spatial_uri | | text | See [Spatial coverage](#spatial-coverage) | -| dcat:Dataset | dct:temporal | custom:temporal_start + custom:temporal_end | | text | None, one or both extras can be present | -| dcat:Dataset | dcat:temporalResolution| custom:temporal_resolution | | list | | -| dcat:Dataset | dcat:spatialResolutionInMeters| custom:spatial_resolution_in_meters | | list | | -| dcat:Dataset | dct:isReferencedBy | custom:is_referenced_by | | list | | -| dcat:Dataset | dct:publisher | custom:publisher_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | -| foaf:Agent | foaf:name | custom:publisher_name | | text | | -| foaf:Agent | foaf:mbox | custom:publisher_email | organization:title | text | | -| foaf:Agent | foaf:homepage | custom:publisher_url | | text | | -| foaf:Agent | dct:type | custom:publisher_type | | text | | -| foaf:Agent | dct:identifier | custom:publisher_id | | text | -| dcat:Dataset | dct:creator | custom:creator_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | -| foaf:Agent | foaf:name | custom:creator_name | | text | | -| foaf:Agent | foaf:mbox | custom:creator_email | organization:title | text | | -| foaf:Agent | foaf:homepage | custom:creator_url | | text | | -| foaf:Agent | dct:type | custom:creator_type | | text | | -| foaf:Agent | dct:identifier | custom:creator_id | | text | -| dcat:Dataset | dcat:contactPoint | custom:contact_uri | | text | See [URIs](mapping.md#uris) and [Contact points](#contact-points-and-publisher) | -| vcard:Kind | vcard:fn | custom:contact_name | maintainer, author | text | | -| vcard:Kind | vcard:hasEmail | custom:contact_email | maintainer_email, author_email | text | | -| dcat:Dataset | dcat:distribution | resources | | text | | -| dcat:Distribution | - | resource:uri | | text | See [URIs](mapping.md#uris) | -| dcat:Distribution | dct:title | resource:name | | text | | -| dcat:Distribution | dcat:accessURL | resource:access_url | resource:url | text | If downloadURL is not present, accessURL will be used as resource url | -| dcat:Distribution | dcat:downloadURL | resource:download_url | | text | If present, downloadURL will be used as resource url | -| dcat:Distribution | dct:description | resource:description | | text | | -| dcat:Distribution | dcat:mediaType | resource:mimetype | | text | | -| dcat:Distribution | dct:format | resource:format | | text | | -| dcat:Distribution | dct:license | resource:license | | text | See [Licenses](#licenses) | -| dcat:Distribution | adms:status | resource:status | | text | | -| dcat:Distribution | dcat:byteSize | resource:size | | number | | -| dcat:Distribution | dct:issued | resource:issued | created | text | | -| dcat:Distribution | dct:modified | resource:modified | metadata_modified | text | | -| dcat:Distribution | dct:rights | resource:rights | | text | | -| dcat:Distribution | foaf:page | resource:documentation | | list | See [Lists](#lists) | -| dcat:Distribution | dct:language | resource:language | | list | See [Lists](#lists) | -| dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See [Lists](#lists) | -| dcat:Distribution | dcatap:availability | resource:availability | | text | | -| dcat:Distribution | dcat:compressFormat | resource:compress_format | | text | | -| dcat:Distribution | dcat:packageFormat | resource:package_format | | text | | -| dcat:Distribution | dcat:accessService | resource:access_services | | text | | -| dcat:DataService | dct:title | access_service:title | | text | | -| dcat:DataService | dcat:endpointURL | access_service:endpoint_url | | list | | -| dcat:DataService | dcat:endpointDescription| access_service:endpoint_description | | text | | -| dcat:DataService | dcatap:availability | access_service:availability | | text | | -| dcat:DataService | dcat:servesDataset | access_service:serves_dataset | | list | | -| dcat:DataService | dct:description | access_service:description | | text | | -| dcat:DataService | dct:license | access_service:license | | text | | -| dcat:DataService | dct:accessRights | access_service:access_rights | | text | | -| spdx:Checksum | spdx:checksumValue | resource:hash | | text | | -| spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | | +| DCAT class | DCAT property | CKAN dataset field | CKAN fallback fields | Stored as | | +|-------------------|--------------------------------|--------------------------------------------|--------------------------------|-----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| +| dcat:Dataset | - | custom:uri | | text | See [URIs](mapping.md#uris) | +| dcat:Dataset | dct:title | title | | text | | +| dcat:Dataset | dct:description | notes | | text | | +| dcat:Dataset | dcat:keyword | tags | | text | | +| dcat:Dataset | dcat:theme | custom:theme | | list | See [Lists](#lists) | +| dcat:Dataset | dct:identifier | custom:identifier | custom:guid, id | text | | +| dcat:Dataset | adms:identifier | custom:alternate_identifier | | text | | +| dcat:Dataset | dct:issued | custom:issued | metadata_created | text | | +| dcat:Dataset | dct:modified | custom:modified | metadata_modified | text | | +| dcat:Dataset | owl:versionInfo | version | custom:dcat_version | text | | +| dcat:Dataset | adms:versionNotes | custom:version_notes | | text | | +| dcat:Dataset | dct:language | custom:language | | list | See [Lists](#lists) | +| dcat:Dataset | dcat:landingPage | url | | text | | +| dcat:Dataset | dct:accrualPeriodicity | custom:frequency | | text | | +| dcat:Dataset | dct:conformsTo | custom:conforms_to | | list | See [Lists](#lists) | +| dcat:Dataset | dct:accessRights | custom:access_rights | | text | | +| dcat:Dataset | foaf:page | custom:documentation | | list | See [Lists](#lists) | +| dcat:Dataset | dct:provenance | custom:provenance | | text | | +| dcat:Dataset | dct:type | custom:dcat_type | | text | | +| dcat:Dataset | dct:hasVersion | custom:has_version | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | dct:isVersionOf | custom:is_version_of | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | dct:source | custom:source | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | adms:sample | custom:sample | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to dcat:Distribution instances | +| dcat:Dataset | dct:spatial | custom:spatial_uri | | text | See [Spatial coverage](#spatial-coverage) | +| dcat:Dataset | dct:temporal | custom:temporal_start + custom:temporal_end | | text | None, one or both extras can be present | +| dcat:Dataset | dcat:temporalResolution | custom:temporal_resolution | | list | | +| dcat:Dataset | dcat:spatialResolutionInMeters | custom:spatial_resolution_in_meters | | list | | +| dcat:Dataset | dct:isReferencedBy | custom:is_referenced_by | | list | | +| dcat:Dataset | dct:publisher | custom:publisher_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | +| foaf:Agent | foaf:name | custom:publisher_name | | text | | +| foaf:Agent | foaf:mbox | custom:publisher_email | organization:title | text | | +| foaf:Agent | foaf:homepage | custom:publisher_url | | text | | +| foaf:Agent | dct:type | custom:publisher_type | | text | | +| foaf:Agent | dct:identifier | custom:publisher_id | | text | +| dcat:Dataset | dct:creator | custom:creator_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | +| foaf:Agent | foaf:name | custom:creator_name | | text | | +| foaf:Agent | foaf:mbox | custom:creator_email | organization:title | text | | +| foaf:Agent | foaf:homepage | custom:creator_url | | text | | +| foaf:Agent | dct:type | custom:creator_type | | text | | +| foaf:Agent | dct:identifier | custom:creator_id | | text | +| dcat:Dataset | dcat:contactPoint | custom:contact_uri | | text | See [URIs](mapping.md#uris) and [Contact points](#contact-points-and-publisher) | +| vcard:Kind | vcard:fn | custom:contact_name | maintainer, author | text | | +| vcard:Kind | vcard:hasEmail | custom:contact_email | maintainer_email, author_email | text | | +| vcard:Kind | vcard:hasUID | custom:contact_identifier | | text | | +| dcat:Dataset | dcat:distribution | resources | | text | | +| dcat:Distribution | - | resource:uri | | text | See [URIs](mapping.md#uris) | +| dcat:Distribution | dct:title | resource:name | | text | | +| dcat:Distribution | dcat:accessURL | resource:access_url | resource:url | text | If downloadURL is not present, accessURL will be used as resource url | +| dcat:Distribution | dcat:downloadURL | resource:download_url | | text | If present, downloadURL will be used as resource url | +| dcat:Distribution | dct:description | resource:description | | text | | +| dcat:Distribution | dcat:mediaType | resource:mimetype | | text | | +| dcat:Distribution | dct:format | resource:format | | text | | +| dcat:Distribution | dct:license | resource:license | | text | See [Licenses](#licenses) | +| dcat:Distribution | adms:status | resource:status | | text | | +| dcat:Distribution | dcat:byteSize | resource:size | | number | | +| dcat:Distribution | dct:issued | resource:issued | created | text | | +| dcat:Distribution | dct:modified | resource:modified | metadata_modified | text | | +| dcat:Distribution | dct:rights | resource:rights | | text | | +| dcat:Distribution | foaf:page | resource:documentation | | list | See [Lists](#lists) | +| dcat:Distribution | dct:language | resource:language | | list | See [Lists](#lists) | +| dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See [Lists](#lists) | +| dcat:Distribution | dcatap:availability | resource:availability | | text | | +| dcat:Distribution | dcat:compressFormat | resource:compress_format | | text | | +| dcat:Distribution | dcat:packageFormat | resource:package_format | | text | | +| dcat:Distribution | dcat:accessService | resource:access_services | | text | | +| dcat:DataService | dct:title | access_service:title | | text | | +| dcat:DataService | dcat:endpointURL | access_service:endpoint_url | | list | | +| dcat:DataService | dcat:endpointDescription | access_service:endpoint_description | | text | | +| dcat:DataService | dcatap:availability | access_service:availability | | text | | +| dcat:DataService | dcat:servesDataset | access_service:serves_dataset | | list | | +| dcat:DataService | dct:description | access_service:description | | text | | +| dcat:DataService | dct:license | access_service:license | | text | | +| dcat:DataService | dct:accessRights | access_service:access_rights | | text | | +| spdx:Checksum | spdx:checksumValue | resource:hash | | text | | +| spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | | ### Custom fields