Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(extras): handle multiple contacts and publishers with 1-based ind… #310

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 40 additions & 44 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ckan.lib.helpers import resource_formats
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
from ckanext.dcat.validators import is_year, is_year_month, is_date
from typing import List

DCT = Namespace("http://purl.org/dc/terms/")
DCAT = Namespace("http://www.w3.org/ns/dcat#")
Expand Down Expand Up @@ -419,13 +420,13 @@ def _insert_or_update_temporal(self, dataset_dict, key, value):
else:
dataset_dict["extras"].append({"key": key, "value": value})

def _publisher(self, subject, predicate):
def _publisher(self, subject, predicate) -> List:
"""
Returns a dict with details about a dct:publisher entity, a foaf:Agent
Returns a list of dicts, each containing details about a dct:publisher entity, a foaf:Agent.

Both subject and predicate must be rdflib URIRef or BNode objects
Both subject and predicate must be rdflib URIRef or BNode objects.

Examples:
Examples of a single publisher entry:

<dct:publisher>
<foaf:Organization rdf:about="http://orgs.vocab.org/some-org">
Expand All @@ -436,43 +437,41 @@ def _publisher(self, subject, predicate):
</foaf:Organization>
</dct:publisher>

{
'uri': 'http://orgs.vocab.org/some-org',
'name': 'Publishing Organization for dataset 1',
'email': '[email protected]',
'url': 'http://some.org',
'type': 'http://purl.org/adms/publishertype/NonProfitOrganisation',
}

<dct:publisher rdf:resource="http://publications.europa.eu/resource/authority/corporate-body/EURCOU" />

{
'uri': 'http://publications.europa.eu/resource/authority/corporate-body/EURCOU'
}
The resulting list:
[
{
'uri': 'http://orgs.vocab.org/some-org',
'name': 'Publishing Organization for dataset 1',
'email': '[email protected]',
'url': 'http://some.org',
'type': 'http://purl.org/adms/publishertype/NonProfitOrganisation',
'identifier': ''
},
...
]

Returns keys for uri, name, email, url and type with the values set to
an empty string if they could not be found
Returns keys for uri, name, email, url, type, and identifier with values
set to an empty string if they could not be found.
"""

publisher = {}
publishers = [] # Initialize an empty list to hold multiple publisher entries

# Iterate over all matching publisher agents
for agent in self.g.objects(subject, predicate):
publisher = {
"uri": str(agent) if isinstance(agent, term.URIRef) else "",
"name": self._object_value(agent, FOAF.name),
"email": self._object_value(agent, FOAF.mbox),
"url": self._object_value(agent, FOAF.homepage),
"type": self._object_value(agent, DCT.type),
"identifier": self._object_value(agent, DCT.identifier)
}

publisher["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""

publisher["name"] = self._object_value(agent, FOAF.name)

publisher["email"] = self._object_value(agent, FOAF.mbox)

publisher["url"] = self._object_value(agent, FOAF.homepage)

publisher["type"] = self._object_value(agent, DCT.type)

publisher['identifier'] = self._object_value(agent, DCT.identifier)
publishers.append(publisher)

return publisher
return publishers # Return the list of publisher dictionaries

def _contact_details(self, subject, predicate):
def _contact_details(self, subject, predicate) -> List:
"""
Returns a dict with details about a vcard expression

Expand All @@ -482,21 +481,18 @@ def _contact_details(self, subject, predicate):
an empty string if they could not be found
"""

contact = {}
contact_list = []

for agent in self.g.objects(subject, predicate):
contact = {
'uri': (str(agent) if isinstance(agent, URIRef)
else self._get_vcard_property_value(agent, VCARD.hasUID)),
'name': self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn),
'email': self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail))}

contact["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""

contact["name"] = self._get_vcard_property_value(
agent, VCARD.hasFN, VCARD.fn
)

contact["email"] = self._without_mailto(
self._get_vcard_property_value(agent, VCARD.hasEmail)
)
contact_list.append(contact)

return contact
return contact_list

def _parse_geodata(self, spatial, datatype, cur_value):
"""
Expand Down
39 changes: 22 additions & 17 deletions ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,25 +109,30 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
dataset_dict["extras"].append({"key": key, "value": json.dumps(values)})

# Contact details
contact = self._contact_details(dataset_ref, DCAT.contactPoint)
if not contact:
# adms:contactPoint was supported on the first version of DCAT-AP
contact = self._contact_details(dataset_ref, ADMS.contactPoint)

if contact:
for key in ("uri", "name", "email"):
if contact.get(key):
dataset_dict["extras"].append(
{"key": "contact_{0}".format(key), "value": contact.get(key)}
)
contacts = self._contact_details(dataset_ref, DCAT.contactPoint)
if not contacts:
# adms:contactPoint was supported in the first version of DCAT-AP
contacts = self._contact_details(dataset_ref, ADMS.contactPoint)

# Ensure contacts is a list (even if it's None or a single contact for compatibility)
if contacts:
for index, contact in enumerate(contacts, start=1):
for key in ("uri", "name", "email"):
if contact.get(key):
dataset_dict["extras"].append(
{"key": "contact_{0}_{1}".format(key, index), "value": contact.get(key)}
)

# Publisher
publisher = self._publisher(dataset_ref, DCT.publisher)
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
)
publishers = self._publisher(dataset_ref, DCT.publisher)

if publishers:
for index, publisher in enumerate(publishers, start=1):
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}_{1}".format(key, index), "value": publisher.get(key)}
)

# Temporal
start, end = self._time_interval(dataset_ref, DCT.temporal)
Expand Down
Loading