diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py
index 02bd395c..857a53a3 100644
--- a/ckanext/dcat/profiles.py
+++ b/ckanext/dcat/profiles.py
@@ -731,6 +731,18 @@ def _schema_field(self, key):
if field['field_name'] == key:
return field
+ def _schema_resource_field(self, key):
+ '''
+ Returns the schema field information if the provided key exists as a field in
+ the resources fields of the dataset schema (if one was provided)
+ '''
+ if not self._dataset_schema:
+ return None
+
+ for field in self._dataset_schema['resource_fields']:
+ if field['field_name'] == key:
+ return field
+
def _set_dataset_value(self, dataset_dict, key, value):
'''
Sets the value for a given key in a CKAN dataset dict
@@ -758,6 +770,15 @@ def _set_list_dataset_value(self, dataset_dict, key, value):
else:
return self._set_dataset_value(dataset_dict, key, json.dumps(value))
+ def _set_list_resource_value(self, resource_dict, key, value):
+ schema_field = self._schema_resource_field(key)
+ if schema_field and 'scheming_multiple_text' in schema_field['validators']:
+ resource_dict[key] = value
+ else:
+ resource_dict[key] = json.dumps(value)
+
+ return resource_dict
+
def _get_dataset_value(self, dataset_dict, key, default=None):
'''
Returns the value for the given key on a CKAN dict
@@ -1084,7 +1105,7 @@ def parse_dataset(self, dataset_dict, dataset_ref):
):
value = self._object_value(dataset_ref, predicate)
if value:
- self._set_dataset_value(dataset_dict, key, value)
+ dataset_dict['extras'].append({'key': key, 'value': value})
# Lists
for key, predicate, in (
@@ -1101,7 +1122,8 @@ def parse_dataset(self, dataset_dict, dataset_ref):
):
values = self._object_value_list(dataset_ref, predicate)
if values:
- self._set_list_dataset_value(dataset_dict, key, values)
+ dataset_dict['extras'].append({'key': key,
+ 'value': json.dumps(values)})
# Contact details
contact = self._contact_details(dataset_ref, DCAT.contactPoint)
@@ -1110,7 +1132,7 @@ def parse_dataset(self, dataset_dict, dataset_ref):
contact = self._contact_details(dataset_ref, ADMS.contactPoint)
if contact:
- for key in ('uri', 'name', 'email'):
+ for key in ('uri', 'name', 'email'):
if contact.get(key):
dataset_dict['extras'].append(
{'key': 'contact_{0}'.format(key),
@@ -1336,32 +1358,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
_type=URIRef, value_modifier=self._add_mailto
)
- # TODO: this will go into a separate profile
- contact = dataset_dict.get("contact")
- if isinstance(contact, list) and len(contact):
- for item in contact:
- contact_uri = item.get('uri')
- if contact_uri:
- contact_details = CleanedURIRef(contact_uri)
- else:
- contact_details = BNode()
-
- g.add((contact_details, RDF.type, VCARD.Organization))
- g.add((dataset_ref, DCAT.contactPoint, contact_details))
-
- self._add_triple_from_dict(
- item, contact_details,
- VCARD.fn, 'name'
- )
- # Add mail address as URIRef, and ensure it has a mailto: prefix
- self._add_triple_from_dict(
- item, contact_details,
- VCARD.hasEmail, 'email',
- _type=URIRef, value_modifier=self._add_mailto
- )
-
-
-
# Publisher
if any([
self._get_dataset_value(dataset_dict, 'publisher_uri'),
@@ -1752,8 +1748,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
]
self._add_list_triples_from_dict(resource_dict, distribution, items)
- # TODO: this will go into a separate profile
-
access_service_list = resource_dict.get('access_services', [])
if isinstance(access_service_list, str):
try:
@@ -1796,9 +1790,8 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
]
self._add_list_triples_from_dict(access_service_dict, access_service_node, items)
- # TODO: re-enable when separating into a profile
- # if access_service_list:
- # resource_dict['access_services'] = json.dumps(access_service_list)
+ if access_service_list:
+ resource_dict['access_services'] = json.dumps(access_service_list)
def graph_from_catalog(self, catalog_dict, catalog_ref):
@@ -2097,3 +2090,88 @@ def _distribution_url_graph(self, distribution, resource_dict):
def _distribution_numbers_graph(self, distribution, resource_dict):
if resource_dict.get('size'):
self.g.add((distribution, SCHEMA.contentSize, Literal(resource_dict['size'])))
+
+
+# TODO: split all these classes in different files
+class EuropeanDCATAPSchemingProfile(RDFProfile):
+ '''
+ This is a compatibilty profile meant to add support for ckanext-scheming to the existing
+ `euro_dcat_ap` and `euro_dcat_ap_2` profiles.
+
+ It does not add or remove any properties from these profiles, it just transforms the
+ resulting dataset_dict so it is compatible with a ckanext-scheming schema
+
+ TODO: summarize changes and link to docs
+ '''
+
+ def parse_dataset(self, dataset_dict, dataset_ref):
+
+ if not self._dataset_schema:
+ # Not using scheming
+ return dataset_dict
+
+ # Move extras to root
+
+ extras_to_remove = []
+ extras = dataset_dict.get('extras', [])
+ for extra in extras:
+ if self._schema_field(extra['key']):
+ # This is a field defined in the dataset schema
+ dataset_dict[extra['key']] = extra['value']
+ extras_to_remove.append(extra['key'])
+
+ dataset_dict['extras'] = [e for e in extras if e['key'] not in extras_to_remove]
+
+
+ # Parse lists
+ def _parse_list_value(data_dict, field_name):
+ schema_field = self._schema_field(field_name) or self._schema_resource_field(field_name)
+
+ if schema_field and 'scheming_multiple_text' in schema_field.get('validators', []):
+ if isinstance(data_dict[field_name], str):
+ try:
+ data_dict[field_name] = json.loads(data_dict[field_name])
+ except ValueError:
+ pass
+
+ for field_name in dataset_dict.keys():
+ _parse_list_value(dataset_dict, field_name)
+
+ for resource_dict in dataset_dict.get('resources', []):
+ for field_name in resource_dict.keys():
+ _parse_list_value(resource_dict, field_name)
+
+
+ # Repeating subfields
+ for schema_field in self._dataset_schema['dataset_fields']:
+ if 'repeating_subfields' in schema_field:
+ # Check if existing extras need to be migrated
+ field_name = schema_field['field_name']
+ new_extras = []
+ new_dict = {}
+ for extra in dataset_dict.get('extras', []):
+ if extra['key'].startswith(f'{field_name}_'):
+ subfield = extra['key'][extra['key'].index('_') + 1:]
+ if subfield in [f['field_name'] for f in schema_field['repeating_subfields']]:
+ new_dict[subfield] = extra['value']
+ else:
+ new_extras.append(extra)
+ else:
+ new_extras.append(extra)
+ if new_dict:
+ dataset_dict[field_name] = [new_dict]
+ dataset_dict['extras'] = new_extras
+
+ for schema_field in self._dataset_schema['resource_fields']:
+ if 'repeating_subfields' in schema_field:
+ # Check if value needs to be load from JSON
+ field_name = schema_field['field_name']
+ for resource_dict in dataset_dict.get('resources', []):
+ if resource_dict.get(field_name) and isinstance(resource_dict[field_name], str):
+ try:
+ # TODO: load only subfields in schema?
+ resource_dict[field_name] = json.loads(resource_dict[field_name])
+ except ValueError:
+ pass
+
+ return dataset_dict
diff --git a/examples/dataset.rdf b/examples/dataset.rdf
index fed71cc9..6b445dff 100644
--- a/examples/dataset.rdf
+++ b/examples/dataset.rdf
@@ -3,6 +3,7 @@
xmlns:time="http://www.w3.org/2006/time#"
xmlns:dct="http://purl.org/dc/terms/"
xmlns:dcat="http://www.w3.org/ns/dcat#"
+ xmlns:dcatap="http://data.europa.eu/r5r/"
xmlns:foaf="http://xmlns.com/foaf/0.1/"
xmlns:adms="http://www.w3.org/ns/adms#"
xmlns:schema="http://schema.org/"
@@ -96,7 +97,19 @@
-
+
+
+
+ Sparql-end Point
+
+ This SPARQL end point allow to directly query the EU Whoiswho content (organization / membership / person)
+ SPARQL url description
+
+
+
+
+
+
diff --git a/setup.py b/setup.py
index fda14619..78fb19fa 100644
--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,7 @@
[ckan.rdf.profiles]
euro_dcat_ap=ckanext.dcat.profiles:EuropeanDCATAPProfile
euro_dcat_ap_2=ckanext.dcat.profiles:EuropeanDCATAP2Profile
+ euro_dcat_ap_scheming=ckanext.dcat.profiles:EuropeanDCATAPSchemingProfile
schemaorg=ckanext.dcat.profiles:SchemaOrgProfile
[babel.extractors]