Skip to content

Commit

Permalink
Merge branch 'master' into Health-RI-healthdcat_ap
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed Jan 9, 2025
2 parents cb04e7e + 868b81e commit 73b74b8
Show file tree
Hide file tree
Showing 16 changed files with 118 additions and 77 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# ckanext-dcat


[![Tests](https://github.com/ckan/ckanext-dcat/workflows/Tests/badge.svg?branch=master)](https://github.com/ckan/ckanext-dcat/actions)
[![Code Coverage](http://codecov.io/github/ckan/ckanext-dcat/coverage.svg?branch=master)](http://codecov.io/github/ckan/ckanext-dcat?branch=master)
[![Tests](https://github.com/ckan/ckanext-dcat/actions/workflows/test.yml/badge.svg)](https://github.com/ckan/ckanext-dcat/actions)


Ckanext-dcat is a [CKAN](https://github.com/ckan/ckan) extension that helps data publishers expose and consume metadata as serialized RDF documents using [DCAT](https://github.com/ckan/ckan).
Expand Down
5 changes: 3 additions & 2 deletions ckanext/dcat/blueprints.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
from flask import Blueprint, jsonify, make_response
from flask import Blueprint, jsonify

from ckantoolkit import config

from ckan.views.dataset import CreateView

import ckan.plugins.toolkit as toolkit
import ckanext.dcat.utils as utils
from ckanext.dcat.helpers import endpoints_enabled

dcat = Blueprint(
'dcat',
Expand All @@ -23,7 +24,7 @@ def read_dataset(_id, _format=None, package_type=None):
return utils.read_dataset_page(_id, _format)


if utils.endpoints_enabled():
if endpoints_enabled():

# requirements={'_format': 'xml|rdf|n3|ttl|jsonld'}
dcat.add_url_rule(config.get('ckanext.dcat.catalog_endpoint',
Expand Down
52 changes: 52 additions & 0 deletions ckanext/dcat/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Helpers used by templates
"""
import simplejson as json

import ckantoolkit as toolkit

from ckanext.dcat.processors import RDFSerializer

config = toolkit.config


ENABLE_RDF_ENDPOINTS_CONFIG = "ckanext.dcat.enable_rdf_endpoints"


def endpoints_enabled():
return toolkit.asbool(config.get(ENABLE_RDF_ENDPOINTS_CONFIG, True))


def get_endpoint(_type="dataset"):
return "dcat.read_dataset" if _type == "dataset" else "dcat.read_catalog"


def structured_data(dataset_dict, profiles=None, _format="jsonld"):
"""
Returns a string containing the structured data of the given
dataset id and using the given profiles (if no profiles are supplied
the default profiles are used).
This string can be used in the frontend.
"""

if not profiles:
profiles = ["schemaorg"]

serializer = RDFSerializer(profiles=profiles)

output = serializer.serialize_dataset(dataset_dict, _format=_format)

# parse result again to prevent UnicodeDecodeError and add formatting
try:
json_data = json.loads(output)
return json.dumps(
json_data,
sort_keys=True,
indent=4,
separators=(",", ": "),
cls=json.JSONEncoderForHTML,
)
except ValueError:
# result was not JSON, return anyway
return output
17 changes: 12 additions & 5 deletions ckanext/dcat/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
dcat_datasets_list,
dcat_auth,
)
from ckanext.dcat import helpers
from ckanext.dcat import utils
from ckanext.dcat.validators import dcat_validators

Expand Down Expand Up @@ -81,7 +82,7 @@ def i18n_directory(self):
# IConfigurer

def update_config(self, config):
p.toolkit.add_template_directory(config, '../templates')
p.toolkit.add_template_directory(config, '../templates/dcat')

# Check catalog URI on startup to emit a warning if necessary
utils.catalog_uri()
Expand All @@ -102,9 +103,8 @@ def update_config(self, config):

def get_helpers(self):
return {
'helper_available': utils.helper_available,
'dcat_get_endpoint': utils.get_endpoint,
'dcat_endpoints_enabled': utils.endpoints_enabled,
'dcat_get_endpoint': helpers.get_endpoint,
'dcat_endpoints_enabled': helpers.endpoints_enabled,
}

# IActions
Expand Down Expand Up @@ -243,11 +243,18 @@ def get_auth_functions(self):


class StructuredDataPlugin(p.SingletonPlugin):

p.implements(p.IConfigurer, inherit=True)
p.implements(p.ITemplateHelpers, inherit=True)

# IConfigurer

def update_config(self, config):
p.toolkit.add_template_directory(config, '../templates/structured_data')

# ITemplateHelpers

def get_helpers(self):
return {
'structured_data': utils.structured_data,
'structured_data': helpers.structured_data,
}
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,3 @@
{% endwith %}
{% endif %}
{% endblock -%}
{% block scripts %}
{{ super() }}
{% block structured_data %}
{#
h.structured_data is defined in the 'structured_data' plugin,
you have to activate the plugin (or implement the method yourself)
to make use of this feature.
More information about structured data:
https://developers.google.com/search/docs/guides/intro-structured-data
#}
{% if h.helper_available('structured_data') %}
<script type="application/ld+json">
{{ h.structured_data(pkg.id)|safe }}
</script>
{% endif %}
{% endblock %}
{% endblock %}
File renamed without changes.
13 changes: 13 additions & 0 deletions ckanext/dcat/templates/structured_data/package/read_base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% ckan_extends %}

{% block scripts %}

{{ super() }}

{% block structured_data %}
<!-- Structured data -->
<script type="application/ld+json">
{{ h.structured_data(pkg) | safe }}
</script>
{% endblock %}
{% endblock %}
46 changes: 0 additions & 46 deletions ckanext/dcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
DCAT_CLEAN_TAGS = 'ckanext.dcat.clean_tags'

DEFAULT_CATALOG_ENDPOINT = '/catalog.{_format}'
ENABLE_RDF_ENDPOINTS_CONFIG = 'ckanext.dcat.enable_rdf_endpoints'
ENABLE_CONTENT_NEGOTIATION_CONFIG = 'ckanext.dcat.enable_content_negotiation'


Expand Down Expand Up @@ -95,43 +94,6 @@ def field_labels():
'created': _('Created'),
}

def helper_available(helper_name):
'''
Checks if a given helper name is available on `h`
'''
try:
getattr(h, helper_name)
except (AttributeError, HelperError):
return False
return True

def structured_data(dataset_id, profiles=None, _format='jsonld'):
'''
Returns a string containing the structured data of the given
dataset id and using the given profiles (if no profiles are supplied
the default profiles are used).
This string can be used in the frontend.
'''
if not profiles:
profiles = ['schemaorg']

data = toolkit.get_action('dcat_dataset_show')(
{},
{
'id': dataset_id,
'profiles': profiles,
'format': _format,
}
)
# parse result again to prevent UnicodeDecodeError and add formatting
try:
json_data = json.loads(data)
return json.dumps(json_data, sort_keys=True,
indent=4, separators=(',', ': '), cls=json.JSONEncoderForHTML)
except ValueError:
# result was not JSON, return anyway
return data

def catalog_uri():
'''
Expand Down Expand Up @@ -459,11 +421,3 @@ def read_catalog_page(_format):
response.headers['Content-type'] = CONTENT_TYPES[_format]

return response


def endpoints_enabled():
return toolkit.asbool(config.get(ENABLE_RDF_ENDPOINTS_CONFIG, True))


def get_endpoint(_type='dataset'):
return 'dcat.read_dataset' if _type == 'dataset' else 'dcat.read_catalog'
27 changes: 27 additions & 0 deletions docs/contributing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
As part of the CKAN ecosystem, ckanext-dcat is entirely open source and welcomes all forms of contributions from the community.
Besides the general guidance provided in the [CKAN documentation](https://docs.ckan.org/en/latest/contributing/index.html) follow these points:

* Format your code with [Black](https://github.com/psf/black).
* Make sure to include tests for your changes. The extension has an extensive test suite so in most cases you just need to copy some of the existing tests and adapt them.
* It's better to submit a pull request early, even if in draft state, to get feedback and make sure the contribution will be accepted.

### Including new profiles


New [profiles](profiles.md) that are useful to the wider community are welcome, provided that they are sustainable long term. A maintainer unfamiliar with the profile should be able to know what the profile does and be confident that everything works as expected. The way to achieve this is with tests (lots of them!) and documentation.

More localized profiles are better placed in dedicated extensions.

A contribution that adds a new profile should include:

* A new [profile class](https://github.com/ckan/ckanext-dcat/tree/master/ckanext/dcat/profiles) with parse and serialize methods (extending the DCAT v3 one)
* A new dataset [schema](https://github.com/ckan/ckanext-dcat/tree/master/ckanext/dcat/schemas) that contains all new properties supported in the new profile (it can contain just the base DCAT 3 recommended ones)
* [Example](https://github.com/ckan/ckanext-dcat/tree/master/examples) CKAN dataset and DCAT serialization of the new profile
* Tests:
* [SHACL validation](https://github.com/ckan/ckanext-dcat/tree/1e945b6e79f0e0bae1ff76989ef9789abb5e32a8/ckanext/dcat/tests/shacl) if SHACL shapes are provided
* [End to end](https://github.com/ckan/ckanext-dcat/blob/1e945b6e79f0e0bae1ff76989ef9789abb5e32a8/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py#L44) tests covering parsing and serialization
* Parsing and serialization tests covering [specific functionality](https://github.com/ckan/ckanext-dcat/blob/1e945b6e79f0e0bae1ff76989ef9789abb5e32a8/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py#L368) for the profile
* [Documentation](https://github.com/ckan/ckanext-dcat/tree/1e945b6e79f0e0bae1ff76989ef9789abb5e32a8/docs) about the new profile (compatibility with DCAT AP versions, other profiles required, config options etc)

This might seem like a lot of requirements but using the existing linked resources as template should make things much easier. Do not hesitate to ask for help if unsure about one point.

14 changes: 9 additions & 5 deletions docs/google-dataset-search.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,22 @@

The `structured_data` plugin will add the necessary markup to dataset pages in order to get your datasets indexed by [Google Dataset Search](https://toolbox.google.com/datasetsearch). This markup is a [structured data](https://developers.google.com/search/docs/guides/intro-structured-data) JSON-LD snippet that uses the [schema.org](https://schema.org) vocabulary to describe the dataset.

ckan.plugins = dcat structured_data
ckan.plugins = structured_data

By default this uses the `schemaorg` profile (see [Profiles](profiles.md#profiles)) to serialize the dataset to JSON-LD, which is then added to the dataset detail page.
To change the schema, you have to override the Jinja template block called `structured_data` in [`templates/package/read_base.html`](https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/templates/package/read_base.html) and call the template helper function with different parameters:
You don't need to load the `dcat` plugin to use the `structured_data` plugin, but you can load them both to enable both functionalities.

The plugin uses the `schemaorg` profile by default (see [Profiles](profiles.md#profiles)) to serialize the dataset to JSON-LD, which is then added to the dataset detail page.

To use a custom profile, you have to override the Jinja template block called `structured_data` in [`templates/package/read_base.html`](https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/templates/structured_data/package/read_base.html) and call the template helper function with different parameters:

{% block structured_data %}
<script type="application/ld+json">
{{ h.structured_data(pkg.id, ['my_custom_schema'])|safe }}
{{ h.structured_data(pkg, ['my_custom_schema']) | safe }}
</script>
{% endblock %}

Example output of structured data in JSON-LD:

Below is an example of the structured data in JSON-LD embedded in the dataset page source:

```html
<script type="application/ld+json">
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,5 @@ nav:
- Google Dataset Search: 'google-dataset-search.md'
- CLI: 'cli.md'
- Configuration reference: 'configuration.md'
- Contributing: 'contributing.md'
- CHANGELOG: 'changelog.md'

0 comments on commit 73b74b8

Please sign in to comment.