Skip to content

Commit

Permalink
Merge pull request #329 from ckan/decouple-structured_data
Browse files Browse the repository at this point in the history
Decouple `dcat` and `structured_data` plugins
  • Loading branch information
amercader authored Jan 8, 2025
2 parents 04788a4 + 86474f7 commit 868b81e
Show file tree
Hide file tree
Showing 13 changed files with 89 additions and 75 deletions.
5 changes: 3 additions & 2 deletions ckanext/dcat/blueprints.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
from flask import Blueprint, jsonify, make_response
from flask import Blueprint, jsonify

from ckantoolkit import config

from ckan.views.dataset import CreateView

import ckan.plugins.toolkit as toolkit
import ckanext.dcat.utils as utils
from ckanext.dcat.helpers import endpoints_enabled

dcat = Blueprint(
'dcat',
Expand All @@ -23,7 +24,7 @@ def read_dataset(_id, _format=None, package_type=None):
return utils.read_dataset_page(_id, _format)


if utils.endpoints_enabled():
if endpoints_enabled():

# requirements={'_format': 'xml|rdf|n3|ttl|jsonld'}
dcat.add_url_rule(config.get('ckanext.dcat.catalog_endpoint',
Expand Down
52 changes: 52 additions & 0 deletions ckanext/dcat/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Helpers used by templates
"""
import simplejson as json

import ckantoolkit as toolkit

from ckanext.dcat.processors import RDFSerializer

config = toolkit.config


ENABLE_RDF_ENDPOINTS_CONFIG = "ckanext.dcat.enable_rdf_endpoints"


def endpoints_enabled():
return toolkit.asbool(config.get(ENABLE_RDF_ENDPOINTS_CONFIG, True))


def get_endpoint(_type="dataset"):
return "dcat.read_dataset" if _type == "dataset" else "dcat.read_catalog"


def structured_data(dataset_dict, profiles=None, _format="jsonld"):
"""
Returns a string containing the structured data of the given
dataset id and using the given profiles (if no profiles are supplied
the default profiles are used).
This string can be used in the frontend.
"""

if not profiles:
profiles = ["schemaorg"]

serializer = RDFSerializer(profiles=profiles)

output = serializer.serialize_dataset(dataset_dict, _format=_format)

# parse result again to prevent UnicodeDecodeError and add formatting
try:
json_data = json.loads(output)
return json.dumps(
json_data,
sort_keys=True,
indent=4,
separators=(",", ": "),
cls=json.JSONEncoderForHTML,
)
except ValueError:
# result was not JSON, return anyway
return output
17 changes: 12 additions & 5 deletions ckanext/dcat/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
dcat_datasets_list,
dcat_auth,
)
from ckanext.dcat import helpers
from ckanext.dcat import utils
from ckanext.dcat.validators import dcat_validators

Expand Down Expand Up @@ -81,7 +82,7 @@ def i18n_directory(self):
# IConfigurer

def update_config(self, config):
p.toolkit.add_template_directory(config, '../templates')
p.toolkit.add_template_directory(config, '../templates/dcat')

# Check catalog URI on startup to emit a warning if necessary
utils.catalog_uri()
Expand All @@ -102,9 +103,8 @@ def update_config(self, config):

def get_helpers(self):
return {
'helper_available': utils.helper_available,
'dcat_get_endpoint': utils.get_endpoint,
'dcat_endpoints_enabled': utils.endpoints_enabled,
'dcat_get_endpoint': helpers.get_endpoint,
'dcat_endpoints_enabled': helpers.endpoints_enabled,
}

# IActions
Expand Down Expand Up @@ -243,11 +243,18 @@ def get_auth_functions(self):


class StructuredDataPlugin(p.SingletonPlugin):

p.implements(p.IConfigurer, inherit=True)
p.implements(p.ITemplateHelpers, inherit=True)

# IConfigurer

def update_config(self, config):
p.toolkit.add_template_directory(config, '../templates/structured_data')

# ITemplateHelpers

def get_helpers(self):
return {
'structured_data': utils.structured_data,
'structured_data': helpers.structured_data,
}
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,3 @@
{% endwith %}
{% endif %}
{% endblock -%}
{% block scripts %}
{{ super() }}
{% block structured_data %}
{#
h.structured_data is defined in the 'structured_data' plugin,
you have to activate the plugin (or implement the method yourself)
to make use of this feature.
More information about structured data:
https://developers.google.com/search/docs/guides/intro-structured-data
#}
{% if h.helper_available('structured_data') %}
<script type="application/ld+json">
{{ h.structured_data(pkg.id)|safe }}
</script>
{% endif %}
{% endblock %}
{% endblock %}
File renamed without changes.
13 changes: 13 additions & 0 deletions ckanext/dcat/templates/structured_data/package/read_base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% ckan_extends %}

{% block scripts %}

{{ super() }}

{% block structured_data %}
<!-- Structured data -->
<script type="application/ld+json">
{{ h.structured_data(pkg) | safe }}
</script>
{% endblock %}
{% endblock %}
46 changes: 0 additions & 46 deletions ckanext/dcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
DCAT_CLEAN_TAGS = 'ckanext.dcat.clean_tags'

DEFAULT_CATALOG_ENDPOINT = '/catalog.{_format}'
ENABLE_RDF_ENDPOINTS_CONFIG = 'ckanext.dcat.enable_rdf_endpoints'
ENABLE_CONTENT_NEGOTIATION_CONFIG = 'ckanext.dcat.enable_content_negotiation'


Expand Down Expand Up @@ -95,43 +94,6 @@ def field_labels():
'created': _('Created'),
}

def helper_available(helper_name):
'''
Checks if a given helper name is available on `h`
'''
try:
getattr(h, helper_name)
except (AttributeError, HelperError):
return False
return True

def structured_data(dataset_id, profiles=None, _format='jsonld'):
'''
Returns a string containing the structured data of the given
dataset id and using the given profiles (if no profiles are supplied
the default profiles are used).
This string can be used in the frontend.
'''
if not profiles:
profiles = ['schemaorg']

data = toolkit.get_action('dcat_dataset_show')(
{},
{
'id': dataset_id,
'profiles': profiles,
'format': _format,
}
)
# parse result again to prevent UnicodeDecodeError and add formatting
try:
json_data = json.loads(data)
return json.dumps(json_data, sort_keys=True,
indent=4, separators=(',', ': '), cls=json.JSONEncoderForHTML)
except ValueError:
# result was not JSON, return anyway
return data

def catalog_uri():
'''
Expand Down Expand Up @@ -459,11 +421,3 @@ def read_catalog_page(_format):
response.headers['Content-type'] = CONTENT_TYPES[_format]

return response


def endpoints_enabled():
return toolkit.asbool(config.get(ENABLE_RDF_ENDPOINTS_CONFIG, True))


def get_endpoint(_type='dataset'):
return 'dcat.read_dataset' if _type == 'dataset' else 'dcat.read_catalog'
14 changes: 9 additions & 5 deletions docs/google-dataset-search.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,22 @@

The `structured_data` plugin will add the necessary markup to dataset pages in order to get your datasets indexed by [Google Dataset Search](https://toolbox.google.com/datasetsearch). This markup is a [structured data](https://developers.google.com/search/docs/guides/intro-structured-data) JSON-LD snippet that uses the [schema.org](https://schema.org) vocabulary to describe the dataset.

ckan.plugins = dcat structured_data
ckan.plugins = structured_data

By default this uses the `schemaorg` profile (see [Profiles](profiles.md#profiles)) to serialize the dataset to JSON-LD, which is then added to the dataset detail page.
To change the schema, you have to override the Jinja template block called `structured_data` in [`templates/package/read_base.html`](https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/templates/package/read_base.html) and call the template helper function with different parameters:
You don't need to load the `dcat` plugin to use the `structured_data` plugin, but you can load them both to enable both functionalities.

The plugin uses the `schemaorg` profile by default (see [Profiles](profiles.md#profiles)) to serialize the dataset to JSON-LD, which is then added to the dataset detail page.

To use a custom profile, you have to override the Jinja template block called `structured_data` in [`templates/package/read_base.html`](https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/templates/structured_data/package/read_base.html) and call the template helper function with different parameters:

{% block structured_data %}
<script type="application/ld+json">
{{ h.structured_data(pkg.id, ['my_custom_schema'])|safe }}
{{ h.structured_data(pkg, ['my_custom_schema']) | safe }}
</script>
{% endblock %}

Example output of structured data in JSON-LD:

Below is an example of the structured data in JSON-LD embedded in the dataset page source:

```html
<script type="application/ld+json">
Expand Down

0 comments on commit 868b81e

Please sign in to comment.