Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decouple dcat and structured_data plugins #329

Merged
merged 3 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions ckanext/dcat/blueprints.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
from flask import Blueprint, jsonify, make_response
from flask import Blueprint, jsonify

from ckantoolkit import config

from ckan.views.dataset import CreateView

import ckan.plugins.toolkit as toolkit
import ckanext.dcat.utils as utils
from ckanext.dcat.helpers import endpoints_enabled

dcat = Blueprint(
'dcat',
Expand All @@ -23,7 +24,7 @@ def read_dataset(_id, _format=None, package_type=None):
return utils.read_dataset_page(_id, _format)


if utils.endpoints_enabled():
if endpoints_enabled():

# requirements={'_format': 'xml|rdf|n3|ttl|jsonld'}
dcat.add_url_rule(config.get('ckanext.dcat.catalog_endpoint',
Expand Down
52 changes: 52 additions & 0 deletions ckanext/dcat/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Helpers used by templates
"""
import simplejson as json

import ckantoolkit as toolkit

from ckanext.dcat.processors import RDFSerializer

config = toolkit.config


ENABLE_RDF_ENDPOINTS_CONFIG = "ckanext.dcat.enable_rdf_endpoints"


def endpoints_enabled():
return toolkit.asbool(config.get(ENABLE_RDF_ENDPOINTS_CONFIG, True))


def get_endpoint(_type="dataset"):
return "dcat.read_dataset" if _type == "dataset" else "dcat.read_catalog"


def structured_data(dataset_dict, profiles=None, _format="jsonld"):
"""
Returns a string containing the structured data of the given
dataset id and using the given profiles (if no profiles are supplied
the default profiles are used).

This string can be used in the frontend.
"""

if not profiles:
profiles = ["schemaorg"]

serializer = RDFSerializer(profiles=profiles)

output = serializer.serialize_dataset(dataset_dict, _format=_format)

# parse result again to prevent UnicodeDecodeError and add formatting
try:
json_data = json.loads(output)
return json.dumps(
json_data,
sort_keys=True,
indent=4,
separators=(",", ": "),
cls=json.JSONEncoderForHTML,
)
except ValueError:
# result was not JSON, return anyway
return output
17 changes: 12 additions & 5 deletions ckanext/dcat/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
dcat_datasets_list,
dcat_auth,
)
from ckanext.dcat import helpers
from ckanext.dcat import utils
from ckanext.dcat.validators import dcat_validators

Expand Down Expand Up @@ -81,7 +82,7 @@ def i18n_directory(self):
# IConfigurer

def update_config(self, config):
p.toolkit.add_template_directory(config, '../templates')
p.toolkit.add_template_directory(config, '../templates/dcat')

# Check catalog URI on startup to emit a warning if necessary
utils.catalog_uri()
Expand All @@ -102,9 +103,8 @@ def update_config(self, config):

def get_helpers(self):
return {
'helper_available': utils.helper_available,
'dcat_get_endpoint': utils.get_endpoint,
'dcat_endpoints_enabled': utils.endpoints_enabled,
'dcat_get_endpoint': helpers.get_endpoint,
'dcat_endpoints_enabled': helpers.endpoints_enabled,
}

# IActions
Expand Down Expand Up @@ -243,11 +243,18 @@ def get_auth_functions(self):


class StructuredDataPlugin(p.SingletonPlugin):

p.implements(p.IConfigurer, inherit=True)
p.implements(p.ITemplateHelpers, inherit=True)

# IConfigurer

def update_config(self, config):
p.toolkit.add_template_directory(config, '../templates/structured_data')

# ITemplateHelpers

def get_helpers(self):
return {
'structured_data': utils.structured_data,
'structured_data': helpers.structured_data,
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,3 @@
{% endwith %}
{% endif %}
{% endblock -%}
{% block scripts %}
{{ super() }}
{% block structured_data %}
{#
h.structured_data is defined in the 'structured_data' plugin,
you have to activate the plugin (or implement the method yourself)
to make use of this feature.
More information about structured data:
https://developers.google.com/search/docs/guides/intro-structured-data
#}
{% if h.helper_available('structured_data') %}
<script type="application/ld+json">
{{ h.structured_data(pkg.id)|safe }}
</script>
{% endif %}
{% endblock %}
{% endblock %}
13 changes: 13 additions & 0 deletions ckanext/dcat/templates/structured_data/package/read_base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% ckan_extends %}

{% block scripts %}

{{ super() }}

{% block structured_data %}
<!-- Structured data -->
<script type="application/ld+json">
{{ h.structured_data(pkg) | safe }}
</script>
{% endblock %}
{% endblock %}
46 changes: 0 additions & 46 deletions ckanext/dcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
DCAT_CLEAN_TAGS = 'ckanext.dcat.clean_tags'

DEFAULT_CATALOG_ENDPOINT = '/catalog.{_format}'
ENABLE_RDF_ENDPOINTS_CONFIG = 'ckanext.dcat.enable_rdf_endpoints'
ENABLE_CONTENT_NEGOTIATION_CONFIG = 'ckanext.dcat.enable_content_negotiation'


Expand Down Expand Up @@ -95,43 +94,6 @@ def field_labels():
'created': _('Created'),
}

def helper_available(helper_name):
'''
Checks if a given helper name is available on `h`
'''
try:
getattr(h, helper_name)
except (AttributeError, HelperError):
return False
return True

def structured_data(dataset_id, profiles=None, _format='jsonld'):
'''
Returns a string containing the structured data of the given
dataset id and using the given profiles (if no profiles are supplied
the default profiles are used).

This string can be used in the frontend.
'''
if not profiles:
profiles = ['schemaorg']

data = toolkit.get_action('dcat_dataset_show')(
{},
{
'id': dataset_id,
'profiles': profiles,
'format': _format,
}
)
# parse result again to prevent UnicodeDecodeError and add formatting
try:
json_data = json.loads(data)
return json.dumps(json_data, sort_keys=True,
indent=4, separators=(',', ': '), cls=json.JSONEncoderForHTML)
except ValueError:
# result was not JSON, return anyway
return data

def catalog_uri():
'''
Expand Down Expand Up @@ -459,11 +421,3 @@ def read_catalog_page(_format):
response.headers['Content-type'] = CONTENT_TYPES[_format]

return response


def endpoints_enabled():
return toolkit.asbool(config.get(ENABLE_RDF_ENDPOINTS_CONFIG, True))


def get_endpoint(_type='dataset'):
return 'dcat.read_dataset' if _type == 'dataset' else 'dcat.read_catalog'
14 changes: 9 additions & 5 deletions docs/google-dataset-search.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,22 @@

The `structured_data` plugin will add the necessary markup to dataset pages in order to get your datasets indexed by [Google Dataset Search](https://toolbox.google.com/datasetsearch). This markup is a [structured data](https://developers.google.com/search/docs/guides/intro-structured-data) JSON-LD snippet that uses the [schema.org](https://schema.org) vocabulary to describe the dataset.

ckan.plugins = dcat structured_data
ckan.plugins = structured_data

By default this uses the `schemaorg` profile (see [Profiles](profiles.md#profiles)) to serialize the dataset to JSON-LD, which is then added to the dataset detail page.
To change the schema, you have to override the Jinja template block called `structured_data` in [`templates/package/read_base.html`](https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/templates/package/read_base.html) and call the template helper function with different parameters:
You don't need to load the `dcat` plugin to use the `structured_data` plugin, but you can load them both to enable both functionalities.

The plugin uses the `schemaorg` profile by default (see [Profiles](profiles.md#profiles)) to serialize the dataset to JSON-LD, which is then added to the dataset detail page.

To use a custom profile, you have to override the Jinja template block called `structured_data` in [`templates/package/read_base.html`](https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/templates/structured_data/package/read_base.html) and call the template helper function with different parameters:

{% block structured_data %}
<script type="application/ld+json">
{{ h.structured_data(pkg.id, ['my_custom_schema'])|safe }}
{{ h.structured_data(pkg, ['my_custom_schema']) | safe }}
</script>
{% endblock %}

Example output of structured data in JSON-LD:

Below is an example of the structured data in JSON-LD embedded in the dataset page source:

```html
<script type="application/ld+json">
Expand Down
Loading