Merge branch 'master' into Health-RI-healthdcat_ap

ckan · Jan 9, 2025 · 73b74b8 · 73b74b8
2 parents cb04e7e + 868b81e
commit 73b74b8
Show file tree

Hide file tree

Showing 16 changed files with 118 additions and 77 deletions.
diff --git a/README.md b/README.md
@@ -1,8 +1,7 @@
 # ckanext-dcat
 
 
-[![Tests](https://github.com/ckan/ckanext-dcat/workflows/Tests/badge.svg?branch=master)](https://github.com/ckan/ckanext-dcat/actions)
-[![Code Coverage](http://codecov.io/github/ckan/ckanext-dcat/coverage.svg?branch=master)](http://codecov.io/github/ckan/ckanext-dcat?branch=master)
+[![Tests](https://github.com/ckan/ckanext-dcat/actions/workflows/test.yml/badge.svg)](https://github.com/ckan/ckanext-dcat/actions)
 
 
 Ckanext-dcat is a [CKAN](https://github.com/ckan/ckan) extension that helps data publishers expose and consume metadata as serialized RDF documents using [DCAT](https://github.com/ckan/ckan).

diff --git a/ckanext/dcat/blueprints.py b/ckanext/dcat/blueprints.py
@@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
-from flask import Blueprint, jsonify, make_response
+from flask import Blueprint, jsonify
 
 from ckantoolkit import config
 
 from ckan.views.dataset import CreateView
 
 import ckan.plugins.toolkit as toolkit
 import ckanext.dcat.utils as utils
+from ckanext.dcat.helpers import endpoints_enabled
 
 dcat = Blueprint(
     'dcat',
@@ -23,7 +24,7 @@ def read_dataset(_id, _format=None, package_type=None):
     return utils.read_dataset_page(_id, _format)
 
 
-if utils.endpoints_enabled():
+if endpoints_enabled():
 
     # requirements={'_format': 'xml|rdf|n3|ttl|jsonld'}
     dcat.add_url_rule(config.get('ckanext.dcat.catalog_endpoint',

diff --git a/ckanext/dcat/helpers.py b/ckanext/dcat/helpers.py
@@ -0,0 +1,52 @@
+"""
+Helpers used by templates
+"""
+import simplejson as json
+
+import ckantoolkit as toolkit
+
+from ckanext.dcat.processors import RDFSerializer
+
+config = toolkit.config
+
+
+ENABLE_RDF_ENDPOINTS_CONFIG = "ckanext.dcat.enable_rdf_endpoints"
+
+
+def endpoints_enabled():
+    return toolkit.asbool(config.get(ENABLE_RDF_ENDPOINTS_CONFIG, True))
+
+
+def get_endpoint(_type="dataset"):
+    return "dcat.read_dataset" if _type == "dataset" else "dcat.read_catalog"
+
+
+def structured_data(dataset_dict, profiles=None, _format="jsonld"):
+    """
+    Returns a string containing the structured data of the given
+    dataset id and using the given profiles (if no profiles are supplied
+    the default profiles are used).
+
+    This string can be used in the frontend.
+    """
+
+    if not profiles:
+        profiles = ["schemaorg"]
+
+    serializer = RDFSerializer(profiles=profiles)
+
+    output = serializer.serialize_dataset(dataset_dict, _format=_format)
+
+    # parse result again to prevent UnicodeDecodeError and add formatting
+    try:
+        json_data = json.loads(output)
+        return json.dumps(
+            json_data,
+            sort_keys=True,
+            indent=4,
+            separators=(",", ": "),
+            cls=json.JSONEncoderForHTML,
+        )
+    except ValueError:
+        # result was not JSON, return anyway
+        return output
diff --git a/ckanext/dcat/plugins/__init__.py b/ckanext/dcat/plugins/__init__.py
@@ -19,6 +19,7 @@
                                 dcat_datasets_list,
                                 dcat_auth,
                                 )
+from ckanext.dcat import helpers
 from ckanext.dcat import utils
 from ckanext.dcat.validators import dcat_validators
 
@@ -81,7 +82,7 @@ def i18n_directory(self):
     # IConfigurer
 
     def update_config(self, config):
-        p.toolkit.add_template_directory(config, '../templates')
+        p.toolkit.add_template_directory(config, '../templates/dcat')
 
         # Check catalog URI on startup to emit a warning if necessary
         utils.catalog_uri()
@@ -102,9 +103,8 @@ def update_config(self, config):
 
     def get_helpers(self):
         return {
-            'helper_available': utils.helper_available,
-            'dcat_get_endpoint': utils.get_endpoint,
-            'dcat_endpoints_enabled': utils.endpoints_enabled,
+            'dcat_get_endpoint': helpers.get_endpoint,
+            'dcat_endpoints_enabled': helpers.endpoints_enabled,
         }
 
     # IActions
@@ -243,11 +243,18 @@ def get_auth_functions(self):
 
 
 class StructuredDataPlugin(p.SingletonPlugin):
+
+    p.implements(p.IConfigurer, inherit=True)
     p.implements(p.ITemplateHelpers, inherit=True)
 
+    # IConfigurer
+
+    def update_config(self, config):
+        p.toolkit.add_template_directory(config, '../templates/structured_data')
+
     # ITemplateHelpers
 
     def get_helpers(self):
         return {
-            'structured_data': utils.structured_data,
+            'structured_data': helpers.structured_data,
         }
diff --git a/ckanext/dcat/templates/home/index.html → ckanext/dcat/templates/dcat/home/index.html b/ckanext/dcat/templates/home/index.html → ckanext/dcat/templates/dcat/home/index.html
diff --git a/...ext/dcat/templates/package/read_base.html → ...cat/templates/dcat/package/read_base.html b/...ext/dcat/templates/package/read_base.html → ...cat/templates/dcat/package/read_base.html
@@ -12,20 +12,3 @@
       {% endwith %}
     {% endif %}
 {% endblock -%}
-{% block scripts %}
-  {{ super() }}
-  {% block structured_data %}
-    {#
-    h.structured_data is defined in the 'structured_data' plugin,
-    you have to activate the plugin (or implement the method yourself)
-    to make use of this feature.
-    More information about structured data:
-    https://developers.google.com/search/docs/guides/intro-structured-data
-    #}
-      {% if h.helper_available('structured_data') %}
-          <script type="application/ld+json">
-           {{ h.structured_data(pkg.id)|safe }}
-          </script>
-      {% endif %}
-  {% endblock %}
-{% endblock %}
diff --git a/ckanext/dcat/templates/package/search.html → ...t/dcat/templates/dcat/package/search.html b/ckanext/dcat/templates/package/search.html → ...t/dcat/templates/dcat/package/search.html
diff --git a/.../scheming/display_snippets/dcat_date.html → .../scheming/display_snippets/dcat_date.html b/.../scheming/display_snippets/dcat_date.html → .../scheming/display_snippets/dcat_date.html
diff --git a/.../scheming/display_snippets/file_size.html → .../scheming/display_snippets/file_size.html b/.../scheming/display_snippets/file_size.html → .../scheming/display_snippets/file_size.html
diff --git a/...plates/scheming/form_snippets/number.html → ...s/dcat/scheming/form_snippets/number.html b/...plates/scheming/form_snippets/number.html → ...s/dcat/scheming/form_snippets/number.html
diff --git a/...ng/form_snippets/repeating_subfields.html → ...ng/form_snippets/repeating_subfields.html b/...ng/form_snippets/repeating_subfields.html → ...ng/form_snippets/repeating_subfields.html
diff --git a/ckanext/dcat/templates/structured_data/package/read_base.html b/ckanext/dcat/templates/structured_data/package/read_base.html
@@ -0,0 +1,13 @@
+{% ckan_extends %}
+
+{% block scripts %}
+
+  {{ super() }}
+
+  {% block structured_data %}
+    <!-- Structured data -->
+    <script type="application/ld+json">
+      {{ h.structured_data(pkg) | safe }}
+    </script>
+  {% endblock %}
+{% endblock %}
diff --git a/ckanext/dcat/utils.py b/ckanext/dcat/utils.py
@@ -37,7 +37,6 @@
 DCAT_CLEAN_TAGS = 'ckanext.dcat.clean_tags'
 
 DEFAULT_CATALOG_ENDPOINT = '/catalog.{_format}'
-ENABLE_RDF_ENDPOINTS_CONFIG = 'ckanext.dcat.enable_rdf_endpoints'
 ENABLE_CONTENT_NEGOTIATION_CONFIG = 'ckanext.dcat.enable_content_negotiation'
 
 
@@ -95,43 +94,6 @@ def field_labels():
         'created': _('Created'),
     }
 
-def helper_available(helper_name):
-    '''
-    Checks if a given helper name is available on `h`
-    '''
-    try:
-        getattr(h, helper_name)
-    except (AttributeError, HelperError):
-        return False
-    return True
-
-def structured_data(dataset_id, profiles=None, _format='jsonld'):
-    '''
-    Returns a string containing the structured data of the given
-    dataset id and using the given profiles (if no profiles are supplied
-    the default profiles are used).
-
-    This string can be used in the frontend.
-    '''
-    if not profiles:
-        profiles = ['schemaorg']
-
-    data = toolkit.get_action('dcat_dataset_show')(
-        {},
-        {
-            'id': dataset_id,
-            'profiles': profiles,
-            'format': _format,
-        }
-    )
-    # parse result again to prevent UnicodeDecodeError and add formatting
-    try:
-        json_data = json.loads(data)
-        return json.dumps(json_data, sort_keys=True,
-                          indent=4, separators=(',', ': '), cls=json.JSONEncoderForHTML)
-    except ValueError:
-        # result was not JSON, return anyway
-        return data
 
 def catalog_uri():
     '''
@@ -459,11 +421,3 @@ def read_catalog_page(_format):
     response.headers['Content-type'] = CONTENT_TYPES[_format]
 
     return response
-
-
-def endpoints_enabled():
-    return toolkit.asbool(config.get(ENABLE_RDF_ENDPOINTS_CONFIG, True))
-
-
-def get_endpoint(_type='dataset'):
-    return 'dcat.read_dataset' if _type == 'dataset' else 'dcat.read_catalog'
diff --git a/docs/contributing.md b/docs/contributing.md
@@ -0,0 +1,27 @@
+As part of the CKAN ecosystem, ckanext-dcat is entirely open source and welcomes all forms of contributions from the community.
+Besides the general guidance provided in the [CKAN documentation](https://docs.ckan.org/en/latest/contributing/index.html) follow these points:
+
+* Format your code with [Black](https://github.com/psf/black).
+* Make sure to include tests for your changes. The extension has an extensive test suite so in most cases you just need to copy some of the existing tests and adapt them.
+* It's better to submit a pull request early, even if in draft state, to get feedback and make sure the contribution will be accepted.
+
+### Including new profiles
+
+
+New [profiles](profiles.md) that are useful to the wider community are welcome, provided that they are sustainable long term. A maintainer unfamiliar with the profile should be able to know what the profile does and be confident that everything works as expected. The way to achieve this is with tests (lots of them!) and documentation.
+
+More localized profiles are better placed in dedicated extensions.
+
+A contribution that adds a new profile should include:
+
+* A new [profile class](https://github.com/ckan/ckanext-dcat/tree/master/ckanext/dcat/profiles) with parse and serialize methods (extending the DCAT v3 one)
+* A new dataset [schema](https://github.com/ckan/ckanext-dcat/tree/master/ckanext/dcat/schemas) that contains all new properties supported in the new profile (it can contain just the base DCAT 3 recommended ones)
+* [Example](https://github.com/ckan/ckanext-dcat/tree/master/examples) CKAN dataset and DCAT serialization of the new profile
+* Tests:
+    * [SHACL validation](https://github.com/ckan/ckanext-dcat/tree/1e945b6e79f0e0bae1ff76989ef9789abb5e32a8/ckanext/dcat/tests/shacl) if SHACL shapes are provided
+    * [End to end](https://github.com/ckan/ckanext-dcat/blob/1e945b6e79f0e0bae1ff76989ef9789abb5e32a8/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py#L44) tests covering parsing and serialization
+    * Parsing and serialization tests covering [specific functionality](https://github.com/ckan/ckanext-dcat/blob/1e945b6e79f0e0bae1ff76989ef9789abb5e32a8/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py#L368) for the profile
+* [Documentation](https://github.com/ckan/ckanext-dcat/tree/1e945b6e79f0e0bae1ff76989ef9789abb5e32a8/docs) about the new profile (compatibility with DCAT AP versions, other profiles required, config options etc)
+
+This might seem like a lot of requirements but using the existing linked resources as template should make things much easier. Do not hesitate to ask for help if unsure about one point.
+
diff --git a/docs/google-dataset-search.md b/docs/google-dataset-search.md
@@ -2,18 +2,22 @@
 
 The `structured_data` plugin will add the necessary markup to dataset pages in order to get your datasets indexed by [Google Dataset Search](https://toolbox.google.com/datasetsearch). This markup is a [structured data](https://developers.google.com/search/docs/guides/intro-structured-data) JSON-LD snippet that uses the [schema.org](https://schema.org) vocabulary to describe the dataset.
 
-    ckan.plugins = dcat structured_data
+    ckan.plugins = structured_data
 
-By default this uses the `schemaorg` profile (see [Profiles](profiles.md#profiles)) to serialize the dataset to JSON-LD, which is then added to the dataset detail page.
-To change the schema, you have to override the Jinja template block called `structured_data` in [`templates/package/read_base.html`](https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/templates/package/read_base.html) and call the template helper function with different parameters:
+You don't need to load the `dcat` plugin to use the `structured_data` plugin, but you can load them both to enable both functionalities.
+
+The plugin uses the `schemaorg` profile by default (see [Profiles](profiles.md#profiles)) to serialize the dataset to JSON-LD, which is then added to the dataset detail page.
+
+To use a custom profile, you have to override the Jinja template block called `structured_data` in [`templates/package/read_base.html`](https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/templates/structured_data/package/read_base.html) and call the template helper function with different parameters:
 
     {% block structured_data %}
       <script type="application/ld+json">
-      {{ h.structured_data(pkg.id, ['my_custom_schema'])|safe }}
+      {{ h.structured_data(pkg, ['my_custom_schema']) | safe }}
       </script>
     {% endblock %}
 
-Example output of structured data in JSON-LD:
+
+Below is an example of the structured data in JSON-LD embedded in the dataset page source:
 
 ```html
     <script type="application/ld+json">

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -76,4 +76,5 @@ nav:
     - Google Dataset Search: 'google-dataset-search.md'
     - CLI: 'cli.md'
     - Configuration reference: 'configuration.md'
+  - Contributing: 'contributing.md'
   - CHANGELOG: 'changelog.md'