Skip to content

Commit

Permalink
Merge pull request #279 from ckan/new-cli
Browse files Browse the repository at this point in the history
New `ckan dcat consume` and `ckan dcat produce` commands
  • Loading branch information
amercader authored May 7, 2024
2 parents 6fb1780 + 576e26c commit 48b5e61
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 66 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ It also offers other features related to Semantic Data like exposing the necessa
- [XML DCAT harvester (deprecated)](#xml-dcat-harvester-deprecated)
- [Translation of fields](#translation-of-fields)
- [Structured Data and Google Dataset Search indexing](#structured-data-and-google-dataset-search-indexing)
- [CLI](#cli)
- [Running the Tests](#running-the-tests)
- [Releases](#releases)
- [Acknowledgements](#acknowledgements)
Expand Down Expand Up @@ -944,6 +945,25 @@ Example output of structured data in JSON-LD:
</html>


## CLI

The `ckan dcat` command offers utilites to transform between DCAT RDF Serializations and CKAN datasets (`ckan dcat consume`) and
viceversa (`ckan dcat produce`). In both cases the input can be provided as a path to a file:

ckan dcat consume -f ttl examples/dataset.ttl

ckan dcat produce -f jsonld examples/ckan_datasets.json

or be read from stdin:

ckan dcat consume -

The latter form allows chaininig commands for more complex metadata processing, e.g.:

curl https://demo.ckan.org/api/action/package_search | jq .result.results | ckan dcat produce -f jsonld -

For the full list of options check `ckan dcat consume --help` and `ckan dcat produce --help`.

## Running the Tests

To run the tests do:
Expand Down
127 changes: 117 additions & 10 deletions ckanext/dcat/cli.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,132 @@
# -*- coding: utf-8 -*-
import json

import click

import ckan.plugins.toolkit as tk

import ckanext.dcat.utils as utils
from ckanext.dcat.processors import RDFParser, RDFSerializer, DEFAULT_RDF_PROFILES

@click.group()
def generate_static():
"""Generates static files containing all datasets.

"""
@click.group()
def dcat():
"""DCAT utilities for CKAN"""
pass

@generate_static.command()
@click.argument('output', type=click.File(mode="w"))
def json(output):
"""The generate command will generate a static file containing all of
the datasets in the catalog in JSON format.

@dcat.command()
@click.argument("output", type=click.File(mode="w"))
def generate_static(output):
"""[Deprecated] Generate a static datasets file in JSON format
(requires the dcat_json_interface plugin) .
"""
utils.generate_static_json(output)


@dcat.command(context_settings={"show_default": True})
@click.argument("input", type=click.File(mode="r"))
@click.option(
"-o",
"--output",
type=click.File(mode="w"),
default="-",
help="By default the command will output the result to stdin, "
"alternatively you can provide a file path with this option",
)
@click.option(
"-f", "--format", default="xml", help="Serialization format (eg ttl, jsonld)"
)
@click.option(
"-p",
"--profiles",
default=" ".join(DEFAULT_RDF_PROFILES),
help="RDF profiles to use",
)
@click.option(
"-P", "--pretty", is_flag=True, help="Make the output more human readable"
)
@click.option(
"-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)"
)
def consume(input, output, format, profiles, pretty, compat_mode):
"""
Parses DCAT RDF graphs into CKAN dataset JSON objects.
The input serializations can be provided as a path to a file, e.g.:
ckan dcat consume examples/dataset.ttl
Or be read from stdin:
ckan dcat consume -
"""
contents = input.read()

if profiles:
profiles = profiles.split()
parser = RDFParser(profiles=profiles, compatibility_mode=compat_mode)
parser.parse(contents, _format=format)

ckan_datasets = [d for d in parser.datasets()]

indent = 4 if pretty else None
out = json.dumps(ckan_datasets, indent=indent)

output.write(out)


@dcat.command(context_settings={"show_default": True})
@click.argument("input", type=click.File(mode="r"))
@click.option(
"-o",
"--output",
type=click.File(mode="w"),
default="-",
help="By default the command will output the result to stdin, "
"alternatively you can provide a file path with this option",
)
@click.option(
"-f", "--format", default="xml", help="Serialization format (eg ttl, jsonld)"
)
@click.option(
"-p",
"--profiles",
default=" ".join(DEFAULT_RDF_PROFILES),
help="RDF profiles to use",
)
@click.option(
"-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)"
)
def produce(input, output, format, profiles, compat_mode):
"""
Transforms CKAN dataset JSON objects into DCAT RDF serializations.
The input datasets can be provided as a path to a file, e.g.:
ckan dcat consume examples/ckan_dataset.json
Or be read from stdin:
ckan dcat produce -
"""
contents = input.read()

if profiles:
profiles = profiles.split()
serializer = RDFSerializer(
profiles=profiles,
compatibility_mode=compat_mode
)

dataset = json.loads(contents)
if isinstance(dataset, list):
out = serializer.serialize_datasets(dataset, _format=format)
else:
out = serializer.serialize_dataset(dataset, _format=format)

output.write(out)


def get_commands():
return [generate_static]
return [dcat]
72 changes: 16 additions & 56 deletions ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,22 @@ def serialize_dataset(self, dataset_dict, _format='xml'):

return output

def serialize_datasets(self, dataset_dicts, _format='xml'):
'''
Given a list of CKAN dataset dicts, returns an RDF serialization
The serialization format can be defined using the `_format` parameter.
It must be one of the ones supported by RDFLib, defaults to `xml`.
Returns a string with the serialized datasets
'''
out = []
for dataset_dict in dataset_dicts:
out.append(self.serialize_dataset(dataset_dict, _format))
return '\n'.join(out)



def serialize_catalog(self, catalog_dict=None, dataset_dicts=None,
_format='xml', pagination_info=None):
'''
Expand Down Expand Up @@ -394,59 +410,3 @@ def _get_from_extra(key):
g.add((agent, predicate, _type(val)))

return catalog_ref


if __name__ == '__main__':

parser = argparse.ArgumentParser(
description='DCAT RDF - CKAN operations')
parser.add_argument('mode',
default='consume',
help='''
Operation mode.
`consume` parses DCAT RDF graphs to CKAN dataset JSON objects.
`produce` serializes CKAN dataset JSON objects into DCAT RDF.
''')
parser.add_argument('file', nargs='?', type=argparse.FileType('r'),
default=sys.stdin,
help='Input file. If omitted will read from stdin')
parser.add_argument('-f', '--format',
default='xml',
help='''Serialization format (as understood by rdflib)
eg: xml, n3 ... Defaults to \'xml\'.''')
parser.add_argument('-P', '--pretty',
action='store_true',
help='Make the output more human readable')
parser.add_argument('-p', '--profile', nargs='*',
action='store',
help='RDF Profiles to use, defaults to euro_dcat_ap_2')
parser.add_argument('-m', '--compat-mode',
action='store_true',
help='Enable compatibility mode')

parser.add_argument('-s', '--subcatalogs', action='store_true', dest='subcatalogs',
default=False,
help="Enable subcatalogs handling (dct:hasPart support)")
args = parser.parse_args()

contents = args.file.read()

config.update({DCAT_EXPOSE_SUBCATALOGS: args.subcatalogs})

if args.mode == 'produce':
serializer = RDFSerializer(profiles=args.profile,
compatibility_mode=args.compat_mode)

dataset = json.loads(contents)
out = serializer.serialize_dataset(dataset, _format=args.format)
print(out)
else:
parser = RDFParser(profiles=args.profile,
compatibility_mode=args.compat_mode)

parser.parse(contents, _format=args.format)

ckan_datasets = [d for d in parser.datasets()]

indent = 4 if args.pretty else None
print(json.dumps(ckan_datasets, indent=indent))
33 changes: 33 additions & 0 deletions ckanext/dcat/tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import json
import os

from ckanext.dcat.cli import dcat as dcat_cli


def test_consume(cli):

path = os.path.join(
os.path.dirname(__file__), "..", "..", "..", "examples", "dataset_afs.ttl"
)

result = cli.invoke(dcat_cli, ["consume", "-f", "ttl", path])
assert result.exit_code == 0

assert json.loads(result.stdout)[0]["title"] == "A test dataset on your catalogue"


def test_produce(cli):

path = os.path.join(
os.path.dirname(__file__),
"..",
"..",
"..",
"examples",
"full_ckan_dataset.json",
)

result = cli.invoke(dcat_cli, ["produce", "-f", "jsonld", path])
assert result.exit_code == 0

assert json.loads(result.stdout)["@context"]["dcat"] == "http://www.w3.org/ns/dcat#"

0 comments on commit 48b5e61

Please sign in to comment.