From 9e9447f6c798f26bd68265a49ec759c9e95a3c29 Mon Sep 17 00:00:00 2001 From: Deepak Date: Tue, 6 Dec 2022 17:00:42 +0100 Subject: [PATCH] Add tutorial notebook (#19) * Add tutorial notebooks * Fix reference and how plugins are supplied * Update README.md * Bump version --- README.md | 2 +- examples/example_data1_validation_report.json | 4 +- examples/example_data2_validation_report.json | 4 +- linkml_validator/__init__.py | 2 +- linkml_validator/cli.py | 6 +- linkml_validator/validator.py | 10 +- tutorials/Data validation (Part 1).ipynb | 372 ++++++++++++++++++ tutorials/Data validation (Part 2).ipynb | 207 ++++++++++ .../Data validation with custom plugins.ipynb | 235 +++++++++++ 9 files changed, 828 insertions(+), 14 deletions(-) create mode 100644 tutorials/Data validation (Part 1).ipynb create mode 100644 tutorials/Data validation (Part 2).ipynb create mode 100644 tutorials/Data validation with custom plugins.ipynb diff --git a/README.md b/README.md index df94cb1..77c40ac 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,7 @@ data_obj = { "name": "Object 1", "type": "X" } -validator = Validator(schema="examples/example_schema.yaml", plugins={MyCustomPlugin}) +validator = Validator(schema="examples/example_schema.yaml", plugins=[{"plugin_class": "MyCustomPlugin", "args": {}]) validator.validate(obj=data_obj, target_class="NamedThing") ``` diff --git a/examples/example_data1_validation_report.json b/examples/example_data1_validation_report.json index ed6140f..f3bc5b1 100644 --- a/examples/example_data1_validation_report.json +++ b/examples/example_data1_validation_report.json @@ -9,7 +9,7 @@ "valid": true, "validation_results": [ { - "plugin_name": "JsonschemaValidationPlugin", + "plugin_name": "JsonSchemaValidationPlugin", "valid": true, "validation_messages": [] } @@ -25,7 +25,7 @@ "valid": true, "validation_results": [ { - "plugin_name": "JsonschemaValidationPlugin", + "plugin_name": "JsonSchemaValidationPlugin", "valid": true, "validation_messages": [] } diff --git a/examples/example_data2_validation_report.json b/examples/example_data2_validation_report.json index ed6140f..f3bc5b1 100644 --- a/examples/example_data2_validation_report.json +++ b/examples/example_data2_validation_report.json @@ -9,7 +9,7 @@ "valid": true, "validation_results": [ { - "plugin_name": "JsonschemaValidationPlugin", + "plugin_name": "JsonSchemaValidationPlugin", "valid": true, "validation_messages": [] } @@ -25,7 +25,7 @@ "valid": true, "validation_results": [ { - "plugin_name": "JsonschemaValidationPlugin", + "plugin_name": "JsonSchemaValidationPlugin", "valid": true, "validation_messages": [] } diff --git a/linkml_validator/__init__.py b/linkml_validator/__init__.py index cd1ee63..98a433b 100644 --- a/linkml_validator/__init__.py +++ b/linkml_validator/__init__.py @@ -1 +1 @@ -__version__ = "0.4.4" +__version__ = "0.4.5" diff --git a/linkml_validator/cli.py b/linkml_validator/cli.py index 680cf2b..cbfe0b8 100644 --- a/linkml_validator/cli.py +++ b/linkml_validator/cli.py @@ -5,7 +5,7 @@ PLUGINS = { - "JsonschemaValidationPlugin": "linkml_validator.plugins.jsonschema_validation.JsonschemaValidationPlugin", + "JsonSchemaValidationPlugin": "linkml_validator.plugins.jsonschema_validation.JsonSchemaValidationPlugin", "RangeValidationPlugin": "linkml_validator.plugins.range_validation.RangeValidationPlugin", } @@ -50,7 +50,7 @@ def cli(inputs, schema, output, target_class, plugins, strict): """ Run the Validator on data from one or more files. """ - plugin_class_references = set() + plugin_class_references = [] if not plugins: plugins = DEFAULT_PLUGINS.values() for plugin in plugins: @@ -59,7 +59,7 @@ def cli(inputs, schema, output, target_class, plugins, strict): plugin_module_name = ".".join(plugin.split(".")[:-1]) plugin_class_name = plugin.split(".")[-1] plugin_class = import_plugin(plugin_module_name, plugin_class_name) - plugin_class_references.add(plugin_class) + plugin_class_references.append({'plugin_class': plugin_class}) validator = Validator(schema=schema, plugins=plugin_class_references) for filename in inputs: reports = [x for x in validator.validate_file(filename=filename, target_class=target_class, strict=strict)] diff --git a/linkml_validator/validator.py b/linkml_validator/validator.py index da748c9..4bf4b99 100644 --- a/linkml_validator/validator.py +++ b/linkml_validator/validator.py @@ -7,7 +7,7 @@ DEFAULT_PLUGINS = { - "JsonschemaValidationPlugin": JsonSchemaValidationPlugin + "JsonSchemaValidationPlugin": JsonSchemaValidationPlugin } @@ -17,13 +17,13 @@ class Validator: Args: schema: Path or URL to schema YAML - plugins: A set of plugin classes to use for validation + plugins: A list of plugin classes to use for validation """ def __init__(self, schema: str, plugins: List[Dict] = None) -> None: self.schema = schema - self.plugins = set() + self.plugins = [] if plugins: for plugin in plugins: plugin_class = plugin["plugin_class"] @@ -33,11 +33,11 @@ def __init__(self, schema: str, plugins: List[Dict] = None) -> None: if not issubclass(plugin_class, BasePlugin): raise Exception(f"{plugin_class} must be a subclass of {BasePlugin}") instance = plugin_class(schema=self.schema, **plugin_args) - self.plugins.add(instance) + self.plugins.append(instance) else: for plugin_class in DEFAULT_PLUGINS.values(): instance = plugin_class(schema=self.schema) - self.plugins.add(instance) + self.plugins.append(instance) def validate( self, obj: Dict, target_class: str, strict: bool = False, **kwargs diff --git a/tutorials/Data validation (Part 1).ipynb b/tutorials/Data validation (Part 1).ipynb new file mode 100644 index 0000000..daec4f9 --- /dev/null +++ b/tutorials/Data validation (Part 1).ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7984f5ae", + "metadata": {}, + "source": [ + "# Data validation (Part 1)" + ] + }, + { + "cell_type": "markdown", + "id": "dfd9add2", + "metadata": {}, + "source": [ + "This is a quick example on how to use the linkml-validator to validate an object against a given LinkML schema." + ] + }, + { + "cell_type": "markdown", + "id": "58397c59", + "metadata": {}, + "source": [ + "## Schema" + ] + }, + { + "cell_type": "markdown", + "id": "0b68b0b0", + "metadata": {}, + "source": [ + "First you define the schema YAML:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "36b3798c", + "metadata": {}, + "outputs": [], + "source": [ + "schema = \"\"\"\n", + "\n", + "id: https://w3id.org/Example-Schema\n", + "name: Example-Schema\n", + "description: >-\n", + " An Example Schema\n", + "version: 0.0.0\n", + "imports:\n", + " - linkml:types\n", + "\n", + "prefixes:\n", + " linkml: https://w3id.org/linkml/\n", + " example: https://w3id.org/example/\n", + "\n", + "default_prefix: example\n", + "\n", + "classes:\n", + " named thing:\n", + " slots:\n", + " - id\n", + " - name\n", + " - type\n", + "\n", + "slots:\n", + " id:\n", + " required: true\n", + "\n", + " name:\n", + " range: string\n", + "\n", + " type:\n", + " range: type_enum\n", + "\n", + "enums:\n", + " type_enum:\n", + " permissible_values:\n", + " X:\n", + " Y:\n", + " Z:\n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "id": "236b9d96", + "metadata": {}, + "source": [ + "## Data as an object" + ] + }, + { + "cell_type": "markdown", + "id": "afb34848", + "metadata": {}, + "source": [ + "Then define the data as a JSON object:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0ce2d92d", + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"id\": \"obj1\",\n", + " \"name\": \"Object 1\",\n", + " \"type\": \"X\"\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "1a9af55a", + "metadata": {}, + "source": [ + "Now, you can instantiate the Validator with the defined schema:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7472e454", + "metadata": {}, + "outputs": [], + "source": [ + "from linkml_validator.validator import Validator\n", + "\n", + "validator = Validator(schema=schema)" + ] + }, + { + "cell_type": "markdown", + "id": "344619d6", + "metadata": {}, + "source": [ + "And then run the `validate` method to validate the defined object:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c77ca267", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Object valid: True\n" + ] + } + ], + "source": [ + "report = validator.validate(obj=data, target_class='NamedThing')\n", + "print(f\"Object valid: {report.valid}\")" + ] + }, + { + "cell_type": "markdown", + "id": "38e4375e", + "metadata": {}, + "source": [ + "## Data as a list of objects" + ] + }, + { + "cell_type": "markdown", + "id": "4dc570be", + "metadata": {}, + "source": [ + "If your data is a list of objects:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ef7fbd98", + "metadata": {}, + "outputs": [], + "source": [ + "data = [\n", + " {\n", + " \"id\": \"obj1\",\n", + " \"name\": \"Object 1\",\n", + " \"type\": \"X\"\n", + " },\n", + " {\n", + " \"id\": \"obj2\",\n", + " \"name\": \"Object 2\",\n", + " \"type\": \"Y\"\n", + " }\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "63e6e6b7", + "metadata": {}, + "source": [ + "You can run the validate method to validate each object in the list:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a4990faa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Object valid: True\n", + "Object valid: True\n" + ] + } + ], + "source": [ + "for obj in data:\n", + " report = validator.validate(obj=obj, target_class='NamedThing')\n", + " print(f\"Object valid: {report.valid}\")" + ] + }, + { + "cell_type": "markdown", + "id": "a03fbc91", + "metadata": {}, + "source": [ + "## Validating invalid data" + ] + }, + { + "cell_type": "markdown", + "id": "a7e41ede", + "metadata": {}, + "source": [ + "Lets assume we have a list of objects of which two of the objects violates the schema:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7107f342", + "metadata": {}, + "outputs": [], + "source": [ + "data = [\n", + " {\n", + " \"id\": \"obj1\",\n", + " \"name\": \"Object 1\",\n", + " \"type\": \"X\"\n", + " },\n", + " {\n", + " \"id\": \"obj2\",\n", + " \"name\": \"Object 2\",\n", + " \"type\": \"Y\"\n", + " },\n", + " {\n", + " \"name\": \"Object 3\", # <-- Missing 'id' field\n", + " \"type\": \"Y\"\n", + " },\n", + " {\n", + " \"id\": \"obj4\",\n", + " \"name\": \"Object 4\",\n", + " \"type\": \"ABC\" # <-- Incorrect enum used for 'type'\n", + " }\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "99e9ed95", + "metadata": {}, + "source": [ + "Then when we run the validation on all the objects in the list, we should see some errors reported:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "00b6810e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Object valid: True\n", + "Object valid: True\n", + "Object valid: False\n", + "Object valid: False\n" + ] + } + ], + "source": [ + "for obj in data:\n", + " report = validator.validate(obj=obj, target_class='NamedThing')\n", + " print(f\"Object valid: {report.valid}\")" + ] + }, + { + "cell_type": "markdown", + "id": "24b980e9", + "metadata": {}, + "source": [ + "But why?" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e4515063", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Object valid: True\n", + "Object valid: True\n", + "Object valid: False\n", + "[JsonSchemaValidationPlugin] 'id' is a required property for {'name': 'Object 3', 'type': 'Y'}\n", + "Object valid: False\n", + "[JsonSchemaValidationPlugin] 'ABC' is not one of ['X', 'Y', 'Z'] for {'id': 'obj4', 'name': 'Object 4', 'type': 'ABC'}\n" + ] + } + ], + "source": [ + "for obj in data:\n", + " report = validator.validate(obj=obj, target_class='NamedThing')\n", + " print(f\"Object valid: {report.valid}\")\n", + " if not report.valid:\n", + " for result in report.validation_results:\n", + " for message in result.validation_messages:\n", + " print(f\"[{result.plugin_name}] {message.message} for {report.object}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "881f486e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/Data validation (Part 2).ipynb b/tutorials/Data validation (Part 2).ipynb new file mode 100644 index 0000000..f28c21b --- /dev/null +++ b/tutorials/Data validation (Part 2).ipynb @@ -0,0 +1,207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b69e2b52", + "metadata": {}, + "source": [ + "# Data validation (Part 2)" + ] + }, + { + "cell_type": "markdown", + "id": "d41bb302", + "metadata": {}, + "source": [ + "Let us consider a more realistic scenario where our data is to be validated against the Biolink Model.\n" + ] + }, + { + "cell_type": "markdown", + "id": "36c596c6", + "metadata": {}, + "source": [ + "## Schema" + ] + }, + { + "cell_type": "markdown", + "id": "55c61534", + "metadata": {}, + "source": [ + "We will use Biolink Model v3.1.1." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a45ede11", + "metadata": {}, + "outputs": [], + "source": [ + "schema_url = \"https://raw.githubusercontent.com/biolink/biolink-model/v3.1.1/biolink-model.yaml\"" + ] + }, + { + "cell_type": "markdown", + "id": "fccf92d3", + "metadata": {}, + "source": [ + "## Data" + ] + }, + { + "cell_type": "markdown", + "id": "4c7b922a", + "metadata": {}, + "source": [ + "And we have a list of Gene objects:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bd90f9cf", + "metadata": {}, + "outputs": [], + "source": [ + "data = [\n", + " {\n", + " \"category\": [\n", + " \"biolink:Gene\"\n", + " ],\n", + " \"id\": \"HGNC:10848\",\n", + " \"name\": \"SHH (human)\",\n", + " \"provided_by\": [\n", + " \"graph_nodes.tsv\"\n", + " ],\n", + " \"taxon\": \"NCBITaxon:9606\"\n", + " },\n", + " {\n", + " \"category\": [\n", + " \"biolink:Gene\"\n", + " ],\n", + " \"id\": \"NCBIGene:6469\",\n", + " \"name\": \"SHH\",\n", + " \"provided_by\": [\n", + " \"graph_nodes.tsv\"\n", + " ],\n", + " \"taxon\": \"NCBITaxon:9606\"\n", + " },\n", + " {\n", + " \"category\": [\n", + " \"biolink:Gene\"\n", + " ],\n", + " \"id\": \"HGNC:9398\",\n", + " \"name\": \"OLIG2\",\n", + " \"provided_by\": [\n", + " \"graph_nodes.tsv\"\n", + " ],\n", + " \"taxon\": \"NCBITaxon:9606\"\n", + " },\n", + " {\n", + " \"id\": \"HGNC:9399\", # <-- 'category' missing for object\n", + " \"name\": \"PRKCD\",\n", + " \"provided_by\": [\n", + " \"graph_nodes.tsv\"\n", + " ],\n", + " \"taxon\": \"NCBITaxon:9606\"\n", + " }\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "79c70738", + "metadata": {}, + "source": [ + "## Validate data against the schema" + ] + }, + { + "cell_type": "markdown", + "id": "dc05ddc6", + "metadata": {}, + "source": [ + "First we instantiate the Validator with the Biolink Model YAML:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ec720b3c", + "metadata": {}, + "outputs": [], + "source": [ + "from linkml_validator.validator import Validator\n", + "\n", + "validator = Validator(schema=schema_url)" + ] + }, + { + "cell_type": "markdown", + "id": "995fdc62", + "metadata": {}, + "source": [ + "Then we can validate our data against the Biolink Model:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7c418d3e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Object valid: True\n", + "Object valid: True\n", + "Object valid: True\n", + "Object valid: False\n", + "[JsonSchemaValidationPlugin] 'category' is a required property for {'id': 'HGNC:9399', 'name': 'PRKCD', 'provided_by': ['graph_nodes.tsv'], 'taxon': 'NCBITaxon:9606'}\n" + ] + } + ], + "source": [ + "for obj in data:\n", + " report = validator.validate(obj=obj, target_class='Gene')\n", + " print(f\"Object valid: {report.valid}\")\n", + " if not report.valid:\n", + " for result in report.validation_results:\n", + " for message in result.validation_messages:\n", + " print(f\"[{result.plugin_name}] {message.message} for {report.object}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3ac48bf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/Data validation with custom plugins.ipynb b/tutorials/Data validation with custom plugins.ipynb new file mode 100644 index 0000000..9623a39 --- /dev/null +++ b/tutorials/Data validation with custom plugins.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bd6b112d", + "metadata": {}, + "source": [ + "# Data validation with and custom plugins" + ] + }, + { + "cell_type": "markdown", + "id": "217889fb", + "metadata": {}, + "source": [ + "Let us consider another scenario where we know our data is wrong but it is not possible to validate and identify such errors via JSONSchema.\n", + "\n", + "This will happen when we translate an RDF oriented data model into JSONSchema. There is loss of semantics and the constraints of JSONSchema becomes apparent." + ] + }, + { + "cell_type": "markdown", + "id": "f3b0a09f", + "metadata": {}, + "source": [ + "## Schema" + ] + }, + { + "cell_type": "markdown", + "id": "388b88ea", + "metadata": {}, + "source": [ + "We will use Biolink Model v3.1.1." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "79c8faab", + "metadata": {}, + "outputs": [], + "source": [ + "schema_url = \"https://raw.githubusercontent.com/biolink/biolink-model/v3.1.1/biolink-model.yaml\"" + ] + }, + { + "cell_type": "markdown", + "id": "ff86df72", + "metadata": {}, + "source": [ + "## Data" + ] + }, + { + "cell_type": "markdown", + "id": "ba0d2771", + "metadata": {}, + "source": [ + "And we have a an object that is incorrect:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6acb0985", + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"id\": \"HGNC:9399\",\n", + " \"name\": \"PRKCD\",\n", + " \"category\": [\n", + " \"biolink:GeneEntity\" # <-- This should be a valid category from Biolink Model\n", + " ],\n", + " \"provided_by\": [\n", + " \"graph_nodes.tsv\"\n", + " ],\n", + " \"taxon\": \"NCBITaxon:9606\"\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "4b8a2744", + "metadata": {}, + "source": [ + "## Define a custom validation plugin" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "34aea3cf", + "metadata": {}, + "outputs": [], + "source": [ + "from linkml_runtime.utils.schemaview import SchemaView\n", + "\n", + "from linkml_validator.validator import Validator\n", + "from linkml_validator.plugins.base import BasePlugin\n", + "from linkml_validator.models import ValidationResult, ValidationMessage\n", + "from linkml_validator.utils import camelcase_to_sentencecase\n", + "\n", + "class MyCustomPlugin(BasePlugin):\n", + " \"\"\"\n", + " A plugin that checks if a given category of an object\n", + " is valid and exists in Biolink Model.\n", + " \"\"\"\n", + " NAME = \"MyCustomPlugin\"\n", + "\n", + " def __init__(self, schema: str, **kwargs) -> None:\n", + " super().__init__(schema)\n", + " self.schemaview = SchemaView(schema)\n", + "\n", + " def process(self, obj: dict, **kwargs) -> ValidationResult:\n", + " valid = True\n", + " categories = obj['category']\n", + " validation_messages = []\n", + " for category in categories:\n", + " category_name = camelcase_to_sentencecase(category.split(':')[1])\n", + " if category_name not in self.schemaview.all_classes():\n", + " valid = False\n", + " validation_message = ValidationMessage(\n", + " severity='Error',\n", + " field='category',\n", + " value=category,\n", + " message=f'Category {category} not in the schema'\n", + " )\n", + " validation_messages.append(validation_message)\n", + " break\n", + " result = ValidationResult(\n", + " plugin_name=self.NAME,\n", + " valid=valid,\n", + " validation_messages=validation_messages\n", + " )\n", + " return result\n" + ] + }, + { + "cell_type": "markdown", + "id": "5f0358f1", + "metadata": {}, + "source": [ + "## Validating data using custom validation plugin" + ] + }, + { + "cell_type": "markdown", + "id": "e693cdca", + "metadata": {}, + "source": [ + "First we instantiate the Validator with the Biolink Model YAML and provide a list of plugins:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cf08b198", + "metadata": {}, + "outputs": [], + "source": [ + "from linkml_validator.validator import Validator\n", + "\n", + "plugins = [\n", + " {\n", + " \"plugin_class\": MyCustomPlugin,\n", + " \"args\": {}\n", + " }\n", + "]\n", + "validator = Validator(schema=schema_url, plugins=plugins)" + ] + }, + { + "cell_type": "markdown", + "id": "bc79c3ef", + "metadata": {}, + "source": [ + "Then we can validate our data against the Biolink Model:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5be706a9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[MyCustomPlugin] Category biolink:GeneEntity not in the schema for {'id': 'HGNC:9399', 'name': 'PRKCD', 'category': ['biolink:GeneEntity'], 'provided_by': ['graph_nodes.tsv'], 'taxon': 'NCBITaxon:9606'}\n" + ] + } + ], + "source": [ + "report = validator.validate(obj=data, target_class='Gene')\n", + "\n", + "if not report.valid:\n", + " for result in report.validation_results:\n", + " for message in result.validation_messages:\n", + " print(f\"[{result.plugin_name}] {message.message} for {report.object}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff44abe6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}