Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

csv testing and bug fixes #205

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions linkml_runtime/loaders/rdflib_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def from_rdf_graph(self, graph: Graph, schemaview: SchemaView, target_class: Typ
v = namespaces.curie_for(o)
for enum_name in enum_names:
e = schemaview.get_enum(enum_name)
if e is None:
raise ValueError(f'no enum found for {slot.range}: {o} (ns={v})')
for pv in e.permissible_values.values():
if v == pv.meaning or str(o) == pv.meaning:
v = pv.text
Expand Down Expand Up @@ -178,9 +180,16 @@ def from_rdf_graph(self, graph: Graph, schemaview: SchemaView, target_class: Typ
# Step 2: replace inline pointers with object dicts
def repl(v):
if isinstance(v, Pointer):
v2 = obj_map[v.obj]
v2 = obj_map.get(v.obj)
if v2 is None:
raise Exception(f'No mapping for pointer {v}')
msg = f'No mapping for pointer {v}. Triples:'
for s, p, o in graph.triples((None, None, v.obj)):
for s2, p2, o2 in graph.triples((None, None, s)):
msg += f"\n{s2} {p2} {o2}."
msg += f"\n{s} {p} {o}."
for s, p, o in graph.triples((v.obj, None, None)):
msg += f"\n{s} {p} {o}."
raise Exception(msg)
return v2
else:
return v
Expand All @@ -207,6 +216,8 @@ def repl(v):
def _get_id_dict(self, node: VALID_SUBJECT, schemaview: SchemaView, cn: ClassDefinitionName) -> ANYDICT:
id_slot = schemaview.get_identifier_slot(cn)
if not isinstance(node, BNode):
if id_slot is None:
raise Exception(f'no slot found for {cn}: bnode={node}')
id_val = self._uri_to_id(node, id_slot, schemaview)
#id_val = schemaview.namespaces().curie_for(node)
if id_val == None:
Expand Down
4 changes: 3 additions & 1 deletion linkml_runtime/utils/yamlutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def __post_init__(self, *args: List[str], **kwargs):
for k in kwargs.keys():
v = repr(kwargs[k])[:40].replace('\n', '\\n')
messages.append(f"{TypedNode.yaml_loc(k)} Unknown argument: {k} = {v}")
raise ValueError('\n'.join(messages))
msg = f"Unknown arguments for: {self}\n"
msg += '\n'.join(messages)
raise ValueError(msg)

def _default(self, obj, filtr: Callable[[dict], dict] = None):
""" JSON serializer callback.
Expand Down
2 changes: 2 additions & 0 deletions tests/test_loaders_dumpers/input/table-inlined.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
columnA columnB columnB columnC
table:row1 first value second value something else
2 changes: 2 additions & 0 deletions tests/test_loaders_dumpers/input/table-json.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
columnA objectB columnC
table:row1 {\"name\": \"foo\", \"value\": \"bar\"} something else
146 changes: 146 additions & 0 deletions tests/test_loaders_dumpers/models/table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Auto generated from table.yaml by pythongen.py version: 0.9.0
# Generation date: 2022-10-10T16:55:54
# Schema: table
#
# id: https://w3id.org/linkml/examples/table
# description: Represent a table in linkml
# license: https://creativecommons.org/publicdomain/zero/1.0/

import dataclasses
import sys
import re
from jsonasobj2 import JsonObj, as_dict
from typing import Optional, List, Union, Dict, ClassVar, Any
from dataclasses import dataclass
from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue, PvFormulaOptions

from linkml_runtime.utils.slot import Slot
from linkml_runtime.utils.metamodelcore import empty_list, empty_dict, bnode
from linkml_runtime.utils.yamlutils import YAMLRoot, extended_str, extended_float, extended_int
from linkml_runtime.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs
from linkml_runtime.utils.formatutils import camelcase, underscore, sfx
from linkml_runtime.utils.enumerations import EnumDefinitionImpl
from rdflib import Namespace, URIRef
from linkml_runtime.utils.curienamespace import CurieNamespace
from linkml_runtime.linkml_model.types import String, Uriorcurie
from linkml_runtime.utils.metamodelcore import URIorCURIE

metamodel_version = "1.7.0"
version = None

# Overwrite dataclasses _init_fn to add **kwargs in __init__
dataclasses._init_fn = dataclasses_init_fn_with_kwargs

# Namespaces
LINKML = CurieNamespace('linkml', 'https://w3id.org/linkml/')
TABLE = CurieNamespace('table', 'https://w3id.org/linkml/examples/table/')
DEFAULT_ = TABLE


# Types

# Class references
class RowColumnA(URIorCURIE):
pass


@dataclass
class Object(YAMLRoot):
"""
An object (bnode) which needs embedding in a single row
"""
_inherited_slots: ClassVar[List[str]] = []

class_class_uri: ClassVar[URIRef] = TABLE.Object
class_class_curie: ClassVar[str] = "table:Object"
class_name: ClassVar[str] = "Object"
class_model_uri: ClassVar[URIRef] = TABLE.Object

name: Optional[str] = None
value: Optional[str] = None

def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if self.name is not None and not isinstance(self.name, str):
self.name = str(self.name)

if self.value is not None and not isinstance(self.value, str):
self.value = str(self.value)

super().__post_init__(**kwargs)


@dataclass
class Row(YAMLRoot):
"""
A single data point made up of columns.
"""
_inherited_slots: ClassVar[List[str]] = []

class_class_uri: ClassVar[URIRef] = TABLE.Row
class_class_curie: ClassVar[str] = "table:Row"
class_name: ClassVar[str] = "Row"
class_model_uri: ClassVar[URIRef] = TABLE.Row

columnA: Union[str, RowColumnA] = None
objectB: Optional[Union[dict, Object]] = None
columnC: Optional[str] = None

def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if self._is_empty(self.columnA):
self.MissingRequiredField("columnA")
if not isinstance(self.columnA, RowColumnA):
self.columnA = RowColumnA(self.columnA)

if self.objectB is not None and not isinstance(self.objectB, Object):
self.objectB = Object(**as_dict(self.objectB))

if self.columnC is not None and not isinstance(self.columnC, str):
self.columnC = str(self.columnC)

super().__post_init__(**kwargs)


@dataclass
class Table(YAMLRoot):
"""
Container of rows.
"""
_inherited_slots: ClassVar[List[str]] = []

class_class_uri: ClassVar[URIRef] = TABLE.Table
class_class_curie: ClassVar[str] = "table:Table"
class_name: ClassVar[str] = "Table"
class_model_uri: ClassVar[URIRef] = TABLE.Table

rows: Optional[Union[Dict[Union[str, RowColumnA], Union[dict, Row]], List[Union[dict, Row]]]] = empty_dict()

def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
self._normalize_inlined_as_list(slot_name="rows", slot_type=Row, key_name="columnA", keyed=True)

super().__post_init__(**kwargs)


# Enumerations


# Slots
class slots:
pass

slots.rows = Slot(uri=TABLE.rows, name="rows", curie=TABLE.curie('rows'),
model_uri=TABLE.rows, domain=None, range=Optional[Union[Dict[Union[str, RowColumnA], Union[dict, Row]], List[Union[dict, Row]]]])

slots.columnA = Slot(uri=TABLE.columnA, name="columnA", curie=TABLE.curie('columnA'),
model_uri=TABLE.columnA, domain=None, range=URIRef)

slots.objectB = Slot(uri=TABLE.objectB, name="objectB", curie=TABLE.curie('objectB'),
model_uri=TABLE.objectB, domain=None, range=Optional[Union[dict, Object]])

slots.columnC = Slot(uri=TABLE.columnC, name="columnC", curie=TABLE.curie('columnC'),
model_uri=TABLE.columnC, domain=None, range=Optional[str])

slots.name = Slot(uri=TABLE.name, name="name", curie=TABLE.curie('name'),
model_uri=TABLE.name, domain=None, range=Optional[str])

slots.value = Slot(uri=TABLE.value, name="value", curie=TABLE.curie('value'),
model_uri=TABLE.value, domain=None, range=Optional[str])
56 changes: 56 additions & 0 deletions tests/test_loaders_dumpers/models/table.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
id: https://w3id.org/linkml/examples/table
name: table
description: |-
Represent a table in linkml
license: https://creativecommons.org/publicdomain/zero/1.0/
imports:
- linkml:types
prefixes:
table: https://w3id.org/linkml/examples/table/
linkml: https://w3id.org/linkml/
default_prefix: table
default_range: string

classes:

Object:
description: |-
An object (bnode) which needs embedding in a single row
slots:
- name
- value

Row:
description: |-
A single data point made up of columns.
slots:
- columnA
- objectB
- columnC

Table:
description: |-
Container of rows.
tree_root: true
slots:
- rows

slots:
rows:
range: Row
inlined: true
inlined_as_list: true
multivalued: true
columnA:
range: uriorcurie
identifier: true
objectB:
range: Object
inlined: true
columnC:
range: string
multivalued: false
name:
range: string
value:
range: string
19 changes: 12 additions & 7 deletions tests/test_loaders_dumpers/test_csv_tsv_loader_dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from linkml_runtime.loaders import csv_loader, tsv_loader
from linkml_runtime.utils.yamlutils import as_json_object
from tests.test_loaders_dumpers.models.books_normalized import Author, Review, Shop, Book, GenreEnum, BookSeries
from tests.test_loaders_dumpers.models.table import Table, Row


ROOT = os.path.abspath(os.path.dirname(__file__))
Expand All @@ -26,6 +27,11 @@
OUTPUT = os.path.join(OUTPUT_DIR, 'books_flattened.tsv')
OUTPUT2 = os.path.join(OUTPUT_DIR, 'books_flattened_02.tsv')

TABLE_SCHEMA = os.path.join(MODEL_DIR, 'table.yaml')
TABLE_DATA_JSON = os.path.join(INPUT_DIR, 'table-json.tsv')
TABLE_DATA_INLINED = os.path.join(INPUT_DIR, 'table-inlined.tsv')


def _json(obj) -> str:
return json.dumps(obj, indent=' ', sort_keys=True)

Expand Down Expand Up @@ -107,6 +113,12 @@ def test_csvgen_unroundtrippable(self):
logging.debug(json_dumper.dumps(roundtrip))
assert roundtrip == data

def test_table_model(self):
schemaview = SchemaView(SCHEMA)
table_json= csv_loader.load(TABLE_DATA_JSON, target_class=Table, index_slot='rows', schemaview=schemaview)
for row in table_json.rows:
assert len(row["columnB"]) == 2

def test_tsvgen_unroundtrippable(self):
schemaview = SchemaView(SCHEMA)
data = yaml_loader.load(DATA2, target_class=Shop)
Expand All @@ -115,12 +127,5 @@ def test_tsvgen_unroundtrippable(self):
roundtrip = tsv_loader.load(OUTPUT2, target_class=Shop, index_slot='all_book_series', schemaview=schemaview)
assert roundtrip == data








if __name__ == '__main__':
unittest.main()
25 changes: 25 additions & 0 deletions tests/test_utils/test_csv_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pytest
import unittest

from linkml_runtime.utils.csvutils import _get_key_config, get_configmap
from linkml_runtime.utils.schemaview import SchemaView
from tests.support.test_environment import TestEnvironmentTestCase
from tests.test_utils.environment import env


class CsvUtilTestCase(TestEnvironmentTestCase):
env = env

def test_null_configmap(self):
get_configmap(None, "unknown")
# TODO: with pytest, use captlog to verify the output
# assert 'Index slot or schema not specified' in caplog.text

def test_get_configmap(self):
fname = env.input_path('kitchen_sink.yaml')
schema = SchemaView(fname)
get_configmap(schema, "unknown")


if __name__ == '__main__':
unittest.main()