Skip to content

Commit

Permalink
Reformatting code for flake8 config (#1)
Browse files Browse the repository at this point in the history
Co-authored-by: Ryan Ly <[email protected]>
  • Loading branch information
cmungall and rly authored Feb 5, 2024
1 parent 37f5e93 commit ee2a063
Show file tree
Hide file tree
Showing 24 changed files with 1,048 additions and 636 deletions.
15 changes: 7 additions & 8 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,15 @@
# https://www.sphinx-doc.org/en/master/usage/configuration.html

import os
import re
import sys
from datetime import date
from linkml_arrays import __version__

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = 'linkml-arrays'
project = "linkml-arrays"
copyright = f"{date.today().year}, Ryan Ly <[email protected]>"
author = 'Ryan Ly <[email protected]>'
author = "Ryan Ly <[email protected]>"
release = __version__

# -- General configuration ---------------------------------------------------
Expand All @@ -25,7 +24,7 @@
"sphinx_rtd_theme",
"sphinx_click",
"sphinx_autodoc_typehints",
"myst_parser"
"myst_parser",
]

# generate autosummary pages
Expand All @@ -46,13 +45,13 @@
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

templates_path = ['_templates']
templates_path = ["_templates"]

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'sphinx_rtd_theme'
html_static_path = ['_static']
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]

# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
Expand Down
1,145 changes: 718 additions & 427 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.9"
#setuptools = "^65.5.0"
#tox = "^3.25.1"
tox = "^3.25.1"
#click = "^8.1.3"
#importlib-metadata = "^4.8.0"
linkml-runtime = "^1.6.0"
Expand Down
1 change: 1 addition & 0 deletions src/linkml_arrays/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""linkml-arrays package."""

import importlib_metadata

try:
Expand Down
7 changes: 5 additions & 2 deletions src/linkml_arrays/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Command line interface for linkml-arrays."""
import click

import logging

import click

from linkml_arrays import __version__
from linkml_arrays.main import demo

Expand All @@ -11,6 +13,7 @@

logger = logging.getLogger(__name__)


@click.group()
@click.option("-v", "--verbose", count=True)
@click.option("-q", "--quiet")
Expand All @@ -30,11 +33,11 @@ def main(verbose: int, quiet: bool):
if quiet:
logger.setLevel(level=logging.ERROR)


@main.command()
def run():
"""Run the linkml-arrays's demo command."""
demo()



if __name__ == "__main__":
Expand Down
14 changes: 11 additions & 3 deletions src/linkml_arrays/dumpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
"""Dumper classes for linkml-arrays."""
from .yaml_numpy_dumper import YamlNumpyDumper
from .yaml_hdf5_dumper import YamlHdf5Dumper

from .hdf5_dumper import Hdf5Dumper
from .zarr_directory_store_dumper import ZarrDirectoryStoreDumper
from .yaml_hdf5_dumper import YamlHdf5Dumper
from .yaml_numpy_dumper import YamlNumpyDumper
from .zarr_directory_store_dumper import ZarrDirectoryStoreDumper

__all__ = [
"Hdf5Dumper",
"YamlHdf5Dumper",
"YamlNumpyDumper",
"ZarrDirectoryStoreDumper",
]
35 changes: 21 additions & 14 deletions src/linkml_arrays/dumpers/hdf5_dumper.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
"""Class for dumping a LinkML model to an HDF5 file."""

from typing import Union

import h5py
from pydantic import BaseModel

from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import SchemaView
from pydantic import BaseModel


def iterate_element(
element: Union[YAMLRoot, BaseModel],
schemaview: SchemaView,
group: h5py.Group = None
def _iterate_element(
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, group: h5py.Group = None
):
"""Recursively iterate through the elements of a LinkML model and save them.
Writes Pydantic BaseModel objects as groups, slots that implement "linkml:elements"
as datasets, and other slots as attributes.
"""
# get the type of the element
element_type = type(element).__name__

Expand All @@ -25,23 +29,26 @@ def iterate_element(
if isinstance(v, BaseModel):
# create a subgroup and recurse
subgroup = group.create_group(k)
iterate_element(v, schemaview, subgroup)
_iterate_element(v, schemaview, subgroup)
else:
# create an attribute on the group
group.attrs[k] = v


class Hdf5Dumper(Dumper):
"""Dumper class for LinkML models to HDF5 files."""

# TODO is this the right method to overwrite? it does not dump a string
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs):
"""Dump the element to an HDF5 file.
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
""" Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
id_slot = schemaview.get_identifier_slot(element.__class__.__name__)
if id_slot is None:
raise ValueError("The class requires an identifier.")
id_value = getattr(element, id_slot.name)
output_file_path = f"{id_value}.h5"
with h5py.File(output_file_path, "w") as f:
iterate_element(element, schemaview, f)



_iterate_element(element, schemaview, f)
34 changes: 24 additions & 10 deletions src/linkml_arrays/dumpers/yaml_hdf5_dumper.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
"""Class for dumping a LinkML model to a YAML file with paths to HDF5 files."""

from typing import Union

import h5py
from pydantic import BaseModel
import yaml

from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import SchemaView
from pydantic import BaseModel


def _iterate_element(
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier=None
):
"""Recursively iterate through the elements of a LinkML model and save them.
Returns a dictionary with the same structure as the input element, but with the slots
that implement "linkml:elements" (arrays) are written to HDF5 files and the paths to these
files are returned in the dictionary. Each array is written to an HDF5 dataset at path
"/data" in a new HDF5 file.
def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier = None):
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
# get the type of the element
element_type = type(element).__name__

Expand All @@ -33,24 +46,25 @@ def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView,
output_file_path = f"{parent_identifier}.{found_class.name}.{found_slot.name}.h5"
else:
output_file_path = f"{found_class.name}.{found_slot.name}.h5"
with h5py.File(output_file_path, "w") as f: # TODO do not assume that there is only one by this name
with h5py.File(
output_file_path, "w"
) as f: # TODO do not assume that there is only one by this name
f.create_dataset("data", data=v)
ret_dict[k] = f"file:./{output_file_path}" # TODO make this nicer
else:
if isinstance(v, BaseModel):
v2 = iterate_element(v, schemaview, id_value)
v2 = _iterate_element(v, schemaview, id_value)
ret_dict[k] = v2
else:
ret_dict[k] = v
return ret_dict


class YamlHdf5Dumper(Dumper):
"""Class for dumping a LinkML model to a YAML file with paths to HDF5 files."""

def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
""" Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
input = iterate_element(element, schemaview)
"""Return element formatted as a YAML string."""
input = _iterate_element(element, schemaview)

return yaml.dump(input)


30 changes: 21 additions & 9 deletions src/linkml_arrays/dumpers/yaml_numpy_dumper.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
"""Class for dumpling a LinkML model to a YAML file with paths to NumPy files."""

from typing import Union

import numpy as np
from pydantic import BaseModel
import yaml

from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import SchemaView
from pydantic import BaseModel


def _iterate_element(
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier=None
):
"""Recursively iterate through the elements of a LinkML model and save them.
Returns a dictionary with the same structure as the input element, but with the slots
that implement "linkml:elements" (arrays) are written to HDF5 files and the paths to these
files are returned in the dictionary. Each array is written to an HDF5 dataset at path
"/data" in a new HDF5 file.
def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier = None):
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
# get the type of the element
element_type = type(element).__name__

Expand Down Expand Up @@ -37,19 +50,18 @@ def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView,
ret_dict[k] = f"file:./{output_file_path}" # TODO make this nicer
else:
if isinstance(v, BaseModel):
v2 = iterate_element(v, schemaview, id_value)
v2 = _iterate_element(v, schemaview, id_value)
ret_dict[k] = v2
else:
ret_dict[k] = v
return ret_dict


class YamlNumpyDumper(Dumper):
"""Dumper class for LinkML models to YAML files with paths to NumPy files."""

def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
""" Return element formatted as a YAML string with paths to numpy files containing the ndarrays"""
input = iterate_element(element, schemaview)
"""Return element formatted as a YAML string."""
input = _iterate_element(element, schemaview)

return yaml.dump(input)


35 changes: 21 additions & 14 deletions src/linkml_arrays/dumpers/zarr_directory_store_dumper.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
"""Class for dumping a LinkML model to a Zarr directory store."""

from typing import Union

import zarr
from pydantic import BaseModel

from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import SchemaView
from pydantic import BaseModel


def iterate_element(
element: Union[YAMLRoot, BaseModel],
schemaview: SchemaView,
group: zarr.hierarchy.Group = None
def _iterate_element(
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, group: zarr.hierarchy.Group = None
):
"""Recursively iterate through the elements of a LinkML model and save them.
Writes Pydantic BaseModel objects as groups, slots that implement "linkml:elements"
as datasets, and other slots as attributes.
"""
# get the type of the element
element_type = type(element).__name__

Expand All @@ -25,24 +29,27 @@ def iterate_element(
if isinstance(v, BaseModel):
# create a subgroup and recurse
subgroup = group.create_group(k)
iterate_element(v, schemaview, subgroup)
_iterate_element(v, schemaview, subgroup)
else:
# create an attribute on the group
group.attrs[k] = v


class ZarrDirectoryStoreDumper(Dumper):
"""Dumper class for LinkML models to Zarr directory stores."""

# TODO is this the right method to overwrite? it does not dump a string
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs):
"""Dump the element to a Zarr directory store.
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
""" Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
id_slot = schemaview.get_identifier_slot(element.__class__.__name__)
if id_slot is None:
raise ValueError("The class requires an identifier.")
id_value = getattr(element, id_slot.name)
output_file_path = f"{id_value}.zarr"
store = zarr.DirectoryStore(output_file_path)
root = zarr.group(store=store, overwrite=True)
iterate_element(element, schemaview, root)



_iterate_element(element, schemaview, root)
14 changes: 11 additions & 3 deletions src/linkml_arrays/loaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
"""Dumper classes for linkml-arrays."""
from .yaml_numpy_loader import YamlNumpyLoader
from .yaml_hdf5_loader import YamlHdf5Loader

from .hdf5_loader import Hdf5Loader
from .zarr_directory_store_loader import ZarrDirectoryStoreLoader
from .yaml_hdf5_loader import YamlHdf5Loader
from .yaml_numpy_loader import YamlNumpyLoader
from .zarr_directory_store_loader import ZarrDirectoryStoreLoader

__all__ = [
"Hdf5Loader",
"YamlHdf5Loader",
"YamlNumpyLoader",
"ZarrDirectoryStoreLoader",
]
Loading

0 comments on commit ee2a063

Please sign in to comment.