Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Create validation context classes from schema.meta.context #2

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
c92db33
feat: Create validation context classes from schema.meta.context
effigies May 27, 2024
789682f
chore: Add attrs and httpx to dependencies
effigies May 27, 2024
5f710e9
fix: Nested f-strings are not permitted in older Python
effigies May 28, 2024
2281296
DOC: Add module docstrings
effigies May 28, 2024
42c90ce
doc: Update docstrings with pydocstyle fixes
effigies May 28, 2024
4399279
PY39: Use explicit Union
effigies Jun 18, 2024
3ab7295
RF: Factor out typespec_to_type
effigies Jun 18, 2024
7ec9368
feat(types): Add FileTree type
effigies Jun 19, 2024
9be8a8e
feat(bidsignore): Add initial bidsignore implementation
effigies Jun 19, 2024
fe7cc86
feat(test): Add fixture to return bids-examples directory
effigies Jun 19, 2024
c3e39fd
fix: Clean up FileTree API
effigies Jun 19, 2024
8f3be4c
feat(filetree): Add relative_path property that matches ignore expect…
effigies Jun 19, 2024
4184594
feat(test): Validate Ignore class functionality
effigies Jun 19, 2024
9b30058
feat(ignore): Add tree filtering function, record filtered files
effigies Jun 19, 2024
4c2baf7
refactor(ignore): Use an explicit chain of ignores so each Ignore can…
effigies Jun 19, 2024
6d072b7
fix(filetree): Relative path is always without a root slash
effigies Jun 19, 2024
02136fe
test(filetree): Initial tests
effigies Jun 19, 2024
828eac9
Update to schema 0.10.0+
effigies Jul 11, 2024
f1591d1
fix: Import Self from typing_extensions
effigies Aug 15, 2024
24f6b5d
fix: Get type names in a py<310 compatible manner
effigies Sep 1, 2024
bfb3e11
Update src/bids_validator/context_generator.py
effigies Nov 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ classifiers = [
requires-python = ">=3.8"
dependencies = [
"bidsschematools >=0.11",
"typing_extensions",
"attrs",
"httpx",
]

[project.optional-dependencies]
Expand Down
129 changes: 129 additions & 0 deletions src/bids_validator/bidsignore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Utilities for working with .bidsignore files."""

import os
import re
from functools import lru_cache
from typing import List, Protocol, Union

import attrs

from .types.files import FileTree


@lru_cache
def compile_pat(pattern: str) -> Union[re.Pattern, None]:
"""Compile .gitignore-style ignore lines to regular expressions."""
orig = pattern
# A line starting with # serves as a comment.
if pattern.startswith('#'):
return None

# An optional prefix "!" which negates the pattern;
invert = pattern.startswith('!')

# Put a backslash ("\") in front of the first hash for patterns that begin with a hash.
# Put a backslash ("\") in front of the first "!" for patterns that begin with a literal "!"
if pattern.startswith((r'\#', r'\!')):
pattern = pattern[1:] # Unescape

# Trailing spaces are ignored unless they are quoted with backslash ("\").
pattern = re.sub(r'(?<!\\) +$', '', pattern)

# A blank line matches no files, so it can serve as a separator for readability.
if pattern == '':
return None

# If there is a separator at the beginning or middle (or both) of the pattern,
# then the pattern is relative to the [root]
relative_match = pattern == '/' or '/' in pattern[:-1]
# If there is a separator at the end of the pattern then the pattern will only match
# directories, otherwise the pattern can match both files and directories.
directory_match = pattern.endswith('/')

# This does not handle character ranges correctly except when they are also valid regex
parts = [
'.*'
if part == '**'
else part.replace('*', '[^/]*').replace('?', '[^/]').replace('.', r'\.')
for part in pattern.strip('/').split('/')
]

prefix = '^' if relative_match else '^(.*/|)'
postfix = r'/\Z' if directory_match else r'/?\Z'

# "**/" matches zero or more directories, so the separating slash needs to be optional
out_pattern = '/'.join(parts).replace('.*/', '.*/?')
out_pattern = f'{prefix}{out_pattern}{postfix}'

if invert:
raise ValueError(f'Inverted patterns not supported: {orig}')
# out_pattern = f'(?!{out_pattern})'

return re.compile(out_pattern)


class HasMatch(Protocol): # noqa: D101
def match(self, relpath: str) -> bool: ... # noqa: D102


@attrs.define
class Ignore:
"""Collection of .gitignore-style patterns.

Tracks successfully matched files for reporting.
"""

patterns: List[str] = attrs.field(factory=list)
history: List[str] = attrs.field(factory=list, init=False)

@classmethod
def from_file(cls, pathlike: os.PathLike):
"""Load Ignore contents from file."""
with open(pathlike) as fobj:
return cls([line.rstrip('\n') for line in fobj])

def match(self, relpath: str) -> bool:
"""Match a relative path against a collection of ignore patterns."""
if any(compile_pat(pattern).match(relpath) for pattern in self.patterns):
self.history.append(relpath)
return True
return False


@attrs.define
class IgnoreMany:
"""Match against several ignore filters."""

ignores: List[Ignore] = attrs.field()

def match(self, relpath: str) -> bool:
"""Return true if any filters match the given file.

Will short-circuit, so ordering is significant for side-effects,
such as recording files ignored by a particular filter.
"""
return any(ignore.match(relpath) for ignore in self.ignores)


def filter_file_tree(filetree: FileTree) -> FileTree:
"""Read .bidsignore and filter file tree."""
bidsignore = filetree.children.get('.bidsignore')
if not bidsignore:
return filetree
ignore = IgnoreMany([Ignore.from_file(bidsignore), Ignore(['/.bidsignore'])])
return _filter(filetree, ignore)


def _filter(filetree: FileTree, ignore: HasMatch) -> FileTree:
items = filetree.children.items()
children = {
name: _filter(child, ignore)
for name, child in items
if not ignore.match(child.relative_path)
}

# XXX This check may not be worth the time. Profile this.
if any(children.get(name) is not child for name, child in items):
filetree = attrs.evolve(filetree, children=children)

return filetree
38 changes: 38 additions & 0 deletions src/bids_validator/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Validation context for schema-based BIDS validation."""

from .context_generator import get_schema, load_schema_into_namespace

schema = get_schema()
load_schema_into_namespace(schema['meta']['context'], globals(), 'Context')


__all__ = [ # noqa: F822
'Context',
'Schema',
'Dataset',
'DatasetDescription',
'Tree',
'Subjects',
'Subject',
'Sessions',
'Entities',
'Sidecar',
'Associations',
'Events',
'Aslcontext',
'M0scan',
'Magnitude',
'Magnitude1',
'Bval',
'Bvec',
'Channels',
'Coordsystem',
'Columns',
'Json',
'Gzip',
'NiftiHeader',
'DimInfo',
'XyztUnits',
'Ome',
'Tiff',
]
212 changes: 212 additions & 0 deletions src/bids_validator/context_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
"""Utilities for generating validation context classes from a BIDS schema.

For default contexts based on the installed BIDS schema, use the `context` module.
These functions allow generating classes from alternative schemas.

Basic usage:

.. python::

from bids_validator.context_generator import get_schema, load_schema_into_namespace

schema = get_schema('https://bids-specification.readthedocs.io/en/stable/schema.json')
load_schema_into_namespace(schema['meta']['context']['context'], globals(), 'Context')
"""

import json
from typing import Any, Dict, List, Union

import attrs
import bidsschematools as bst
import bidsschematools.schema
import httpx

LATEST_SCHEMA_URL = 'https://bids-specification.readthedocs.io/en/latest/schema.json'
STABLE_SCHEMA_URL = 'https://bids-specification.readthedocs.io/en/stable/schema.json'


def get_schema(url: Union[str, None] = None) -> Dict[str, Any]:
"""Load a BIDS schema from a URL or return the bundled schema if no URL is provided.

Parameters
----------
url : str | None
The URL to load the schema from. If None, the bundled schema is returned.
The strings 'latest' and 'stable' are also accepted as shortcuts.

Returns
-------
Dict[str, Any]
The loaded schema as a dictionary.

"""
if url is None:
return bst.schema.load_schema()

if url == 'latest':
url = LATEST_SCHEMA_URL
elif url == 'stable':
url = STABLE_SCHEMA_URL

with httpx.Client() as client:
return client.get(url).json()


def snake_to_pascal(val: str):
"""Convert snake_case string to PascalCase."""
return ''.join(sub.capitalize() for sub in val.split('_'))


def typespec_to_type(name: str, typespec: Dict[str, Any]):
"""Convert JSON-schema style specification to type and metadata dictionary."""
tp = typespec.get('type')
if not tp:
raise ValueError(f'Invalid typespec: {json.dumps(typespec)}')
metadata = {key: typespec[key] for key in ('name', 'description') if key in typespec}
if tp == 'object':
properties = typespec.get('properties')
if properties:
type_ = create_attrs_class(name, properties=properties, metadata=metadata)
else:
type_ = Dict[str, Any]
elif tp == 'array':
if 'items' in typespec:
subtype, md = typespec_to_type(name, typespec['items'])
else:
subtype = Any
type_ = List[subtype]
else:
type_ = {
'number': float,
'string': str,
'integer': int,
}[tp]
return type_, metadata


def _type_name(tp: type) -> str:
try:
return tp.__name__
except AttributeError:
return str(tp)


def create_attrs_class(
class_name: str,
properties: Dict[str, Any],
metadata: Dict[str, Any],
) -> type:
"""Dynamically create an attrs class with the given properties.

Parameters
----------
class_name
The name of the class to create.
properties
A dictionary of property names and their corresponding schema information.
If a nested object is encountered, a nested class is created.
metadata
A short description of the class, included in the docstring.

Returns
-------
cls : type
The dynamically created attrs class.

"""
attributes = {}
for prop_name, prop_info in properties.items():
type_, md = typespec_to_type(prop_name, prop_info)
attributes[prop_name] = attrs.field(
type=type_, repr=prop_name != 'schema', default=None, metadata=md
)

return attrs.make_class(
snake_to_pascal(class_name),
attributes,
class_body={
'__doc__': f"""\
{metadata.get('description', '')}

attrs data class auto-generated from BIDS schema

Attributes
----------
"""
+ '\n'.join(
[
f'{k}: {_type_name(v.type)}\n\t{v.metadata["description"]}'
for k, v in attributes.items()
]
),
},
)


def generate_attrs_classes_from_schema(
schema: Dict[str, Any],
root_class_name: str,
) -> type:
"""Generate attrs classes from a JSON schema.

Parameters
----------
schema : Dict[str, Any]
The JSON schema to generate classes from. Must contain a 'properties' field.
root_class_name : str
The name of the root class to create.

Returns
-------
cls : type
The root class created from the schema.

"""
if 'properties' not in schema:
raise ValueError("Invalid schema: 'properties' field is required")

type_, _ = typespec_to_type(root_class_name, schema)
return type_


def populate_namespace(attrs_class: type, namespace: Dict[str, Any]) -> None:
"""Populate a namespace with nested attrs classes.

Parameters
----------
attrs_class : type
The root attrs class to add to the namespace.
namespace : Dict[str, Any]
The namespace to populate with nested classes.

"""
for attr in attrs_class.__attrs_attrs__:
attr_type = attr.type

if isinstance(attr_type, type) and hasattr(attr_type, '__attrs_attrs__'):
namespace[attr_type.__name__] = attr_type
populate_namespace(attr_type, namespace)


def load_schema_into_namespace(
schema: Dict[str, Any],
namespace: Dict[str, Any],
root_class_name: str,
) -> None:
"""Load a JSON schema into a namespace as attrs classes.

Intended to be used with globals() or locals() to create classes in the current module.

Parameters
----------
schema : Dict[str, Any]
The JSON schema to load into the namespace.
namespace : Dict[str, Any]
The namespace to load the schema into.
root_class_name : str
The name of the root class to create.

"""
attrs_class = generate_attrs_classes_from_schema(schema, root_class_name)
namespace[root_class_name] = attrs_class
populate_namespace(attrs_class, namespace)
1 change: 1 addition & 0 deletions src/bids_validator/types/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Modules for providing types."""
Loading
Loading