Skip to content

Commit

Permalink
Allow Metric to use dataclasses or attr
Browse files Browse the repository at this point in the history
* Update util.metric and related util.inspect modules to work with
  dataclasses or attr
* Update test_metric to test both dataclasses and attr classes

Closes #45
  • Loading branch information
TedBrookings committed Nov 27, 2023
1 parent 66b2200 commit a1df504
Show file tree
Hide file tree
Showing 7 changed files with 426 additions and 198 deletions.
3 changes: 3 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ Metric files

.. seealso::

https://docs.python.org/3/library/dataclasses.html
Documentation for the dataclasses standard module

https://www.attrs.org/en/stable/examples.html

The attrs website for bringing back the joy to writing classes.
Expand Down
8 changes: 4 additions & 4 deletions fgpyo/read_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def __str__(self) -> str:
return self.value


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
class SubReadWithoutQuals:
"""Contains the bases that correspond to the given read segment"""

Expand All @@ -99,7 +99,7 @@ def kind(self) -> SegmentType:
return self.segment.kind


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
class SubReadWithQuals:
"""Contains the bases and qualities that correspond to the given read segment"""

Expand All @@ -112,7 +112,7 @@ def kind(self) -> SegmentType:
return self.segment.kind


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
class ReadSegment:
"""Encapsulates all the information about a segment within a read structure. A segment can
either have a definite length, in which case length must be Some(Int), or an indefinite length
Expand Down Expand Up @@ -187,7 +187,7 @@ def __str__(self) -> str:
return f"{ANY_LENGTH_CHAR}{self.kind.value}"


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
class ReadStructure(Iterable[ReadSegment]):
"""Describes the structure of a give read. A read contains one or more read segments. A read
segment describes a contiguous stretch of bases of the same type (ex. template bases) of some
Expand Down
35 changes: 17 additions & 18 deletions fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def is_clipping(self) -> bool:
return self == CigarOp.S or self == CigarOp.H


@attr.s(frozen=True, slots=True)
@attr.s(frozen=True, slots=True, auto_attribs=True)
class CigarElement:
"""Represents an element in a Cigar
Expand All @@ -394,14 +394,13 @@ class CigarElement:
- operator (CigarOp): the operator of the element
"""

length: int = attr.ib()
operator: CigarOp = attr.ib()
length: int
operator: CigarOp

@length.validator
def _validate_length(self, attribute: Any, value: int) -> None:
def __post_init__(self) -> None:
"""Validates the length attribute is greater than zero."""
if value <= 0:
raise ValueError(f"Cigar element must have a length > 0, found {value}")
if self.length <= 0:
raise ValueError(f"Cigar element must have a length > 0, found {self.length}")

Check warning on line 403 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L403

Added line #L403 was not covered by tests

@property
def length_on_query(self) -> int:
Expand All @@ -423,15 +422,15 @@ class CigarParsingException(Exception):
pass


@attr.s(frozen=True, slots=True)
@attr.s(frozen=True, slots=True, auto_attribs=True)
class Cigar:
"""Class representing a cigar string.
Attributes:
- elements (Tuple[CigarElement, ...]): zero or more cigar elements
"""

elements: Tuple[CigarElement, ...] = attr.ib(default=())
elements: Tuple[CigarElement, ...] = ()

@classmethod
def from_cigartuples(cls, cigartuples: Optional[List[Tuple[int, int]]]) -> "Cigar":
Expand Down Expand Up @@ -518,7 +517,7 @@ def length_on_target(self) -> int:
return sum([elem.length_on_target for elem in self.elements])


@attr.s(auto_attribs=True, frozen=True)
@attr.s(frozen=True, auto_attribs=True)
class SupplementaryAlignment:
"""Stores a supplementary alignment record produced by BWA and stored in the SA SAM tag.
Expand All @@ -531,12 +530,12 @@ class SupplementaryAlignment:
nm: the number of edits
"""

reference_name: str = attr.ib()
start: int = attr.ib()
is_forward: bool = attr.ib()
cigar: Cigar = attr.ib()
mapq: int = attr.ib()
nm: int = attr.ib()
reference_name: str
start: int
is_forward: bool
cigar: Cigar
mapq: int
nm: int

def __str__(self) -> str:
return ",".join(
Expand Down Expand Up @@ -620,7 +619,7 @@ def set_pair_info(r1: AlignedSegment, r2: AlignedSegment, proper_pair: bool = Tr
r2.template_length = -insert_size


@attr.s(auto_attribs=True, frozen=True)
@attr.s(frozen=True, auto_attribs=True)
class ReadEditInfo:
"""
Counts various stats about how a read compares to a reference sequence.
Expand Down Expand Up @@ -709,7 +708,7 @@ def calculate_edit_info(
)


@attr.s(auto_attribs=True, frozen=True)
@attr.s(frozen=True, auto_attribs=True)
class Template:
"""A container for alignment records corresponding to a single sequenced template
or insert.
Expand Down
111 changes: 100 additions & 11 deletions fgpyo/util/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,85 @@

if sys.version_info >= (3, 8):
from typing import Literal
from typing import Protocol
else:
from typing_extensions import Literal
from typing_extensions import Protocol
if sys.version_info >= (3, 12):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias

import dataclasses
import functools
from dataclasses import MISSING as DATACLASSES_MISSING
from dataclasses import fields as get_dataclasses_fields
from dataclasses import is_dataclass as is_dataclasses_class
from enum import Enum
from functools import partial
from pathlib import PurePath
from typing import TYPE_CHECKING
from typing import Callable
from typing import Optional

import attr
from typing import TypeVar

import fgpyo.util.types as types

try:
import attr

_use_attr = True
from attr import NOTHING as ATTR_NOTHING
from attr import fields as get_attr_fields
from attr import fields_dict as get_attr_fields_dict

Attribute = attr.Attribute

MISSING = {DATACLASSES_MISSING, ATTR_NOTHING}
except ImportError:
_use_attr = False
attr = None
ATTR_NOTHING = None
Attribute = TypeVar("Attribute", bound=object) # type: ignore

Check warning on line 51 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L47-L51

Added lines #L47 - L51 were not covered by tests

def get_attr_fields(cls: type) -> Tuple[dataclasses.Field, ...]: # type: ignore
return ()

Check warning on line 54 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L53-L54

Added lines #L53 - L54 were not covered by tests

def get_attr_fields_dict(cls: type) -> Dict[str, dataclasses.Field]: # type: ignore
return {}

Check warning on line 57 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L56-L57

Added lines #L56 - L57 were not covered by tests

MISSING = {DATACLASSES_MISSING}

Check warning on line 59 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L59

Added line #L59 was not covered by tests

if TYPE_CHECKING:
from _typeshed import DataclassInstance as DataclassesProtocol

Check warning on line 62 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L62

Added line #L62 was not covered by tests
else:

class DataclassesProtocol(Protocol):
__dataclasses_fields__: Dict[str, dataclasses.Field]


if TYPE_CHECKING and _use_attr:
from attr import AttrsInstance

Check warning on line 70 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L70

Added line #L70 was not covered by tests
else:

class AttrsInstance(Protocol): # type: ignore
__attrs_attrs__: Dict[str, Any]


def is_attr_class(cls: type) -> bool: # type: ignore
return hasattr(cls, "__attrs_attrs__")


MISSING_OR_NONE = {*MISSING, None}
DataclassesOrAttrClass = Union[DataclassesProtocol, AttrsInstance]
FieldType: TypeAlias = Union[dataclasses.Field, attr.Attribute]


def get_dataclasses_fields_dict(
class_or_instance: Union[DataclassesProtocol, Type[DataclassesProtocol]],
) -> Dict[str, dataclasses.Field]:
return {field.name: field for field in get_dataclasses_fields(class_or_instance)}


class ParserNotFoundException(Exception):
pass
Expand Down Expand Up @@ -254,9 +315,36 @@ def dict_parse(dict_string: str) -> Dict[Any, Any]:
return parser


def get_fields_dict(cls: Type[DataclassesOrAttrClass]) -> Dict[str, FieldType]:
"""
Get the fields dict from either a dataclasses or attr dataclass.
Combine results in case someone chooses to mix them through inheritance.
"""
if not (is_dataclasses_class(cls) or is_attr_class(cls)):
raise ValueError("cls must a dataclasses or attr class")

Check warning on line 325 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L325

Added line #L325 was not covered by tests
return {
**(get_dataclasses_fields_dict(cls) if is_dataclasses_class(cls) else {}),
**(get_attr_fields_dict(cls) if is_attr_class(cls) else {}), # type: ignore
}


def get_fields(cls: Type[DataclassesOrAttrClass]) -> Tuple[FieldType, ...]:
if not (is_dataclasses_class(cls) or is_attr_class(cls)):
raise ValueError("cls must a dataclasses or attr class")

Check warning on line 334 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L334

Added line #L334 was not covered by tests
return (get_dataclasses_fields(cls) if is_dataclasses_class(cls) else ()) + (
get_attr_fields(cls) if is_attr_class(cls) else () # type: ignore
)


AttrFromType = TypeVar("AttrFromType")


def attr_from(
cls: Type, kwargs: Dict[str, str], parsers: Optional[Dict[type, Callable[[str], Any]]] = None
) -> Any:
cls: Type[AttrFromType],
kwargs: Dict[str, str],
parsers: Optional[Dict[type, Callable[[str], Any]]] = None,
) -> AttrFromType:
"""Builds an attr class from key-word arguments
Args:
Expand All @@ -265,15 +353,16 @@ def attr_from(
parsers: a dictionary of parser functions to apply to specific types
"""
return_values: Dict[str, Any] = {}
for attribute in attr.fields(cls):
for attribute in get_fields(cls): # type: ignore
return_value: Any
if attribute.name in kwargs:
str_value: str = kwargs[attribute.name]
set_value: bool = False

# Use the converter if provided
if attribute.converter is not None:
return_value = attribute.converter(str_value)
converter = getattr(attribute, "converter", None)
if converter is not None:
return_value = converter(str_value)

Check warning on line 365 in fgpyo/util/inspect.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/util/inspect.py#L365

Added line #L365 was not covered by tests
set_value = True

# try getting a known parser
Expand Down Expand Up @@ -305,21 +394,21 @@ def attr_from(
), f"No value given and no default for attribute `{attribute.name}`"
return_value = attribute.default
# when the default is attr.NOTHING, just use None
if return_value is attr.NOTHING:
if return_value in MISSING:
return_value = None

return_values[attribute.name] = return_value

return cls(**return_values)


def attribute_is_optional(attribute: attr.Attribute) -> bool:
def attribute_is_optional(attribute: FieldType) -> bool:
"""Returns True if the attribute is optional, False otherwise"""
return types.get_origin_type(attribute.type) is Union and isinstance(
None, types.get_arg_types(attribute.type)
)


def attribute_has_default(attribute: attr.Attribute) -> bool:
def attribute_has_default(attribute: FieldType) -> bool:
"""Returns True if the attribute has a default value, False otherwise"""
return attribute.default != attr.NOTHING or attribute_is_optional(attribute)
return attribute.default not in MISSING_OR_NONE or attribute_is_optional(attribute)
Loading

0 comments on commit a1df504

Please sign in to comment.