Skip to content

Commit

Permalink
refactor: reorder
Browse files Browse the repository at this point in the history
  • Loading branch information
msto committed May 5, 2024
1 parent 1ce2978 commit 7cf2e65
Showing 1 changed file with 96 additions and 96 deletions.
192 changes: 96 additions & 96 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,46 +385,39 @@ class MetricFileHeader:
fieldnames: list[str]


def get_header(
reader: io.ReadableFileHandle,
file_format: MetricFileFormat,
) -> Optional[MetricFileHeader]:
"""
Read the header from an open file.
The first row after any commented or empty lines will be used as the fieldnames.
def asdict(metric: Metric) -> dict[str, Any]:
"""Convert a Metric instance to a dictionary."""
assert_is_metric(type(metric))

Lines preceding the fieldnames will be returned in the `preface.`
if dataclasses.is_dataclass(metric):
return dataclasses.asdict(metric)
elif attr.has(metric):
return attr.asdict(metric)
else:
assert False, "Unreachable"

NB: This function returns `Optional` instead of raising an error because the name of the
source file is not in scope, making it difficult to provide a helpful error message. It is
the responsibility of the caller to raise an error if the file is empty.

See original proof-of-concept here: https://github.com/fulcrumgenomics/fgpyo/pull/103
def get_fieldnames(metric_class: type[Metric]) -> list[str]:
"""
Get the fieldnames of the specified metric class.
Args:
reader: An open, readable file handle.
file_format: A dataclass containing (at minimum) the file's delimiter and the string
prefixing any comment lines.
metric_class: A Metric class.
Returns:
A `FileHeader` containing the field names and any preceding lines.
None if the file was empty or contained only comments or empty lines.
"""
A list of fieldnames.
preface: list[str] = []
Raises:
TypeError: If the given class is not a Metric.
"""
assert_is_metric(metric_class)

for line in reader:
if line.startswith(file_format.comment) or line.strip() == "":
preface.append(line.strip())
else:
break
if dataclasses.is_dataclass(metric_class):
return [f.name for f in dataclasses.fields(metric_class)]
elif attr.has(metric_class):
return [f.name for f in attr.fields(metric_class)]
else:
return None

fieldnames = line.strip().split(file_format.delimiter)

return MetricFileHeader(preface=preface, fieldnames=fieldnames)
assert False, "Unreachable"


class MetricWriter:
Expand Down Expand Up @@ -546,53 +539,96 @@ def writeall(self, metrics: Iterable[Metric]) -> None:
self.write(metric)


def assert_is_metric(cls: type[Metric]) -> None:
def _get_header(
reader: io.ReadableFileHandle,
file_format: MetricFileFormat,
) -> Optional[MetricFileHeader]:
"""
Assert that the given class is a Metric.
Read the header from an open file.
The first row after any commented or empty lines will be used as the fieldnames.
Lines preceding the fieldnames will be returned in the `preface.`
NB: This function returns `Optional` instead of raising an error because the name of the
source file is not in scope, making it difficult to provide a helpful error message. It is
the responsibility of the caller to raise an error if the file is empty.
See original proof-of-concept here: https://github.com/fulcrumgenomics/fgpyo/pull/103
Args:
cls: A class object.
reader: An open, readable file handle.
file_format: A dataclass containing (at minimum) the file's delimiter and the string
prefixing any comment lines.
Raises:
TypeError: If the given class is not a Metric.
Returns:
A `FileHeader` containing the field names and any preceding lines.
None if the file was empty or contained only comments or empty lines.
"""
if not is_metric(cls):
raise TypeError(f"Not a dataclass or attr decorated Metric: {cls}")

preface: list[str] = []

def asdict(metric: Metric) -> dict[str, Any]:
"""Convert a Metric instance to a dictionary."""
assert_is_metric(type(metric))
for line in reader:
if line.startswith(file_format.comment) or line.strip() == "":
preface.append(line.strip())
else:
break
else:
return None

if dataclasses.is_dataclass(metric):
return dataclasses.asdict(metric)
elif attr.has(metric):
return attr.asdict(metric)
fieldnames = line.strip().split(file_format.delimiter)

return MetricFileHeader(preface=preface, fieldnames=fieldnames)


def _validate_output_fieldnames(
metric_class: type[MetricType],
include_fields: list[str] | None = None,
exclude_fields: list[str] | None = None,
) -> list[str]:
"""
Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists.
* Only one of `include_fields` and `exclude_fields` may be specified.
* All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this
argument is specified, fields will be returned in the order they appear in the list.
* All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is
technically unnecessary, but is a safeguard against passing an incorrect list.)
* If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s
fieldnames.
Raises:
ValueError: If both `include_fields` and `exclude_fields` are specified.
"""

if include_fields is not None and exclude_fields is not None:
raise ValueError(
"Only one of `include_fields` and `exclude_fields` may be specified, not both."
)
elif exclude_fields is not None:
assert_fieldnames_are_metric_attributes(exclude_fields, metric_class)
output_fieldnames = [f for f in get_fieldnames(metric_class) if f not in exclude_fields]
elif include_fields is not None:
assert_fieldnames_are_metric_attributes(include_fields, metric_class)
output_fieldnames = include_fields
else:
assert False, "Unreachable"
output_fieldnames = get_fieldnames(metric_class)

return output_fieldnames

def get_fieldnames(metric_class: type[Metric]) -> list[str]:

def assert_is_metric(cls: type[Metric]) -> None:
"""
Get the fieldnames of the specified metric class.
Assert that the given class is a Metric.
Args:
metric_class: A Metric class.
Returns:
A list of fieldnames.
cls: A class object.
Raises:
TypeError: If the given class is not a Metric.
"""
assert_is_metric(metric_class)

if dataclasses.is_dataclass(metric_class):
return [f.name for f in dataclasses.fields(metric_class)]
elif attr.has(metric_class):
return [f.name for f in attr.fields(metric_class)]
else:
assert False, "Unreachable"
if not is_metric(cls):
raise TypeError(f"Not a dataclass or attr decorated Metric: {cls}")


def assert_file_header_matches_metric(
Expand All @@ -604,7 +640,7 @@ def assert_file_header_matches_metric(
Check that the specified file has a header and its fields match those of the provided Metric.
"""
with path.open("r") as fin:
header: MetricFileHeader = get_header(fin, file_format=file_format)
header: MetricFileHeader = _get_header(fin, file_format=file_format)

if header is None:
raise ValueError(f"Could not find a header in the provided file: {path}")
Expand Down Expand Up @@ -637,39 +673,3 @@ def assert_fieldnames_are_metric_attributes(
+ f"{metric_class.__name__}: "
+ ", ".join(invalid_fieldnames)
)


def _validate_output_fieldnames(
metric_class: type[MetricType],
include_fields: list[str] | None = None,
exclude_fields: list[str] | None = None,
) -> list[str]:
"""
Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists.
* Only one of `include_fields` and `exclude_fields` may be specified.
* All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this
argument is specified, fields will be returned in the order they appear in the list.
* All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is
technically unnecessary, but is a safeguard against passing an incorrect list.)
* If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s
fieldnames.
Raises:
ValueError: If both `include_fields` and `exclude_fields` are specified.
"""

if include_fields is not None and exclude_fields is not None:
raise ValueError(
"Only one of `include_fields` and `exclude_fields` may be specified, not both."
)
elif exclude_fields is not None:
assert_fieldnames_are_metric_attributes(exclude_fields, metric_class)
output_fieldnames = [f for f in get_fieldnames(metric_class) if f not in exclude_fields]
elif include_fields is not None:
assert_fieldnames_are_metric_attributes(include_fields, metric_class)
output_fieldnames = include_fields
else:
output_fieldnames = get_fieldnames(metric_class)

return output_fieldnames

0 comments on commit 7cf2e65

Please sign in to comment.