refactor: reorder

fulcrumgenomics · May 5, 2024 · 7cf2e65 · 7cf2e65
1 parent 1ce2978
commit 7cf2e65
Showing 1 changed file with 96 additions and 96 deletions.
diff --git a/fgpyo/util/metric.py b/fgpyo/util/metric.py
@@ -385,46 +385,39 @@ class MetricFileHeader:
     fieldnames: list[str]
 
 
-def get_header(
-    reader: io.ReadableFileHandle,
-    file_format: MetricFileFormat,
-) -> Optional[MetricFileHeader]:
-    """
-    Read the header from an open file.
-
-    The first row after any commented or empty lines will be used as the fieldnames.
+def asdict(metric: Metric) -> dict[str, Any]:
+    """Convert a Metric instance to a dictionary."""
+    assert_is_metric(type(metric))
 
-    Lines preceding the fieldnames will be returned in the `preface.`
+    if dataclasses.is_dataclass(metric):
+        return dataclasses.asdict(metric)
+    elif attr.has(metric):
+        return attr.asdict(metric)
+    else:
+        assert False, "Unreachable"
 
-    NB: This function returns `Optional` instead of raising an error because the name of the
-    source file is not in scope, making it difficult to provide a helpful error message. It is
-    the responsibility of the caller to raise an error if the file is empty.
 
-    See original proof-of-concept here: https://github.com/fulcrumgenomics/fgpyo/pull/103
+def get_fieldnames(metric_class: type[Metric]) -> list[str]:
+    """
+    Get the fieldnames of the specified metric class.
 
     Args:
-        reader: An open, readable file handle.
-        file_format: A dataclass containing (at minimum) the file's delimiter and the string
-            prefixing any comment lines.
+        metric_class: A Metric class.
 
     Returns:
-        A `FileHeader` containing the field names and any preceding lines.
-        None if the file was empty or contained only comments or empty lines.
-    """
+        A list of fieldnames.
 
-    preface: list[str] = []
+    Raises:
+        TypeError: If the given class is not a Metric.
+    """
+    assert_is_metric(metric_class)
 
-    for line in reader:
-        if line.startswith(file_format.comment) or line.strip() == "":
-            preface.append(line.strip())
-        else:
-            break
+    if dataclasses.is_dataclass(metric_class):
+        return [f.name for f in dataclasses.fields(metric_class)]
+    elif attr.has(metric_class):
+        return [f.name for f in attr.fields(metric_class)]
     else:
-        return None
-
-    fieldnames = line.strip().split(file_format.delimiter)
-
-    return MetricFileHeader(preface=preface, fieldnames=fieldnames)
+        assert False, "Unreachable"
 
 
 class MetricWriter:
@@ -546,53 +539,96 @@ def writeall(self, metrics: Iterable[Metric]) -> None:
             self.write(metric)
 
 
-def assert_is_metric(cls: type[Metric]) -> None:
+def _get_header(
+    reader: io.ReadableFileHandle,
+    file_format: MetricFileFormat,
+) -> Optional[MetricFileHeader]:
     """
-    Assert that the given class is a Metric.
+    Read the header from an open file.
+
+    The first row after any commented or empty lines will be used as the fieldnames.
+
+    Lines preceding the fieldnames will be returned in the `preface.`
+
+    NB: This function returns `Optional` instead of raising an error because the name of the
+    source file is not in scope, making it difficult to provide a helpful error message. It is
+    the responsibility of the caller to raise an error if the file is empty.
+
+    See original proof-of-concept here: https://github.com/fulcrumgenomics/fgpyo/pull/103
 
     Args:
-        cls: A class object.
+        reader: An open, readable file handle.
+        file_format: A dataclass containing (at minimum) the file's delimiter and the string
+            prefixing any comment lines.
 
-    Raises:
-        TypeError: If the given class is not a Metric.
+    Returns:
+        A `FileHeader` containing the field names and any preceding lines.
+        None if the file was empty or contained only comments or empty lines.
     """
-    if not is_metric(cls):
-        raise TypeError(f"Not a dataclass or attr decorated Metric: {cls}")
 
+    preface: list[str] = []
 
-def asdict(metric: Metric) -> dict[str, Any]:
-    """Convert a Metric instance to a dictionary."""
-    assert_is_metric(type(metric))
+    for line in reader:
+        if line.startswith(file_format.comment) or line.strip() == "":
+            preface.append(line.strip())
+        else:
+            break
+    else:
+        return None
 
-    if dataclasses.is_dataclass(metric):
-        return dataclasses.asdict(metric)
-    elif attr.has(metric):
-        return attr.asdict(metric)
+    fieldnames = line.strip().split(file_format.delimiter)
+
+    return MetricFileHeader(preface=preface, fieldnames=fieldnames)
+
+
+def _validate_output_fieldnames(
+    metric_class: type[MetricType],
+    include_fields: list[str] | None = None,
+    exclude_fields: list[str] | None = None,
+) -> list[str]:
+    """
+    Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists.
+
+    * Only one of `include_fields` and `exclude_fields` may be specified.
+    * All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this
+      argument is specified, fields will be returned in the order they appear in the list.
+    * All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is
+      technically unnecessary, but is a safeguard against passing an incorrect list.)
+    * If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s
+      fieldnames.
+
+    Raises:
+        ValueError: If both `include_fields` and `exclude_fields` are specified.
+    """
+
+    if include_fields is not None and exclude_fields is not None:
+        raise ValueError(
+            "Only one of `include_fields` and `exclude_fields` may be specified, not both."
+        )
+    elif exclude_fields is not None:
+        assert_fieldnames_are_metric_attributes(exclude_fields, metric_class)
+        output_fieldnames = [f for f in get_fieldnames(metric_class) if f not in exclude_fields]
+    elif include_fields is not None:
+        assert_fieldnames_are_metric_attributes(include_fields, metric_class)
+        output_fieldnames = include_fields
     else:
-        assert False, "Unreachable"
+        output_fieldnames = get_fieldnames(metric_class)
 
+    return output_fieldnames
 
-def get_fieldnames(metric_class: type[Metric]) -> list[str]:
+
+def assert_is_metric(cls: type[Metric]) -> None:
     """
-    Get the fieldnames of the specified metric class.
+    Assert that the given class is a Metric.
 
     Args:
-        metric_class: A Metric class.
-
-    Returns:
-        A list of fieldnames.
+        cls: A class object.
 
     Raises:
         TypeError: If the given class is not a Metric.
     """
-    assert_is_metric(metric_class)
-
-    if dataclasses.is_dataclass(metric_class):
-        return [f.name for f in dataclasses.fields(metric_class)]
-    elif attr.has(metric_class):
-        return [f.name for f in attr.fields(metric_class)]
-    else:
-        assert False, "Unreachable"
+    if not is_metric(cls):
+        raise TypeError(f"Not a dataclass or attr decorated Metric: {cls}")
 
 
 def assert_file_header_matches_metric(
@@ -604,7 +640,7 @@ def assert_file_header_matches_metric(
     Check that the specified file has a header and its fields match those of the provided Metric.
     """
     with path.open("r") as fin:
-        header: MetricFileHeader = get_header(fin, file_format=file_format)
+        header: MetricFileHeader = _get_header(fin, file_format=file_format)
 
     if header is None:
         raise ValueError(f"Could not find a header in the provided file: {path}")
@@ -637,39 +673,3 @@ def assert_fieldnames_are_metric_attributes(
             + f"{metric_class.__name__}: "
             + ", ".join(invalid_fieldnames)
         )
-
-
-def _validate_output_fieldnames(
-    metric_class: type[MetricType],
-    include_fields: list[str] | None = None,
-    exclude_fields: list[str] | None = None,
-) -> list[str]:
-    """
-    Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists.
-
-    * Only one of `include_fields` and `exclude_fields` may be specified.
-    * All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this
-      argument is specified, fields will be returned in the order they appear in the list.
-    * All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is
-      technically unnecessary, but is a safeguard against passing an incorrect list.)
-    * If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s
-      fieldnames.
-
-    Raises:
-        ValueError: If both `include_fields` and `exclude_fields` are specified.
-    """
-
-    if include_fields is not None and exclude_fields is not None:
-        raise ValueError(
-            "Only one of `include_fields` and `exclude_fields` may be specified, not both."
-        )
-    elif exclude_fields is not None:
-        assert_fieldnames_are_metric_attributes(exclude_fields, metric_class)
-        output_fieldnames = [f for f in get_fieldnames(metric_class) if f not in exclude_fields]
-    elif include_fields is not None:
-        assert_fieldnames_are_metric_attributes(include_fields, metric_class)
-        output_fieldnames = include_fields
-    else:
-        output_fieldnames = get_fieldnames(metric_class)
-
-    return output_fieldnames