From 59cbd9e56dad7d7b9f9699340223132dd59c2b9c Mon Sep 17 00:00:00 2001 From: clintval Date: Fri, 24 Nov 2023 20:34:19 -0500 Subject: [PATCH 1/3] Add VCF module to docs and fixup docs warnings --- docs/api.rst | 12 +++++++++++- fgpyo/fasta/builder.py | 13 +++++++++---- fgpyo/io/__init__.py | 15 +++++++++------ fgpyo/read_structure.py | 10 +++------- fgpyo/sam/builder.py | 6 ++++-- fgpyo/sam/clipping.py | 10 ++++------ fgpyo/util/inspect.py | 1 + fgpyo/vcf/__init__.py | 13 +++++++------ 8 files changed, 48 insertions(+), 32 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 63da3ba7..fa0e833a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -18,6 +18,15 @@ SAM/BAM/CRAM files .. automodule:: fgpyo.sam.clipping :members: +VCF/BCF files +============= + +.. automodule:: fgpyo.vcf + :members: + +.. automodule:: fgpyo.vcf.builder + :members: + FASTA files =========== @@ -31,6 +40,7 @@ Metric files :members: .. autofunction:: fgpyo.util.inspect.attr_from + :noindex: .. seealso:: @@ -69,7 +79,7 @@ Logging :members: IO -======= +== .. automodule:: fgpyo.io :members: diff --git a/fgpyo/fasta/builder.py b/fgpyo/fasta/builder.py index dee611e6..bf4eeac8 100755 --- a/fgpyo/fasta/builder.py +++ b/fgpyo/fasta/builder.py @@ -7,25 +7,30 @@ Examples of creating sets of contigs for writing to fasta ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Writing a FASTA with two contigs each with 100 bases. -.. code-block:: python +Writing a FASTA with two contigs each with 100 bases: + >>> from fgpyo.fasta.builder import FastaBuilder >>> builder = FastaBuilder() >>> builder.add("chr10").add("AAAAAAAAAA", 10) >>> builder.add("chr11").add("GGGGGGGGGG", 10) >>> builder.to_file(path = pathlib.Path("test.fasta")) -Writing a FASTA with one contig with 100 A's and 50 T's + +Writing a FASTA with one contig with 100 A's and 50 T's: + >>> from fgpyo.fasta.builder import FastaBuilder >>> builder = FastaBuilder() >>> builder.add("chr10").add("AAAAAAAAAA", 10).add("TTTTTTTTTT", 5) >>> builder.to_file(path = pathlib.Path("test.fasta")) -Add bases to existing contig + +Add bases to existing contig: + >>> from fgpyo.fasta.builder import FastaBuilder >>> builder = FastaBuilder() >>> contig_one = builder.add("chr10").add("AAAAAAAAAA", 1) >>> contig_one.add("NNN", 1) >>> contig_one.bases 'AAAAAAAAAANNN' + """ import textwrap from pathlib import Path diff --git a/fgpyo/io/__init__.py b/fgpyo/io/__init__.py index 34c95139..3457c7d1 100644 --- a/fgpyo/io/__init__.py +++ b/fgpyo/io/__init__.py @@ -40,6 +40,7 @@ "gzip file" >>> next(lines) "10" + """ import gzip import io @@ -134,9 +135,10 @@ def to_reader(path: Path) -> Union[io.TextIOWrapper, TextIO, IO[Any]]: path: Path to read from Example: - reader = fio.to_reader(path = Path("reader.txt")) - reader.readlines() - reader.close() + >>> reader = fio.to_reader(path = Path("reader.txt")) + >>> reader.readlines() + >>> reader.close() + """ if path.suffix in COMPRESSED_FILE_EXTENSIONS: return io.TextIOWrapper(cast(IO[bytes], gzip.open(path, mode="rb")), encoding="utf-8") @@ -151,9 +153,10 @@ def to_writer(path: Path, append: bool = False) -> Union[IO[Any], io.TextIOWrapp path: Path to write (or append) to Example: - writer = fio.to_writer(path = Path("writer.txt")) - writer.write(f'{something}\n') - writer.close() + >>> writer = fio.to_writer(path = Path("writer.txt")) + >>> writer.write(f'{something}\\n') + >>> writer.close() + """ mode_prefix = "w" if append: diff --git a/fgpyo/read_structure.py b/fgpyo/read_structure.py index 91d7622f..59ee6414 100644 --- a/fgpyo/read_structure.py +++ b/fgpyo/read_structure.py @@ -142,16 +142,12 @@ def fixed_length(self) -> int: return self.length def extract(self, bases: str) -> SubReadWithoutQuals: - """Gets the bases associated with this read segment. If strict is false then only return - the sub-sequence for which we have bases in `bases`, otherwise throw an exception. - """ + """Gets the bases associated with this read segment.""" end = self._calculate_end(bases) return SubReadWithoutQuals(bases=bases[self.offset : end], segment=self._resized(end)) def extract_with_quals(self, bases: str, quals: str) -> SubReadWithQuals: - """Gets the bases and qualities associated with this read segment. If strict is false then - only return the sub-sequence for which we have bases in `bases`, otherwise throw an - exception.""" + """Gets the bases and qualities associated with this read segment.""" assert len(bases) == len(quals), f"Bases and quals differ in length: {bases} {quals}" end = self._calculate_end(bases) return SubReadWithQuals( @@ -162,7 +158,7 @@ def extract_with_quals(self, bases: str, quals: str) -> SubReadWithQuals: def _calculate_end(self, bases: str) -> int: """Checks some requirements and then calculates the end position for the segment for the - given read""" + given read.""" bases_len = len(bases) assert bases_len >= self.offset, f"Read ends before the segment starts: {self}" assert ( diff --git a/fgpyo/sam/builder.py b/fgpyo/sam/builder.py index 69f0db1e..f2e716b3 100755 --- a/fgpyo/sam/builder.py +++ b/fgpyo/sam/builder.py @@ -369,13 +369,15 @@ def add_pair( Mapped pairs can be created by specifying both `start1` and `start2` and either `chrom`, for pairs where both reads map to the same contig, or both `chrom1` and `chrom2`, for pairs where reads map to different contigs. i.e.: + - `add_pair(chrom, start1, start2)` will create a mapped pair where both reads map to - the same contig (`chrom`). + the same contig (`chrom`). - `add_pair(chrom1, start1, chrom2, start2)` will create a mapped pair where the reads - map to different contigs (`chrom1` and `chrom2`). + map to different contigs (`chrom1` and `chrom2`). A pair with only one of the two reads mapped can be created by setting only one start position. Flags will automatically be set correctly for the unmapped mate. + - `add_pair(chrom, start1)` - `add_pair(chrom1, start1)` - `add_pair(chrom, start2)` diff --git a/fgpyo/sam/clipping.py b/fgpyo/sam/clipping.py index 8b47b226..908dcc65 100755 --- a/fgpyo/sam/clipping.py +++ b/fgpyo/sam/clipping.py @@ -69,15 +69,13 @@ class ClippingInfo(NamedTuple): - """Named tuple holding the number of bases clipped on the query and reference respectively. - - Attributes: - query_bases_clipped (int): the number of query bases in the alignment that were clipped. - ref_bases_clipped (int): the number of reference bases in the alignment that were clipped. - """ + """Named tuple holding the number of bases clipped on the query and reference respectively.""" query_bases_clipped: int + """The number of query bases in the alignment that were clipped.""" + ref_bases_clipped: int + """The number of reference bases in the alignment that were clipped.""" def softclip_start_of_alignment_by_query( diff --git a/fgpyo/util/inspect.py b/fgpyo/util/inspect.py index 5ab8a4e8..944ece77 100644 --- a/fgpyo/util/inspect.py +++ b/fgpyo/util/inspect.py @@ -263,6 +263,7 @@ def attr_from( cls: the attr class to be built kwargs: a dictionary of keyword arguments parsers: a dictionary of parser functions to apply to specific types + """ return_values: Dict[str, Any] = {} for attribute in attr.fields(cls): diff --git a/fgpyo/vcf/__init__.py b/fgpyo/vcf/__init__.py index d23fc06c..f6f837a0 100644 --- a/fgpyo/vcf/__init__.py +++ b/fgpyo/vcf/__init__.py @@ -7,17 +7,17 @@ The module contains the following public classes: - - :class:`~VariantBuilder` -- A builder class that allows the + - :class:`~fgpyo.vcf.builder.VariantBuilder` -- A builder class that allows the accumulation of variant records and access as a list and writing to file. Examples ~~~~~~~~ Typically, we have :class:`~pysam.VariantRecord` records obtained from reading from a VCF file. -The :class:`~VariantBuilder` class builds such records. +The :class:`~fgpyo.vcf.builder.VariantBuilder` class builds such records. -Variants are added with the :func:`~VariantBuilder.add()` method, which -returns a `Variant`. +Variants are added with the :func:`~fgpyo.vcf.builder.VariantBuilder.add()` method, which +returns a :class:`pysam.VariantRecord`. >>> import pysam >>> from fgpyo.vcf.builder import VariantBuilder @@ -44,8 +44,8 @@ >>> path_to_vcf: Path = builder.to_path() The variants may also be retrieved in the order they were added via the -:func:`~VariantBuilder.to_unsorted_list()` method and in coordinate sorted -order via the :func:`~VariantBuilder.to_sorted_list()` method. +:func:`~fgpyo.vcf.builder.VariantBuilder.to_unsorted_list()` method and in coordinate sorted +order via the :func:`~fgpyo.vcf.builder.VariantBuilder.to_sorted_list()` method. """ from contextlib import contextmanager @@ -87,6 +87,7 @@ def reader(path: VcfPath) -> Generator[VcfReader, None, None]: @contextmanager def writer(path: VcfPath, header: VariantHeader) -> Generator[VcfWriter, None, None]: """Opens the given path for VCF writing. + Args: path: the path to a VCF, or an open filehandle header: the source for the output VCF header. If you are modifying a VCF file that you are From 93a6ecea857815ae0b85e6bcc49bed04502496d6 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 27 Nov 2023 16:25:45 -0800 Subject: [PATCH 2/3] chore: found a few docs rough edges --- fgpyo/read_structure.py | 42 ++++++++++++++++++++++++++++++----------- fgpyo/util/metric.py | 13 ++++++++----- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/fgpyo/read_structure.py b/fgpyo/read_structure.py index 59ee6414..32b2504a 100644 --- a/fgpyo/read_structure.py +++ b/fgpyo/read_structure.py @@ -15,7 +15,6 @@ Examples ~~~~~~~~ -.. code-block:: python >>> from fgpyo.read_structure import ReadStructure >>> rs = ReadStructure.from_string("75T8B75T") @@ -53,13 +52,14 @@ The module contains the following public classes: - :class:`~fgpyo.read_structure.ReadStructure` -- Describes the structure of a give read - :class:`~fgpyo.read_structure.ReadSegment` -- Describes all the information about a segment - within a read structure + within a read structure - :class:`~fgpyo.read_structure.SegmentType` -- The type of segments that can show up in a read - structure + structure - :class:`~fgpyo.read_structure.SubReadWithoutQuals` -- Contains the bases that correspond to - the given read segment + the given read segment - :class:`~fgpyo.read_structure.SubReadWithQuals` -- Contains the bases and qualities that - correspond to the given read segment + correspond to the given read segment + """ import enum from typing import Iterable @@ -70,8 +70,9 @@ import attr -# A character that can be put in place of a number in a read structure to mean "0 or more bases". + ANY_LENGTH_CHAR: str = "+" +"""A character that can be put in place of a number in a read structure to mean "0 or more bases".""" @enum.unique @@ -79,9 +80,16 @@ class SegmentType(enum.Enum): """The type of segments that can show up in a read structure""" Template = "T" + """The segment type for template bases.""" + SampleBarcode = "B" + """The segment type for sample barcode bases.""" + MolecularBarcode = "M" + """The segment type for molecular barcode bases.""" + Skip = "S" + """The segment type for bases that need to be skipped.""" def __str__(self) -> str: return self.value @@ -89,13 +97,17 @@ def __str__(self) -> str: @attr.s(frozen=True, auto_attribs=True, kw_only=True) class SubReadWithoutQuals: - """Contains the bases that correspond to the given read segment""" + """Contains the bases that correspond to the given read segment.""" bases: str + """The sub-read bases that correspond to the given read segment.""" + segment: "ReadSegment" + """The segment of the read structure that describes this sub-read.""" @property def kind(self) -> SegmentType: + """The kind of read segment that corresponds to this sub-read.""" return self.segment.kind @@ -104,11 +116,17 @@ class SubReadWithQuals: """Contains the bases and qualities that correspond to the given read segment""" bases: str + """The sub-read bases that correspond to the given read segment.""" + quals: str + """The sub-read base qualities that correspond to the given read segment.""" + segment: "ReadSegment" + """The segment of the read structure that describes this sub-read.""" @property def kind(self) -> SegmentType: + """The kind of read segment that corresponds to this sub-read.""" return self.segment.kind @@ -119,9 +137,10 @@ class ReadSegment: (can be any length, 0 or more) in which case length must be None. Attributes: - offset: the offset of the read segment in the read - length: the length of the segment, or None if it is variable length - kind: the kind of read segment + offset: The offset of the read segment in the read. + length: The length of the segment, or None if it is variable length. + kind: The kind of read segment. + """ offset: int @@ -190,7 +209,8 @@ class ReadStructure(Iterable[ReadSegment]): length and some offset from the start of the read. Attributes: - segments: the segments composing the read structure + segments: The segments composing the read structure + """ segments: Tuple[ReadSegment, ...] diff --git a/fgpyo/util/metric.py b/fgpyo/util/metric.py index 9cda23ee..9cc49143 100644 --- a/fgpyo/util/metric.py +++ b/fgpyo/util/metric.py @@ -129,8 +129,8 @@ class Metric(ABC, Generic[MetricType]): makes it easy for them to be read in languages like `R`. Sub-classes of :class:`~fgpyo.util.metric.Metric` can support parsing and formatting custom - types with :func::`~fgpyo.util.metric.Metric._parsers` and - :func::`~fgpyo.util.metric.Metric.format_value`. + types with :func:`~fgpyo.util.metric.Metric._parsers` and + :func:`~fgpyo.util.metric.Metric.format_value`. """ def values(self) -> Iterator[Any]: @@ -219,7 +219,9 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]: @classmethod def parse(cls, fields: List[str]) -> Any: """Parses the string-representation of this metric. One string per attribute should be - given.""" + given. + + """ parsers = cls._parsers() header = cls.header() assert len(fields) == len(header) @@ -232,8 +234,9 @@ def write(cls, path: Path, *values: MetricType) -> None: The header will always be written. Args: - path: path to the output file - values: zero or more metrics. + path: Path to the output file. + values: Zero or more metrics. + """ with io.to_writer(path) as writer: writer.write("\t".join(cls.header())) From b0f9ba74b792f868468ca46625baf5a1adcec907 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 27 Nov 2023 16:27:33 -0800 Subject: [PATCH 3/3] chore: fixup one lint bug --- fgpyo/read_structure.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fgpyo/read_structure.py b/fgpyo/read_structure.py index 32b2504a..6b2a3d9b 100644 --- a/fgpyo/read_structure.py +++ b/fgpyo/read_structure.py @@ -72,7 +72,8 @@ ANY_LENGTH_CHAR: str = "+" -"""A character that can be put in place of a number in a read structure to mean "0 or more bases".""" +"""A character that can be put in place of a number in a read structure to mean "0 or more bases". +""" @enum.unique