From 8c69815f2ceb0d25307bfdad948bd2abd8457723 Mon Sep 17 00:00:00 2001 From: clintval Date: Thu, 26 Dec 2024 19:38:49 -0500 Subject: [PATCH] Add methods to fix mate info on non-primaries and templates --- fgpyo/sam/__init__.py | 101 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/fgpyo/sam/__init__.py b/fgpyo/sam/__init__.py index 698b1ea..b0600a0 100644 --- a/fgpyo/sam/__init__.py +++ b/fgpyo/sam/__init__.py @@ -876,6 +876,78 @@ def set_mate_info( r2.is_proper_pair = proper_pair +def set_mate_info_on_secondary( + primary: AlignedSegment, + secondary: AlignedSegment, + is_proper_pair: Callable[[AlignedSegment, AlignedSegment], bool] = is_proper_pair, +) -> None: + """Set mate info on a secondary alignment to the next read ordinal's primary alignment. + + Args: + primary: The primary alignment of the secondary's mate. + secondary: The secondary alignment to set mate information upon. + is_proper_pair: A function that takes the two alignments and determines proper pair status. + + Raises: + ValueError: If primary and secondary are of the same read ordinal. + ValueError: If the primary is marked as either secondary or supplementary. + ValueError: If the secondary is not marked as secondary. + """ + if primary.is_read1 == secondary.is_read1 or primary.is_secondary or primary.is_supplementary: + raise ValueError("Secondary mate info must be set from a primary of the next ordinal!") + if not secondary.is_secondary: + raise ValueError("Cannot set mate info on an alignment not marked as secondary!") + if primary.query_name != secondary.query_name: + raise ValueError("Cannot set mate info on alignments with different query names!") + + secondary.next_reference_id = primary.reference_id + secondary.next_reference_name = primary.reference_name + secondary.next_reference_start = primary.reference_start + secondary.mate_is_forward = primary.is_forward + secondary.mate_is_mapped = primary.is_mapped + secondary.set_tag("MC", primary.cigarstring) + secondary.set_tag("MQ", primary.mapping_quality) + secondary.set_tag("ms", sum_of_base_qualities(primary)) + + # NB: calculate isize and proper pair as if this secondary alignment was the primary alignment. + secondary.is_proper_pair = is_proper_pair(primary, secondary) + secondary.template_length = isize(primary, secondary) + + +def set_mate_info_on_supplementary(primary: AlignedSegment, supp: AlignedSegment) -> None: + """Set mate info on a supplementary alignment to the next read ordinal's primary alignment. + + Args: + primary: The primary alignment of the supplementary's mate. + supp: The supplementary alignment to set mate information upon. + + Raises: + ValueError: If primary and secondary are of the same read ordinal. + ValueError: If the primary is marked as either secondary or supplementary. + ValueError: If the secondary is not marked as secondary. + """ + if primary.is_read1 == supp.is_read1 or primary.is_secondary or primary.is_supplementary: + raise ValueError("Supplementary mate info must be set from a primary of the next ordinal!") + if not supp.is_supplementary: + raise ValueError("Cannot set mate info on an alignment not marked as supplementary!") + if primary.query_name != supp.query_name: + raise ValueError("Cannot set mate info on alignments with different query names!") + + supp.next_reference_id = primary.reference_id + supp.next_reference_name = primary.reference_name + supp.next_reference_start = primary.reference_start + supp.mate_is_forward = primary.is_forward + supp.mate_is_mapped = primary.is_mapped + supp.set_tag("MC", primary.cigarstring) + supp.set_tag("MQ", primary.mapping_quality) + supp.set_tag("ms", sum_of_base_qualities(primary)) + + # NB: for a non-secondary supplemental alignment, set the following to the same as the primary. + if not supp.is_secondary: + supp.is_proper_pair = primary.is_proper_pair + supp.template_length = -primary.template_length + + def set_as_pairs( r1: AlignedSegment, r2: AlignedSegment, @@ -1157,6 +1229,10 @@ def all_recs(self) -> Iterator[AlignedSegment]: for rec in recs: yield rec + def set_mate_info(self) -> "Template": + """Reset all mate information on every record in a template.""" + return set_mate_info_for_template(self) + def write_to( self, writer: SamFile, @@ -1215,6 +1291,31 @@ def __next__(self) -> Template: return Template.build(recs, validate=False) +def set_mate_info_for_template( + template: Template, + is_proper_pair: Callable[[AlignedSegment, AlignedSegment], bool] = is_proper_pair, +) -> Template: + """Reset all mate information on every record in a template. + + Args: + template: The template of alignments to reset all mate information on. + is_proper_pair: A function that takes two alignments and determines proper pair status. + """ + if template.r1 is not None and template.r2 is not None: + set_mate_info(template.r1, template.r2, is_proper_pair=is_proper_pair) + if template.r1 is not None: + for rec in template.r2_secondaries: + set_mate_info_on_secondary(template.r1, rec, is_proper_pair=is_proper_pair) + for rec in template.r2_supplementals: + set_mate_info_on_supplementary(template.r1, rec) + if template.r2 is not None: + for rec in template.r1_secondaries: + set_mate_info_on_secondary(template.r2, rec, is_proper_pair=is_proper_pair) + for rec in template.r1_supplementals: + set_mate_info_on_supplementary(template.r2, rec) + return template + + class SamOrder(enum.Enum): """ Enumerations of possible sort orders for a SAM file.