Skip to content

Commit

Permalink
refactor: Primer -> Oligo, PrimerLike -> OligoLike; corresponding upd…
Browse files Browse the repository at this point in the history
…ates to imports and tests
  • Loading branch information
emmcauley committed Sep 26, 2024
1 parent 3a416d6 commit 31c6c3f
Show file tree
Hide file tree
Showing 14 changed files with 414 additions and 324 deletions.
8 changes: 4 additions & 4 deletions prymer/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from prymer.api.clustering import ClusteredIntervals
from prymer.api.clustering import cluster_intervals
from prymer.api.minoptmax import MinOptMax
from prymer.api.oligo import Oligo
from prymer.api.oligo_like import OligoLike
from prymer.api.picking import FilteringParams
from prymer.api.picking import build_and_pick_primer_pairs
from prymer.api.picking import build_primer_pairs
from prymer.api.picking import pick_top_primer_pairs
from prymer.api.primer import Primer
from prymer.api.primer_like import PrimerLike
from prymer.api.primer_pair import PrimerPair
from prymer.api.span import BedLikeCoords
from prymer.api.span import Span
Expand All @@ -27,8 +27,8 @@
"build_primer_pairs",
"pick_top_primer_pairs",
"build_and_pick_primer_pairs",
"PrimerLike",
"Primer",
"OligoLike",
"Oligo",
"PrimerPair",
"Span",
"Strand",
Expand Down
217 changes: 217 additions & 0 deletions prymer/api/oligo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
"""
# Oligo Class and Methods
This module contains a class and class methods to represent an oligo designed by Primer3.
Oligos can represent single primer and/or internal probe designs.
Class attributes include the primer sequence, melting temperature, and the score of the oligo. The
mapping of the oligo to the genome is also stored.
Optional attributes include naming information and a tail sequence to attach to the 5' end of the
oligo (if applicable). Optional attributes also include the thermodynamic results from Primer3.
## Examples of interacting with the `Oligo` class
```python
>>> from prymer.api.span import Span, Strand
>>> oligo_span = Span(refname="chr1", start=1, end=20)
>>> oligo = Oligo(tm=70.0, penalty=-123.0, span=oligo_span)
>>> oligo.longest_hp_length()
0
>>> oligo.length
20
>>> oligo.name is None
True
>>> oligo = Oligo(tm=70.0, penalty=-123.0, span=oligo_span, bases="GACGG"*4)
>>> oligo.longest_hp_length()
3
>>> oligo.untailed_length()
20
>>> oligo.tailed_length()
20
>>> primer = oligo.with_tail(tail="GATTACA")
>>> primer.untailed_length()
20
>>> primer.tailed_length()
27
>>> primer = primer.with_name(name="fwd_primer")
>>> primer.name
'fwd_primer'
```
Oligos may also be written to a file and subsequently read back in, as the `Oligo` class is an
`fgpyo` `Metric` class:
```python
>>> from pathlib import Path
>>> left_span = Span(refname="chr1", start=1, end=20)
>>> left = Oligo(tm=70.0, penalty=-123.0, span=left_span, bases="G"*20)
>>> right_span = Span(refname="chr1", start=101, end=120)
>>> right = Oligo(tm=70.0, penalty=-123.0, span=right_span, bases="T"*20)
>>> path = Path("/tmp/path/to/primers.txt")
>>> Oligo.write(path, left, right) # doctest: +SKIP
>>> primers = Oligo.read(path) # doctest: +SKIP
>>> list(primers) # doctest: +SKIP
[
Oligo(tm=70.0, penalty=-123.0, span=amplicon_span, bases="G"*20),
Oligo(tm=70.0, penalty=-123.0, span=amplicon_span, bases="T"*20)
]
```
"""

from dataclasses import dataclass
from dataclasses import replace
from typing import Any
from typing import Callable
from typing import Dict
from typing import Optional

from fgpyo.fasta.sequence_dictionary import SequenceDictionary
from fgpyo.sequence import longest_dinucleotide_run_length
from fgpyo.sequence import longest_homopolymer_length
from fgpyo.util.metric import Metric

from prymer.api.oligo_like import MISSING_BASES_STRING
from prymer.api.oligo_like import OligoLike
from prymer.api.span import Span


@dataclass(frozen=True, init=True, kw_only=True, slots=True)
class Oligo(OligoLike, Metric["Oligo"]):
"""Stores the properties of the designed oligo.
Oligos can include both single primer and internal probe designs. The penalty score of the
design is emitted by Primer3 and controlled by the corresponding design parameters.
The penalty for a primer is set by the combination of `PrimerAndAmpliconParameters` and
`PrimerWeights`, whereas a probe penalty is set by `ProbeParameters` and `ProbeWeights`.
Attributes:
tm: the calculated melting temperature of the oligo
penalty: the penalty or score for the oligo
span: the mapping of the primer to the genome
self_any_th: self-complementarity throughout the probe as calculated by Primer3
self_end_th: 3' end complementarity of the probe as calculated by Primer3
hairpin_th: hairpin formation thermodynamics of the probe as calculated by Primer3
bases: the base sequence of the oligo (excluding any tail)
tail: an optional tail sequence to put on the 5' end of the primer
name: an optional name to use for the primer
"""

tm: float
penalty: float
span: Span
self_any_th: Optional[float] = None
self_end_th: Optional[float] = None
hairpin_th: Optional[float] = None
bases: Optional[str] = None
tail: Optional[str] = None

def __post_init__(self) -> None:
super(Oligo, self).__post_init__()

def longest_hp_length(self) -> int:
"""Length of longest homopolymer in the oligo."""
if self.bases is None:
return 0
else:
return longest_homopolymer_length(self.bases)

@property
def length(self) -> int:
"""Length of un-tailed oligo."""
return self.span.length

def untailed_length(self) -> int:
"""Length of un-tailed oligo."""
return self.span.length

def tailed_length(self) -> int:
"""Length of tailed oligo."""
return self.span.length if self.tail is None else self.span.length + len(self.tail)

def longest_dinucleotide_run_length(self) -> int:
"""Number of bases in the longest dinucleotide run in a oligo.
A dinucleotide run is when length two repeat-unit is repeated. For example,
TCTC (length = 4) or ACACACACAC (length = 10). If there are no such runs, returns 2
(or 0 if there are fewer than 2 bases)."""
return longest_dinucleotide_run_length(self.bases)

def with_tail(self, tail: str) -> "Oligo":
"""Returns a copy of the oligo with the tail sequence attached."""
return replace(self, tail=tail)

def with_name(self, name: str) -> "Oligo":
"""Returns a copy of oligo object with the given name."""
return replace(self, name=name)

def bases_with_tail(self) -> Optional[str]:
"""
Returns the sequence of the oligo prepended by the tail.
If either `bases` or `tail` are None, they shall be excluded. Return None if both are None.
"""
if self.bases is None:
return None if self.tail is None else self.tail
if self.tail is None:
return self.bases
return f"{self.tail}{self.bases}"

def to_bed12_row(self) -> str:
"""Returns the BED detail format view:
https://genome.ucsc.edu/FAQ/FAQformat.html#format1.7"""
bed_coord = self.span.get_bedlike_coords()
return "\t".join(
map(
str,
[
self.span.refname, # contig
bed_coord.start, # start
bed_coord.end, # end
self.id, # name
500, # score
self.span.strand.value, # strand
bed_coord.start, # thick start
bed_coord.end, # thick end
"100,100,100", # color
1, # block count
f"{self.length}", # block sizes
"0", # block starts (relative to `start`)
],
)
)

def __str__(self) -> str:
"""
Returns a string representation of this oligo
"""
# If the bases field is None, replace with MISSING_BASES_STRING
bases: str = self.bases if self.bases is not None else MISSING_BASES_STRING
return f"{bases}\t{self.tm}\t{self.penalty}\t{self.span}"

@classmethod
def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
return {
Span: lambda value: Span.from_string(value),
}

@staticmethod
def compare(this: "Oligo", that: "Oligo", seq_dict: SequenceDictionary) -> int:
"""Compares this oligo to that oligo by their span, ordering references using the given
sequence dictionary.
Args:
this: the first oligo
that: the second oligo
seq_dict: the sequence dictionary used to order references
Returns:
-1 if this oligo is less than the that oligo, 0 if equal, 1 otherwise
"""
return Span.compare(this=this.span, that=that.span, seq_dict=seq_dict)
34 changes: 17 additions & 17 deletions prymer/api/primer_like.py → prymer/api/oligo_like.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
"""
# Class and Methods for primer-like objects
# Class and Methods for oligo-like objects
The `PrimerLike` class is an abstract base class designed to represent primer-like objects,
such as individual primers or primer pairs. This class encapsulates common attributes and
The `OligoLike` class is an abstract base class designed to represent oligo-like objects,
such as individual primers and probes or primer pairs. This class encapsulates common attributes and
provides a foundation for more specialized implementations.
In particular, the following methods/attributes need to be implemented:
- [`span()`][prymer.api.primer_like.PrimerLike.span] -- the mapping of the primer-like
- [`span()`][prymer.api.oligo_like.OligoLike.span] -- the mapping of the oligo-like
object to the genome.
- [`bases()`][prymer.api.primer_like.PrimerLike.bases] -- the bases of the primer-like
- [`bases()`][prymer.api.oligo_like.OligoLike.bases] -- the bases of the oligo-like
object, or `None` if not available.
- [`to_bed12_row()`][prymer.api.primer_like.PrimerLike.to_bed12_row] -- the 12-field BED
representation of this primer-like object.
- [`to_bed12_row()`][prymer.api.oligo_like.OligoLike.to_bed12_row] -- the 12-field BED
representation of this oligo-like object.
See the following concrete implementations:
- [`Primer`][prymer.api.primer.Primer] -- a class to store an individual primer
- [`Primer`][prymer.api.oligo.Oligo] -- a class to store an individual oligo
- [`PrimerPair`][prymer.api.primer_pair.PrimerPair] -- a class to store a primer pair
"""
Expand All @@ -38,9 +38,9 @@


@dataclass(frozen=True, init=True, slots=True)
class PrimerLike(ABC):
class OligoLike(ABC):
"""
An abstract base class for primer-like objects, such as individual primers or primer pairs.
An abstract base class for oligo-like objects, such as individual primers or primer pairs.
Attributes:
name: an optional name to use for the primer
Expand All @@ -67,12 +67,12 @@ def __post_init__(self) -> None:
@property
@abstractmethod
def span(self) -> Span:
"""Returns the mapping of the primer-like object to a genome."""
"""Returns the mapping of the oligo-like object to a genome."""

@property
@abstractmethod
def bases(self) -> Optional[str]:
"""Returns the base sequence of the primer-like object."""
"""Returns the base sequence of the oligo-like object."""

@property
def percent_gc_content(self) -> float:
Expand All @@ -88,7 +88,7 @@ def percent_gc_content(self) -> float:
@property
def id(self) -> str:
"""
Returns the identifier for the primer-like object. This shall be the `name`
Returns the identifier for the oligo-like object. This shall be the `name`
if one exists, otherwise a generated value based on the location of the object.
"""
if self.name is not None:
Expand All @@ -98,7 +98,7 @@ def id(self) -> str:

@property
def location_string(self) -> str:
"""Returns a string representation of the location of the primer-like object."""
"""Returns a string representation of the location of the oligo-like object."""
return (
f"{self.span.refname}_{self.span.start}_"
+ f"{self.span.end}_{self._strand_to_location_string()}"
Expand All @@ -107,7 +107,7 @@ def location_string(self) -> str:
@abstractmethod
def to_bed12_row(self) -> str:
"""
Formats the primer-like into 12 tab-separated fields matching the BED 12-column spec.
Formats the oligo-like into 12 tab-separated fields matching the BED 12-column spec.
See: https://genome.ucsc.edu/FAQ/FAQformat.html#format1
"""

Expand All @@ -126,5 +126,5 @@ def _strand_to_location_string(self) -> str:
assert_never(f"Encountered unhandled Strand value: {self.span.strand}")


PrimerLikeType = TypeVar("PrimerLikeType", bound=PrimerLike)
"""Type variable for classes generic over `PrimerLike` types."""
OligoLikeType = TypeVar("OligoLikeType", bound=OligoLike)
"""Type variable for classes generic over `OligoLike` types."""
14 changes: 7 additions & 7 deletions prymer/api/picking.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

from prymer.api.melting import calculate_long_seq_tm
from prymer.api.minoptmax import MinOptMax
from prymer.api.primer import Primer
from prymer.api.oligo import Oligo
from prymer.api.primer_pair import PrimerPair
from prymer.api.span import Span
from prymer.ntthal import NtThermoAlign
Expand Down Expand Up @@ -146,8 +146,8 @@ def _seq_penalty(start: int, end: int, params: FilteringParams) -> float:


def score(
left: Primer,
right: Primer,
left: Oligo,
right: Oligo,
target: Span,
amplicon: Span,
amplicon_seq_or_tm: str | float,
Expand Down Expand Up @@ -279,8 +279,8 @@ def is_acceptable_primer_pair(primer_pair: PrimerPair, params: FilteringParams)


def build_primer_pairs(
lefts: Iterable[Primer],
rights: Iterable[Primer],
lefts: Iterable[Oligo],
rights: Iterable[Oligo],
target: Span,
params: FilteringParams,
fasta: FastaFile,
Expand Down Expand Up @@ -411,8 +411,8 @@ def pick_top_primer_pairs(


def build_and_pick_primer_pairs(
lefts: Iterable[Primer],
rights: Iterable[Primer],
lefts: Iterable[Oligo],
rights: Iterable[Oligo],
target: Span,
num_primers: int,
min_difference: int,
Expand Down
Loading

0 comments on commit 31c6c3f

Please sign in to comment.