diff --git a/docs/overview.md b/docs/overview.md index 1e92398..42759d2 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -3,8 +3,8 @@ The `prymer` Python library is intended to be used for three main purposes: 1. [Clustering targets](#clustering-targets) into larger amplicons prior to designing primers. -2. [Designing primers](#designing-primers) (left or right) or primer pairs using Primer3 for each target from (1). -3. [Build and Picking a set of primer pairs](#build-and-picking-primer-pairs) from the designed primer pairs produced in (2). +2. [Designing](#designing-primers) primers (single and paired) and internal hybridization probes using Primer3 for each target from (1). +3. [Build and Picking a set of primer pairs](#build-and-picking-primer-pairs) from the design candidates produced in (2). ## Clustering Targets @@ -18,22 +18,24 @@ amplicons prior to primer design. Designing primers (left or right) or primer pairs using Primer3 is primarily performed using the [`Primer3`][prymer.primer3.primer3.Primer3] class, which wraps the [`primer3` command line tool](https://github.com/primer3-org/primer3). The -[`design_primers()`][prymer.primer3.primer3.Primer3.design_primers] facilitates the design of single and paired primers +[`design()`][prymer.primer3.primer3.Primer3.design] method facilitates the design of primers (single and paired) and internal hybridization probes for a single target. The `Primer3` instance is intended to be re-used to design primers across multiple targets, or re-design (after changing parameters) for the same target, or both! -Common input parameters are specified in [`Primer3Parameters()`][prymer.primer3.primer3_parameters.Primer3Parameters] and -[`Primer3Weights()`][prymer.primer3.primer3_weights.Primer3Weights], while the task type (left primer, +Common input parameters for designing primers are specified in [`PrimerAndAmpliconParameters()`][prymer.primer3.primer3_parameters.PrimerAndAmpliconParameters] and +[`PrimerAndAmpliconWeights()`][prymer.primer3.primer3_weights.PrimerAndAmpliconWeights], while the task type (left primer, right primer, or primer pair design) is specified with the corresponding -[`Primer3Task`][prymer.primer3.primer3_task.Primer3Task]. +[`Primer3Task`][prymer.primer3.primer3_task.Primer3Task]. +Design specifications for designing probes are stored in [`ProbeParameters()`][prymer.primer3.primer3_parameters.ProbeParameters]. +Penalty weights for designing internal probes are specified in [`ProbeWeights()`][prymer.primer3.primer3_weights.ProbeWeights]. The result of a primer design is encapsulated in the [`Primer3Result`][prymer.primer3.primer3.Primer3Result] class. It -provides the primers (or primer pairs) that were designed, as well as a list of reasons some primers were not returned, +provides the primers, probes, or primer pairs that were designed, as well as a list of reasons some primers were not returned, for example exceeding the melting temperature threshold, too high GC content, and so on. These failures are encapsulated in the [`Primer3Failures`][prymer.primer3.primer3.Primer3Failure] class. The [`Primer3Result`][prymer.primer3.primer3.Primer3Result] returned by the primer design contains either a list of -[`Primer`][prymer.api.primer.Primer]s or [`PrimerPair`][prymer.api.primer_pair.PrimerPair]s, depending on the +[`Oligo`][prymer.api.primer.Oligo]s or [`PrimerPair`][prymer.api.primer_pair.PrimerPair]s, depending on the [`Primer3Task`][prymer.primer3.primer3_task.Primer3Task] specified in the input parameters. These can be subsequently filtered or examined. diff --git a/prymer/api/__init__.py b/prymer/api/__init__.py index bccf36c..9cc352d 100644 --- a/prymer/api/__init__.py +++ b/prymer/api/__init__.py @@ -1,12 +1,12 @@ from prymer.api.clustering import ClusteredIntervals from prymer.api.clustering import cluster_intervals from prymer.api.minoptmax import MinOptMax +from prymer.api.oligo import Oligo +from prymer.api.oligo_like import OligoLike from prymer.api.picking import FilteringParams from prymer.api.picking import build_and_pick_primer_pairs from prymer.api.picking import build_primer_pairs from prymer.api.picking import pick_top_primer_pairs -from prymer.api.primer import Primer -from prymer.api.primer_like import PrimerLike from prymer.api.primer_pair import PrimerPair from prymer.api.span import BedLikeCoords from prymer.api.span import Span @@ -27,8 +27,8 @@ "build_primer_pairs", "pick_top_primer_pairs", "build_and_pick_primer_pairs", - "PrimerLike", - "Primer", + "OligoLike", + "Oligo", "PrimerPair", "Span", "Strand", diff --git a/prymer/api/oligo.py b/prymer/api/oligo.py new file mode 100644 index 0000000..4a003de --- /dev/null +++ b/prymer/api/oligo.py @@ -0,0 +1,207 @@ +""" +# Oligo Class and Methods + +This module contains a class and class methods to represent an oligo (e.g., designed by Primer3). + +Oligos can represent single primer and/or internal probe designs. + +Class attributes include the base sequence, melting temperature, and the score of the oligo. The +mapping of the oligo to the genome is also stored. + +Optional attributes include naming information and a tail sequence to attach to the 5' end of the +oligo (if applicable). Optional attributes also include the thermodynamic results from Primer3. + +## Examples of interacting with the `Oligo` class + +```python +>>> from prymer.api.span import Span, Strand +>>> oligo_span = Span(refname="chr1", start=1, end=20) +>>> oligo = Oligo(tm=70.0, penalty=-123.0, span=oligo_span, bases="AGCT" * 5) +>>> oligo.longest_hp_length() +1 +>>> oligo.length +20 +>>> oligo.name is None +True +>>> oligo = Oligo(tm=70.0, penalty=-123.0, span=oligo_span, bases="GACGG"*4) +>>> oligo.longest_hp_length() +3 +>>> oligo.untailed_length() +20 +>>> oligo.tailed_length() +20 +>>> primer = oligo.with_tail(tail="GATTACA") +>>> primer.untailed_length() +20 +>>> primer.tailed_length() +27 +>>> primer = primer.with_name(name="fwd_primer") +>>> primer.name +'fwd_primer' + +``` + +Oligos may also be written to a file and subsequently read back in, as the `Oligo` class is an +`fgpyo` `Metric` class: + +```python +>>> from pathlib import Path +>>> left_span = Span(refname="chr1", start=1, end=20) +>>> left = Oligo(tm=70.0, penalty=-123.0, span=left_span, bases="G"*20) +>>> right_span = Span(refname="chr1", start=101, end=120) +>>> right = Oligo(tm=70.0, penalty=-123.0, span=right_span, bases="T"*20) +>>> path = Path("/tmp/path/to/primers.txt") +>>> Oligo.write(path, left, right) # doctest: +SKIP +>>> primers = Oligo.read(path) # doctest: +SKIP +>>> list(primers) # doctest: +SKIP +[ + Oligo(tm=70.0, penalty=-123.0, span=amplicon_span, bases="G"*20), + Oligo(tm=70.0, penalty=-123.0, span=amplicon_span, bases="T"*20) +] + +``` +""" + +from dataclasses import dataclass +from dataclasses import replace +from typing import Any +from typing import Callable +from typing import Dict +from typing import Optional + +from fgpyo.fasta.sequence_dictionary import SequenceDictionary +from fgpyo.sequence import longest_dinucleotide_run_length +from fgpyo.sequence import longest_homopolymer_length +from fgpyo.util.metric import Metric + +from prymer.api.oligo_like import MISSING_BASES_STRING +from prymer.api.oligo_like import OligoLike +from prymer.api.span import Span + + +@dataclass(frozen=True, init=True, kw_only=True, slots=True) +class Oligo(OligoLike, Metric["Oligo"]): + """Stores the properties of the designed oligo. + + Oligos can include both single primer and internal probe designs. The penalty score of the + design is emitted by Primer3 and controlled by the corresponding design parameters. + The penalty for a primer is set by the combination of `PrimerAndAmpliconParameters` and + `PrimerWeights`, whereas a probe penalty is set by `ProbeParameters` and `ProbeWeights`. + + Attributes: + tm: the calculated melting temperature of the oligo + penalty: the penalty or score for the oligo + span: the mapping of the primer to the genome + bases: the base sequence of the oligo (excluding any tail) + tail: an optional tail sequence to put on the 5' end of the primer + name: an optional name to use for the primer + + """ + + tm: float + penalty: float + span: Span + bases: Optional[str] = None + tail: Optional[str] = None + + def __post_init__(self) -> None: + super(Oligo, self).__post_init__() + + def longest_hp_length(self) -> int: + """Length of longest homopolymer in the oligo.""" + if self.bases is None: + return 0 + else: + return longest_homopolymer_length(self.bases) + + @property + def length(self) -> int: + """Length of un-tailed oligo.""" + return self.span.length + + def untailed_length(self) -> int: + """Length of un-tailed oligo.""" + return self.span.length + + def tailed_length(self) -> int: + """Length of tailed oligo.""" + return self.span.length if self.tail is None else self.span.length + len(self.tail) + + def longest_dinucleotide_run_length(self) -> int: + """Number of bases in the longest dinucleotide run in a oligo. + + A dinucleotide run is when length two repeat-unit is repeated. For example, + TCTC (length = 4) or ACACACACAC (length = 10). If there are no such runs, returns 2 + (or 0 if there are fewer than 2 bases).""" + return longest_dinucleotide_run_length(self.bases) + + def with_tail(self, tail: str) -> "Oligo": + """Returns a copy of the oligo with the tail sequence attached.""" + return replace(self, tail=tail) + + def with_name(self, name: str) -> "Oligo": + """Returns a copy of oligo object with the given name.""" + return replace(self, name=name) + + def bases_with_tail(self) -> Optional[str]: + """ + Returns the sequence of the oligo prepended by the tail. + + If `tail` is None, only return `bases`. + """ + if self.tail is None: + return self.bases + return f"{self.tail}{self.bases}" + + def to_bed12_row(self) -> str: + """Returns the BED detail format view: + https://genome.ucsc.edu/FAQ/FAQformat.html#format1.7""" + bed_coord = self.span.get_bedlike_coords() + return "\t".join( + map( + str, + [ + self.span.refname, # contig + bed_coord.start, # start + bed_coord.end, # end + self.id, # name + 500, # score + self.span.strand.value, # strand + bed_coord.start, # thick start + bed_coord.end, # thick end + "100,100,100", # color + 1, # block count + f"{self.length}", # block sizes + "0", # block starts (relative to `start`) + ], + ) + ) + + def __str__(self) -> str: + """ + Returns a string representation of this oligo + """ + # If the bases field is None, replace with MISSING_BASES_STRING + bases: str = self.bases if self.bases is not None else MISSING_BASES_STRING + return f"{bases}\t{self.tm}\t{self.penalty}\t{self.span}" + + @classmethod + def _parsers(cls) -> Dict[type, Callable[[str], Any]]: + return { + Span: lambda value: Span.from_string(value), + } + + @staticmethod + def compare(this: "Oligo", that: "Oligo", seq_dict: SequenceDictionary) -> int: + """Compares this oligo to that oligo by their span, ordering references using the given + sequence dictionary. + + Args: + this: the first oligo + that: the second oligo + seq_dict: the sequence dictionary used to order references + + Returns: + -1 if this oligo is less than the that oligo, 0 if equal, 1 otherwise + """ + return Span.compare(this=this.span, that=that.span, seq_dict=seq_dict) diff --git a/prymer/api/primer_like.py b/prymer/api/oligo_like.py similarity index 72% rename from prymer/api/primer_like.py rename to prymer/api/oligo_like.py index 075fa82..2797b3f 100644 --- a/prymer/api/primer_like.py +++ b/prymer/api/oligo_like.py @@ -1,22 +1,22 @@ """ -# Class and Methods for primer-like objects +# Class and Methods for oligo-like objects -The `PrimerLike` class is an abstract base class designed to represent primer-like objects, -such as individual primers or primer pairs. This class encapsulates common attributes and +The `OligoLike` class is an abstract base class designed to represent oligo-like objects, +such as individual primers and probes or primer pairs. This class encapsulates common attributes and provides a foundation for more specialized implementations. In particular, the following methods/attributes need to be implemented: -- [`span()`][prymer.api.primer_like.PrimerLike.span] -- the mapping of the primer-like +- [`span()`][prymer.api.oligo_like.OligoLike.span] -- the mapping of the oligo-like object to the genome. -- [`bases()`][prymer.api.primer_like.PrimerLike.bases] -- the bases of the primer-like +- [`bases()`][prymer.api.oligo_like.OligoLike.bases] -- the bases of the oligo-like object, or `None` if not available. -- [`to_bed12_row()`][prymer.api.primer_like.PrimerLike.to_bed12_row] -- the 12-field BED - representation of this primer-like object. +- [`to_bed12_row()`][prymer.api.oligo_like.OligoLike.to_bed12_row] -- the 12-field BED + representation of this oligo-like object. See the following concrete implementations: -- [`Primer`][prymer.api.primer.Primer] -- a class to store an individual primer +- [`Primer`][prymer.api.oligo.Oligo] -- a class to store an individual oligo - [`PrimerPair`][prymer.api.primer_pair.PrimerPair] -- a class to store a primer pair """ @@ -25,7 +25,6 @@ from abc import abstractmethod from dataclasses import dataclass from typing import Optional -from typing import TypeVar from typing import assert_never from fgpyo.sequence import gc_content @@ -38,9 +37,9 @@ @dataclass(frozen=True, init=True, slots=True) -class PrimerLike(ABC): +class OligoLike(ABC): """ - An abstract base class for primer-like objects, such as individual primers or primer pairs. + An abstract base class for oligo-like objects, such as individual primers or primer pairs. Attributes: name: an optional name to use for the primer @@ -67,12 +66,12 @@ def __post_init__(self) -> None: @property @abstractmethod def span(self) -> Span: - """Returns the mapping of the primer-like object to a genome.""" + """Returns the mapping of the oligo-like object to a genome.""" @property @abstractmethod def bases(self) -> Optional[str]: - """Returns the base sequence of the primer-like object.""" + """Returns the base sequence of the oligo-like object.""" @property def percent_gc_content(self) -> float: @@ -88,7 +87,7 @@ def percent_gc_content(self) -> float: @property def id(self) -> str: """ - Returns the identifier for the primer-like object. This shall be the `name` + Returns the identifier for the oligo-like object. This shall be the `name` if one exists, otherwise a generated value based on the location of the object. """ if self.name is not None: @@ -98,7 +97,7 @@ def id(self) -> str: @property def location_string(self) -> str: - """Returns a string representation of the location of the primer-like object.""" + """Returns a string representation of the location of the oligo-like object.""" return ( f"{self.span.refname}_{self.span.start}_" + f"{self.span.end}_{self._strand_to_location_string()}" @@ -107,7 +106,7 @@ def location_string(self) -> str: @abstractmethod def to_bed12_row(self) -> str: """ - Formats the primer-like into 12 tab-separated fields matching the BED 12-column spec. + Formats the oligo-like into 12 tab-separated fields matching the BED 12-column spec. See: https://genome.ucsc.edu/FAQ/FAQformat.html#format1 """ @@ -124,7 +123,3 @@ def _strand_to_location_string(self) -> str: case _: # pragma: no cover # Not calculating coverage on this line as it should be impossible to reach assert_never(f"Encountered unhandled Strand value: {self.span.strand}") - - -PrimerLikeType = TypeVar("PrimerLikeType", bound=PrimerLike) -"""Type variable for classes generic over `PrimerLike` types.""" diff --git a/prymer/api/picking.py b/prymer/api/picking.py index ad8572c..aa20ce9 100644 --- a/prymer/api/picking.py +++ b/prymer/api/picking.py @@ -35,7 +35,7 @@ - [`pick_top_primer_pairs()`][prymer.api.picking.pick_top_primer_pairs] -- Selects up to the given number of primer pairs from the given list of primer pairs. - [`build_and_pick_primer_pairs()`][prymer.api.picking.build_and_pick_primer_pairs] -- - Builds primer pairs from individual left and primers and selects up to the given number of + Builds primer pairs from individual left and right primers and selects up to the given number of primer pairs from the given list of primer pairs. """ @@ -50,7 +50,7 @@ from prymer.api.melting import calculate_long_seq_tm from prymer.api.minoptmax import MinOptMax -from prymer.api.primer import Primer +from prymer.api.oligo import Oligo from prymer.api.primer_pair import PrimerPair from prymer.api.span import Span from prymer.ntthal import NtThermoAlign @@ -146,8 +146,8 @@ def _seq_penalty(start: int, end: int, params: FilteringParams) -> float: def score( - left: Primer, - right: Primer, + left_primer: Oligo, + right_primer: Oligo, target: Span, amplicon: Span, amplicon_seq_or_tm: str | float, @@ -168,8 +168,8 @@ def score( by the corresponding weight. Is zero when the amplicon is at most the read length. Args: - left: the left primer - right: the right primer + left_primer: the left primer + right_primer: the right primer target: the target mapping amplicon: the amplicon mapping amplicon_seq_or_tm: either the melting temperature of the amplicon, or the amplicon sequence @@ -208,18 +208,26 @@ def score( tm_penalty = (params.amplicon_tms.opt - tm) * params.product_tm_lt # Penalize primers whose innermost base is closer than some minimum distance from the target - left_dist_penalty: float = _dist_penalty(start=left.span.end, end=target.start, params=params) - right_dist_penalty: float = _dist_penalty(start=target.end, end=right.span.start, params=params) + left_dist_penalty: float = _dist_penalty( + start=left_primer.span.end, end=target.start, params=params + ) + right_dist_penalty: float = _dist_penalty( + start=target.end, end=right_primer.span.start, params=params + ) # Penalize amplicons where the target cannot be fully sequenced at the given read length # starting from both ends of the amplicon. - left_seq_penalty: float = _seq_penalty(start=left.span.start, end=target.end, params=params) - right_seq_penalty: float = _seq_penalty(start=target.start, end=right.span.end, params=params) + left_seq_penalty: float = _seq_penalty( + start=left_primer.span.start, end=target.end, params=params + ) + right_seq_penalty: float = _seq_penalty( + start=target.start, end=right_primer.span.end, params=params + ) # Put it all together return ( - left.penalty - + right.penalty + left_primer.penalty + + right_primer.penalty + size_penalty + tm_penalty + left_dist_penalty @@ -279,8 +287,8 @@ def is_acceptable_primer_pair(primer_pair: PrimerPair, params: FilteringParams) def build_primer_pairs( - lefts: Iterable[Primer], - rights: Iterable[Primer], + left_primers: Iterable[Oligo], + right_primers: Iterable[Oligo], target: Span, params: FilteringParams, fasta: FastaFile, @@ -288,8 +296,8 @@ def build_primer_pairs( """Builds primer pairs from individual left and primers. Args: - lefts: the left primers - rights: the right primers + left_primers: the left primers + right_primers: the right primers target: the genome mapping for the target params: the parameters used for filtering fasta: the FASTA file from which the amplicon sequence will be retrieved. @@ -299,31 +307,32 @@ def build_primer_pairs( """ # generate all the primer pairs primer_pairs: list[PrimerPair] = [] - for left in lefts: - for right in rights: - if left.span.refname != right.span.refname: + for left_primer in left_primers: + for right_primer in right_primers: + if left_primer.span.refname != right_primer.span.refname: raise ValueError( "Cannot create a primer pair from left and right primers on different" - f"references: left: '{left.span.refname}' right: '{right.span.refname}'" + f" references: left: '{left_primer.span.refname}'" + f" right: '{right_primer.span.refname}'" ) amplicon_mapping = Span( - refname=target.refname, start=left.span.start, end=right.span.end + refname=target.refname, start=left_primer.span.start, end=right_primer.span.end ) amplicon_bed = amplicon_mapping.get_bedlike_coords() # since fasta.fetch is 0-based amplicon_sequence = fasta.fetch( reference=target.refname, start=amplicon_bed.start, end=amplicon_bed.end ) amplicon_penalty = score( - left=left, - right=right, + left_primer=left_primer, + right_primer=right_primer, target=target, amplicon=amplicon_mapping, amplicon_seq_or_tm=amplicon_sequence, params=params, ) pp = PrimerPair( - left_primer=left, - right_primer=right, + left_primer=left_primer, + right_primer=right_primer, amplicon_sequence=amplicon_sequence, amplicon_tm=calculate_long_seq_tm(amplicon_sequence), penalty=amplicon_penalty, @@ -411,8 +420,8 @@ def pick_top_primer_pairs( def build_and_pick_primer_pairs( - lefts: Iterable[Primer], - rights: Iterable[Primer], + left_primers: Iterable[Oligo], + right_primers: Iterable[Oligo], target: Span, num_primers: int, min_difference: int, @@ -424,8 +433,8 @@ def build_and_pick_primer_pairs( """Picks up to `num_primers` primer pairs. Args: - lefts: the left primers - rights: the right primers + left_primers: the left primers + right_primers: the right primers target: the genome mapping for the target num_primers: the number of primer pairs to return for the target. min_difference: the minimum base difference between two primers that we will tolerate. @@ -439,7 +448,11 @@ def build_and_pick_primer_pairs( """ # build the list of primer pairs primer_pairs = build_primer_pairs( - lefts=lefts, rights=rights, target=target, params=params, fasta=fasta + left_primers=left_primers, + right_primers=right_primers, + target=target, + params=params, + fasta=fasta, ) # select the primer pairs diff --git a/prymer/api/primer.py b/prymer/api/primer.py index 8ceb5b6..aa1ac31 100644 --- a/prymer/api/primer.py +++ b/prymer/api/primer.py @@ -1,205 +1,22 @@ -""" -# Primer Class and Methods - -This module contains a class and class methods to represent a primer (e.g. designed by Primer3) - -Class attributes include the primer sequence, melting temperature, and the score of the primer. The -mapping of the primer to the genome is also stored. - -Optional attributes include naming information and a tail sequence to attach to the 5' end of the -primer (if applicable). - -## Examples of interacting with the `Primer` class - -```python ->>> from prymer.api.span import Span, Strand ->>> primer_span = Span(refname="chr1", start=1, end=20) ->>> primer = Primer(tm=70.0, penalty=-123.0, span=primer_span) ->>> primer.longest_hp_length() -0 ->>> primer.length -20 ->>> primer.name is None -True ->>> primer = Primer(tm=70.0, penalty=-123.0, span=primer_span, bases="GACGG"*4) ->>> primer.longest_hp_length() -3 ->>> primer.untailed_length() -20 ->>> primer.tailed_length() -20 ->>> primer = primer.with_tail(tail="GATTACA") ->>> primer.untailed_length() -20 ->>> primer.tailed_length() -27 ->>> primer = primer.with_name(name="foobar") ->>> primer.name -'foobar' - -``` - -Primers may also be written to a file and subsequently read back in, as the `Primer` class is an -`fgpyo` `Metric` class: - -```python ->>> from pathlib import Path ->>> left_span = Span(refname="chr1", start=1, end=20) ->>> left = Primer(tm=70.0, penalty=-123.0, span=left_span, bases="G"*20) ->>> right_span = Span(refname="chr1", start=101, end=120) ->>> right = Primer(tm=70.0, penalty=-123.0, span=right_span, bases="T"*20) ->>> path = Path("/tmp/path/to/primers.txt") ->>> Primer.write(path, left, right) # doctest: +SKIP ->>> primers = Primer.read(path) # doctest: +SKIP ->>> list(primers) # doctest: +SKIP -[ - Primer(tm=70.0, penalty=-123.0, span=amplicon_span, bases="G"*20), - Primer(tm=70.0, penalty=-123.0, span=amplicon_span, bases="T"*20) -] - -``` -""" +"""This module is deprecated - see prymer/api/oligo.py""" +import warnings from dataclasses import dataclass -from dataclasses import replace -from typing import Any -from typing import Callable -from typing import Dict -from typing import Optional - -from fgpyo.fasta.sequence_dictionary import SequenceDictionary -from fgpyo.sequence import longest_dinucleotide_run_length -from fgpyo.sequence import longest_homopolymer_length -from fgpyo.util.metric import Metric - -from prymer.api.primer_like import MISSING_BASES_STRING -from prymer.api.primer_like import PrimerLike -from prymer.api.span import Span - -@dataclass(frozen=True, init=True, kw_only=True, slots=True) -class Primer(PrimerLike, Metric["Primer"]): - """Stores the properties of the designed Primer. +from prymer.api.oligo import Oligo - Attributes: - bases: the base sequence of the primer (excluding any tail) - tm: the calculated melting temperature of the primer - penalty: the penalty or score for the primer - span: the mapping of the primer to the genome - name: an optional name to use for the primer - tail: an optional tail sequence to put on the 5' end of the primer - Example: - #TODO - <.....> +@dataclass(frozen=True, init=True, slots=True) +class Primer(Oligo): """ + A deprecated alias for `Oligo`. - tm: float - penalty: float - span: Span - bases: Optional[str] = None - tail: Optional[str] = None - - def __post_init__(self) -> None: - super(Primer, self).__post_init__() - - def longest_hp_length(self) -> int: - """Length of longest homopolymer in the primer.""" - if self.bases is None: - return 0 - else: - return longest_homopolymer_length(self.bases) - - @property - def length(self) -> int: - """Length of un-tailed primer.""" - return self.span.length - - def untailed_length(self) -> int: - """Length of un-tailed primer.""" - return self.span.length - - def tailed_length(self) -> int: - """Length of tailed primer.""" - return self.span.length if self.tail is None else self.span.length + len(self.tail) - - def longest_dinucleotide_run_length(self) -> int: - """Number of bases in the longest dinucleotide run in a primer. - - A dinucleotide run is when length two repeat-unit is repeated. For example, - TCTC (length = 4) or ACACACACAC (length = 10). If there are no such runs, returns 2 - (or 0 if there are fewer than 2 bases).""" - return longest_dinucleotide_run_length(self.bases) - - def with_tail(self, tail: str) -> "Primer": - """Returns a copy of the primer with the tail sequence attached.""" - return replace(self, tail=tail) - - def with_name(self, name: str) -> "Primer": - """Returns copy of primer object with the given name.""" - return replace(self, name=name) - - def bases_with_tail(self) -> Optional[str]: - """ - Returns the sequence of the primer prepended by the tail. - - If either `bases` or `tail` are None, they shall be excluded. Return None if both are None. - """ - if self.bases is None: - return None if self.tail is None else self.tail - if self.tail is None: - return self.bases - return f"{self.tail}{self.bases}" - - def to_bed12_row(self) -> str: - """Returns the BED detail format view: - https://genome.ucsc.edu/FAQ/FAQformat.html#format1.7""" - bed_coord = self.span.get_bedlike_coords() - return "\t".join( - map( - str, - [ - self.span.refname, # contig - bed_coord.start, # start - bed_coord.end, # end - self.id, # name - 500, # score - self.span.strand.value, # strand - bed_coord.start, # thick start - bed_coord.end, # thick end - "100,100,100", # color - 1, # block count - f"{self.length}", # block sizes - "0", # block starts (relative to `start`) - ], - ) - ) - - def __str__(self) -> str: - """ - Returns a string representation of this primer - """ - # If the bases field is None, replace with MISSING_BASES_STRING - bases: str = self.bases if self.bases is not None else MISSING_BASES_STRING - return f"{bases}\t{self.tm}\t{self.penalty}\t{self.span}" - - @classmethod - def _parsers(cls) -> Dict[type, Callable[[str], Any]]: - return { - Span: lambda value: Span.from_string(value), - } - - @staticmethod - def compare(this: "Primer", that: "Primer", seq_dict: SequenceDictionary) -> int: - """Compares this primer to that primer by their span, ordering references using the given - sequence dictionary. - - Args: - this: the first primer - that: the second primer - seq_dict: the sequence dictionary used to order references + This class exists to maintain backwards compatibility with earlier releases of `prymer` + and may be removed in a future version. + """ - Returns: - -1 if this primer is less than the that primer, 0 if equal, 1 otherwise - """ - return Span.compare(this=this.span, that=that.span, seq_dict=seq_dict) + warnings.warn( + "The Primer class was deprecated, use Oligo instead", + DeprecationWarning, + stacklevel=2, + ) diff --git a/prymer/api/primer_pair.py b/prymer/api/primer_pair.py index a10074a..ccbff9e 100644 --- a/prymer/api/primer_pair.py +++ b/prymer/api/primer_pair.py @@ -5,8 +5,8 @@ class methods to represent a primer pair. The primer pair is comprised of a left and right primer that work together to amplify an amplicon. -Class attributes include each of the primers (represented by a -[`Primer`][prymer.api.primer.Primer] object), information about the expected amplicon +Class attributes include each of the primers (represented by an +[`Oligo`][prymer.api.primer.Oligo] object), information about the expected amplicon (positional information about how the amplicon maps to the genome, the sequence, and its melting temperature), as well as a score of the primer pair (e.g. as emitted by Primer3). @@ -18,9 +18,9 @@ class methods to represent a primer pair. The primer pair is comprised of a lef ```python >>> from prymer.api.span import Strand >>> left_span = Span(refname="chr1", start=1, end=20) ->>> left_primer = Primer(tm=70.0, penalty=-123.0, span=left_span, bases="G"*20) +>>> left_primer = Oligo(tm=70.0, penalty=-123.0, span=left_span, bases="G"*20) >>> right_span = Span(refname="chr1", start=101, end=120, strand=Strand.NEGATIVE) ->>> right_primer = Primer(tm=70.0, penalty=-123.0, span=right_span, bases="T"*20) +>>> right_primer = Oligo(tm=70.0, penalty=-123.0, span=right_span, bases="T"*20) >>> primer_pair = PrimerPair( \ left_primer=left_primer, \ right_primer=right_primer, \ @@ -37,7 +37,7 @@ class methods to represent a primer pair. The primer pair is comprised of a lef Span(refname='chr1', start=21, end=100, strand=) >>> list(primer_pair) -[Primer(name=None, tm=70.0, penalty=-123.0, span=Span(refname='chr1', start=1, end=20, strand=), bases='GGGGGGGGGGGGGGGGGGGG', tail=None), Primer(name=None, tm=70.0, penalty=-123.0, span=Span(refname='chr1', start=101, end=120, strand=), bases='TTTTTTTTTTTTTTTTTTTT', tail=None)] +[Oligo(name=None, tm=70.0, penalty=-123.0, span=Span(refname='chr1', start=1, end=20, strand=), bases='GGGGGGGGGGGGGGGGGGGG', tail=None), Oligo(name=None, tm=70.0, penalty=-123.0, span=Span(refname='chr1', start=101, end=120, strand=), bases='TTTTTTTTTTTTTTTTTTTT', tail=None)] ``` """ # noqa: E501 @@ -50,14 +50,14 @@ class methods to represent a primer pair. The primer pair is comprised of a lef from fgpyo.fasta.sequence_dictionary import SequenceDictionary -from prymer.api.primer import Primer -from prymer.api.primer_like import MISSING_BASES_STRING -from prymer.api.primer_like import PrimerLike +from prymer.api.oligo import Oligo +from prymer.api.oligo_like import MISSING_BASES_STRING +from prymer.api.oligo_like import OligoLike from prymer.api.span import Span @dataclass(frozen=True, init=True, kw_only=True, slots=True) -class PrimerPair(PrimerLike): +class PrimerPair(OligoLike): """ Represents a pair of primers that work together to amplify an amplicon. The coordinates of the amplicon are determined to span from the start of the left @@ -75,8 +75,8 @@ class PrimerPair(PrimerLike): ValueError: if the chromosomes of the left and right primers are not the same """ - left_primer: Primer - right_primer: Primer + left_primer: Oligo + right_primer: Oligo amplicon_tm: float penalty: float amplicon_sequence: Optional[str] = None @@ -215,7 +215,7 @@ def to_bed12_row(self) -> str: ) ) - def __iter__(self) -> Iterator[Primer]: + def __iter__(self) -> Iterator[Oligo]: """Returns an iterator of left and right primers""" return iter([self.left_primer, self.right_primer]) @@ -274,9 +274,9 @@ def compare( if by_amplicon: return Span.compare(this=this.amplicon, that=that.amplicon, seq_dict=seq_dict) else: - retval = Primer.compare(this=this.left_primer, that=that.left_primer, seq_dict=seq_dict) + retval = Oligo.compare(this=this.left_primer, that=that.left_primer, seq_dict=seq_dict) if retval == 0: - retval = Primer.compare( + retval = Oligo.compare( this=this.right_primer, that=that.right_primer, seq_dict=seq_dict ) return retval diff --git a/prymer/offtarget/offtarget_detector.py b/prymer/offtarget/offtarget_detector.py index d19d6e2..025ca8a 100644 --- a/prymer/offtarget/offtarget_detector.py +++ b/prymer/offtarget/offtarget_detector.py @@ -13,8 +13,8 @@ >>> from pathlib import Path >>> from prymer.api.span import Strand >>> ref_fasta = Path("./tests/offtarget/data/miniref.fa") ->>> left_primer = Primer(bases="AAAAA", tm=37, penalty=0, span=Span("chr1", start=67, end=71)) ->>> right_primer = Primer(bases="TTTTT", tm=37, penalty=0, span=Span("chr1", start=75, end=79, strand=Strand.NEGATIVE)) +>>> left_primer = Oligo(bases="AAAAA", tm=37, penalty=0, span=Span("chr1", start=67, end=71)) +>>> right_primer = Oligo(bases="TTTTT", tm=37, penalty=0, span=Span("chr1", start=75, end=79, strand=Strand.NEGATIVE)) >>> detector = OffTargetDetector(ref=ref_fasta, max_primer_hits=204, max_primer_pair_hits=1, three_prime_region_length=20, max_mismatches_in_three_prime_region=0, max_mismatches=0, max_amplicon_size=250) >>> len(detector.filter(primers=[left_primer, right_primer])) # keep all 2 @@ -55,8 +55,8 @@ method maps individual primers (`Primer`s). ```python ->>> p1: Primer = Primer(tm=37, penalty=0, span=Span(refname="chr1", start=1, end=30), bases="CAGGTGGATCATGAGGTCAGGAGTTCAAGA") ->>> p2: Primer = Primer(tm=37, penalty=0, span=Span(refname="chr1", start=61, end=93, strand=Strand.NEGATIVE), bases="CATGCCCAGCTAATTTTTTGTATTTTTAGTAGA") +>>> p1: Oligo = Oligo(tm=37, penalty=0, span=Span(refname="chr1", start=1, end=30), bases="CAGGTGGATCATGAGGTCAGGAGTTCAAGA") +>>> p2: Oligo = Oligo(tm=37, penalty=0, span=Span(refname="chr1", start=61, end=93, strand=Strand.NEGATIVE), bases="CATGCCCAGCTAATTTTTTGTATTTTTAGTAGA") >>> results_dict: dict[str, BwaResult] = detector.mappings_of(primers=[p1, p2]) >>> list(results_dict.keys()) ['CAGGTGGATCATGAGGTCAGGAGTTCAAGA', 'CATGCCCAGCTAATTTTTTGTATTTTTAGTAGA'] @@ -87,7 +87,7 @@ from ordered_set import OrderedSet -from prymer.api.primer import Primer +from prymer.api.oligo import Oligo from prymer.api.primer_pair import PrimerPair from prymer.api.span import Span from prymer.offtarget.bwa import BwaAlnInteractive @@ -95,7 +95,7 @@ from prymer.offtarget.bwa import BwaResult from prymer.offtarget.bwa import Query -PrimerType = TypeVar("PrimerType", bound=Primer) +PrimerType = TypeVar("PrimerType", bound=Oligo) @dataclass(init=True, frozen=True) diff --git a/prymer/primer3/__init__.py b/prymer/primer3/__init__.py index beec032..1381a43 100644 --- a/prymer/primer3/__init__.py +++ b/prymer/primer3/__init__.py @@ -4,11 +4,13 @@ from prymer.primer3.primer3_failure_reason import Primer3FailureReason from prymer.primer3.primer3_input import Primer3Input from prymer.primer3.primer3_input_tag import Primer3InputTag -from prymer.primer3.primer3_parameters import Primer3Parameters +from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters +from prymer.primer3.primer3_parameters import ProbeParameters from prymer.primer3.primer3_task import DesignLeftPrimersTask from prymer.primer3.primer3_task import DesignPrimerPairsTask from prymer.primer3.primer3_task import DesignRightPrimersTask -from prymer.primer3.primer3_weights import Primer3Weights +from prymer.primer3.primer3_weights import PrimerAndAmpliconWeights +from prymer.primer3.primer3_weights import ProbeWeights __all__ = [ "Primer3", @@ -20,6 +22,8 @@ "DesignLeftPrimersTask", "DesignPrimerPairsTask", "DesignRightPrimersTask", - "Primer3Parameters", - "Primer3Weights", + "PrimerAndAmpliconParameters", + "ProbeParameters", + "ProbeWeights", + "PrimerAndAmpliconWeights", ] diff --git a/prymer/primer3/primer3.py b/prymer/primer3/primer3.py index 0bd0748..e1d62e4 100644 --- a/prymer/primer3/primer3.py +++ b/prymer/primer3/primer3.py @@ -43,15 +43,15 @@ ``` -The `design_primers()` method on `Primer3` is used to design the primers given a +The `design()` method on `Primer3` is used to design the primers given a [`Primer3Input`][prymer.primer3.primer3_input.Primer3Input]. The latter includes all the parameters and target region. ```python ->>> from prymer.primer3 import Primer3Parameters +>>> from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters >>> from prymer.api import MinOptMax >>> target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) ->>> params = Primer3Parameters( \ +>>> params = PrimerAndAmpliconParameters( \ amplicon_sizes=MinOptMax(min=100, max=250, opt=200), \ amplicon_tms=MinOptMax(min=55.0, max=100.0, opt=70.0), \ primer_sizes=MinOptMax(min=29, max=31, opt=30), \ @@ -60,16 +60,16 @@ ) >>> design_input = Primer3Input( \ target=target, \ - params=params, \ + primer_and_amplicon_params=params, \ task=DesignLeftPrimersTask(), \ ) ->>> left_result = designer.design_primers(design_input=design_input) +>>> left_result = designer.design(design_input=design_input) ``` The `left_result` returns the [`Primer3Result`][prymer.primer3.primer3.Primer3Result] container class. It contains two attributes: -1. `filtered_designs`: filtered and ordered (by objective function score) list of primer pairs or +1. `designs`: filtered and ordered (by objective function score) list of primer pairs or single primers that were returned by Primer3. 2. `failures`: ordered list of [`Primer3Failures`][prymer.primer3.primer3.Primer3Failure] detailing design failure reasons and corresponding count. @@ -84,7 +84,7 @@ ``` -While`filtered_designs` attribute on `Primer3Result` may be used to access the list of primers or +While the `designs` attribute on `Primer3Result` may be used to access the list of primers or primer pairs, it is more convenient to use the `primers()` and `primer_pairs()` methods to return the designed primers or primer pairs (use the method corresponding to the input task) so that the proper type is returned (i.e. [`Primer`][prymer.api.primer.Primer] or @@ -139,8 +139,8 @@ from fgpyo.sequence import reverse_complement from fgpyo.util.metric import Metric -from prymer.api.primer import Primer -from prymer.api.primer_like import PrimerLike +from prymer.api.oligo import Oligo +from prymer.api.oligo_like import OligoLike from prymer.api.primer_pair import PrimerPair from prymer.api.span import Span from prymer.api.span import Strand @@ -152,6 +152,7 @@ from prymer.primer3.primer3_task import DesignLeftPrimersTask from prymer.primer3.primer3_task import DesignPrimerPairsTask from prymer.primer3.primer3_task import DesignRightPrimersTask +from prymer.primer3.primer3_task import PickHybProbeOnly from prymer.util.executable_runner import ExecutableRunner @@ -170,49 +171,49 @@ class Primer3Failure(Metric["Primer3Failure"]): count: int -PrimerLikeType = TypeVar("PrimerLikeType", bound=PrimerLike) -"""Type variable for a `Primer3Result`, which must implement `PrimerLike`""" +OligoLikeType = TypeVar("OligoLikeType", bound=OligoLike) +"""Type variable for a `Primer3Result`, which must implement `OligoLike`""" @dataclass(init=True, slots=True, frozen=True) -class Primer3Result(Generic[PrimerLikeType]): +class Primer3Result(Generic[OligoLikeType]): """Encapsulates Primer3 design results (both valid designs and failures). Attributes: - filtered_designs: filtered and ordered (by objective function score) list of primer - pairs or single primers that were returned by Primer3 + designs: filtered for out-of-spec characteristics and ordered (by objective function score) + list of primer pairs or single oligos that were returned by Primer3 failures: ordered list of Primer3Failures detailing design failure reasons and corresponding count """ - filtered_designs: list[PrimerLikeType] + designs: list[OligoLikeType] failures: list[Primer3Failure] - def as_primer_result(self) -> "Primer3Result[Primer]": + def as_primer_result(self) -> "Primer3Result[Oligo]": """Returns this Primer3Result assuming the design results are of type `Primer`.""" - if len(self.filtered_designs) > 0 and not isinstance(self.filtered_designs[0], Primer): + if len(self.designs) > 0 and not isinstance(self.designs[0], Oligo): raise ValueError("Cannot call `as_primer_result` on `PrimerPair` results") - return typing.cast(Primer3Result[Primer], self) + return typing.cast(Primer3Result[Oligo], self) def as_primer_pair_result(self) -> "Primer3Result[PrimerPair]": """Returns this Primer3Result assuming the design results are of type `PrimerPair`.""" - if len(self.filtered_designs) > 0 and not isinstance(self.filtered_designs[0], PrimerPair): - raise ValueError("Cannot call `as_primer_pair_result` on `Primer` results") + if len(self.designs) > 0 and not isinstance(self.designs[0], PrimerPair): + raise ValueError("Cannot call `as_primer_pair_result` on `Oligo` results") return typing.cast(Primer3Result[PrimerPair], self) - def primers(self) -> list[Primer]: + def primers(self) -> list[Oligo]: """Returns the design results as a list `Primer`s""" try: - return self.as_primer_result().filtered_designs + return self.as_primer_result().designs except ValueError as ex: raise ValueError("Cannot call `primers` on `PrimerPair` results") from ex def primer_pairs(self) -> list[PrimerPair]: """Returns the design results as a list `PrimerPair`s""" try: - return self.as_primer_pair_result().filtered_designs + return self.as_primer_pair_result().designs except ValueError as ex: - raise ValueError("Cannot call `primer_pairs` on `Primer` results") from ex + raise ValueError("Cannot call `primer_pairs` on `Oligo` results") from ex class Primer3(ExecutableRunner): @@ -308,17 +309,10 @@ def get_design_sequences(self, region: Span) -> tuple[str, str]: hard_masked = "".join(soft_masked_list) return soft_masked, hard_masked - @staticmethod - def _is_valid_primer(design_input: Primer3Input, primer_design: Primer) -> bool: - return ( - primer_design.longest_dinucleotide_run_length() - <= design_input.params.primer_max_dinuc_bases - ) - @staticmethod def _screen_pair_results( design_input: Primer3Input, designed_primer_pairs: list[PrimerPair] - ) -> tuple[list[PrimerPair], list[Primer]]: + ) -> tuple[list[PrimerPair], list[Oligo]]: """Screens primer pair designs emitted by Primer3 for dinucleotide run length. Args: @@ -330,18 +324,18 @@ def _screen_pair_results( dinuc_pair_failures: single primer designs that failed the `max_dinuc_bases` threshold """ valid_primer_pair_designs: list[PrimerPair] = [] - dinuc_pair_failures: list[Primer] = [] + dinuc_pair_failures: list[Oligo] = [] for primer_pair in designed_primer_pairs: valid: bool = True if ( primer_pair.left_primer.longest_dinucleotide_run_length() - > design_input.params.primer_max_dinuc_bases + > design_input.primer_and_amplicon_params.primer_max_dinuc_bases ): # if the left primer has too many dinucleotide bases, fail it dinuc_pair_failures.append(primer_pair.left_primer) valid = False if ( primer_pair.right_primer.longest_dinucleotide_run_length() - > design_input.params.primer_max_dinuc_bases + > design_input.primer_and_amplicon_params.primer_max_dinuc_bases ): # if the right primer has too many dinucleotide bases, fail it dinuc_pair_failures.append(primer_pair.right_primer) valid = False @@ -349,8 +343,8 @@ def _screen_pair_results( valid_primer_pair_designs.append(primer_pair) return valid_primer_pair_designs, dinuc_pair_failures - def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: C901 - """Designs primers or primer pairs given a target region. + def design(self, design_input: Primer3Input) -> Primer3Result: # noqa: C901 + """Designs primers, primer pairs, and/or internal probes given a target region. Args: design_input: encapsulates the target region, design task, specifications, and scoring @@ -371,12 +365,25 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: f"Error, trying to use a subprocess that has already been " f"terminated, return code {self._subprocess.returncode}" ) - - design_region: Span = self._create_design_region( - target_region=design_input.target, - max_amplicon_length=design_input.params.max_amplicon_length, - min_primer_length=design_input.params.min_primer_length, - ) + design_region: Span + match design_input.task: + case PickHybProbeOnly(): + if design_input.target.length < design_input.probe_params.probe_sizes.min: + raise ValueError( + "Target region required to be at least as large as the" + " minimal probe size: " + f"target length: {design_input.target.length}, " + f"minimal probe size: {design_input.probe_params.probe_sizes.min}" + ) + design_region = design_input.target + case DesignRightPrimersTask() | DesignLeftPrimersTask() | DesignPrimerPairsTask(): + design_region = self._create_design_region( + target_region=design_input.target, + max_amplicon_length=design_input.primer_and_amplicon_params.max_amplicon_length, + min_primer_length=design_input.primer_and_amplicon_params.min_primer_length, + ) + case _ as unreachable: + assert_never(unreachable) # pragma: no cover soft_masked, hard_masked = self.get_design_sequences(design_region) global_primer3_params = { @@ -389,7 +396,6 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: **global_primer3_params, **design_input.to_input_tags(design_region=design_region), } - # Submit inputs to primer3 for tag, value in assembled_primer3_tags.items(): self._subprocess.stdin.write(f"{tag}={value}") @@ -454,15 +460,16 @@ def primer3_error(message: str) -> None: unfiltered_designs=all_pair_results, ) - case DesignLeftPrimersTask() | DesignRightPrimersTask(): # Single primer design - all_single_results = Primer3._build_primers( + case DesignLeftPrimersTask() | DesignRightPrimersTask() | PickHybProbeOnly(): + # Single primer or probe design + all_single_results: list[Oligo] = Primer3._build_oligos( design_input=design_input, design_results=primer3_results, design_region=design_region, design_task=design_input.task, unmasked_design_seq=soft_masked, ) - return Primer3._assemble_primers( + return Primer3._assemble_single_designs( design_input=design_input, design_results=primer3_results, unfiltered_designs=all_single_results, @@ -472,48 +479,39 @@ def primer3_error(message: str) -> None: assert_never(unreachable) @staticmethod - def _build_primers( + def _build_oligos( design_input: Primer3Input, design_results: dict[str, str], design_region: Span, - design_task: Union[DesignLeftPrimersTask, DesignRightPrimersTask], + design_task: Union[DesignLeftPrimersTask, DesignRightPrimersTask, PickHybProbeOnly], unmasked_design_seq: str, - ) -> list[Primer]: + ) -> list[Oligo]: """ - Builds a list of left or right primers from Primer3 output. + Builds a list of single oligos from Primer3 output. Args: design_input: the target region, design task, specifications, and scoring penalties - design_results: design results emitted by Primer3 and captured by design_primers() + design_results: design results emitted by Primer3 and captured by design() design_region: the padded design region design_task: the design task unmasked_design_seq: the reference sequence corresponding to the target region Returns: - primers: a list of unsorted and unfiltered primer designs emitted by Primer3 + oligos: a list of unsorted and unfiltered primer designs emitted by Primer3 Raises: ValueError: if Primer3 does not return primer designs """ - count_tag = design_input.task.count_tag - - maybe_count: Optional[str] = design_results.get(count_tag) - if maybe_count is None: # no count tag was found - if "PRIMER_ERROR" in design_results: - primer_error = design_results["PRIMER_ERROR"] - raise ValueError(f"Primer3 returned an error: {primer_error}") - else: - raise ValueError(f"Primer3 did not return the count tag: {count_tag}") - count: int = int(maybe_count) - - primers = [] + count: int = _check_design_results(design_input, design_results) + + primers: list[Oligo] = [] for idx in range(count): key = f"PRIMER_{design_task.task_type}_{idx}" str_position, str_length = design_results[key].split(",", maxsplit=1) position, length = int(str_position), int(str_length) # position is 1-based match design_task: - case DesignLeftPrimersTask(): + case DesignLeftPrimersTask() | PickHybProbeOnly(): span = design_region.get_subspan( offset=position - 1, subspan_length=length, strand=Strand.POSITIVE ) @@ -534,7 +532,7 @@ def _build_primers( bases = reverse_complement(bases) primers.append( - Primer( + Oligo( bases=bases, tm=float(design_results[f"PRIMER_{design_task.task_type}_{idx}_TM"]), penalty=float(design_results[f"PRIMER_{design_task.task_type}_{idx}_PENALTY"]), @@ -544,41 +542,29 @@ def _build_primers( return primers @staticmethod - def _assemble_primers( - design_input: Primer3Input, design_results: dict[str, str], unfiltered_designs: list[Primer] + def _assemble_single_designs( + design_input: Primer3Input, + design_results: dict[str, str], + unfiltered_designs: list[Oligo], ) -> Primer3Result: - """Helper function to organize primer designs into valid and failed designs. - - Wraps `Primer3._is_valid_primer()` and `Primer3._build_failures()` to filter out designs - with dinucleotide runs that are too long and extract additional failure reasons emitted by - Primer3. - - Args: - design_input: encapsulates the target region, design task, specifications, - and scoring penalties - unfiltered_designs: list of primers emitted from Primer3 - design_results: key-value pairs of results reported by Primer3 + """Screens oligo designs (primers or probes) emitted by Primer3 for acceptable dinucleotide + runs and extracts failure reasons for failed designs.""" - Returns: - primer_designs: a `Primer3Result` that encapsulates valid and failed designs - """ - valid_primer_designs = [ + valid_designs = [ design for design in unfiltered_designs - if Primer3._is_valid_primer(primer_design=design, design_input=design_input) + if _has_acceptable_dinuc_run(oligo_design=design, design_input=design_input) ] dinuc_failures = [ design for design in unfiltered_designs - if not Primer3._is_valid_primer(primer_design=design, design_input=design_input) + if not _has_acceptable_dinuc_run(oligo_design=design, design_input=design_input) ] failure_strings = [design_results[f"PRIMER_{design_input.task.task_type}_EXPLAIN"]] failures = Primer3._build_failures(dinuc_failures, failure_strings) - primer_designs: Primer3Result = Primer3Result( - filtered_designs=valid_primer_designs, failures=failures - ) - return primer_designs + design_candidates: Primer3Result = Primer3Result(designs=valid_designs, failures=failures) + return design_candidates @staticmethod def _build_primer_pairs( @@ -592,7 +578,7 @@ def _build_primer_pairs( Args: design_input: the target region, design task, specifications, and scoring penalties - design_results: design results emitted by Primer3 and captured by design_primers() + design_results: design results emitted by Primer3 and captured by design() design_region: the padded design region unmasked_design_seq: the reference sequence corresponding to the target region @@ -602,7 +588,7 @@ def _build_primer_pairs( Raises: ValueError: if Primer3 does not return the same number of left and right designs """ - left_primers = Primer3._build_primers( + left_primers = Primer3._build_oligos( design_input=design_input, design_results=design_results, design_region=design_region, @@ -610,7 +596,7 @@ def _build_primer_pairs( unmasked_design_seq=unmasked_design_seq, ) - right_primers = Primer3._build_primers( + right_primers = Primer3._build_oligos( design_input=design_input, design_results=design_results, design_region=design_region, @@ -618,7 +604,7 @@ def _build_primer_pairs( unmasked_design_seq=unmasked_design_seq, ) - def _build_primer_pair(num: int, primer_pair: tuple[Primer, Primer]) -> PrimerPair: + def _build_primer_pair(num: int, primer_pair: tuple[Oligo, Oligo]) -> PrimerPair: """Builds the `PrimerPair` object from input left and right primers.""" left_primer = primer_pair[0] right_primer = primer_pair[1] @@ -665,7 +651,7 @@ def _assemble_primer_pairs( primer_designs: a `Primer3Result` that encapsulates valid and failed designs """ valid_primer_pair_designs: list[PrimerPair] - dinuc_pair_failures: list[Primer] + dinuc_pair_failures: list[Oligo] valid_primer_pair_designs, dinuc_pair_failures = Primer3._screen_pair_results( design_input=design_input, designed_primer_pairs=unfiltered_designs ) @@ -676,15 +662,13 @@ def _assemble_primer_pairs( design_results["PRIMER_RIGHT_EXPLAIN"], ] pair_failures = Primer3._build_failures(dinuc_pair_failures, failure_strings) - primer_designs = Primer3Result( - filtered_designs=valid_primer_pair_designs, failures=pair_failures - ) + primer_designs = Primer3Result(designs=valid_primer_pair_designs, failures=pair_failures) return primer_designs @staticmethod def _build_failures( - dinuc_failures: list[Primer], + dinuc_failures: list[Oligo], failure_strings: list[str], ) -> list[Primer3Failure]: """Extracts the reasons why designs that were considered by Primer3 failed @@ -760,3 +744,44 @@ def _create_design_region( ) return design_region + + +def _check_design_results(design_input: Primer3Input, design_results: dict[str, str]) -> int: + """Checks for any additional Primer3 errors and reports out the count of emitted designs.""" + count_tag = design_input.task.count_tag + maybe_count: Optional[str] = design_results.get(count_tag) + if maybe_count is None: # no count tag was found + if "PRIMER_ERROR" in design_results: + primer_error = design_results["PRIMER_ERROR"] + raise ValueError(f"Primer3 returned an error: {primer_error}") + else: + raise ValueError(f"Primer3 did not return the count tag: {count_tag}") + count: int = int(maybe_count) + + return count + + +def _has_acceptable_dinuc_run(design_input: Primer3Input, oligo_design: Oligo) -> bool: + """ + True if the design's longest dinucleotide run is no more than the stipulated maximum. + + For primer designs, the maximum is recorded in the input's + `PrimerAndAmpliconParameters.primer_max_dinuc_bases`. + + For probe designs, the maximum is recorded in the input's + `ProbeParameters.probe_max_dinuc_bases`. + + Args: + design_input: the Primer3Input object that wraps task-specific and design-specific params + oligo_design: the design candidate + + Returns: + + """ + max_dinuc_bases: int + if design_input.task.requires_primer_amplicon_params: + max_dinuc_bases = design_input.primer_and_amplicon_params.primer_max_dinuc_bases + elif design_input.task.requires_probe_params: + max_dinuc_bases = design_input.probe_params.probe_max_dinuc_bases + + return oligo_design.longest_dinucleotide_run_length() <= max_dinuc_bases diff --git a/prymer/primer3/primer3_input.py b/prymer/primer3/primer3_input.py index 6ac6076..aaa29ca 100644 --- a/prymer/primer3/primer3_input.py +++ b/prymer/primer3/primer3_input.py @@ -7,13 +7,17 @@ The module uses: -1. [`Primer3Parameters`][prymer.primer3.primer3_parameters.Primer3Parameters] -to specify user-specified criteria for primer design -2. [`Primer3Weights`][prymer.primer3.primer3_weights.Primer3Weights] to establish penalties -based on those criteria -3. [`Primer3Task`][prymer.primer3.primer3_task.Primer3Task] to organize task-specific +1. [`PrimerAndAmpliconParameters`][prymer.primer3.primer3_parameters.Primer3Parameters] + to specify user-specified criteria for primer design +2. [`ProbeParameters`][prymer.primer3.primer3_parameters.ProbeParameters] + to specify user-specified criteria for probe design +3. [`PrimerAndAmpliconWeights`][prymer.primer3.primer3_weights.PrimerAndAmpliconWeights] + to establish penalties based on those criteria +4. [`ProbeWeights`][prymer.primer3.primer3_weights.ProbeWeights] to specify penalties based on probe + design criteria +5. [`Primer3Task`][prymer.primer3.primer3_task.Primer3Task] to organize task-specific logic. -4. [`Span`](index.md#prymer.api.span.Span] to specify the target region. +6. [`Span`](index.md#prymer.api.span.Span] to specify the target region. The `Primer3Input.to_input_tags(]` method The main purpose of this class is to generate the @@ -29,14 +33,18 @@ >>> from prymer.primer3 import DesignLeftPrimersTask >>> target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) >>> design_region = Span(refname="chr1", start=150, end=300, strand=Strand.POSITIVE) ->>> params = Primer3Parameters( \ +>>> params = PrimerAndAmpliconParameters( \ amplicon_sizes=MinOptMax(min=100, max=250, opt=200), \ amplicon_tms=MinOptMax(min=55.0, max=100.0, opt=70.0), \ primer_sizes=MinOptMax(min=29, max=31, opt=30), \ primer_tms=MinOptMax(min=63.0, max=67.0, opt=65.0), \ primer_gcs=MinOptMax(min=30.0, max=65.0, opt=45.0), \ -) ->>> design_input = Primer3Input(target=target, params=params, task=DesignLeftPrimersTask()) + ) +>>> design_input = Primer3Input(target=target, \ + primer_and_amplicon_params=params, \ + task=DesignLeftPrimersTask() \ + ) + >>> for tag, value in design_input.to_input_tags(design_region=design_region).items(): \ print(f"{tag.value} -> {value}") PRIMER_TASK -> pick_primer_list @@ -44,7 +52,6 @@ PRIMER_PICK_RIGHT_PRIMER -> 0 PRIMER_PICK_INTERNAL_OLIGO -> 0 SEQUENCE_INCLUDED_REGION -> 1,51 -PRIMER_NUM_RETURN -> 5 PRIMER_PRODUCT_OPT_SIZE -> 200 PRIMER_PRODUCT_SIZE_RANGE -> 100-250 PRIMER_PRODUCT_MIN_TM -> 55.0 @@ -64,6 +71,7 @@ PRIMER_MAX_POLY_X -> 5 PRIMER_MAX_NS_ACCEPTED -> 1 PRIMER_LOWERCASE_MASKING -> 1 +PRIMER_NUM_RETURN -> 5 PRIMER_PAIR_WT_PRODUCT_SIZE_LT -> 1.0 PRIMER_PAIR_WT_PRODUCT_SIZE_GT -> 1.0 PRIMER_PAIR_WT_PRODUCT_TM_LT -> 0.0 @@ -79,24 +87,63 @@ PRIMER_WT_TM_GT -> 1.0 """ +from dataclasses import MISSING from dataclasses import dataclass +from dataclasses import fields from typing import Any +from typing import Optional from prymer.api.span import Span from prymer.primer3.primer3_input_tag import Primer3InputTag -from prymer.primer3.primer3_parameters import Primer3Parameters +from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters +from prymer.primer3.primer3_parameters import ProbeParameters from prymer.primer3.primer3_task import Primer3TaskType -from prymer.primer3.primer3_weights import Primer3Weights +from prymer.primer3.primer3_weights import PrimerAndAmpliconWeights +from prymer.primer3.primer3_weights import ProbeWeights @dataclass(frozen=True, init=True, slots=True) class Primer3Input: - """Assembles necessary inputs for Primer3 to orchestrate primer and/or primer pair design.""" + """Assembles necessary inputs for Primer3 to orchestrate primer, primer pair, and/or internal + probe design. + + At least one set of design parameters (either `PrimerAndAmpliconParameters` + or `ProbeParameters`) must be specified. + + If `PrimerAndAmpliconParameters` is provided but `PrimerAndAmpliconWeights` is not provided, + default `PrimerAndAmpliconWeights` will be used. + + Similarly, if `ProbeParameters` is provided but `ProbeWeights` is not provided, default + `ProbeWeights` will be used. + + Please see primer3_parameters.py for details on the defaults. + + + Raises: + ValueError: if neither the primer or probe parameters are specified + """ target: Span task: Primer3TaskType - params: Primer3Parameters - weights: Primer3Weights = Primer3Weights() + primer_and_amplicon_params: Optional[PrimerAndAmpliconParameters] = None + probe_params: Optional[ProbeParameters] = None + primer_weights: Optional[PrimerAndAmpliconWeights] = None + probe_weights: Optional[ProbeWeights] = None + + def __post_init__(self) -> None: + # check for at least one set of params + # for the set of params given, check that weights were given; use defaults if not given + if self.primer_and_amplicon_params is None and self.probe_params is None: + raise ValueError( + "Primer3 requires at least one set of parameters" + " for either primer or probe design" + ) + + if self.primer_and_amplicon_params is not None and self.primer_weights is None: + object.__setattr__(self, "primer_weights", PrimerAndAmpliconWeights()) + + if self.probe_params is not None and self.probe_weights is None: + object.__setattr__(self, "probe_weights", ProbeWeights()) def to_input_tags(self, design_region: Span) -> dict[Primer3InputTag, Any]: """Assembles `Primer3InputTag` and values for input to `Primer3` @@ -113,9 +160,15 @@ def to_input_tags(self, design_region: Span) -> dict[Primer3InputTag, Any]: primer3_task_params = self.task.to_input_tags( design_region=design_region, target=self.target ) - assembled_tags = { - **primer3_task_params, - **self.params.to_input_tags(), - **self.weights.to_input_tags(), + assembled_tags: dict[Primer3InputTag, Any] = {**primer3_task_params} + + optional_attributes = { + field.name: getattr(self, field.name) + for field in fields(self) + if field.default is not MISSING } + for settings in optional_attributes.values(): + if settings is not None: + assembled_tags.update(settings.to_input_tags()) + return assembled_tags diff --git a/prymer/primer3/primer3_parameters.py b/prymer/primer3/primer3_parameters.py index 3438ef7..2d427fb 100644 --- a/prymer/primer3/primer3_parameters.py +++ b/prymer/primer3/primer3_parameters.py @@ -1,26 +1,33 @@ """ -# Primer3Parameters Class and Methods +# PrimerAndAmpliconParameters and ProbeParameters: Classes and Methods -The [`Primer3Parameters`][prymer.primer3.primer3_parameters.Primer3Parameters] class stores -user input and maps it to the correct Primer3 fields. +The [`PrimerAndAmpliconParameters`][prymer.primer3.primer3_parameters.PrimerAndAmpliconParameters] +class stores user input for primer design and maps it to the correct Primer3 fields. Primer3 considers many criteria for primer design, including characteristics of candidate primers and the resultant amplicon product, as well as potential complications (off-target priming, primer dimer formation). Users can specify many of these constraints in Primer3, some of which are used to quantify a "score" for each primer design. -The Primer3Parameters class stores commonly used constraints for primer design: GC content, melting -temperature, and size of both primers and expected amplicon. Additional criteria include the maximum -homopolymer length, ambiguous bases, and bases in a dinucleotide run within a primer. By default, -primer design avoids masked bases, returns 5 primers, and sets the GC clamp to be no larger than 5. +The `PrimerAndAmpliconParameters` class stores commonly used constraints for primer design: +GC content, melting temperature, and size of both primers and expected amplicon. +Additional criteria include the maximum homopolymer length, ambiguous bases, and bases in a +dinucleotide run within a primer. By default, primer design avoids masked bases, returns 5 primers, +and sets the GC clamp to be no larger than 5. -The `to_input_tags()` method in `Primer3Parameters` converts these parameters into tag-values pairs -for use when executing `Primer3`. +The `to_input_tags()` method in `PrimerAndAmpliconParameters` converts these parameters into +tag-values pairs for use when executing `Primer3`. + +The [`ProbeParameters`][prymer.primer3.primer3_parameters.ProbeParameters] +class stores user input for internal probe design and maps it to the correct Primer3 fields. + +Similar to the `PrimerAndAmpliconParameters` class, the `ProbeParameters` class can be used to +specify the acceptable ranges of probe sizes, melting temperatures, and GC content. ## Examples ```python ->>> params = Primer3Parameters( \ +>>> params = PrimerAndAmpliconParameters( \ amplicon_sizes=MinOptMax(min=100, max=250, opt=200), \ amplicon_tms=MinOptMax(min=55.0, max=100.0, opt=70.0), \ primer_sizes=MinOptMax(min=29, max=31, opt=30), \ @@ -29,7 +36,6 @@ ) >>> for tag, value in params.to_input_tags().items(): \ print(f"{tag.value} -> {value}") -PRIMER_NUM_RETURN -> 5 PRIMER_PRODUCT_OPT_SIZE -> 200 PRIMER_PRODUCT_SIZE_RANGE -> 100-250 PRIMER_PRODUCT_MIN_TM -> 55.0 @@ -49,10 +55,12 @@ PRIMER_MAX_POLY_X -> 5 PRIMER_MAX_NS_ACCEPTED -> 1 PRIMER_LOWERCASE_MASKING -> 1 +PRIMER_NUM_RETURN -> 5 ``` """ +import warnings from dataclasses import dataclass from typing import Any @@ -61,8 +69,8 @@ @dataclass(frozen=True, init=True, slots=True) -class Primer3Parameters: - """Holds common primer design options that Primer3 uses to inform primer design. +class PrimerAndAmpliconParameters: + """Holds common primer and amplicon design options that Primer3 uses to inform primer design. Attributes: amplicon_sizes: the min, optimal, and max amplicon size @@ -105,8 +113,7 @@ def __post_init__(self) -> None: def to_input_tags(self) -> dict[Primer3InputTag, Any]: """Converts input params to Primer3InputTag to feed directly into Primer3.""" - mapped_dict = { - Primer3InputTag.PRIMER_NUM_RETURN: self.number_primers_return, + mapped_dict: dict[Primer3InputTag, Any] = { Primer3InputTag.PRIMER_PRODUCT_OPT_SIZE: self.amplicon_sizes.opt, Primer3InputTag.PRIMER_PRODUCT_SIZE_RANGE: ( f"{self.amplicon_sizes.min}-{self.amplicon_sizes.max}" @@ -128,7 +135,9 @@ def to_input_tags(self) -> dict[Primer3InputTag, Any]: Primer3InputTag.PRIMER_MAX_POLY_X: self.primer_max_polyX, Primer3InputTag.PRIMER_MAX_NS_ACCEPTED: self.primer_max_Ns, Primer3InputTag.PRIMER_LOWERCASE_MASKING: 1 if self.avoid_masked_bases else 0, + Primer3InputTag.PRIMER_NUM_RETURN: self.number_primers_return, } + return mapped_dict @property @@ -145,3 +154,71 @@ def max_primer_length(self) -> int: def min_primer_length(self) -> int: """Minimum primer length.""" return int(self.primer_sizes.min) + + +@dataclass(frozen=True, init=True, slots=True) +class Primer3Parameters(PrimerAndAmpliconParameters): + """A deprecated alias for `PrimerAndAmpliconParameters` intended to maintain backwards + compatibility with earlier releases of `prymer`.""" + + warnings.warn( + "The Primer3Parameters class was deprecated, use PrimerAndAmpliconParameters instead", + DeprecationWarning, + stacklevel=2, + ) + + +@dataclass(frozen=True, init=True, slots=True) +class ProbeParameters: + """Holds common primer design options that Primer3 uses to inform internal probe design. + + Attributes: + probe_sizes: the min, optimal, and max probe size + probe_tms: the min, optimal, and max probe melting temperatures + probe_gcs: the min and max GC content for individual probes + number_probes_return: the number of probes to return + probe_max_dinuc_bases: the max number of bases in a dinucleotide run in a probe + probe_max_polyX: the max homopolymer length acceptable within a probe + probe_max_Ns: the max number of ambiguous bases acceptable within a probe + + The attributes that have default values specified take their default values from the + Primer3 manual. + + Please see the Primer3 manual for additional details: https://primer3.org/manual.html#globalTags + + + """ + + probe_sizes: MinOptMax[int] + probe_tms: MinOptMax[float] + probe_gcs: MinOptMax[float] + number_probes_return: int = 5 + probe_max_dinuc_bases: int = 4 + probe_max_polyX: int = 5 + probe_max_Ns: int = 0 + + def __post_init__(self) -> None: + if not isinstance(self.probe_sizes.min, int): + raise TypeError("Probe sizes must be integers") + if not isinstance(self.probe_tms.min, float) or not isinstance(self.probe_gcs.min, float): + raise TypeError("Probe melting temperatures and GC content must be floats") + if self.probe_max_dinuc_bases % 2 == 1: + raise ValueError("Max threshold for dinucleotide bases must be an even number of bases") + + def to_input_tags(self) -> dict[Primer3InputTag, Any]: + """Converts input params to Primer3InputTag to feed directly into Primer3.""" + mapped_dict: dict[Primer3InputTag, Any] = { + Primer3InputTag.PRIMER_INTERNAL_MIN_SIZE: self.probe_sizes.min, + Primer3InputTag.PRIMER_INTERNAL_OPT_SIZE: self.probe_sizes.opt, + Primer3InputTag.PRIMER_INTERNAL_MAX_SIZE: self.probe_sizes.max, + Primer3InputTag.PRIMER_INTERNAL_MIN_TM: self.probe_tms.min, + Primer3InputTag.PRIMER_INTERNAL_OPT_TM: self.probe_tms.opt, + Primer3InputTag.PRIMER_INTERNAL_MAX_TM: self.probe_tms.max, + Primer3InputTag.PRIMER_INTERNAL_MIN_GC: self.probe_gcs.min, + Primer3InputTag.PRIMER_INTERNAL_OPT_GC_PERCENT: self.probe_gcs.opt, + Primer3InputTag.PRIMER_INTERNAL_MAX_GC: self.probe_gcs.max, + Primer3InputTag.PRIMER_INTERNAL_MAX_POLY_X: self.probe_max_polyX, + Primer3InputTag.PRIMER_INTERNAL_MAX_NS_ACCEPTED: self.probe_max_Ns, + } + + return mapped_dict diff --git a/prymer/primer3/primer3_task.py b/prymer/primer3/primer3_task.py index e09a769..0de198d 100644 --- a/prymer/primer3/primer3_task.py +++ b/prymer/primer3/primer3_task.py @@ -8,7 +8,7 @@ The design task "type" dictates which type of primers to pick and informs the design region. These parameters are aligned to the correct Primer3 settings and fed directly into Primer3. -Three types of tasks are available: +Four types of tasks are available: 1. [`DesignPrimerPairsTask`][prymer.primer3.primer3_task.DesignPrimerPairsTask] -- task for designing _primer pairs_. @@ -16,6 +16,8 @@ for designing primers to the _left_ (5') of the design region on the top/positive strand. 3. [`DesignRightPrimersTask`][prymer.primer3.primer3_task.DesignRightPrimersTask] -- task for designing primers to the _right_ (3') of the design region on the bottom/negative strand. +4. [`PickHybProbeOnly`][prymer.primer3.primer3_task.PickHybProbeOnly] -- task for designing an + internal probe for hybridization-based technologies The main purpose of these classes are to generate the [`Primer3InputTag`s][prymer.primer3.primer3_input_tag.Primer3InputTag]s required by @@ -103,15 +105,15 @@ from prymer.primer3.primer3_input_tag import Primer3InputTag Primer3TaskType: TypeAlias = Union[ - "DesignPrimerPairsTask", "DesignLeftPrimersTask", "DesignRightPrimersTask" + "DesignPrimerPairsTask", "DesignLeftPrimersTask", "DesignRightPrimersTask", "PickHybProbeOnly" ] """Type alias for all `Primer3Task`s, to enable exhaustiveness checking.""" @unique class TaskType(UppercaseStrEnum): - """Represents the type of design task, either design primer pairs, or individual primers - (left or right).""" + """Represents the type of design task: design primer pairs, individual primers + (left or right), or an internal hybridization probe.""" # Developer Note: the names of this enum are important, as they are used as-is for the # count_tag in `Primer3Task`. @@ -119,6 +121,7 @@ class TaskType(UppercaseStrEnum): PAIR = auto() LEFT = auto() RIGHT = auto() + INTERNAL = auto() class Primer3Task(ABC): @@ -194,6 +197,14 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT f"{target.length}", } + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + class DesignLeftPrimersTask(Primer3Task, task_type=TaskType.LEFT): """Stores task-specific characteristics for designing left primers.""" @@ -208,6 +219,14 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT Primer3InputTag.SEQUENCE_INCLUDED_REGION: f"1,{target.start - design_region.start}", } + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + class DesignRightPrimersTask(Primer3Task, task_type=TaskType.RIGHT): """Stores task-specific characteristics for designing right primers""" @@ -223,3 +242,32 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT Primer3InputTag.PRIMER_PICK_INTERNAL_OLIGO: 0, Primer3InputTag.SEQUENCE_INCLUDED_REGION: f"{start},{length}", } + + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + + +class PickHybProbeOnly(Primer3Task, task_type=TaskType.INTERNAL): + """Stores task-specific characteristics for designing an internal hybridization probe.""" + + @classmethod + def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputTag, Any]: + return { + Primer3InputTag.PRIMER_TASK: "generic", + Primer3InputTag.PRIMER_PICK_LEFT_PRIMER: 0, + Primer3InputTag.PRIMER_PICK_RIGHT_PRIMER: 0, + Primer3InputTag.PRIMER_PICK_INTERNAL_OLIGO: 1, + } + + @property + def requires_primer_amplicon_params(self) -> bool: + return False + + @property + def requires_probe_params(self) -> bool: + return True diff --git a/prymer/primer3/primer3_weights.py b/prymer/primer3/primer3_weights.py index 0468e52..4668090 100644 --- a/prymer/primer3/primer3_weights.py +++ b/prymer/primer3/primer3_weights.py @@ -1,7 +1,10 @@ """ # Primer3Weights Class and Methods -The Primer3Weights class holds the penalty weights that Primer3 uses to score primer designs. +The PrimerAndAmpliconWeights class holds the penalty weights that Primer3 uses to score +primer designs. + +The ProbeWeights class holds the penalty weights that Primer3 uses to score internal probe designs. Primer3 considers the differential between user input (e.g., constraining the optimal primer size to be 18 bp) and the characteristics of a specific primer design (e.g., if the primer @@ -11,17 +14,14 @@ By modifying these weights, users can prioritize specific primer design characteristics. Each of the defaults provided here are derived from the Primer3 manual: https://primer3.org/manual.html -## Examples of interacting with the `Primer3Weights` class - +## Examples of interacting with the `PrimerAndAmpliconWeights` class -```python ->>> Primer3Weights(product_size_lt=1, product_size_gt=1) -Primer3Weights(product_size_lt=1, product_size_gt=1, ...) ->>> Primer3Weights(product_size_lt=5, product_size_gt=1) -Primer3Weights(product_size_lt=5, product_size_gt=1, ...) - -``` -""" +Example: +>>> PrimerAndAmpliconWeights() # default implementation +PrimerAndAmpliconWeights(product_size_lt=1.0, product_size_gt=1.0, product_tm_lt=0.0, product_tm_gt=0.0, primer_end_stability=0.25, primer_gc_lt=0.25, primer_gc_gt=0.25, primer_self_any=0.1, primer_self_end=0.1, primer_size_lt=0.5, primer_size_gt=0.1, primer_tm_lt=1.0, primer_tm_gt=1.0) +>>> PrimerAndAmpliconWeights(product_size_lt=5.0) +PrimerAndAmpliconWeights(product_size_lt=5.0, product_size_gt=1.0, product_tm_lt=0.0, product_tm_gt=0.0, primer_end_stability=0.25, primer_gc_lt=0.25, primer_gc_gt=0.25, primer_self_any=0.1, primer_self_end=0.1, primer_size_lt=0.5, primer_size_gt=0.1, primer_tm_lt=1.0, primer_tm_gt=1.0) +""" # noqa: E501 from dataclasses import dataclass from typing import Any @@ -30,23 +30,32 @@ @dataclass(frozen=True, init=True, slots=True) -class Primer3Weights: - """Holds the weights that Primer3 uses to adjust penalties - that originate from the designed primer(s). +class PrimerAndAmpliconWeights: + """Holds the primer-specific weights that Primer3 uses to adjust design penalties. The weights that Primer3 uses when a parameter is less than optimal are labeled with "_lt". "_gt" weights are penalties applied when a parameter is greater than optimal. + Some of these settings depart from the default settings enumerated in the Primer3 manual. Please see the Primer3 manual for additional details: https://primer3.org/manual.html#globalTags - Example: - >>> Primer3Weights() #default implementation - Primer3Weights(product_size_lt=1.0, product_size_gt=1.0, product_tm_lt=0.0, product_tm_gt=0.0, primer_end_stability=0.25, primer_gc_lt=0.25, primer_gc_gt=0.25, primer_self_any=0.1, primer_self_end=0.1, primer_size_lt=0.5, primer_size_gt=0.1, primer_tm_lt=1.0, primer_tm_gt=1.0) - - >>> Primer3Weights(product_size_lt=5.0) - Primer3Weights(product_size_lt=5.0, product_size_gt=1.0, product_tm_lt=0.0, product_tm_gt=0.0, primer_end_stability=0.25, primer_gc_lt=0.25, primer_gc_gt=0.25, primer_self_any=0.1, primer_self_end=0.1, primer_size_lt=0.5, primer_size_gt=0.1, primer_tm_lt=1.0, primer_tm_gt=1.0) - """ # noqa: E501 + Attributes: + product_size_lt: weight for products shorter than + `PrimerAndAmpliconParameters.amplicon_sizes.opt` + product_size_gt: weight for products longer than + `PrimerAndAmpliconParameters.amplicon_sizes.opt` + product_tm_lt: weight for products with a Tm lower than + `PrimerAndAmpliconParameters.amplicon_tms.opt` + product_tm_gt: weight for products with a Tm greater than + `PrimerAndAmpliconParameters.amplicon_tms.opt` + primer_end_stability: penalty for the calculated maximum stability + for the last five 3' bases of primer + primer_gc_lt: penalty for primers with GC percent lower than + `PrimerAndAmpliconParameters.primer_gcs.opt` + primer_gc_gt: penalty weight for primers with GC percent higher than + `PrimerAndAmpliconParameters.primer_gcs.opt` + """ product_size_lt: float = 1.0 product_size_gt: float = 1.0 @@ -80,3 +89,37 @@ def to_input_tags(self) -> dict[Primer3InputTag, Any]: Primer3InputTag.PRIMER_WT_TM_GT: self.primer_tm_gt, } return mapped_dict + + +@dataclass(frozen=True, init=True, slots=True) +class ProbeWeights: + """Holds the probe-specific weights that Primer3 uses to adjust design penalties. + + Attributes: + probe_size_lt: penalty for probes shorter than `ProbeParameters.probe_sizes.opt` + probe_size_gt: penalty for probes longer than `ProbeParameters.probe_sizes.opt` + probe_tm_lt: penalty for probes with a Tm lower than `ProbeParameters.probe_tms.opt` + probe_tm_gt: penalty for probes with a Tm greater than `ProbeParameters.probe_tms.opt` + probe_gc_lt: penalty for probes with GC content lower than `ProbeParameters.probe_gcs.opt` + probe_gc_gt: penalty for probes with GC content greater than `ProbeParameters.probe_gcs.opt` + + """ + + probe_size_lt: float = 0.25 + probe_size_gt: float = 0.25 + probe_tm_lt: float = 1.0 + probe_tm_gt: float = 1.0 + probe_gc_lt: float = 0.5 + probe_gc_gt: float = 0.5 + + def to_input_tags(self) -> dict[Primer3InputTag, Any]: + """Maps weights to Primer3InputTag to feed directly into Primer3.""" + mapped_dict = { + Primer3InputTag.PRIMER_INTERNAL_WT_SIZE_LT: self.probe_size_lt, + Primer3InputTag.PRIMER_INTERNAL_WT_SIZE_GT: self.probe_size_gt, + Primer3InputTag.PRIMER_INTERNAL_WT_TM_LT: self.probe_tm_lt, + Primer3InputTag.PRIMER_INTERNAL_WT_TM_GT: self.probe_tm_gt, + Primer3InputTag.PRIMER_INTERNAL_WT_GC_PERCENT_LT: self.probe_gc_lt, + Primer3InputTag.PRIMER_INTERNAL_WT_GC_PERCENT_GT: self.probe_gc_gt, + } + return mapped_dict diff --git a/tests/api/test_primer.py b/tests/api/test_oligo.py similarity index 80% rename from tests/api/test_primer.py rename to tests/api/test_oligo.py index b7e4c1d..09e9984 100644 --- a/tests/api/test_primer.py +++ b/tests/api/test_oligo.py @@ -6,7 +6,7 @@ import pytest from fgpyo.fasta.sequence_dictionary import SequenceDictionary -from prymer.api.primer import Primer +from prymer.api.oligo import Oligo from prymer.api.span import Span from prymer.api.span import Strand @@ -24,14 +24,14 @@ ], ) def test_valid_primer_config(bases: str, tm: float, penalty: float, test_span: Span) -> None: - """Test Primer construction with valid input and ensure reported lengths match""" - test_primer = Primer(bases=bases, tm=tm, penalty=penalty, span=test_span) + """Test Oligo construction with valid input and ensure reported lengths match""" + test_primer = Oligo(bases=bases, tm=tm, penalty=penalty, span=test_span) assert test_primer.length == test_primer.span.length def test_span_returns_span(test_span: Span) -> None: """Test that the mapping property returns the span object.""" - test_primer = Primer( + test_primer = Oligo( bases="AGCTAGCTAA", tm=1.0, penalty=2.0, @@ -41,9 +41,9 @@ def test_span_returns_span(test_span: Span) -> None: def test_invalid_primer_config_raises() -> None: - """Test Primer construction with invalid input raises ValueError""" + """Test Oligo construction with invalid input raises ValueError""" with pytest.raises(ValueError, match="Bases must not be an empty string"): - Primer( + Oligo( bases="", tm=1.0, penalty=2.0, @@ -53,7 +53,7 @@ def test_invalid_primer_config_raises() -> None: with pytest.raises( ValueError, match="Conflicting lengths: span length=1000, sequence length=4" ): - Primer( + Oligo( bases="ACGT", tm=1.0, penalty=2.0, @@ -62,30 +62,30 @@ def test_invalid_primer_config_raises() -> None: @dataclass(init=True, frozen=True) -class PrimerTestCase: - """Test case for a `Primer`. +class OligoTestCase: + """Test case for an `Oligo`. Attributes: primer: the primer to test - gc_pct: the expected value for the `Primer.percent_gc_content` method - longest_hp: the expected value for the `Primer.longest_homopolymer` method - longest_dinuc: the expected value for the `Primer.longest_dinucleotide_run` method + gc_pct: the expected value for the `Oligo.percent_gc_content` method + longest_hp: the expected value for the `Oligo.longest_homopolymer` method + longest_dinuc: the expected value for the `Oligo.longest_dinucleotide_run` method str_fields: the fields, that when tab-delimited, are the expected string for the - `Primer.__str__` method. + `Oligo.__str__` method. """ - primer: Primer + primer: Oligo gc_pct: float longest_hp: int longest_dinuc: int str_fields: list[str] -def build_primer_test_cases() -> list[PrimerTestCase]: +def build_primer_test_cases() -> list[OligoTestCase]: """Builds a set of test cases for `Primer` methods.""" return [ - PrimerTestCase( - primer=Primer( + OligoTestCase( + primer=Oligo( bases="ATAT", tm=1.0, penalty=2.0, @@ -96,8 +96,8 @@ def build_primer_test_cases() -> list[PrimerTestCase]: longest_dinuc=4, str_fields=["ATAT", "1.0", "2.0", "chr1:1-4:+"], ), - PrimerTestCase( - primer=Primer( + OligoTestCase( + primer=Oligo( bases="ACGTAAAAAATT", tm=1.0, penalty=2.0, @@ -108,8 +108,8 @@ def build_primer_test_cases() -> list[PrimerTestCase]: longest_dinuc=6, str_fields=["ACGTAAAAAATT", "1.0", "2.0", "chr1:1-12:+"], ), - PrimerTestCase( - primer=Primer( + OligoTestCase( + primer=Oligo( bases="ATAC", tm=1.0, penalty=2.0, @@ -120,8 +120,8 @@ def build_primer_test_cases() -> list[PrimerTestCase]: longest_dinuc=2, str_fields=["ATAC", "1.0", "2.0", "chr1:1-4:+"], ), - PrimerTestCase( - primer=Primer( + OligoTestCase( + primer=Oligo( bases="ATATCC", tm=1.0, penalty=2.0, @@ -132,8 +132,8 @@ def build_primer_test_cases() -> list[PrimerTestCase]: longest_dinuc=4, str_fields=["ATATCC", "1.0", "2.0", "chr1:1-6:+"], ), - PrimerTestCase( - primer=Primer( + OligoTestCase( + primer=Oligo( bases="AGCT", tm=1.0, penalty=2.0, @@ -144,8 +144,8 @@ def build_primer_test_cases() -> list[PrimerTestCase]: longest_dinuc=2, str_fields=["AGCT", "1.0", "2.0", "chr1:1-4:+"], ), - PrimerTestCase( - primer=Primer( + OligoTestCase( + primer=Oligo( bases="GGGGG", tm=1.0, penalty=2.0, @@ -156,8 +156,8 @@ def build_primer_test_cases() -> list[PrimerTestCase]: longest_dinuc=4, str_fields=["GGGGG", "1.0", "2.0", "chr1:1-5:+"], ), - PrimerTestCase( - primer=Primer( + OligoTestCase( + primer=Oligo( bases="ccgTATGC", tm=1.0, penalty=2.0, @@ -168,20 +168,20 @@ def build_primer_test_cases() -> list[PrimerTestCase]: longest_dinuc=2, str_fields=["ccgTATGC", "1.0", "2.0", "chr1:1-8:+"], ), - PrimerTestCase( - primer=Primer( - bases=None, + OligoTestCase( + primer=Oligo( + bases="ACGT", tm=1.0, penalty=2.0, span=Span(refname="chr1", start=1, end=4, strand=Strand.POSITIVE), ), - gc_pct=0.0, - longest_hp=0, + gc_pct=50.0, + longest_hp=1, longest_dinuc=0, - str_fields=["*", "1.0", "2.0", "chr1:1-4:+"], + str_fields=["ACGT", "1.0", "2.0", "chr1:1-4:+"], ), - PrimerTestCase( - primer=Primer( + OligoTestCase( + primer=Oligo( bases="ACACACTCTCTCT", tm=1.0, penalty=2.0, @@ -195,17 +195,17 @@ def build_primer_test_cases() -> list[PrimerTestCase]: ] -PRIMER_TEST_CASES: list[PrimerTestCase] = build_primer_test_cases() +OLIGO_TEST_CASES: list[OligoTestCase] = build_primer_test_cases() -@pytest.mark.parametrize("test_case", PRIMER_TEST_CASES) -def test_gc_content_calc(test_case: PrimerTestCase) -> None: +@pytest.mark.parametrize("test_case", OLIGO_TEST_CASES) +def test_gc_content_calc(test_case: OligoTestCase) -> None: """Test that percent GC content is calculated correctly.""" assert test_case.primer.percent_gc_content == pytest.approx(test_case.gc_pct) -@pytest.mark.parametrize("test_case", PRIMER_TEST_CASES) -def test_longest_homopolymer_len_calc(test_case: PrimerTestCase) -> None: +@pytest.mark.parametrize("test_case", OLIGO_TEST_CASES) +def test_longest_homopolymer_len_calc(test_case: OligoTestCase) -> None: """Test that longest homopolymer run is calculated correctly.""" assert test_case.primer.longest_hp_length() == test_case.longest_hp @@ -227,7 +227,7 @@ def test_with_tail(init: Optional[str], value: str, expected: Optional[str]) -> """Tests the `with_tail` method, setting the initial value to `init`, updating the tail using the `with_tail()` method with value `value`, and testing for the execpted value `expected`.""" - test_primer = Primer( + test_primer = Oligo( bases="AGCT", tm=1.0, penalty=2.0, @@ -244,17 +244,15 @@ def test_with_tail(init: Optional[str], value: str, expected: Optional[str]) -> [ ("TTTT", "AGCT", "TTTTAGCT"), ("", "AGCT", "AGCT"), - ("AAA", None, "AAA"), (None, "AGCT", "AGCT"), ("NNNNNNNNNN", "AGCT", "NNNNNNNNNNAGCT"), ("GATTACA", "AGCT", "GATTACAAGCT"), - (None, None, None), ], ) def test_bases_with_tail( - tail_seq: Optional[str], bases: Optional[str], expected_result: Optional[str] + tail_seq: Optional[str], bases: str, expected_result: Optional[str] ) -> None: - test_primer = Primer( + test_primer = Oligo( bases=bases, tm=1.0, penalty=2.0, @@ -278,7 +276,7 @@ def test_bases_with_tail( ], ) def test_with_name(init: Optional[str], value: str, expected: Optional[str]) -> None: - test_primer = Primer( + test_primer = Oligo( bases="AGCT", tm=1.0, penalty=2.0, @@ -306,7 +304,7 @@ def test_id_generation( # For each scenario, generate a Primer object and assert that the generated ID # matches the expected ID. - primer = Primer( + primer = Oligo( name=name, span=test_span, bases="AAAAAAAAAA", @@ -319,7 +317,7 @@ def test_id_generation( def test_to_bed12_row(test_span: Span) -> None: """Asserts that the to_bed12_row method exists and returns the expected value.""" - primer = Primer( + primer = Oligo( name="test", span=test_span, bases="AAAAAAAAAA", @@ -345,8 +343,8 @@ def test_to_bed12_row(test_span: Span) -> None: ) -@pytest.mark.parametrize("test_case", PRIMER_TEST_CASES) -def test_untailed_length(test_case: PrimerTestCase) -> None: +@pytest.mark.parametrize("test_case", OLIGO_TEST_CASES) +def test_untailed_length(test_case: OligoTestCase) -> None: assert test_case.primer.length == test_case.primer.untailed_length() @@ -361,7 +359,7 @@ def test_untailed_length(test_case: PrimerTestCase) -> None: ], ) def test_tailed_length(tail_seq: str, expected_length: int) -> None: - test_primer = Primer( + test_primer = Oligo( bases="AGCT", tm=1.0, penalty=2.0, @@ -371,8 +369,8 @@ def test_tailed_length(tail_seq: str, expected_length: int) -> None: assert test_primer.tailed_length() == expected_length -@pytest.mark.parametrize("test_case", PRIMER_TEST_CASES) -def test_primer_str(test_case: PrimerTestCase) -> None: +@pytest.mark.parametrize("test_case", OLIGO_TEST_CASES) +def test_primer_str(test_case: OligoTestCase) -> None: """Test whether the __str__ method returns the expected string representation""" # For each of the primer objects supplied, look up the expected set of string values & join @@ -381,16 +379,16 @@ def test_primer_str(test_case: PrimerTestCase) -> None: def test_primer_serialization_roundtrip() -> None: - input_primers: list[Primer] = [test_case.primer for test_case in PRIMER_TEST_CASES] + input_primers: list[Oligo] = [test_case.primer for test_case in OLIGO_TEST_CASES] with NamedTemporaryFile(suffix=".txt", mode="r", delete=True) as write_file: path = Path(write_file.name) # write them to a file - Primer.write(path, *input_primers) + Oligo.write(path, *input_primers) # read them back in again - output_primers = list(Primer.read(path=path)) + output_primers = list(Oligo.read(path=path)) # make sure they're the same! assert input_primers == output_primers @@ -401,13 +399,13 @@ def test_primer_serialization_roundtrip() -> None: [ # same primer ( - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, span=Span(refname="chr1", start=100, end=106, strand=Strand.POSITIVE), ), - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, @@ -417,13 +415,13 @@ def test_primer_serialization_roundtrip() -> None: ), # different primer (chromosome) ( - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, span=Span(refname="chr1", start=100, end=106, strand=Strand.POSITIVE), ), - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, @@ -434,7 +432,7 @@ def test_primer_serialization_roundtrip() -> None: ], ) def test_primer_compare( - this: Primer, that: Primer, expected: int, seq_dict: SequenceDictionary + this: Oligo, that: Oligo, expected: int, seq_dict: SequenceDictionary ) -> None: - assert expected == Primer.compare(this=this, that=that, seq_dict=seq_dict) - assert -expected == Primer.compare(this=that, that=this, seq_dict=seq_dict) + assert expected == Oligo.compare(this=this, that=that, seq_dict=seq_dict) + assert -expected == Oligo.compare(this=that, that=this, seq_dict=seq_dict) diff --git a/tests/api/test_primer_like.py b/tests/api/test_oligo_like.py similarity index 83% rename from tests/api/test_primer_like.py rename to tests/api/test_oligo_like.py index d83f356..d85c364 100644 --- a/tests/api/test_primer_like.py +++ b/tests/api/test_oligo_like.py @@ -3,12 +3,12 @@ import pytest -from prymer.api.primer_like import PrimerLike +from prymer.api.oligo_like import OligoLike from prymer.api.span import Span @dataclass(frozen=True, init=True, kw_only=True, slots=True) -class PrimerLikeTester(PrimerLike): +class OligoLikeTester(OligoLike): """A simple class that inherits from PrimerLike for testing purposes.""" span: Span @@ -31,13 +31,13 @@ def test_id_generation( expected_id: str, ) -> None: """Asserts that the id field is correctly generated based on the name and name_prefix fields.""" - test_primer = PrimerLikeTester(name=name, bases="AATCGATCCA", span=test_span) + test_primer = OligoLikeTester(name=name, bases="AATCGATCCA", span=test_span) assert test_primer.id == expected_id def test_to_bed12_row_exists(test_span: Span) -> None: """Asserts that the to_bed12_row method exists and returns the expected value.""" - test_primer = PrimerLikeTester( + test_primer = OligoLikeTester( name="test", bases="AATCGATCCA", span=test_span, diff --git a/tests/api/test_picking.py b/tests/api/test_picking.py index 3018155..8b4ad4d 100644 --- a/tests/api/test_picking.py +++ b/tests/api/test_picking.py @@ -12,7 +12,7 @@ from prymer.api import FilteringParams from prymer.api import MinOptMax -from prymer.api import Primer +from prymer.api import Oligo from prymer.api import PrimerPair from prymer.api import Span from prymer.api import Strand @@ -98,12 +98,12 @@ def test_seq_penalty( def build_primer_pair(amplicon_length: int, tm: float) -> PrimerPair: - left_primer = Primer( + left_primer = Oligo( tm=0, penalty=0, span=Span(refname="1", start=1, end=max(1, amplicon_length // 4)), ) - right_primer = Primer( + right_primer = Oligo( tm=0, penalty=0, span=Span( @@ -246,8 +246,8 @@ def test_is_acceptable_primer_pair(pair: PrimerPair, expected: bool) -> None: @dataclass(init=True, frozen=True) class ScoreInput: - left: Primer - right: Primer + left: Oligo + right: Oligo target: Span amplicon: Span amplicon_sequence: str @@ -268,8 +268,8 @@ def _score_input() -> ScoreInput: amplicon = Span(refname="1", start=l_mapping.end, end=r_mapping.start) target = Span(refname="1", start=l_mapping.end + 10, end=r_mapping.start - 20) return ScoreInput( - left=Primer(penalty=0, tm=0, span=l_mapping), - right=Primer(penalty=0, tm=0, span=r_mapping), + left=Oligo(penalty=0, tm=0, span=l_mapping), + right=Oligo(penalty=0, tm=0, span=r_mapping), target=target, amplicon=amplicon, amplicon_sequence="A" * amplicon.length, @@ -321,8 +321,8 @@ def test_zero_score( ) -> None: assert ( picking_score( - left=score_input.left, - right=score_input.right, + left_primer=score_input.left, + right_primer=score_input.right, target=score_input.target, amplicon=score_input.amplicon, amplicon_seq_or_tm=score_input.amplicon_sequence, @@ -339,8 +339,8 @@ def test_zero_score_with_amplicon_tm( amplicon_tm: float = calculate_long_seq_tm(score_input.amplicon_sequence) assert ( picking_score( - left=score_input.left, - right=score_input.right, + left_primer=score_input.left, + right_primer=score_input.right, target=score_input.target, amplicon=score_input.amplicon, amplicon_seq_or_tm=amplicon_tm, @@ -398,8 +398,8 @@ def test_score( params = replace(zero_score_filtering_params, **kwargs) assert ( picking_score( - left=score_input.left, - right=score_input.right, + left_primer=score_input.left, + right_primer=score_input.right, target=score_input.target, amplicon=score_input.amplicon, amplicon_seq_or_tm=score_input.amplicon_sequence, @@ -421,8 +421,8 @@ def test_score_primer_primer_penalties( left = replace(score_input.left, penalty=left_penalty) right = replace(score_input.right, penalty=right_penalty) assert picking_score( - left=left, - right=right, + left_primer=left, + right_primer=right, target=score_input.target, amplicon=score_input.amplicon, amplicon_seq_or_tm=score_input.amplicon_sequence, @@ -437,43 +437,43 @@ def test_primer_pairs( target = Span(refname="chr1", start=100, end=250) # tile some left primers - lefts = [] - rights = [] + left_primers = [] + right_primers = [] for offset in range(0, 50, 5): # left left_end = target.start - offset left_start = left_end - primer_length - left = Primer( + left = Oligo( penalty=-offset, tm=0, span=Span(refname=target.refname, start=left_start, end=left_end) ) - lefts.append(left) + left_primers.append(left) # right right_start = target.end + offset right_end = right_start + primer_length - right = Primer( + right = Oligo( penalty=offset, tm=0, span=Span(refname=target.refname, start=right_start, end=right_end), ) - rights.append(right) + right_primers.append(right) with pysam.FastaFile(f"{genome_ref}") as fasta: primer_pairs = build_primer_pairs( - lefts=lefts, - rights=rights, + left_primers=left_primers, + right_primers=right_primers, target=target, params=zero_score_filtering_params, fasta=fasta, ) - assert len(primer_pairs) == len(lefts) * len(rights) + assert len(primer_pairs) == len(left_primers) * len(right_primers) last_penalty = primer_pairs[0].penalty - primer_counter: Counter[Primer] = Counter() + primer_counter: Counter[Oligo] = Counter() for pp in primer_pairs: - assert pp.left_primer in lefts - assert pp.right_primer in rights + assert pp.left_primer in left_primers + assert pp.right_primer in right_primers primer_counter[pp.left_primer] += 1 primer_counter[pp.right_primer] += 1 - # by design, only the left/right penalties contribute to the primer pair penlaty + # by design, only the left/right penalties contribute to the primer pair penalty assert pp.penalty == pp.left_primer.penalty + pp.right_primer.penalty # at least check that the amplicon Tm is non-zero assert pp.amplicon_tm > 0 @@ -484,7 +484,9 @@ def test_primer_pairs( last_penalty = pp.penalty # make sure we see all the primers the same # of times! items = primer_counter.items() - assert len(set(i[0] for i in items)) == len(lefts) + len(rights) # same primers + assert len(set(i[0] for i in items)) == len(left_primers) + len( + right_primers + ) # same primers assert len(set(i[1] for i in items)) == 1 # same counts for each primer @@ -498,8 +500,8 @@ def test_primer_pairs_except_different_references( # change the reference for the right primer right = replace(score_input.right, span=Span(refname="Y", start=195, end=225)) build_primer_pairs( - lefts=[score_input.left], - rights=[right], + left_primers=[score_input.left], + right_primers=[right], target=score_input.target, params=zero_score_filtering_params, fasta=fasta, @@ -558,8 +560,8 @@ def _primer_pair( right_bases = reverse_complement(right_bases) fasta.close() return PrimerPair( - left_primer=Primer(bases=left_bases, penalty=0, tm=0, span=left_span), - right_primer=Primer(bases=right_bases, penalty=0, tm=0, span=right_span), + left_primer=Oligo(bases=left_bases, penalty=0, tm=0, span=left_span), + right_primer=Oligo(bases=right_bases, penalty=0, tm=0, span=right_span), amplicon_tm=amplicon_tm, penalty=penalty, ) @@ -894,8 +896,8 @@ def test_and_pick_primer_pairs( with pysam.FastaFile(f"{picking_ref}") as fasta: designed_primer_pairs = build_and_pick_primer_pairs( - lefts=[pp.left_primer], - rights=[pp.right_primer], + left_primers=[pp.left_primer], + right_primers=[pp.right_primer], target=target, num_primers=1, min_difference=1, diff --git a/tests/api/test_primer_pair.py b/tests/api/test_primer_pair.py index f14a040..1cd94cf 100644 --- a/tests/api/test_primer_pair.py +++ b/tests/api/test_primer_pair.py @@ -6,7 +6,7 @@ from fgpyo.fasta.sequence_dictionary import SequenceDictionary from fgpyo.sequence import reverse_complement -from prymer.api.primer import Primer +from prymer.api.oligo import Oligo from prymer.api.primer_pair import PrimerPair from prymer.api.span import Span from prymer.api.span import Strand @@ -35,13 +35,13 @@ class PrimerPairTestCase: str_fields: list[str] @staticmethod - def primer_pair_from_left_primer(left: Primer, right_offset: int = 50) -> PrimerPair: + def primer_pair_from_left_primer(left: Oligo, right_offset: int = 50) -> PrimerPair: """ Generates a PrimerPair for use in unit tests. Will first generate a right primer using a standard formula, and then calculates the PrimerPair fields based on the left & right primers. """ - right: Primer = PrimerPairTestCase.right_primer_from_left_primer( + right: Oligo = PrimerPairTestCase.right_primer_from_left_primer( left=left, right_offset=right_offset ) @@ -63,7 +63,7 @@ def primer_pair_from_left_primer(left: Primer, right_offset: int = 50) -> Primer ) @staticmethod - def right_primer_from_left_primer(left: Primer, right_offset: int) -> Primer: + def right_primer_from_left_primer(left: Oligo, right_offset: int) -> Oligo: """ Provides a standard conversion for a left primer to a right primer for use in tests of PrimerPair. @@ -110,7 +110,7 @@ def build_primer_pair_test_cases() -> list[PrimerPairTestCase]: return [ PrimerPairTestCase( primer_pair=PrimerPairTestCase.primer_pair_from_left_primer( - left=Primer( + left=Oligo( bases="GATTACA", tm=12.34, penalty=56.78, @@ -150,7 +150,7 @@ def build_primer_pair_test_cases() -> list[PrimerPairTestCase]: ), PrimerPairTestCase( primer_pair=PrimerPairTestCase.primer_pair_from_left_primer( - left=Primer( + left=Oligo( bases="TGTAATC", tm=87.65, penalty=43.21, @@ -190,7 +190,7 @@ def build_primer_pair_test_cases() -> list[PrimerPairTestCase]: ), PrimerPairTestCase( primer_pair=PrimerPairTestCase.primer_pair_from_left_primer( - left=Primer( + left=Oligo( bases=None, tm=12.34, penalty=56.78, @@ -230,7 +230,7 @@ def build_primer_pair_test_cases() -> list[PrimerPairTestCase]: ), PrimerPairTestCase( primer_pair=PrimerPairTestCase.primer_pair_from_left_primer( - left=Primer( + left=Oligo( bases="GGGGGGG", tm=12.34, penalty=56.78, @@ -273,13 +273,13 @@ def build_primer_pair_test_cases() -> list[PrimerPairTestCase]: # overlap PrimerPairTestCase( primer_pair=PrimerPair( - left_primer=Primer( + left_primer=Oligo( bases="GATTACA", tm=12.34, penalty=56.78, span=Span(refname="chr1", start=1, end=7, strand=Strand.POSITIVE), ), - right_primer=Primer( + right_primer=Oligo( bases="TGTAATC", tm=87.65, penalty=43.21, @@ -471,7 +471,7 @@ def test_iter() -> None: # same primer ( PrimerPairTestCase.primer_pair_from_left_primer( - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, @@ -479,7 +479,7 @@ def test_iter() -> None: ) ), PrimerPairTestCase.primer_pair_from_left_primer( - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, @@ -492,7 +492,7 @@ def test_iter() -> None: # different primer (chromosome) ( PrimerPairTestCase.primer_pair_from_left_primer( - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, @@ -500,7 +500,7 @@ def test_iter() -> None: ) ), PrimerPairTestCase.primer_pair_from_left_primer( - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, @@ -513,7 +513,7 @@ def test_iter() -> None: # same primer when by amplicon, but different by primer ( PrimerPairTestCase.primer_pair_from_left_primer( - Primer( + Oligo( bases="GATTAC", tm=1.0, penalty=2.0, @@ -522,7 +522,7 @@ def test_iter() -> None: right_offset=51, ), PrimerPairTestCase.primer_pair_from_left_primer( - Primer( + Oligo( bases="GATTACA", tm=1.0, penalty=2.0, diff --git a/tests/offtarget/test_offtarget.py b/tests/offtarget/test_offtarget.py index aabd2c2..153880a 100644 --- a/tests/offtarget/test_offtarget.py +++ b/tests/offtarget/test_offtarget.py @@ -4,14 +4,14 @@ import pytest from fgpyo.sam import Cigar -from prymer.api import Primer -from prymer.api import PrimerPair -from prymer.api import Span -from prymer.api import Strand -from prymer.offtarget import BwaHit -from prymer.offtarget import BwaResult -from prymer.offtarget import OffTargetDetector -from prymer.offtarget import OffTargetResult +from prymer.api.oligo import Oligo +from prymer.api.primer_pair import PrimerPair +from prymer.api.span import Span +from prymer.api.span import Strand +from prymer.offtarget.bwa import BwaHit +from prymer.offtarget.bwa import BwaResult +from prymer.offtarget.offtarget_detector import OffTargetDetector +from prymer.offtarget.offtarget_detector import OffTargetResult def _build_detector( @@ -43,13 +43,13 @@ def _build_detector( def multimap_primer_pair() -> PrimerPair: """A primer pair that maps to many locations (204 for each primer, 856 as a pair)""" return PrimerPair( - left_primer=Primer( + left_primer=Oligo( bases="AAAAA", tm=37, penalty=0, span=Span("chr1", start=67, end=71), ), - right_primer=Primer( + right_primer=Oligo( bases="TTTTT", tm=37, penalty=0, @@ -137,7 +137,7 @@ def test_check_too_many_primer_pair_hits( @pytest.mark.parametrize("cache_results", [True, False]) def test_mappings_of(ref_fasta: Path, cache_results: bool) -> None: with _build_detector(ref_fasta=ref_fasta, cache_results=cache_results) as detector: - p1: Primer = Primer( + p1: Oligo = Oligo( tm=37, penalty=0, span=Span(refname="chr1", start=1, end=30), @@ -148,7 +148,7 @@ def test_mappings_of(ref_fasta: Path, cache_results: bool) -> None: refname="chr1", start=1, negative=False, cigar=Cigar.from_cigarstring("30M"), edits=0 ) - p2: Primer = Primer( + p2: Oligo = Oligo( tm=37, penalty=0, span=Span(refname="chr1", start=61, end=93, strand=Strand.NEGATIVE), @@ -239,7 +239,7 @@ def test_generic_filter(ref_fasta: Path) -> None: """ @dataclass(frozen=True) - class CustomPrimer(Primer): + class CustomPrimer(Oligo): foo: str = "foo" # fmt: off diff --git a/tests/primer3/test_primer3.py b/tests/primer3/test_primer3.py index de3747e..091af50 100644 --- a/tests/primer3/test_primer3.py +++ b/tests/primer3/test_primer3.py @@ -6,20 +6,23 @@ import pytest from fgpyo.sequence import reverse_complement -from prymer.api import MinOptMax -from prymer.api import Primer -from prymer.api import PrimerPair -from prymer.api import Span -from prymer.api import Strand -from prymer.api import cached -from prymer.primer3 import DesignLeftPrimersTask -from prymer.primer3 import DesignPrimerPairsTask -from prymer.primer3 import DesignRightPrimersTask -from prymer.primer3 import Primer3 -from prymer.primer3 import Primer3Failure -from prymer.primer3 import Primer3Input -from prymer.primer3 import Primer3Parameters -from prymer.primer3 import Primer3Result +from prymer.api.minoptmax import MinOptMax +from prymer.api.oligo import Oligo +from prymer.api.primer_pair import PrimerPair +from prymer.api.span import Span +from prymer.api.span import Strand +from prymer.api.variant_lookup import cached +from prymer.primer3.primer3 import Primer3 +from prymer.primer3.primer3 import Primer3Failure +from prymer.primer3.primer3 import Primer3Result +from prymer.primer3.primer3 import _has_acceptable_dinuc_run +from prymer.primer3.primer3_input import Primer3Input +from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters +from prymer.primer3.primer3_parameters import ProbeParameters +from prymer.primer3.primer3_task import DesignLeftPrimersTask +from prymer.primer3.primer3_task import DesignPrimerPairsTask +from prymer.primer3.primer3_task import DesignRightPrimersTask +from prymer.primer3.primer3_task import PickHybProbeOnly @pytest.fixture(scope="session") @@ -33,8 +36,8 @@ def vcf_path() -> Path: @pytest.fixture -def single_primer_params() -> Primer3Parameters: - return Primer3Parameters( +def single_primer_params() -> PrimerAndAmpliconParameters: + return PrimerAndAmpliconParameters( amplicon_sizes=MinOptMax(min=100, max=250, opt=200), amplicon_tms=MinOptMax(min=55.0, max=100.0, opt=70.0), primer_sizes=MinOptMax(min=29, max=31, opt=30), @@ -46,8 +49,8 @@ def single_primer_params() -> Primer3Parameters: @pytest.fixture -def pair_primer_params() -> Primer3Parameters: - return Primer3Parameters( +def pair_primer_params() -> PrimerAndAmpliconParameters: + return PrimerAndAmpliconParameters( amplicon_sizes=MinOptMax(min=100, max=200, opt=150), amplicon_tms=MinOptMax(min=55.0, max=100.0, opt=72.5), primer_sizes=MinOptMax(min=20, max=30, opt=25), @@ -59,8 +62,8 @@ def pair_primer_params() -> Primer3Parameters: @pytest.fixture -def design_fail_gen_primer3_params() -> Primer3Parameters: - return Primer3Parameters( +def design_fail_gen_primer3_params() -> PrimerAndAmpliconParameters: + return PrimerAndAmpliconParameters( amplicon_sizes=MinOptMax(min=200, max=300, opt=250), amplicon_tms=MinOptMax(min=65.0, max=75.0, opt=74.0), primer_sizes=MinOptMax(min=24, max=27, opt=26), @@ -69,8 +72,17 @@ def design_fail_gen_primer3_params() -> Primer3Parameters: ) -def make_primer(bases: str, refname: str, start: int, end: int) -> Primer: - return Primer( +@pytest.fixture +def valid_probe_params() -> ProbeParameters: + return ProbeParameters( + probe_sizes=MinOptMax(min=18, opt=22, max=30), + probe_tms=MinOptMax(min=65.0, opt=70.0, max=75.0), + probe_gcs=MinOptMax(min=45.0, opt=55.0, max=60.0), + ) + + +def make_primer(bases: str, refname: str, start: int, end: int) -> Oligo: + return Oligo( bases=bases, tm=55, penalty=5, @@ -78,7 +90,7 @@ def make_primer(bases: str, refname: str, start: int, end: int) -> Primer: ) -def make_primer_pair(left: Primer, right: Primer, genome_ref: Path) -> PrimerPair: +def make_primer_pair(left: Oligo, right: Oligo, genome_ref: Path) -> PrimerPair: ref = pysam.FastaFile(str(genome_ref)) # pysam expects a str instead of Path amplicon_span = Span( refname=left.span.refname, @@ -98,8 +110,8 @@ def make_primer_pair(left: Primer, right: Primer, genome_ref: Path) -> PrimerPai @pytest.fixture(scope="session") -def valid_left_primers() -> list[Primer]: - lefts: list[Primer] = [ +def valid_left_primers() -> list[Oligo]: + lefts: list[Oligo] = [ make_primer(bases="ACATTTGCTTCTGACACAAC", refname="chr1", start=1, end=20), make_primer(bases="TGTGTTCACTAGCAACCTCA", refname="chr1", start=21, end=40), ] @@ -107,8 +119,8 @@ def valid_left_primers() -> list[Primer]: @pytest.fixture(scope="session") -def valid_right_primers() -> list[Primer]: - rights: list[Primer] = [ +def valid_right_primers() -> list[Oligo]: + rights: list[Oligo] = [ make_primer( bases=reverse_complement("TCAAGGTTACAAGACAGGTT"), refname="chr1", start=150, end=169 ), @@ -121,7 +133,7 @@ def valid_right_primers() -> list[Primer]: @pytest.fixture(scope="session") def valid_primer_pairs( - valid_left_primers: list[Primer], valid_right_primers: list[Primer], genome_ref: Path + valid_left_primers: list[Oligo], valid_right_primers: list[Oligo], genome_ref: Path ) -> list[PrimerPair]: primer_pairs = [ make_primer_pair(left=left, right=right, genome_ref=genome_ref) @@ -130,11 +142,11 @@ def valid_primer_pairs( return primer_pairs -def test_design_primers_raises( +def test_design_raises( genome_ref: Path, - single_primer_params: Primer3Parameters, + single_primer_params: PrimerAndAmpliconParameters, ) -> None: - """Test that design_primers() raises when given an invalid argument.""" + """Test that design() raises when given an invalid argument.""" target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) @@ -143,30 +155,32 @@ def test_design_primers_raises( number_primers_return="invalid", # type: ignore ) invalid_design_input = Primer3Input( - target=target, params=illegal_primer3_params, task=DesignLeftPrimersTask() + target=target, + primer_and_amplicon_params=illegal_primer3_params, + task=DesignLeftPrimersTask(), ) with pytest.raises(ValueError, match="Primer3 failed"): - Primer3(genome_fasta=genome_ref).design_primers(design_input=invalid_design_input) + Primer3(genome_fasta=genome_ref).design(design_input=invalid_design_input) # TODO: add other Value Errors def test_left_primer_valid_designs( genome_ref: Path, - single_primer_params: Primer3Parameters, + single_primer_params: PrimerAndAmpliconParameters, ) -> None: """Test that left primer designs are within the specified design specifications.""" target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) design_input = Primer3Input( target=target, - params=single_primer_params, + primer_and_amplicon_params=single_primer_params, task=DesignLeftPrimersTask(), ) with Primer3(genome_fasta=genome_ref) as designer: for _ in range(10): # run many times to ensure we can re-use primer3 - left_result = designer.design_primers(design_input=design_input) - designed_lefts: list[Primer] = left_result.primers() - assert all(isinstance(design, Primer) for design in designed_lefts) + left_result = designer.design(design_input=design_input) + designed_lefts: list[Oligo] = left_result.primers() + assert all(isinstance(design, Oligo) for design in designed_lefts) for actual_design in designed_lefts: assert ( actual_design.longest_dinucleotide_run_length() @@ -200,20 +214,20 @@ def test_left_primer_valid_designs( def test_right_primer_valid_designs( genome_ref: Path, - single_primer_params: Primer3Parameters, + single_primer_params: PrimerAndAmpliconParameters, ) -> None: """Test that right primer designs are within the specified design specifications.""" target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) design_input = Primer3Input( target=target, - params=single_primer_params, + primer_and_amplicon_params=single_primer_params, task=DesignRightPrimersTask(), ) with Primer3(genome_fasta=genome_ref) as designer: for _ in range(10): # run many times to ensure we can re-use primer3 - right_result: Primer3Result = designer.design_primers(design_input=design_input) - designed_rights: list[Primer] = right_result.primers() - assert all(isinstance(design, Primer) for design in designed_rights) + right_result: Primer3Result = designer.design(design_input=design_input) + designed_rights: list[Oligo] = right_result.primers() + assert all(isinstance(design, Oligo) for design in designed_rights) for actual_design in designed_rights: assert ( @@ -247,17 +261,19 @@ def test_right_primer_valid_designs( assert designer.is_alive -def test_primer_pair_design(genome_ref: Path, pair_primer_params: Primer3Parameters) -> None: +def test_primer_pair_design( + genome_ref: Path, pair_primer_params: PrimerAndAmpliconParameters +) -> None: """Test that paired primer design produces left and right primers within design constraints. Additionally, assert that `PrimerPair.amplicon_sequence()` matches reference sequence.""" target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) design_input = Primer3Input( target=target, - params=pair_primer_params, + primer_and_amplicon_params=pair_primer_params, task=DesignPrimerPairsTask(), ) with Primer3(genome_fasta=genome_ref) as designer: - pair_result: Primer3Result = designer.design_primers(design_input=design_input) + pair_result: Primer3Result = designer.design(design_input=design_input) designed_pairs: list[PrimerPair] = pair_result.primer_pairs() assert all(isinstance(design, PrimerPair) for design in designed_pairs) lefts = [primer_pair.left_primer for primer_pair in designed_pairs] @@ -328,7 +344,9 @@ def test_primer_pair_design(genome_ref: Path, pair_primer_params: Primer3Paramet assert pair_design.right_primer.bases.upper() == right_from_ref.upper() -def test_fasta_close_valid(genome_ref: Path, single_primer_params: Primer3Parameters) -> None: +def test_fasta_close_valid( + genome_ref: Path, single_primer_params: PrimerAndAmpliconParameters +) -> None: """Test that fasta file is closed when underlying subprocess is terminated.""" designer = Primer3(genome_fasta=genome_ref) assert designer._fasta.is_open() @@ -338,14 +356,14 @@ def test_fasta_close_valid(genome_ref: Path, single_primer_params: Primer3Parame target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) design_input = Primer3Input( target=target, - params=single_primer_params, + primer_and_amplicon_params=single_primer_params, task=DesignLeftPrimersTask(), ) with pytest.raises( RuntimeError, match="Error, trying to use a subprocess that has already been terminated" ): - designer.design_primers(design_input=design_input) + designer.design(design_input=design_input) @pytest.mark.parametrize( @@ -396,9 +414,11 @@ def test_variant_lookup( def test_screen_pair_results( - valid_primer_pairs: list[PrimerPair], genome_ref: Path, pair_primer_params: Primer3Parameters + valid_primer_pairs: list[PrimerPair], + genome_ref: Path, + pair_primer_params: PrimerAndAmpliconParameters, ) -> None: - """Test that `_is_valid_primer()` and `_screen_pair_results()` use + """Test that `_has_acceptable_dinuc_run()` and `_screen_pair_results()` use `Primer3Parameters.primer_max_dinuc_bases` to disqualify primers when applicable. Create 2 sets of design input, the only difference being the length of allowable dinucleotide run in a primer (high_threshold = 6, low_threshold = 2). @@ -407,14 +427,14 @@ def test_screen_pair_results( target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) design_input = Primer3Input( target=target, - params=pair_primer_params, + primer_and_amplicon_params=pair_primer_params, task=DesignPrimerPairsTask(), ) lower_dinuc_thresh = replace(pair_primer_params, primer_max_dinuc_bases=2) # lower from 6 to 2 altered_design_input = Primer3Input( target=target, - params=lower_dinuc_thresh, + primer_and_amplicon_params=lower_dinuc_thresh, task=DesignPrimerPairsTask(), ) with Primer3(genome_fasta=genome_ref) as designer: @@ -423,20 +443,21 @@ def test_screen_pair_results( design_input=design_input, designed_primer_pairs=valid_primer_pairs ) assert len(base_dinuc_pair_failures) == 0 + assert design_input.primer_and_amplicon_params is not None for primer_pair in base_primer_pair_designs: assert ( primer_pair.left_primer.longest_dinucleotide_run_length() - <= design_input.params.primer_max_dinuc_bases + <= design_input.primer_and_amplicon_params.primer_max_dinuc_bases ) assert ( primer_pair.right_primer.longest_dinucleotide_run_length() - <= design_input.params.primer_max_dinuc_bases + <= design_input.primer_and_amplicon_params.primer_max_dinuc_bases ) - assert Primer3._is_valid_primer( - design_input=design_input, primer_design=primer_pair.left_primer + assert _has_acceptable_dinuc_run( + design_input=design_input, oligo_design=primer_pair.left_primer ) - assert Primer3._is_valid_primer( - design_input=design_input, primer_design=primer_pair.right_primer + assert _has_acceptable_dinuc_run( + design_input=design_input, oligo_design=primer_pair.right_primer ) # 1 primer from every pair will fail lowered dinuc threshold of 2 @@ -444,16 +465,19 @@ def test_screen_pair_results( altered_designs, altered_dinuc_failures = designer._screen_pair_results( design_input=altered_design_input, designed_primer_pairs=valid_primer_pairs ) + assert altered_design_input.primer_and_amplicon_params is not None assert [ design.longest_dinucleotide_run_length() - > altered_design_input.params.primer_max_dinuc_bases + > altered_design_input.primer_and_amplicon_params.primer_max_dinuc_bases for design in altered_dinuc_failures ] assert len(altered_designs) == 0 def test_build_failures( - valid_primer_pairs: list[PrimerPair], genome_ref: Path, pair_primer_params: Primer3Parameters + valid_primer_pairs: list[PrimerPair], + genome_ref: Path, + pair_primer_params: PrimerAndAmpliconParameters, ) -> None: """Test that `build_failures()` parses Primer3 `failure_strings` correctly and includes failures related to long dinucleotide runs.""" @@ -462,7 +486,7 @@ def test_build_failures( low_dinuc_thresh = replace(pair_primer_params, primer_max_dinuc_bases=2) # lower from 6 to 2 altered_design_input = Primer3Input( target=target, - params=low_dinuc_thresh, + primer_and_amplicon_params=low_dinuc_thresh, task=DesignPrimerPairsTask(), ) designer = Primer3(genome_fasta=genome_ref) @@ -492,7 +516,7 @@ def test_build_failures( def test_build_failures_debugs( valid_primer_pairs: list[PrimerPair], genome_ref: Path, - pair_primer_params: Primer3Parameters, + pair_primer_params: PrimerAndAmpliconParameters, caplog: pytest.LogCaptureFixture, ) -> None: """Test that we log a debug message in the event of an unknown Primer3Failure reason.""" @@ -501,7 +525,7 @@ def test_build_failures_debugs( design_input = Primer3Input( target=target, - params=pair_primer_params, + primer_and_amplicon_params=pair_primer_params, task=DesignPrimerPairsTask(), ) designer = Primer3(genome_fasta=genome_ref) @@ -515,45 +539,45 @@ def test_build_failures_debugs( def test_primer3_result_primers_ok( - valid_left_primers: list[Primer], valid_right_primers: list[Primer] + valid_left_primers: list[Oligo], valid_right_primers: list[Oligo] ) -> None: - primers: list[Primer] = valid_left_primers + valid_right_primers - assert primers == Primer3Result(filtered_designs=primers, failures=[]).primers() + primers: list[Oligo] = valid_left_primers + valid_right_primers + assert primers == Primer3Result(designs=primers, failures=[]).primers() def test_primer3_result_primers_exception(valid_primer_pairs: list[PrimerPair]) -> None: - result = Primer3Result(filtered_designs=valid_primer_pairs, failures=[]) + result = Primer3Result(designs=valid_primer_pairs, failures=[]) with pytest.raises(ValueError, match="Cannot call `primers` on `PrimerPair` results"): result.primers() def test_primer3_result_as_primer_result_exception(valid_primer_pairs: list[PrimerPair]) -> None: - result = Primer3Result(filtered_designs=valid_primer_pairs, failures=[]) + result = Primer3Result(designs=valid_primer_pairs, failures=[]) with pytest.raises(ValueError, match="Cannot call `as_primer_result` on `PrimerPair` results"): result.as_primer_result() def test_primer3_result_primer_pairs_ok(valid_primer_pairs: list[PrimerPair]) -> None: assert valid_primer_pairs == ( - Primer3Result(filtered_designs=valid_primer_pairs, failures=[]).primer_pairs() + Primer3Result(designs=valid_primer_pairs, failures=[]).primer_pairs() ) def test_primer3_result_primer_pairs_exception( - valid_left_primers: list[Primer], valid_right_primers: list[Primer] + valid_left_primers: list[Oligo], valid_right_primers: list[Oligo] ) -> None: - primers: list[Primer] = valid_left_primers + valid_right_primers - result = Primer3Result(filtered_designs=primers, failures=[]) - with pytest.raises(ValueError, match="Cannot call `primer_pairs` on `Primer` results"): + primers: list[Oligo] = valid_left_primers + valid_right_primers + result = Primer3Result(designs=primers, failures=[]) + with pytest.raises(ValueError, match="Cannot call `primer_pairs` on `Oligo` results"): result.primer_pairs() def test_primer3_result_as_primer_pair_result_exception( - valid_left_primers: list[Primer], valid_right_primers: list[Primer] + valid_left_primers: list[Oligo], valid_right_primers: list[Oligo] ) -> None: - primers: list[Primer] = valid_left_primers + valid_right_primers - result = Primer3Result(filtered_designs=primers, failures=[]) - with pytest.raises(ValueError, match="Cannot call `as_primer_pair_result` on `Primer` results"): + primers: list[Oligo] = valid_left_primers + valid_right_primers + result = Primer3Result(designs=primers, failures=[]) + with pytest.raises(ValueError, match="Cannot call `as_primer_pair_result` on `Oligo` results"): result.as_primer_pair_result() @@ -603,3 +627,18 @@ def test_create_design_region_raises_when_primers_would_not_fit_in_design_region designer._create_design_region( target_region=target_region, max_amplicon_length=55, min_primer_length=10 ) + + +def test_probe_design_raises(genome_ref: Path, valid_probe_params: ProbeParameters) -> None: + """Test that we raise an error when the target region is smaller than the minimal probe size.""" + target = Span(refname="chr1", start=201, end=217, strand=Strand.POSITIVE) + design_input = Primer3Input( + target=target, + probe_params=valid_probe_params, + task=PickHybProbeOnly(), + ) + with Primer3(genome_fasta=genome_ref) as designer: + with pytest.raises( + ValueError, match="Target region required to be at least as large as the" + ): + designer.design(design_input=design_input) diff --git a/tests/primer3/test_primer3_input.py b/tests/primer3/test_primer3_input.py new file mode 100644 index 0000000..54d6c8d --- /dev/null +++ b/tests/primer3/test_primer3_input.py @@ -0,0 +1,181 @@ +import pytest + +from prymer.api import Span +from prymer.api import Strand +from prymer.api.minoptmax import MinOptMax +from prymer.primer3 import DesignLeftPrimersTask +from prymer.primer3 import DesignPrimerPairsTask +from prymer.primer3 import DesignRightPrimersTask +from prymer.primer3 import Primer3Input +from prymer.primer3.primer3_input_tag import Primer3InputTag +from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters +from prymer.primer3.primer3_parameters import ProbeParameters +from prymer.primer3.primer3_task import PickHybProbeOnly +from prymer.primer3.primer3_task import Primer3TaskType +from prymer.primer3.primer3_weights import PrimerAndAmpliconWeights +from prymer.primer3.primer3_weights import ProbeWeights + + +@pytest.fixture +def valid_primer_amplicon_params() -> PrimerAndAmpliconParameters: + return PrimerAndAmpliconParameters( + amplicon_sizes=MinOptMax(min=200, opt=250, max=300), + amplicon_tms=MinOptMax(min=55.0, opt=60.0, max=65.0), + primer_sizes=MinOptMax(min=18, opt=21, max=27), + primer_tms=MinOptMax(min=55.0, opt=60.0, max=65.0), + primer_gcs=MinOptMax(min=45.0, opt=55.0, max=60.0), + ) + + +@pytest.fixture +def valid_probe_params() -> ProbeParameters: + return ProbeParameters( + probe_sizes=MinOptMax(min=18, opt=22, max=30), + probe_tms=MinOptMax(min=65.0, opt=70.0, max=75.0), + probe_gcs=MinOptMax(min=45.0, opt=55.0, max=60.0), + ) + + +@pytest.fixture +def valid_primer_weights() -> PrimerAndAmpliconWeights: + return PrimerAndAmpliconWeights() + + +@pytest.fixture +def valid_probe_weights() -> ProbeWeights: + return ProbeWeights() + + +@pytest.mark.parametrize( + "task_type", + [ + DesignRightPrimersTask(), + DesignLeftPrimersTask(), + DesignPrimerPairsTask(), + ], +) +def test_primer_design_only_valid( + valid_primer_amplicon_params: PrimerAndAmpliconParameters, + task_type: Primer3TaskType, + valid_primer_weights: PrimerAndAmpliconWeights, +) -> None: + test_design_region = Span(refname="chr1", start=1, end=500, strand=Strand.POSITIVE) + test_target = Span(refname="chr1", start=200, end=300, strand=Strand.POSITIVE) + test_input = Primer3Input( + target=test_target, + primer_weights=valid_primer_weights, + task=task_type, + primer_and_amplicon_params=valid_primer_amplicon_params, + ) + mapped_dict = test_input.to_input_tags(design_region=test_design_region) + assert len(mapped_dict.keys()) == 38 + + +@pytest.mark.parametrize( + "task_type", [DesignRightPrimersTask(), DesignLeftPrimersTask(), DesignPrimerPairsTask()] +) +def test_primer_design_only_raises( + task_type: Primer3TaskType, valid_primer_weights: PrimerAndAmpliconWeights +) -> None: + test_target = Span(refname="chr1", start=200, end=300, strand=Strand.POSITIVE) + with pytest.raises(ValueError, match="Primer3 requires at least one set of parameters"): + Primer3Input( + target=test_target, + primer_weights=valid_primer_weights, + task=task_type, + primer_and_amplicon_params=None, + ) + + +def test_probe_design_only_valid( + valid_probe_params: ProbeParameters, valid_probe_weights: ProbeWeights +) -> None: + test_design_region = Span(refname="chr1", start=1, end=500, strand=Strand.POSITIVE) + test_target = Span(refname="chr1", start=200, end=300, strand=Strand.POSITIVE) + test_input = Primer3Input( + target=test_target, + probe_weights=valid_probe_weights, + task=PickHybProbeOnly(), + probe_params=valid_probe_params, + primer_and_amplicon_params=None, + ) + mapped_dict = test_input.to_input_tags(design_region=test_design_region) + assert mapped_dict[Primer3InputTag.PRIMER_PICK_INTERNAL_OLIGO] == 1 + + assert len(mapped_dict.keys()) == 21 + + # test instantiation of default `ProbeWeights` when they are not provided + altered_input = Primer3Input( + target=test_target, + probe_weights=None, + task=PickHybProbeOnly(), + probe_params=valid_probe_params, + primer_and_amplicon_params=None, + ) + altered_mapped_dict = altered_input.to_input_tags(design_region=test_target) + assert altered_mapped_dict[Primer3InputTag.PRIMER_INTERNAL_WT_GC_PERCENT_GT] == 0.5 + + +def test_probe_design_only_raises(valid_probe_weights: ProbeWeights) -> None: + test_target = Span(refname="chr1", start=200, end=300, strand=Strand.POSITIVE) + with pytest.raises(ValueError, match="Primer3 requires at least one set"): + Primer3Input( + target=test_target, + probe_weights=valid_probe_weights, + task=PickHybProbeOnly(), + primer_and_amplicon_params=None, + ) + + +@pytest.mark.parametrize( + "task_type", + [ + DesignRightPrimersTask(), + DesignLeftPrimersTask(), + DesignPrimerPairsTask(), + PickHybProbeOnly(), + ], +) +def test_no_params_given_raises( + valid_primer_weights: PrimerAndAmpliconWeights, task_type: Primer3TaskType +) -> None: + test_target = Span(refname="chr1", start=200, end=300, strand=Strand.POSITIVE) + with pytest.raises(ValueError, match="Primer3 requires at least one set"): + Primer3Input( + target=test_target, + primer_weights=valid_primer_weights, + task=task_type, + primer_and_amplicon_params=None, + probe_params=None, + ) + + +@pytest.mark.parametrize( + "task_type, expected_req_primer_amplicon_params, expected_req_probe_params", + [ + (DesignPrimerPairsTask(), True, False), + (DesignRightPrimersTask(), True, False), + (DesignLeftPrimersTask(), True, False), + (PickHybProbeOnly(), False, True), + ], +) +def test_requires_params_sets( + task_type: Primer3TaskType, + valid_probe_params: ProbeParameters, + valid_primer_amplicon_params: PrimerAndAmpliconParameters, + valid_primer_weights: PrimerAndAmpliconWeights, + valid_probe_weights: ProbeWeights, + expected_req_primer_amplicon_params: bool, + expected_req_probe_params: bool, +) -> None: + test_target = Span(refname="chr1", start=200, end=300, strand=Strand.POSITIVE) + test_input = Primer3Input( + target=test_target, + primer_weights=valid_primer_weights, + probe_weights=valid_probe_weights, + task=task_type, + probe_params=valid_probe_params, + primer_and_amplicon_params=valid_primer_amplicon_params, + ) + assert test_input.task.requires_probe_params == expected_req_probe_params + assert test_input.task.requires_primer_amplicon_params == expected_req_primer_amplicon_params diff --git a/tests/primer3/test_primer3_parameters.py b/tests/primer3/test_primer3_parameters.py index 82352f4..efd4d24 100644 --- a/tests/primer3/test_primer3_parameters.py +++ b/tests/primer3/test_primer3_parameters.py @@ -2,14 +2,15 @@ import pytest -from prymer.api import MinOptMax -from prymer.primer3 import Primer3InputTag -from prymer.primer3 import Primer3Parameters +from prymer.api.minoptmax import MinOptMax +from prymer.primer3.primer3_input_tag import Primer3InputTag +from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters +from prymer.primer3.primer3_parameters import ProbeParameters @pytest.fixture -def valid_primer3_params() -> Primer3Parameters: - return Primer3Parameters( +def valid_primer_amplicon_params() -> PrimerAndAmpliconParameters: + return PrimerAndAmpliconParameters( amplicon_sizes=MinOptMax(min=200, opt=250, max=300), amplicon_tms=MinOptMax(min=55.0, opt=60.0, max=65.0), primer_sizes=MinOptMax(min=18, opt=21, max=27), @@ -18,33 +19,82 @@ def valid_primer3_params() -> Primer3Parameters: ) -def test_primer3_param_construction_valid(valid_primer3_params: Primer3Parameters) -> None: - """Test Primer3Parameters class instantiation with valid input""" - assert valid_primer3_params.amplicon_sizes.min == 200 - assert valid_primer3_params.amplicon_sizes.opt == 250 - assert valid_primer3_params.amplicon_sizes.max == 300 - assert valid_primer3_params.primer_gcs.min == 45.0 - assert valid_primer3_params.primer_gcs.opt == 55.0 - assert valid_primer3_params.primer_gcs.max == 60.0 +@pytest.fixture +def valid_probe_params() -> ProbeParameters: + return ProbeParameters( + probe_sizes=MinOptMax(min=18, opt=22, max=30), + probe_tms=MinOptMax(min=65.0, opt=70.0, max=75.0), + probe_gcs=MinOptMax(min=45.0, opt=55.0, max=60.0), + ) -def test_primer3_param_construction_raises(valid_primer3_params: Primer3Parameters) -> None: - """Test that Primer3Parameters post_init raises with invalid input.""" +def test_primer_amplicon_param_construction_valid( + valid_primer_amplicon_params: PrimerAndAmpliconParameters, +) -> None: + """Test PrimerAndAmpliconParameters class instantiation with valid input""" + assert valid_primer_amplicon_params.amplicon_sizes.min == 200 + assert valid_primer_amplicon_params.amplicon_sizes.opt == 250 + assert valid_primer_amplicon_params.amplicon_sizes.max == 300 + assert valid_primer_amplicon_params.primer_gcs.min == 45.0 + assert valid_primer_amplicon_params.primer_gcs.opt == 55.0 + assert valid_primer_amplicon_params.primer_gcs.max == 60.0 + + +def test_probe_param_construction_valid( + valid_probe_params: ProbeParameters, +) -> None: + """Test ProbeParameters class instantiation with valid input""" + assert valid_probe_params.probe_sizes.min == 18 + assert valid_probe_params.probe_sizes.opt == 22 + assert valid_probe_params.probe_sizes.max == 30 + assert valid_probe_params.probe_tms.min == 65.0 + assert valid_probe_params.probe_tms.opt == 70.0 + assert valid_probe_params.probe_tms.max == 75.0 + assert valid_probe_params.probe_gcs.min == 45.0 + assert valid_probe_params.probe_gcs.opt == 55.0 + assert valid_probe_params.probe_gcs.max == 60.0 + + +def test_primer_amplicon_param_construction_raises( + valid_primer_amplicon_params: PrimerAndAmpliconParameters, +) -> None: + """Test that PrimerAndAmpliconParameters post_init raises with invalid input.""" # overriding mypy here to test a case that normally would be caught by mypy with pytest.raises(ValueError, match="Primer Max Dinuc Bases must be an even number of bases"): # replace will create a new Primer instance with the provided/modified arguments - replace(valid_primer3_params, primer_max_dinuc_bases=5) + replace(valid_primer_amplicon_params, primer_max_dinuc_bases=5) with pytest.raises(TypeError, match="Amplicon sizes and primer sizes must be integers"): - replace(valid_primer3_params, amplicon_sizes=MinOptMax(min=200.0, opt=250.0, max=300.0)) # type: ignore + replace( + valid_primer_amplicon_params, + amplicon_sizes=MinOptMax(min=200.0, opt=250.0, max=300.0), # type: ignore + ) with pytest.raises(TypeError, match="Amplicon sizes and primer sizes must be integers"): - replace(valid_primer3_params, primer_sizes=MinOptMax(min=18.0, opt=21.0, max=27.0)) # type: ignore + replace(valid_primer_amplicon_params, primer_sizes=MinOptMax(min=18.0, opt=21.0, max=27.0)) # type: ignore with pytest.raises(ValueError, match="Min primer GC-clamp must be <= max primer GC-clamp"): - replace(valid_primer3_params, gc_clamp=(5, 0)) + replace(valid_primer_amplicon_params, gc_clamp=(5, 0)) + + +def test_primer_probe_param_construction_raises( + valid_probe_params: ProbeParameters, +) -> None: + """Test that Primer3Parameters post_init raises with invalid input.""" + # overriding mypy here to test a case that normally would be caught by mypy + with pytest.raises(TypeError, match="Probe sizes must be integers"): + # replace will create a new Primer instance with the provided/modified arguments + # we use `type: ignore` here to bypass mypy + replace( + valid_probe_params, + probe_sizes=MinOptMax(min=18.1, opt=22.1, max=30.1), # type: ignore + ) + with pytest.raises(TypeError, match="Probe melting temperatures and GC content must be floats"): + replace(valid_probe_params, probe_tms=MinOptMax(min=55, opt=60, max=65)) -def test_to_input_tags_primer3_params(valid_primer3_params: Primer3Parameters) -> None: +def test_primer_amplicon_params_to_input_tags( + valid_primer_amplicon_params: PrimerAndAmpliconParameters, +) -> None: """Test that to_input_tags() works as expected""" - test_dict = valid_primer3_params.to_input_tags() + test_dict = valid_primer_amplicon_params.to_input_tags() assert test_dict[Primer3InputTag.PRIMER_NUM_RETURN] == 5 assert test_dict[Primer3InputTag.PRIMER_PRODUCT_SIZE_RANGE] == "200-300" assert test_dict[Primer3InputTag.PRIMER_PRODUCT_OPT_SIZE] == 250 @@ -65,24 +115,24 @@ def test_to_input_tags_primer3_params(valid_primer3_params: Primer3Parameters) - assert test_dict[Primer3InputTag.PRIMER_MAX_POLY_X] == 5 assert test_dict[Primer3InputTag.PRIMER_MAX_NS_ACCEPTED] == 1 assert test_dict[Primer3InputTag.PRIMER_LOWERCASE_MASKING] == 1 - ambiguous_primer_design = replace(valid_primer3_params, avoid_masked_bases=False) + ambiguous_primer_design = replace(valid_primer_amplicon_params, avoid_masked_bases=False) ambiguous_dict = ambiguous_primer_design.to_input_tags() assert ambiguous_dict[Primer3InputTag.PRIMER_LOWERCASE_MASKING] == 0 -def test_max_ampl_length(valid_primer3_params: Primer3Parameters) -> None: +def test_max_ampl_length(valid_primer_amplicon_params: PrimerAndAmpliconParameters) -> None: """Test that max_amplicon_length() returns expected int""" - assert valid_primer3_params.max_amplicon_length == 300 + assert valid_primer_amplicon_params.max_amplicon_length == 300 change_max_length = replace( - valid_primer3_params, amplicon_sizes=MinOptMax(min=200, opt=500, max=1000) + valid_primer_amplicon_params, amplicon_sizes=MinOptMax(min=200, opt=500, max=1000) ) assert change_max_length.max_amplicon_length == 1000 -def test_max_primer_length(valid_primer3_params: Primer3Parameters) -> None: +def test_max_primer_length(valid_primer_amplicon_params: PrimerAndAmpliconParameters) -> None: """Test that max_primer_length() returns expected int""" - assert valid_primer3_params.max_primer_length == 27 + assert valid_primer_amplicon_params.max_primer_length == 27 change_max_length = replace( - valid_primer3_params, primer_sizes=MinOptMax(min=18, opt=35, max=50) + valid_primer_amplicon_params, primer_sizes=MinOptMax(min=18, opt=35, max=50) ) assert change_max_length.max_primer_length == 50 diff --git a/tests/primer3/test_primer3_weights.py b/tests/primer3/test_primer3_weights.py index c9dd175..f1a7726 100644 --- a/tests/primer3/test_primer3_weights.py +++ b/tests/primer3/test_primer3_weights.py @@ -1,10 +1,11 @@ -from prymer.primer3 import Primer3InputTag -from prymer.primer3 import Primer3Weights +from prymer.primer3.primer3_input_tag import Primer3InputTag +from prymer.primer3.primer3_weights import PrimerAndAmpliconWeights +from prymer.primer3.primer3_weights import ProbeWeights def test_primer_weights_valid() -> None: - """Test instantiation of Primer3Weights object with valid input""" - test_weights = Primer3Weights() + """Test instantiation of `PrimerAndAmpliconWeights` object with valid input""" + test_weights = PrimerAndAmpliconWeights() test_dict = test_weights.to_input_tags() assert test_dict[Primer3InputTag.PRIMER_PAIR_WT_PRODUCT_SIZE_LT] == 1 assert test_dict[Primer3InputTag.PRIMER_PAIR_WT_PRODUCT_SIZE_GT] == 1 @@ -22,9 +23,21 @@ def test_primer_weights_valid() -> None: assert len((test_dict.values())) == 13 +def test_probe_weights_valid() -> None: + test_weights = ProbeWeights() + test_dict = test_weights.to_input_tags() + assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_SIZE_LT] == 0.25 + assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_SIZE_GT] == 0.25 + assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_TM_LT] == 1.0 + assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_TM_GT] == 1.0 + assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_GC_PERCENT_LT] == 0.5 + assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_GC_PERCENT_GT] == 0.5 + assert len((test_dict.values())) == 6 + + def test_primer_weights_to_input_tags() -> None: """Test results from to_input_tags() with and without default values""" - default_map = Primer3Weights().to_input_tags() + default_map = PrimerAndAmpliconWeights().to_input_tags() assert default_map[Primer3InputTag.PRIMER_PAIR_WT_PRODUCT_SIZE_LT] == 1 - customized_map = Primer3Weights(product_size_lt=5).to_input_tags() + customized_map = PrimerAndAmpliconWeights(product_size_lt=5).to_input_tags() assert customized_map[Primer3InputTag.PRIMER_PAIR_WT_PRODUCT_SIZE_LT] == 5