feat: add relevant fields in PrimerParams and PrimerWeights; style: e…

…xpand docstrings, update doctest examples
fulcrumgenomics · Sep 26, 2024 · c3e52d0 · c3e52d0
1 parent e97ca61
commit c3e52d0
Show file tree

Hide file tree

Showing 7 changed files with 175 additions and 37 deletions.
diff --git a/prymer/api/oligo.py b/prymer/api/oligo.py
@@ -88,25 +88,32 @@ class Oligo(OligoLike, Metric["Oligo"]):
     The penalty for a primer is set by the combination of `PrimerAndAmpliconParameters` and
     `PrimerWeights`, whereas a probe penalty is set by `ProbeParameters` and `ProbeWeights`.
 
+    The values for `self_any`, `self_any_th`, `self_end`, `self_end_th`, and `hairpin_th`
+    are emitted by Primer3 as part of oligo design. These attributes are optional to maintain
+    flexibility for reading in and writing `Oligo` objects, espeically when design settings are
+    inconsistent.
+
     Attributes:
         tm: the calculated melting temperature of the oligo
         penalty: the penalty or score for the oligo
         span: the mapping of the primer to the genome
-        self_any_th: self-complementarity throughout the probe as calculated by Primer3
-        self_end_th: 3' end complementarity of the probe as calculated by Primer3
-        hairpin_th: hairpin formation thermodynamics of the probe as calculated by Primer3
+        self_any: probe self-complementarity, expressed as local alignment score
+        self_any_th: probe self-complementarity, expressed as melting temperature
+        self_end: 3' end complementarity, expressed as local alignment score
+        self_end_th: 3' end complementarity, expressed as melting temperature
+        hairpin_th: hairpin formation thermodynamics of the oligo as calculated by Primer3
         bases: the base sequence of the oligo (excluding any tail)
         tail: an optional tail sequence to put on the 5' end of the primer
         name: an optional name to use for the primer
 
-
-
     """
 
     tm: float
     penalty: float
     span: Span
+    self_any: Optional[float] = None
     self_any_th: Optional[float] = None
+    self_end: Optional[float] = None
     self_end_th: Optional[float] = None
     hairpin_th: Optional[float] = None
     bases: Optional[str] = None

diff --git a/prymer/api/primer_pair.py b/prymer/api/primer_pair.py
@@ -37,7 +37,7 @@ class methods to represent a primer pair.  The primer pair is comprised of a lef
 Span(refname='chr1', start=21, end=100, strand=<Strand.POSITIVE: '+'>)
 
 >>> list(primer_pair)
-[Oligo(name=None, tm=70.0, penalty=-123.0, span=Span(refname='chr1', start=1, end=20, strand=<Strand.POSITIVE: '+'>), self_any_th=None, self_end_th=None, hairpin_th=None, bases='GGGGGGGGGGGGGGGGGGGG', tail=None), Oligo(name=None, tm=70.0, penalty=-123.0, span=Span(refname='chr1', start=101, end=120, strand=<Strand.NEGATIVE: '-'>), self_any_th=None, self_end_th=None, hairpin_th=None, bases='TTTTTTTTTTTTTTTTTTTT', tail=None)]
+[Oligo(name=None, tm=70.0, penalty=-123.0, span=Span(refname='chr1', start=1, end=20, strand=<Strand.POSITIVE: '+'>), self_any=None, self_any_th=None, self_end=None, self_end_th=None, hairpin_th=None, bases='GGGGGGGGGGGGGGGGGGGG', tail=None), Oligo(name=None, tm=70.0, penalty=-123.0, span=Span(refname='chr1', start=101, end=120, strand=<Strand.NEGATIVE: '-'>), self_any=None, self_any_th=None, self_end=None, self_end_th=None, hairpin_th=None, bases='TTTTTTTTTTTTTTTTTTTT', tail=None)]
 
 ```
 """  # noqa: E501

diff --git a/prymer/primer3/primer3_input.py b/prymer/primer3/primer3_input.py
@@ -52,7 +52,6 @@
 PRIMER_PICK_RIGHT_PRIMER -> 0
 PRIMER_PICK_INTERNAL_OLIGO -> 0
 SEQUENCE_INCLUDED_REGION -> 1,51
-PRIMER_NUM_RETURN -> 5
 PRIMER_PRODUCT_OPT_SIZE -> 200
 PRIMER_PRODUCT_SIZE_RANGE -> 100-250
 PRIMER_PRODUCT_MIN_TM -> 55.0
@@ -72,6 +71,12 @@
 PRIMER_MAX_POLY_X -> 5
 PRIMER_MAX_NS_ACCEPTED -> 1
 PRIMER_LOWERCASE_MASKING -> 1
+PRIMER_NUM_RETURN -> 5
+PRIMER_MAX_SELF_ANY -> 8.0
+PRIMER_MAX_SELF_ANY_TH -> 53.0
+PRIMER_MAX_SELF_END -> 3.0
+PRIMER_MAX_SELF_END_TH -> 53.0
+PRIMER_MAX_HAIRPIN_TH -> 53.0
 PRIMER_PAIR_WT_PRODUCT_SIZE_LT -> 1
 PRIMER_PAIR_WT_PRODUCT_SIZE_GT -> 1
 PRIMER_PAIR_WT_PRODUCT_TM_LT -> 0.0
@@ -85,6 +90,9 @@
 PRIMER_WT_SIZE_GT -> 0.1
 PRIMER_WT_TM_LT -> 1.0
 PRIMER_WT_TM_GT -> 1.0
+PRIMER_WT_SELF_ANY_TH -> 0.0
+PRIMER_WT_SELF_END_TH -> 0.0
+PRIMER_WT_HAIRPIN_TH -> 0.0
 """
 
 from dataclasses import MISSING

diff --git a/prymer/primer3/primer3_parameters.py b/prymer/primer3/primer3_parameters.py
@@ -3,8 +3,6 @@
 
 The [`PrimerAndAmpliconParameters`][prymer.primer3.primer3_parameters.PrimerAndAmpliconParameters]
 class stores user input for primer design and maps it to the correct Primer3 fields.
-The [`ProbeParameters`][prymer.primer3.primer3_parameters.ProbeParameters]
-class stores user input for internal probe design and maps it to the correct Primer3 fields.
 
 Primer3 considers many criteria for primer design, including characteristics of candidate primers
 and the resultant amplicon product, as well as potential complications (off-target priming,
@@ -15,11 +13,24 @@ class stores user input for internal probe design and maps it to the correct Pri
 GC content, melting temperature, and size of both primers and expected amplicon.
 Additional criteria include the maximum homopolymer length, ambiguous bases, and bases in a
 dinucleotide run within a primer. By default, primer design avoids masked bases, returns 5 primers,
-and sets the GC clamp to be no larger than 5.
+and sets the GC clamp to be no larger than 5. The PrimerAndAmpliconParameters also stores common
+default settings to minimize the tendendancy of primers and probes to anneal to one another. This
+self-complementarity can make PCR reactions less efficient and potentially yield nonspecific
+amplification. Primer3 supports thermodynamic-based thresholds as well as parameters for maximal
+alignment scores.
 
 The `to_input_tags()` method in `PrimerAndAmpliconParameters` converts these parameters into
 tag-values pairs for use when executing `Primer3`.
 
+The [`ProbeParameters`][prymer.primer3.primer3_parameters.ProbeParameters]
+class stores user input for internal probe design and maps it to the correct Primer3 fields.
+
+Similar to the PrimerAndAmpliconParameters class, the ProbeParameters class can be used to
+specify the acceptable ranges of probe sizes, melting temperatures, and GC content. A region can be
+excluded from internal probe design based on its start and the length of the region to exclude. This
+attribute can help avoid regions that are problematic for oligo design, like low-complexity
+sequence tracts.
+
 ## Examples
 
 ```python
@@ -32,7 +43,6 @@ class stores user input for internal probe design and maps it to the correct Pri
 )
 >>> for tag, value in params.to_input_tags().items(): \
     print(f"{tag.value} -> {value}")
-PRIMER_NUM_RETURN -> 5
 PRIMER_PRODUCT_OPT_SIZE -> 200
 PRIMER_PRODUCT_SIZE_RANGE -> 100-250
 PRIMER_PRODUCT_MIN_TM -> 55.0
@@ -52,12 +62,19 @@ class stores user input for internal probe design and maps it to the correct Pri
 PRIMER_MAX_POLY_X -> 5
 PRIMER_MAX_NS_ACCEPTED -> 1
 PRIMER_LOWERCASE_MASKING -> 1
+PRIMER_NUM_RETURN -> 5
+PRIMER_MAX_SELF_ANY -> 8.0
+PRIMER_MAX_SELF_ANY_TH -> 53.0
+PRIMER_MAX_SELF_END -> 3.0
+PRIMER_MAX_SELF_END_TH -> 53.0
+PRIMER_MAX_HAIRPIN_TH -> 53.0
 
 ```
 """
 
 import warnings
 from dataclasses import dataclass
+from dataclasses import field
 from typing import Any
 from typing import Optional
 
@@ -81,6 +98,30 @@ class PrimerAndAmpliconParameters:
         primer_max_dinuc_bases: the maximal number of bases in a dinucleotide run in a primer
         avoid_masked_bases: whether Primer3 should avoid designing primers in soft-masked regions
         number_primers_return: the number of primers to return
+        primer_max_self_any: the maximal local alignment score of aligning the primer to itself
+        primer_max_self_any_thermo: the maximal melting temperature of the most stable structure
+            resulting from aligning the primer to itself
+        primer_max_self_end: the maximal 3' anchored global alignment score of aligning the primer
+            to itself
+        primer_max_self_end_thermo: the maximal melting temperature of the most stable structure
+            resulting from aligning the 3' end of the primer
+        primer_max_hairpin_thermo: the maximal melting temperature of the most stable hairpin
+            structure of the primer
+
+    Primer3 uses both thermodynamic and alignment-based approaches to quantify primer
+    self-complementarity.
+
+    `primer_max_self_any`, `primer_max_self_any_thermo`, `primer_max_self_end`,
+    `primer_max_self_end_thermo`, and `primer_max_hairpin_thermo` are all set to default values as
+    specified in the Primer3 manual. The default values of the thermodynamic attributes
+    (ending in `_th`) are set to 10 degrees less than the minimal melting temperature specified for
+    primer design.
+
+    For `primer_max_self_any` and `primer_max_self_end`, a score of 0.00 indicates that there is no
+    reasonable local alignment across the individual primer under consideration.
+
+    In general, these settings are meant to limit problematic oligo self-complementarity
+    and avoid primer-dimers or other nonspecific binding of oligos to target sequences.
 
     Please see the Primer3 manual for additional details: https://primer3.org/manual.html#globalTags
 
@@ -97,6 +138,11 @@ class PrimerAndAmpliconParameters:
     primer_max_dinuc_bases: int = 6
     avoid_masked_bases: bool = True
     number_primers_return: int = 5
+    primer_max_self_any: float = 8.00
+    primer_max_self_any_thermo: float = field(init=False)
+    primer_max_self_end: float = 3.0
+    primer_max_self_end_thermo: float = field(init=False)
+    primer_max_hairpin_thermo: float = field(init=False)
 
     def __post_init__(self) -> None:
         if self.primer_max_dinuc_bases % 2 == 1:
@@ -108,10 +154,15 @@ def __post_init__(self) -> None:
         if self.gc_clamp[0] > self.gc_clamp[1]:
             raise ValueError("Min primer GC-clamp must be <= max primer GC-clamp")
 
+        # Set melting temperature thresholds to be 10 degrees less than the minimum primer tm
+        default_thermo_tm: float = self.primer_tms.min - 10.0
+        object.__setattr__(self, "primer_max_self_any_thermo", default_thermo_tm)
+        object.__setattr__(self, "primer_max_self_end_thermo", default_thermo_tm)
+        object.__setattr__(self, "primer_max_hairpin_thermo", default_thermo_tm)
+
     def to_input_tags(self) -> dict[Primer3InputTag, Any]:
         """Converts input params to Primer3InputTag to feed directly into Primer3."""
         mapped_dict: dict[Primer3InputTag, Any] = {
-            Primer3InputTag.PRIMER_NUM_RETURN: self.number_primers_return,
             Primer3InputTag.PRIMER_PRODUCT_OPT_SIZE: self.amplicon_sizes.opt,
             Primer3InputTag.PRIMER_PRODUCT_SIZE_RANGE: (
                 f"{self.amplicon_sizes.min}-{self.amplicon_sizes.max}"
@@ -133,7 +184,14 @@ def to_input_tags(self) -> dict[Primer3InputTag, Any]:
             Primer3InputTag.PRIMER_MAX_POLY_X: self.primer_max_polyX,
             Primer3InputTag.PRIMER_MAX_NS_ACCEPTED: self.primer_max_Ns,
             Primer3InputTag.PRIMER_LOWERCASE_MASKING: 1 if self.avoid_masked_bases else 0,
+            Primer3InputTag.PRIMER_NUM_RETURN: self.number_primers_return,
+            Primer3InputTag.PRIMER_MAX_SELF_ANY: self.primer_max_self_any,
+            Primer3InputTag.PRIMER_MAX_SELF_ANY_TH: self.primer_max_self_any_thermo,
+            Primer3InputTag.PRIMER_MAX_SELF_END: self.primer_max_self_end,
+            Primer3InputTag.PRIMER_MAX_SELF_END_TH: self.primer_max_self_end_thermo,
+            Primer3InputTag.PRIMER_MAX_HAIRPIN_TH: self.primer_max_hairpin_thermo,
         }
+
         return mapped_dict
 
     @property
@@ -176,24 +234,40 @@ class ProbeParameters:
         probe_max_dinuc_bases: the max  number of bases in a dinucleotide run in a probe
         probe_max_polyX: the max homopolymer length acceptable within a probe
         probe_max_Ns: the max number of ambiguous bases acceptable within a probe
-        probe_max_self_any: max allowable local alignment score when evaluating an individual probe
-            for self-complementarity throughout the probe sequence
-        probe_max_self_any_thermo: max allowable score for self-complementarity of the probe
-            sequence using a thermodynamic approach
-        probe_max_self_end: max allowable 3'-anchored global alignment score when testing a single
+        probe_max_self_any: the maximal local alignment score of aligning the probe to itself
+        probe_max_self_any_thermo: the maximal melting temperature of the most stable structure
+            resulting from aligning the probe to itself
+        probe_max_self_end: max allowable 3'-anchored global alignment score when testing a
             probe for self-complementarity
-        probe_max_self_end_thermo: similar to `probe_max_end_any` but uses a thermodynamic approach
-            to evaluate a probe for self-complementarity
+        probe_max_self_end_thermo: the maximal melting temperature of the most stable structure
+            resulting from aligning the 3' end of the probe
         probe_max_hairpin_thermo: most stable monomer structure as calculated by a thermodynamic
             approach
         probe_excluded_region: the excluded region (start, length) that probes shall not overlap
 
+    The attributes that have default values specified take their default values from the
+    Primer3 manual.
 
-    Defaults in this class are set as recommended by the Primer3 manual.
     Please see the Primer3 manual for additional details: https://primer3.org/manual.html#globalTags
 
+    Primer3 uses both thermodynamic and alignment-based approaches to quantify oligo
+    self-complementarity.
+
+    `primer_max_self_any`, `primer_max_self_any_thermo`, `primer_max_self_end`,
+    `primer_max_self_end_thermo`, and `primer_max_hairpin_thermo` are all set to default values as
+    specified in the Primer3 manual. The default values of the thermodynamic attributes
+    (ending in `_th`) are set to 10 degrees less than the minimal melting temperature specified for
+    primer design.
+
+    For `probe_max_self_any` and `probe_max_self_end`, a score of 0.00 indicates that there is no
+    reasonable local alignment across the individual primer under consideration. These scores are
+    always positive.
+
+    In general, these settings are meant to limit problematic oligo self-complementarity
+    and avoid primer-dimers or other nonspecific binding of probes to target sequences.
+
     Note that the Primer3 documentation advises that, while `probe_max_end_any` is meaningless
-    when applied to internal oligos used for hybridization-based detection,
+    when applied to internal probes used for hybridization-based detection,
     `PRIMER_INTERNAL_MAX_SELF_END` should be set at least as high as `PRIMER_INTERNAL_MAX_SELF_ANY`.
     Therefore, both parameters are exposed here.
 
@@ -207,10 +281,10 @@ class ProbeParameters:
     probe_max_polyX: int = 5
     probe_max_Ns: int = 0
     probe_max_self_any: float = 12.0
-    probe_max_self_any_thermo: float = 47.0
+    probe_max_self_any_thermo: float = field(init=False)
     probe_max_self_end: float = 12.0
-    probe_max_self_end_thermo: float = 47.0
-    probe_max_hairpin_thermo: float = 47.0
+    probe_max_self_end_thermo: float = field(init=False)
+    probe_max_hairpin_thermo: float = field(init=False)
     probe_excluded_region: Optional[tuple[int, int]] = None
 
     def __post_init__(self) -> None:
@@ -230,6 +304,11 @@ def __post_init__(self) -> None:
                     "Excluded region for probe design must be given as a tuple[int, int]"
                     "for start and length of region (e.g., (10,20))"
                 )
+        # Set melting temperature thresholds to be 10 degrees less than the minimum primer tm
+        default_thermo_tm: float = self.probe_tms.min - 10.0
+        object.__setattr__(self, "probe_max_self_any_thermo", default_thermo_tm)
+        object.__setattr__(self, "probe_max_self_end_thermo", default_thermo_tm)
+        object.__setattr__(self, "probe_max_hairpin_thermo", default_thermo_tm)
 
     def to_input_tags(self) -> dict[Primer3InputTag, Any]:
         """Converts input params to Primer3InputTag to feed directly into Primer3."""