From e7bf242d3d404a5b4cdcd868a487eec927ed77bf Mon Sep 17 00:00:00 2001 From: Erin McAuley Date: Thu, 26 Sep 2024 18:44:01 -0400 Subject: [PATCH] feat: add primer3 logic --- prymer/primer3/primer3.py | 167 +++++++++++++++++++-------------- prymer/primer3/primer3_task.py | 56 ++++++++++- tests/primer3/test_primer3.py | 31 +++--- 3 files changed, 165 insertions(+), 89 deletions(-) diff --git a/prymer/primer3/primer3.py b/prymer/primer3/primer3.py index 862578e..c69d478 100644 --- a/prymer/primer3/primer3.py +++ b/prymer/primer3/primer3.py @@ -43,7 +43,7 @@ ``` -The `design_primers()` method on `Primer3` is used to design the primers given a +The `design_oligos()` method on `Primer3` is used to design the primers given a [`Primer3Input`][prymer.primer3.primer3_input.Primer3Input]. The latter includes all the parameters and target region. @@ -63,7 +63,7 @@ primer_and_amplicon_params=params, \ task=DesignLeftPrimersTask(), \ ) ->>> left_result = designer.design_primers(design_input=design_input) +>>> left_result = designer.design_oligos(design_input=design_input) ``` @@ -152,6 +152,7 @@ from prymer.primer3.primer3_task import DesignLeftPrimersTask from prymer.primer3.primer3_task import DesignPrimerPairsTask from prymer.primer3.primer3_task import DesignRightPrimersTask +from prymer.primer3.primer3_task import PickHybProbeOnly from prymer.util.executable_runner import ExecutableRunner @@ -170,22 +171,22 @@ class Primer3Failure(Metric["Primer3Failure"]): count: int -PrimerLikeType = TypeVar("PrimerLikeType", bound=OligoLike) -"""Type variable for a `Primer3Result`, which must implement `PrimerLike`""" +OligoLikeType = TypeVar("OligoLikeType", bound=OligoLike) +"""Type variable for a `Primer3Result`, which must implement `OligoLike`""" @dataclass(init=True, slots=True, frozen=True) -class Primer3Result(Generic[PrimerLikeType]): +class Primer3Result(Generic[OligoLikeType]): """Encapsulates Primer3 design results (both valid designs and failures). Attributes: filtered_designs: filtered and ordered (by objective function score) list of primer - pairs or single primers that were returned by Primer3 + pairs or single oligos that were returned by Primer3 failures: ordered list of Primer3Failures detailing design failure reasons and corresponding count """ - filtered_designs: list[PrimerLikeType] + filtered_designs: list[OligoLikeType] failures: list[Primer3Failure] def as_primer_result(self) -> "Primer3Result[Oligo]": @@ -197,7 +198,7 @@ def as_primer_result(self) -> "Primer3Result[Oligo]": def as_primer_pair_result(self) -> "Primer3Result[PrimerPair]": """Returns this Primer3Result assuming the design results are of type `PrimerPair`.""" if len(self.filtered_designs) > 0 and not isinstance(self.filtered_designs[0], PrimerPair): - raise ValueError("Cannot call `as_primer_pair_result` on `Primer` results") + raise ValueError("Cannot call `as_primer_pair_result` on `Oligo` results") return typing.cast(Primer3Result[PrimerPair], self) def primers(self) -> list[Oligo]: @@ -212,7 +213,7 @@ def primer_pairs(self) -> list[PrimerPair]: try: return self.as_primer_pair_result().filtered_designs except ValueError as ex: - raise ValueError("Cannot call `primer_pairs` on `Primer` results") from ex + raise ValueError("Cannot call `primer_pairs` on `Oligo` results") from ex class Primer3(ExecutableRunner): @@ -308,13 +309,6 @@ def get_design_sequences(self, region: Span) -> tuple[str, str]: hard_masked = "".join(soft_masked_list) return soft_masked, hard_masked - @staticmethod - def _is_valid_primer(design_input: Primer3Input, primer_design: Oligo) -> bool: - return ( - primer_design.longest_dinucleotide_run_length() - <= design_input.primer_and_amplicon_params.primer_max_dinuc_bases - ) - @staticmethod def _screen_pair_results( design_input: Primer3Input, designed_primer_pairs: list[PrimerPair] @@ -349,8 +343,8 @@ def _screen_pair_results( valid_primer_pair_designs.append(primer_pair) return valid_primer_pair_designs, dinuc_pair_failures - def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: C901 - """Designs primers or primer pairs given a target region. + def design_oligos(self, design_input: Primer3Input) -> Primer3Result: # noqa: C901 + """Designs primers, primer pairs, and/or internal probes given a target region. Args: design_input: encapsulates the target region, design task, specifications, and scoring @@ -371,12 +365,18 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: f"Error, trying to use a subprocess that has already been " f"terminated, return code {self._subprocess.returncode}" ) - - design_region: Span = self._create_design_region( - target_region=design_input.target, - max_amplicon_length=design_input.primer_and_amplicon_params.max_amplicon_length, - min_primer_length=design_input.primer_and_amplicon_params.min_primer_length, - ) + design_region: Span + match design_input.task: + case PickHybProbeOnly(): + design_region = design_input.target + case DesignRightPrimersTask() | DesignLeftPrimersTask() | DesignPrimerPairsTask(): + design_region = self._create_design_region( + target_region=design_input.target, + max_amplicon_length=design_input.primer_and_amplicon_params.max_amplicon_length, + min_primer_length=design_input.primer_and_amplicon_params.min_primer_length, + ) + case _ as unreachable: + assert_never(unreachable) soft_masked, hard_masked = self.get_design_sequences(design_region) global_primer3_params = { @@ -389,7 +389,6 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: **global_primer3_params, **design_input.to_input_tags(design_region=design_region), } - # Submit inputs to primer3 for tag, value in assembled_primer3_tags.items(): self._subprocess.stdin.write(f"{tag}={value}") @@ -454,15 +453,16 @@ def primer3_error(message: str) -> None: unfiltered_designs=all_pair_results, ) - case DesignLeftPrimersTask() | DesignRightPrimersTask(): # Single primer design - all_single_results = Primer3._build_primers( + case DesignLeftPrimersTask() | DesignRightPrimersTask() | PickHybProbeOnly(): + # Single primer or probe design + all_single_results: list[Oligo] = Primer3._build_oligos( design_input=design_input, design_results=primer3_results, design_region=design_region, design_task=design_input.task, unmasked_design_seq=soft_masked, ) - return Primer3._assemble_primers( + return Primer3._assemble_single_designs( design_input=design_input, design_results=primer3_results, unfiltered_designs=all_single_results, @@ -472,48 +472,39 @@ def primer3_error(message: str) -> None: assert_never(unreachable) @staticmethod - def _build_primers( + def _build_oligos( design_input: Primer3Input, design_results: dict[str, str], design_region: Span, - design_task: Union[DesignLeftPrimersTask, DesignRightPrimersTask], + design_task: Union[DesignLeftPrimersTask, DesignRightPrimersTask, PickHybProbeOnly], unmasked_design_seq: str, ) -> list[Oligo]: """ - Builds a list of left or right primers from Primer3 output. + Builds a list of single oligos from Primer3 output. Args: design_input: the target region, design task, specifications, and scoring penalties - design_results: design results emitted by Primer3 and captured by design_primers() + design_results: design results emitted by Primer3 and captured by design_oligos() design_region: the padded design region design_task: the design task unmasked_design_seq: the reference sequence corresponding to the target region Returns: - primers: a list of unsorted and unfiltered primer designs emitted by Primer3 + oligos: a list of unsorted and unfiltered primer designs emitted by Primer3 Raises: ValueError: if Primer3 does not return primer designs """ - count_tag = design_input.task.count_tag - - maybe_count: Optional[str] = design_results.get(count_tag) - if maybe_count is None: # no count tag was found - if "PRIMER_ERROR" in design_results: - primer_error = design_results["PRIMER_ERROR"] - raise ValueError(f"Primer3 returned an error: {primer_error}") - else: - raise ValueError(f"Primer3 did not return the count tag: {count_tag}") - count: int = int(maybe_count) - - primers = [] + count: int = _check_design_results(design_input, design_results) + + primers: list[Oligo] = [] for idx in range(count): key = f"PRIMER_{design_task.task_type}_{idx}" str_position, str_length = design_results[key].split(",", maxsplit=1) position, length = int(str_position), int(str_length) # position is 1-based match design_task: - case DesignLeftPrimersTask(): + case DesignLeftPrimersTask() | PickHybProbeOnly(): span = design_region.get_subspan( offset=position - 1, subspan_length=length, strand=Strand.POSITIVE ) @@ -539,46 +530,39 @@ def _build_primers( tm=float(design_results[f"PRIMER_{design_task.task_type}_{idx}_TM"]), penalty=float(design_results[f"PRIMER_{design_task.task_type}_{idx}_PENALTY"]), span=span, + self_any_th=float(design_results[f"{key}_SELF_ANY_TH"]), + self_end_th=float(design_results[f"{key}_SELF_END_TH"]), + hairpin_th=float(design_results[f"{key}_HAIRPIN_TH"]), ) ) return primers @staticmethod - def _assemble_primers( - design_input: Primer3Input, design_results: dict[str, str], unfiltered_designs: list[Oligo] + def _assemble_single_designs( + design_input: Primer3Input, + design_results: dict[str, str], + unfiltered_designs: list[Oligo], ) -> Primer3Result: - """Helper function to organize primer designs into valid and failed designs. - - Wraps `Primer3._is_valid_primer()` and `Primer3._build_failures()` to filter out designs - with dinucleotide runs that are too long and extract additional failure reasons emitted by - Primer3. - - Args: - design_input: encapsulates the target region, design task, specifications, - and scoring penalties - unfiltered_designs: list of primers emitted from Primer3 - design_results: key-value pairs of results reported by Primer3 + """Screens oligo designs (primers or probes) emitted by Primer3 for acceptable dinucleotide + runs and extracts failure reasons for failed designs.""" - Returns: - primer_designs: a `Primer3Result` that encapsulates valid and failed designs - """ - valid_primer_designs = [ + valid_oligo_designs = [ design for design in unfiltered_designs - if Primer3._is_valid_primer(primer_design=design, design_input=design_input) + if _has_acceptable_dinuc_run(oligo_design=design, design_input=design_input) ] dinuc_failures = [ design for design in unfiltered_designs - if not Primer3._is_valid_primer(primer_design=design, design_input=design_input) + if not _has_acceptable_dinuc_run(oligo_design=design, design_input=design_input) ] failure_strings = [design_results[f"PRIMER_{design_input.task.task_type}_EXPLAIN"]] failures = Primer3._build_failures(dinuc_failures, failure_strings) - primer_designs: Primer3Result = Primer3Result( - filtered_designs=valid_primer_designs, failures=failures + design_candidates: Primer3Result = Primer3Result( + filtered_designs=valid_oligo_designs, failures=failures ) - return primer_designs + return design_candidates @staticmethod def _build_primer_pairs( @@ -592,7 +576,7 @@ def _build_primer_pairs( Args: design_input: the target region, design task, specifications, and scoring penalties - design_results: design results emitted by Primer3 and captured by design_primers() + design_results: design results emitted by Primer3 and captured by design_oligos() design_region: the padded design region unmasked_design_seq: the reference sequence corresponding to the target region @@ -602,7 +586,7 @@ def _build_primer_pairs( Raises: ValueError: if Primer3 does not return the same number of left and right designs """ - left_primers = Primer3._build_primers( + left_primers = Primer3._build_oligos( design_input=design_input, design_results=design_results, design_region=design_region, @@ -610,7 +594,7 @@ def _build_primer_pairs( unmasked_design_seq=unmasked_design_seq, ) - right_primers = Primer3._build_primers( + right_primers = Primer3._build_oligos( design_input=design_input, design_results=design_results, design_region=design_region, @@ -760,3 +744,44 @@ def _create_design_region( ) return design_region + + +def _check_design_results(design_input: Primer3Input, design_results: dict[str, str]) -> int: + """Checks for any additional Primer3 errors and reports out the count of emitted designs.""" + count_tag = design_input.task.count_tag + maybe_count: Optional[str] = design_results.get(count_tag) + if maybe_count is None: # no count tag was found + if "PRIMER_ERROR" in design_results: + primer_error = design_results["PRIMER_ERROR"] + raise ValueError(f"Primer3 returned an error: {primer_error}") + else: + raise ValueError(f"Primer3 did not return the count tag: {count_tag}") + count: int = int(maybe_count) + + return count + + +def _has_acceptable_dinuc_run(design_input: Primer3Input, oligo_design: Oligo) -> bool: + """ + True if the design's longest dinucleotide run is no more than the stipulated maximum. + + For primer designs, the maximum is recorded in the input's + `PrimerAndAmpliconParameters.primer_max_dinuc_bases`. + + For probe designs, the maximum is recorded in the input's + `ProbeParameters.probe_max_dinuc_bases`. + + Args: + design_input: the Primer3Input object that wraps task-specific and design-specific params + oligo_design: the design candidate + + Returns: + + """ + max_dinuc_bases: int + if design_input.task.requires_primer_amplicon_params: + max_dinuc_bases = design_input.primer_and_amplicon_params.primer_max_dinuc_bases + elif design_input.task.requires_probe_params: + max_dinuc_bases = design_input.probe_params.probe_max_dinuc_bases + + return oligo_design.longest_dinucleotide_run_length() <= max_dinuc_bases diff --git a/prymer/primer3/primer3_task.py b/prymer/primer3/primer3_task.py index e09a769..0de198d 100644 --- a/prymer/primer3/primer3_task.py +++ b/prymer/primer3/primer3_task.py @@ -8,7 +8,7 @@ The design task "type" dictates which type of primers to pick and informs the design region. These parameters are aligned to the correct Primer3 settings and fed directly into Primer3. -Three types of tasks are available: +Four types of tasks are available: 1. [`DesignPrimerPairsTask`][prymer.primer3.primer3_task.DesignPrimerPairsTask] -- task for designing _primer pairs_. @@ -16,6 +16,8 @@ for designing primers to the _left_ (5') of the design region on the top/positive strand. 3. [`DesignRightPrimersTask`][prymer.primer3.primer3_task.DesignRightPrimersTask] -- task for designing primers to the _right_ (3') of the design region on the bottom/negative strand. +4. [`PickHybProbeOnly`][prymer.primer3.primer3_task.PickHybProbeOnly] -- task for designing an + internal probe for hybridization-based technologies The main purpose of these classes are to generate the [`Primer3InputTag`s][prymer.primer3.primer3_input_tag.Primer3InputTag]s required by @@ -103,15 +105,15 @@ from prymer.primer3.primer3_input_tag import Primer3InputTag Primer3TaskType: TypeAlias = Union[ - "DesignPrimerPairsTask", "DesignLeftPrimersTask", "DesignRightPrimersTask" + "DesignPrimerPairsTask", "DesignLeftPrimersTask", "DesignRightPrimersTask", "PickHybProbeOnly" ] """Type alias for all `Primer3Task`s, to enable exhaustiveness checking.""" @unique class TaskType(UppercaseStrEnum): - """Represents the type of design task, either design primer pairs, or individual primers - (left or right).""" + """Represents the type of design task: design primer pairs, individual primers + (left or right), or an internal hybridization probe.""" # Developer Note: the names of this enum are important, as they are used as-is for the # count_tag in `Primer3Task`. @@ -119,6 +121,7 @@ class TaskType(UppercaseStrEnum): PAIR = auto() LEFT = auto() RIGHT = auto() + INTERNAL = auto() class Primer3Task(ABC): @@ -194,6 +197,14 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT f"{target.length}", } + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + class DesignLeftPrimersTask(Primer3Task, task_type=TaskType.LEFT): """Stores task-specific characteristics for designing left primers.""" @@ -208,6 +219,14 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT Primer3InputTag.SEQUENCE_INCLUDED_REGION: f"1,{target.start - design_region.start}", } + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + class DesignRightPrimersTask(Primer3Task, task_type=TaskType.RIGHT): """Stores task-specific characteristics for designing right primers""" @@ -223,3 +242,32 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT Primer3InputTag.PRIMER_PICK_INTERNAL_OLIGO: 0, Primer3InputTag.SEQUENCE_INCLUDED_REGION: f"{start},{length}", } + + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + + +class PickHybProbeOnly(Primer3Task, task_type=TaskType.INTERNAL): + """Stores task-specific characteristics for designing an internal hybridization probe.""" + + @classmethod + def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputTag, Any]: + return { + Primer3InputTag.PRIMER_TASK: "generic", + Primer3InputTag.PRIMER_PICK_LEFT_PRIMER: 0, + Primer3InputTag.PRIMER_PICK_RIGHT_PRIMER: 0, + Primer3InputTag.PRIMER_PICK_INTERNAL_OLIGO: 1, + } + + @property + def requires_primer_amplicon_params(self) -> bool: + return False + + @property + def requires_probe_params(self) -> bool: + return True diff --git a/tests/primer3/test_primer3.py b/tests/primer3/test_primer3.py index e741ca4..9ffe8e7 100644 --- a/tests/primer3/test_primer3.py +++ b/tests/primer3/test_primer3.py @@ -15,6 +15,7 @@ from prymer.primer3.primer3 import Primer3 from prymer.primer3.primer3 import Primer3Failure from prymer.primer3.primer3 import Primer3Result +from prymer.primer3.primer3 import _has_acceptable_dinuc_run from prymer.primer3.primer3_input import Primer3Input from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters from prymer.primer3.primer3_task import DesignLeftPrimersTask @@ -130,11 +131,11 @@ def valid_primer_pairs( return primer_pairs -def test_design_primers_raises( +def test_design_oligos_raises( genome_ref: Path, single_primer_params: PrimerAndAmpliconParameters, ) -> None: - """Test that design_primers() raises when given an invalid argument.""" + """Test that design_oligos() raises when given an invalid argument.""" target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) @@ -148,7 +149,7 @@ def test_design_primers_raises( task=DesignLeftPrimersTask(), ) with pytest.raises(ValueError, match="Primer3 failed"): - Primer3(genome_fasta=genome_ref).design_primers(design_input=invalid_design_input) + Primer3(genome_fasta=genome_ref).design_oligos(design_input=invalid_design_input) # TODO: add other Value Errors @@ -166,7 +167,7 @@ def test_left_primer_valid_designs( with Primer3(genome_fasta=genome_ref) as designer: for _ in range(10): # run many times to ensure we can re-use primer3 - left_result = designer.design_primers(design_input=design_input) + left_result = designer.design_oligos(design_input=design_input) designed_lefts: list[Oligo] = left_result.primers() assert all(isinstance(design, Oligo) for design in designed_lefts) for actual_design in designed_lefts: @@ -213,7 +214,7 @@ def test_right_primer_valid_designs( ) with Primer3(genome_fasta=genome_ref) as designer: for _ in range(10): # run many times to ensure we can re-use primer3 - right_result: Primer3Result = designer.design_primers(design_input=design_input) + right_result: Primer3Result = designer.design_oligos(design_input=design_input) designed_rights: list[Oligo] = right_result.primers() assert all(isinstance(design, Oligo) for design in designed_rights) @@ -261,7 +262,7 @@ def test_primer_pair_design( task=DesignPrimerPairsTask(), ) with Primer3(genome_fasta=genome_ref) as designer: - pair_result: Primer3Result = designer.design_primers(design_input=design_input) + pair_result: Primer3Result = designer.design_oligos(design_input=design_input) designed_pairs: list[PrimerPair] = pair_result.primer_pairs() assert all(isinstance(design, PrimerPair) for design in designed_pairs) lefts = [primer_pair.left_primer for primer_pair in designed_pairs] @@ -351,7 +352,7 @@ def test_fasta_close_valid( with pytest.raises( RuntimeError, match="Error, trying to use a subprocess that has already been terminated" ): - designer.design_primers(design_input=design_input) + designer.design_oligos(design_input=design_input) @pytest.mark.parametrize( @@ -406,7 +407,7 @@ def test_screen_pair_results( genome_ref: Path, pair_primer_params: PrimerAndAmpliconParameters, ) -> None: - """Test that `_is_valid_primer()` and `_screen_pair_results()` use + """Test that `_has_acceptable_dinuc_run()` and `_screen_pair_results()` use `Primer3Parameters.primer_max_dinuc_bases` to disqualify primers when applicable. Create 2 sets of design input, the only difference being the length of allowable dinucleotide run in a primer (high_threshold = 6, low_threshold = 2). @@ -431,6 +432,7 @@ def test_screen_pair_results( design_input=design_input, designed_primer_pairs=valid_primer_pairs ) assert len(base_dinuc_pair_failures) == 0 + assert design_input.primer_and_amplicon_params is not None for primer_pair in base_primer_pair_designs: assert ( primer_pair.left_primer.longest_dinucleotide_run_length() @@ -440,11 +442,11 @@ def test_screen_pair_results( primer_pair.right_primer.longest_dinucleotide_run_length() <= design_input.primer_and_amplicon_params.primer_max_dinuc_bases ) - assert Primer3._is_valid_primer( - design_input=design_input, primer_design=primer_pair.left_primer + assert _has_acceptable_dinuc_run( + design_input=design_input, oligo_design=primer_pair.left_primer ) - assert Primer3._is_valid_primer( - design_input=design_input, primer_design=primer_pair.right_primer + assert _has_acceptable_dinuc_run( + design_input=design_input, oligo_design=primer_pair.right_primer ) # 1 primer from every pair will fail lowered dinuc threshold of 2 @@ -452,6 +454,7 @@ def test_screen_pair_results( altered_designs, altered_dinuc_failures = designer._screen_pair_results( design_input=altered_design_input, designed_primer_pairs=valid_primer_pairs ) + assert altered_design_input.primer_and_amplicon_params is not None assert [ design.longest_dinucleotide_run_length() > altered_design_input.primer_and_amplicon_params.primer_max_dinuc_bases @@ -554,7 +557,7 @@ def test_primer3_result_primer_pairs_exception( ) -> None: primers: list[Oligo] = valid_left_primers + valid_right_primers result = Primer3Result(filtered_designs=primers, failures=[]) - with pytest.raises(ValueError, match="Cannot call `primer_pairs` on `Primer` results"): + with pytest.raises(ValueError, match="Cannot call `primer_pairs` on `Oligo` results"): result.primer_pairs() @@ -563,7 +566,7 @@ def test_primer3_result_as_primer_pair_result_exception( ) -> None: primers: list[Oligo] = valid_left_primers + valid_right_primers result = Primer3Result(filtered_designs=primers, failures=[]) - with pytest.raises(ValueError, match="Cannot call `as_primer_pair_result` on `Primer` results"): + with pytest.raises(ValueError, match="Cannot call `as_primer_pair_result` on `Oligo` results"): result.as_primer_pair_result()