Skip to content

Commit

Permalink
Ensure SvPileup ignores duplicate records by default
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Dec 10, 2024
1 parent 0f5a127 commit 241c040
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 14 deletions.
42 changes: 28 additions & 14 deletions src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ class SvPileup
|side of an aligned segment when merging segments.""") slop: Int = 5,
@arg(flag='t', doc="Optional bed file of target regions") targetsBed: Option[FilePath] = None,
@arg(flag='T', doc="Requirement on if each side of the breakpoint must overlap a target. Will always annotate each side of the breakpoint.")
targetsBedRequirement: TargetBedRequirement.Requirement = TargetBedRequirement.AnnotateOnly
targetsBedRequirement: TargetBedRequirement.Requirement = TargetBedRequirement.AnnotateOnly,
@arg(doc="Whether to include duplicate marked records in the pileup or not.") val includeDuplicates: Boolean = false
) extends SvTool {

import SvPileup._
Expand All @@ -145,7 +146,14 @@ class SvPileup

Bams.templateIterator(source)
.tapEach(t => progress.record(t.allReads.next()))
.flatMap(template => filterTemplate(template, minPrimaryMapq=minPrimaryMappingQuality, minSupplementaryMapq=minSupplementaryMappingQuality))
.flatMap { template =>
filterTemplate(
template,
minPrimaryMapq=minPrimaryMappingQuality,
minSupplementaryMapq=minSupplementaryMappingQuality,
includeDuplicates=includeDuplicates,
)
}
.foreach { template =>
// Find the breakpoints
val evidences = findBreakpoints(
Expand Down Expand Up @@ -297,18 +305,24 @@ object SvPileup extends LazyLogging {
*/
def filterTemplate(t: Template,
minPrimaryMapq: Int,
minSupplementaryMapq: Int): Option[Template] = {
val r1PrimaryOk = t.r1.exists(r => r.mapped && r.mapq >= minPrimaryMapq)
val r2PrimaryOk = t.r2.exists(r => r.mapped && r.mapq >= minPrimaryMapq)

if (!r1PrimaryOk && !r2PrimaryOk) None else Some(
Template(
r1 = if (r1PrimaryOk) t.r1 else None,
r2 = if (r2PrimaryOk) t.r2 else None,
r1Supplementals = if (r1PrimaryOk) t.r1Supplementals.filter(_.mapq >= minSupplementaryMapq) else Nil,
r2Supplementals = if (r2PrimaryOk) t.r2Supplementals.filter(_.mapq >= minSupplementaryMapq) else Nil,
)
)
minSupplementaryMapq: Int,
includeDuplicates: Boolean = false): Option[Template] = {
t.allReads.filter(rec => includeDuplicates || !rec.duplicate).toList match {
case Nil => None
case records =>
val duplicateFiltered = Template(records.iterator)
val r1PrimaryOk = duplicateFiltered.r1.exists(r => r.mapped && r.mapq >= minPrimaryMapq)
val r2PrimaryOk = duplicateFiltered.r2.exists(r => r.mapped && r.mapq >= minPrimaryMapq)

if (!r1PrimaryOk && !r2PrimaryOk) None else Some(
Template(
r1 = if (r1PrimaryOk) duplicateFiltered.r1 else None,
r2 = if (r2PrimaryOk) duplicateFiltered.r2 else None,
r1Supplementals = if (r1PrimaryOk) duplicateFiltered.r1Supplementals.filter(_.mapq >= minSupplementaryMapq) else Nil,
r2Supplementals = if (r2PrimaryOk) duplicateFiltered.r2Supplementals.filter(_.mapq >= minSupplementaryMapq) else Nil,
)
)
}
}


Expand Down
88 changes: 88 additions & 0 deletions src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,94 @@ class SvPileupTest extends UnitSpec {
SvPileup.filterTemplate(template, minPrimaryMapq=30, minSupplementaryMapq=20) shouldBe None
}

it should s"ignore records for the entire ordinal where R1 (true) or R2 (false) are marked as duplicates" in {
val r1 = r("chr1", 100, Plus, r = 1, cigar = "100M", supp = false, mapq = 50)
val r2 = r("chr1", 300, Plus, r = 2, cigar = "100M", supp = false, mapq = 50)
val r1supp = r("chr7", 800, Plus, r=1, cigar="50S50M", supp=true, mapq=50)
val r2supp = r("chr7", 800, Plus, r=2, cigar="50S50M", supp=true, mapq=50)

val template = t(r1, r2, r1supp, r2supp)

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20).value shouldBe template

r1.duplicate = true
r2.duplicate = false

val expected = template.copy(r1=None, r1Supplementals = Nil, r1Secondaries = Nil)

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20).value shouldBe expected
}

it should s"ignore records for the entire ordinal where R1 (false) or R2 (true) are marked as duplicates" in {
val r1 = r("chr1", 100, Plus, r = 1, cigar = "100M", supp = false, mapq = 50)
val r2 = r("chr1", 300, Plus, r = 2, cigar = "100M", supp = false, mapq = 50)
val r1supp = r("chr7", 800, Plus, r=1, cigar="50S50M", supp=true, mapq=50)
val r2supp = r("chr7", 800, Plus, r=2, cigar="50S50M", supp=true, mapq=50)

val template = t(r1, r2, r1supp, r2supp)

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20).value shouldBe template

r1.duplicate = false
r2.duplicate = true

val expected = template.copy(r2=None, r2Supplementals = Nil, r2Secondaries = Nil)

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20).value shouldBe expected
}

it should s"ignore all records where R1 (true) or R2 (true) are both marked as duplicates" in {
val r1 = r("chr1", 100, Plus, r = 1, cigar = "100M", supp = false, mapq = 50)
val r2 = r("chr1", 300, Plus, r = 2, cigar = "100M", supp = false, mapq = 50)
val r1supp = r("chr7", 800, Plus, r=1, cigar="50S50M", supp=true, mapq=50)
val r2supp = r("chr7", 800, Plus, r=2, cigar="50S50M", supp=true, mapq=50)

val template = t(r1, r2, r1supp, r2supp)

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20).value shouldBe template

r1.duplicate = true
r2.duplicate = true

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20) shouldBe None
}

it should "ignore duplicate supplementals when asked to" in {
val r1 = r("chr1", 100, Plus, r = 1, cigar = "100M", supp = false, mapq = 50)
val r2 = r("chr1", 300, Plus, r = 2, cigar = "100M", supp = false, mapq = 50)
val r1supp = r("chr1", 100, Plus, r = 1, cigar = "100M", supp = true, mapq = 50)
val r2supp = r("chr1", 300, Plus, r = 2, cigar = "100M", supp = true, mapq = 50)

val template = t(r1, r2, r1supp, r2supp)

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20).value shouldBe template

r1supp.duplicate = true
r2supp.duplicate = true

val expected = t(r1, r2)

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20).value shouldBe expected
}

it should "not ignore duplicate records when asked to" in {
val r1 = r("chr1", 100, Plus, r = 1, cigar = "100M", supp = false, mapq = 50)
val r2 = r("chr1", 300, Plus, r = 2, cigar = "100M", supp = false, mapq = 50)
val r1supp = r("chr1", 100, Plus, r = 1, cigar = "100M", supp = true, mapq = 50)
val r2supp = r("chr1", 300, Plus, r = 2, cigar = "100M", supp = true, mapq = 50)

val template = t(r1, r2, r1supp, r2supp)

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20, includeDuplicates = true).value shouldBe template

r1.duplicate = true
r2.duplicate = true
r1supp.duplicate = true
r2supp.duplicate = true

SvPileup.filterTemplate(template, minPrimaryMapq = 30, minSupplementaryMapq = 20, includeDuplicates = true).value shouldBe template
}

/** Writes the given BAM records to a BAM file */
private def toInput(rec: SamRecord*): PathToBam = {
val path = makeTempFile("input.", ".bam")
Expand Down

0 comments on commit 241c040

Please sign in to comment.