Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for read groups in EstimatePoolingFractions #639

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
add a unit test
nh13 committed Dec 12, 2020
commit 8548c83a63f9a1f1b80db383babde95b8d260f97
Original file line number Diff line number Diff line change
@@ -119,12 +119,13 @@ class EstimatePoolingFractions
regression.setNoIntercept(true) // Intercept should be at 0!

val metrics = observedSamples.flatMap { observedSample =>
logger.info(f"Examining $observedSample")
val (observedFractions, lociExpectedSampleFractions) = loci.flatMap { locus =>
locus.observedFraction.get(observedSample).map { observedFraction =>
(observedFraction, locus.expectedSampleFractions)
}
}.unzip
logger.info(f"Regressing on ${observedFractions.length}%,d of ${loci.length}%,d that met coverage requirements.")
logger.info(f"Regressing on ${observedFractions.length}%,d of ${loci.length}%,d loci that met coverage requirements.")
regression.newSampleData(
observedFractions,
lociExpectedSampleFractions
@@ -162,6 +163,7 @@ class EstimatePoolingFractions
}
}

logger.info("Writing metrics")
Metric.write(output, metrics)
}

Original file line number Diff line number Diff line change
@@ -43,12 +43,15 @@ class EstimatePoolingFractionsTest extends UnitSpec with ParallelTestExecution {
private val Regions = DataDir.resolve("regions.interval_list")

/** Merges one or more BAMs and returns the path to the merged BAM. */
def merge(bams: Seq[PathToBam]): PathToBam = {
def merge(bams: Seq[PathToBam], sample: Option[String] = None): PathToBam = {
val readers = bams.map(bam => SamSource(bam))

// Mangle the library names in the header so that the merger sees duplicate RGs as different RGs.
readers.zipWithIndex.foreach { case (reader, index) =>
reader.header.getReadGroups.foreach(rg => rg.setLibrary(rg.getLibrary + ":" + index))
reader.header.getReadGroups.foreach { rg =>
rg.setLibrary(rg.getLibrary + ":" + index)
sample.foreach(s => rg.setSample(s))
}
}
val headerMerger = new SamFileHeaderMerger(SortOrder.coordinate, readers.iterator.map(_.header).toJavaList, false)
val iterator = new MergingSamRecordIterator(headerMerger, readers.iterator.map(_.toSamReader).toJavaList, true)
@@ -166,4 +169,30 @@ class EstimatePoolingFractionsTest extends UnitSpec with ParallelTestExecution {
expected should (be >= m.ci99_low and be <= m.ci99_high)
}
}

it should "accurately estimate mixes of two samples across multiple input read groups" in {
val samples = Samples.take(2)
val Seq(bam1, bam2) = Bams.take(2)
val inBam1 = merge(Seq(bam1, bam1, bam1, bam2), sample=Some("Sample1")) // 75% bam1, 25% bam2
val inBam2 = merge(Seq(bam1, bam2, bam2, bam2), sample=Some("Sample2")) // 25% bam1, 75% bam2
val bam = merge(Seq(inBam1, inBam2))
val out = makeTempFile("pooling_metrics.", ".txt")
new EstimatePoolingFractions(vcf=Vcf, bam=bam, output=out, samples=samples, bySample=true).execute()
val metrics = Metric.read[PoolingFractionMetric](out)
metrics should have size 4

val metrics1 = metrics.filter(_.observed_sample == "Sample1")
metrics1 should have size 2
metrics1.foreach {m =>
val expected = if (m.pool_sample == samples.head) 0.75 else 0.25
expected should (be >= m.ci99_low and be <= m.ci99_high)
}

val metrics2 = metrics.filter(_.observed_sample == "Sample2")
metrics2 should have size 2
metrics2.foreach {m =>
val expected = if (m.pool_sample == samples.head) 0.25 else 0.75
expected should (be >= m.ci99_low and be <= m.ci99_high)
}
}
}