-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a tool to make IGV-compat. BEDPE files from AggregateSvPileup (#37)
- Loading branch information
Showing
12 changed files
with
296 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
--- | ||
title: AggregateSvPileupToBedPE | ||
--- | ||
|
||
# AggregateSvPileupToBedPE | ||
|
||
## Overview | ||
**Group:** Utility Tools | ||
|
||
Convert the output of AggregateSvPileup to BEDPE. | ||
|
||
## Arguments | ||
|
||
|Name|Flag|Type|Description|Required?|Max # of Values|Default Value(s)| | ||
|----|----|----|-----------|---------|---------------|----------------| | ||
|input|i|FilePath|Input text file of aggregate pileups generated by AggregateSvPileup|Required|1|| | ||
|output|o|FilePath|Output text file of the aggregate pileups in BEDPE format.|Required|1|| | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
114 changes: 114 additions & 0 deletions
114
src/main/scala/com/fulcrumgenomics/sv/tools/AggregateSvPileupToBedPE.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
package com.fulcrumgenomics.sv.tools | ||
|
||
import com.fulcrumgenomics.commons.CommonsDef._ | ||
import com.fulcrumgenomics.commons.io.Writer | ||
import com.fulcrumgenomics.commons.util.LazyLogging | ||
import com.fulcrumgenomics.sopt.{arg, clp} | ||
import com.fulcrumgenomics.sv.cmdline.{ClpGroups, SvTool} | ||
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE | ||
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE.BedPEWriter | ||
import com.fulcrumgenomics.util.{Io, Metric} | ||
import htsjdk.tribble.annotation.Strand | ||
|
||
import java.io.BufferedWriter | ||
|
||
|
||
@clp(group=ClpGroups.Utilities, description= "Convert the output of AggregateSvPileup to BEDPE.") | ||
class AggregateSvPileupToBedPE( | ||
@arg(flag='i', doc="Input text file of aggregate pileups generated by AggregateSvPileup") input: FilePath, | ||
@arg(flag='o', doc="Output text file of the aggregate pileups in BEDPE format.") output: FilePath, | ||
) extends SvTool with LazyLogging { | ||
|
||
Io.assertReadable(input) | ||
Io.assertCanWriteFile(output) | ||
|
||
override def execute(): Unit = { | ||
val reader = Metric.iterator[AggregatedBreakpointPileup](input) | ||
val writer = BedPEWriter(output) | ||
|
||
reader.map(BedPE.apply).foreach(writer.write) | ||
|
||
writer.close() | ||
} | ||
} | ||
|
||
/** Companion object for [[AggregateSvPileupToBedPE]]. */ | ||
object AggregateSvPileupToBedPE { | ||
|
||
/** The IGV-supported BEDPE file extension. */ | ||
val BedPEExtension: String = ".bedpe" | ||
|
||
/** A simple BEDPE record as defined by [`bedtools`](https://bedtools.readthedocs.io/en/latest/content/general-usage.html#bedpe-format). | ||
* | ||
* Future compatibility could be implemented for supporting [10x flavored BEDPE files](https://github.com/igvteam/igv/wiki/BedPE-Support). | ||
* | ||
* @param chrom1 The reference sequence name for the first interval. | ||
* @param start1 The 0-based position for the start of the first interval. | ||
* @param end1 The 0-based half-open position for the end of the first interval. | ||
* @param chrom2 The reference sequence name for the second interval. | ||
* @param start2 The 0-based position for the start of the second interval. | ||
* @param end2 The 0-based half-open position for the end of the second interval. | ||
* @param name The name of the paired interval record. | ||
* @param score The score of the paired interval record. | ||
* @param strand1 The strand for the first interval. | ||
* @param strand2 The strand for the second interval. | ||
*/ | ||
case class BedPE( | ||
chrom1: String, | ||
start1: Int, | ||
end1: Int, | ||
chrom2: String, | ||
start2: Int, | ||
end2: Int, | ||
name: String, | ||
score: Int, | ||
strand1: Strand, | ||
strand2: Strand, | ||
) extends Metric | ||
|
||
/** Companion object for [[BedPE]]. */ | ||
object BedPE { | ||
|
||
/** Build a [[BedPE]] record from an [[AggregatedBreakpointPileup]]. */ | ||
def apply(pileup: AggregatedBreakpointPileup): BedPE = { | ||
new BedPE( | ||
chrom1 = pileup.left_contig, | ||
start1 = pileup.left_min_pos, | ||
end1 = pileup.left_max_pos + 1, | ||
chrom2 = pileup.right_contig, | ||
start2 = pileup.right_min_pos, | ||
end2 = pileup.right_max_pos + 1, | ||
name = pileup.id, | ||
score = pileup.total, | ||
strand1 = Strand.decode(pileup.left_strand), | ||
strand2 = Strand.decode(pileup.right_strand), | ||
) | ||
} | ||
|
||
/** A writer class for writing [[BedPE]] records since BEDPE files do not by default have a header. */ | ||
class BedPEWriter(val out: BufferedWriter) extends Writer[BedPE] { | ||
|
||
/** Write a [[BedPE]] record to the underlying writer. */ | ||
override def write(record: BedPE): Unit = { | ||
out.write(record.values.mkString("\t")) | ||
out.newLine() | ||
} | ||
|
||
/** Closes the underlying writer. */ | ||
override def close(): Unit = out.close() | ||
} | ||
|
||
/** Companion object to [[BedPEWriter]]. */ | ||
object BedPEWriter { | ||
|
||
/** Constructs a [[BedPEWriter]] that will write to the provided path. */ | ||
def apply(path: PathToIntervals): BedPEWriter = apply(Io.toWriter(path)) | ||
|
||
/** Constructs a [[BedPEWriter]] from a [[java.io.Writer]]. */ | ||
def apply(writer: java.io.Writer): BedPEWriter = writer match { | ||
case bw: BufferedWriter => new BedPEWriter(bw) | ||
case w => new BedPEWriter(new BufferedWriter(w)) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
117 changes: 117 additions & 0 deletions
117
src/test/scala/com/fulcrumgenomics/sv/tools/AggregateSvPileupToBedPETest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
package com.fulcrumgenomics.sv.tools | ||
|
||
import com.fulcrumgenomics.commons.io.Io | ||
import com.fulcrumgenomics.commons.util.DelimitedDataParser | ||
import com.fulcrumgenomics.sv.UnitSpec | ||
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE.BedPEWriter | ||
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.{BedPE, BedPEExtension} | ||
import com.fulcrumgenomics.util.Metric | ||
import htsjdk.tribble.annotation.Strand | ||
|
||
|
||
/** Unit tests for [[AggregateSvPileupToBedPE]]. */ | ||
class AggregateSvPileupToBedPETest extends UnitSpec { | ||
|
||
/** A test aggregate breakpoint pileup. */ | ||
private val test_aggregate_breakpoint_pileup = AggregatedBreakpointPileup( | ||
id = "112", | ||
category = "Inter-contig rearrangement", | ||
left_contig = "chr1", | ||
left_min_pos = 100, | ||
left_max_pos = 100, | ||
left_strand = '+', | ||
right_contig = "chr3", | ||
right_min_pos = 200, | ||
right_max_pos = 200, | ||
right_strand = '-', | ||
split_reads = 1, | ||
read_pairs = 1, | ||
total = 2, | ||
left_pileups = PositionList(100), | ||
right_pileups = PositionList(200), | ||
) | ||
|
||
/** A companion test BEDPE record. */ | ||
private val test_bed_pe = BedPE( | ||
chrom1 = "chr1", | ||
start1 = 100, | ||
end1 = 101, | ||
chrom2 = "chr3", | ||
start2 = 200, | ||
end2 = 201, | ||
name = "112", | ||
score = 2, | ||
strand1 = Strand.POSITIVE, | ||
strand2 = Strand.NEGATIVE, | ||
) | ||
|
||
"AggregateSvPileupToBedPE.BedPE" should "be instantiated from an AggregateBreakpointPileup" in { | ||
BedPE(test_aggregate_breakpoint_pileup) shouldBe test_bed_pe | ||
} | ||
|
||
"AggregateSvPileupToBedPE.BedPEWriter" should "write a BedPE record" in { | ||
val record = new BedPE( | ||
chrom1 = "chr1", | ||
start1 = 100, | ||
end1 = 101, | ||
chrom2 = "chr3", | ||
start2 = 200, | ||
end2 = 201, | ||
name = "112", | ||
score = 2, | ||
strand1 = Strand.POSITIVE, | ||
strand2 = Strand.NEGATIVE, | ||
) | ||
|
||
val expected = Seq( | ||
record.chrom1, | ||
Integer.toString(record.start1), | ||
Integer.toString(record.end1), | ||
record.chrom2, | ||
Integer.toString(record.start2), | ||
Integer.toString(record.end2), | ||
record.name, | ||
record.score.toString, | ||
record.strand1.toString, | ||
record.strand2.toString, | ||
).toIndexedSeq | ||
|
||
val output = Io.makeTempFile(this.getClass.getSimpleName, BedPEExtension) | ||
val writer = BedPEWriter(output) | ||
writer.write(record) | ||
writer.close() | ||
|
||
val fields: Seq[String] = classOf[BedPE].getDeclaredFields.map(_.getName).toIndexedSeq | ||
val records = DelimitedDataParser(output, delimiter = '\t', header = fields).toSeq | ||
records.length shouldBe 1 | ||
val actual = fields.map(field => records.head.get[String](field).value) | ||
actual should contain theSameElementsInOrderAs expected | ||
} | ||
|
||
"AggregateSvPileupToBedPE" should "convert an AggregateSvPileup output to a BEDPE file" in { | ||
val expected = Seq( | ||
test_bed_pe.chrom1, | ||
Integer.toString(test_bed_pe.start1), | ||
Integer.toString(test_bed_pe.end1), | ||
test_bed_pe.chrom2, | ||
Integer.toString(test_bed_pe.start2), | ||
Integer.toString(test_bed_pe.end2), | ||
test_bed_pe.name, | ||
test_bed_pe.score.toString, | ||
test_bed_pe.strand1.toString, | ||
test_bed_pe.strand2.toString, | ||
).toIndexedSeq | ||
|
||
val input = Io.makeTempFile(this.getClass.getSimpleName, ".txt") | ||
val output = Io.makeTempFile(this.getClass.getSimpleName, BedPEExtension) | ||
Metric.write[AggregatedBreakpointPileup](input, test_aggregate_breakpoint_pileup) | ||
|
||
new AggregateSvPileupToBedPE(input = input, output = output).execute() | ||
|
||
val fields: Seq[String] = classOf[BedPE].getDeclaredFields.map(_.getName).toIndexedSeq | ||
val records = DelimitedDataParser(output, delimiter = '\t', header = fields).toSeq | ||
records.length shouldBe 1 | ||
val actual = fields.map(field => records.head.get[String](field).value) | ||
actual should contain theSameElementsInOrderAs expected | ||
} | ||
} |