-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a tool to make IGV-compat. BEDPE files from AggregateSvPileup
- Loading branch information
Showing
6 changed files
with
257 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
127 changes: 127 additions & 0 deletions
127
src/main/scala/com/fulcrumgenomics/sv/tools/AggregateSvPileupToBedPE.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
package com.fulcrumgenomics.sv.tools | ||
|
||
import com.fulcrumgenomics.commons.CommonsDef._ | ||
import com.fulcrumgenomics.commons.io.Writer | ||
import com.fulcrumgenomics.commons.util.LazyLogging | ||
import com.fulcrumgenomics.sopt.{arg, clp} | ||
import com.fulcrumgenomics.sv.cmdline.{ClpGroups, SvTool} | ||
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE | ||
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE.BedPEWriter | ||
import com.fulcrumgenomics.util.{Io, Metric} | ||
import htsjdk.tribble.annotation.Strand | ||
|
||
import java.io.BufferedWriter | ||
|
||
|
||
@clp(group=ClpGroups.Utilities, description= "Convert the output of AggregateSvPileup to BEDPE.") | ||
class AggregateSvPileupToBedPE( | ||
@arg(flag='i', doc="Input text file of aggregate pileups generated by AggregateSvPileup") input: FilePath, | ||
@arg(flag='o', doc="Output text file of the aggregate pileups in BEDPE format.") output: FilePath, | ||
) extends SvTool with LazyLogging { | ||
|
||
Io.assertReadable(input) | ||
Io.assertCanWriteFile(output) | ||
|
||
override def execute(): Unit = { | ||
val reader = Metric.iterator[AggregatedBreakpointPileup](input) | ||
val writer = BedPEWriter(output) | ||
|
||
reader.map(BedPE.apply).foreach(writer.write) | ||
|
||
writer.close() | ||
} | ||
} | ||
|
||
/** Companion object for [[AggregateSvPileupToBedPE]]. */ | ||
object AggregateSvPileupToBedPE { | ||
|
||
/** The IGV-supported BEDPE file extension. */ | ||
val BedPEExtension: String = ".bedpe" | ||
|
||
/** A simple BEDPE record as defined by `bedtools`: | ||
* | ||
* - https://bedtools.readthedocs.io/en/latest/content/general-usage.html#bedpe-format) | ||
* | ||
* Future compatibility could be implemented for supporting 10x flavored BEDPE files: | ||
* | ||
* - https://github.com/igvteam/igv/wiki/BedPE-Support | ||
* | ||
* Note that the field `score` is allowed to be a string per bedtools! | ||
*/ | ||
case class BedPE( | ||
chrom1: String, | ||
start1: Int, | ||
end1: Int, | ||
chrom2: String, | ||
start2: Int, | ||
end2: Int, | ||
name: String, | ||
score: String, | ||
strand1: Strand, | ||
strand2: Strand, | ||
) extends Metric | ||
|
||
/** Companion object for [[BedPE]]. */ | ||
object BedPE { | ||
|
||
/** Build a [[BedPE]] record from an [[AggregatedBreakpointPileup]]. */ | ||
def apply(pileup: AggregatedBreakpointPileup): BedPE = { | ||
new BedPE( | ||
chrom1 = pileup.left_contig, | ||
start1 = pileup.left_min_pos, | ||
end1 = pileup.left_max_pos + 1, | ||
chrom2 = pileup.right_contig, | ||
start2 = pileup.right_min_pos, | ||
end2 = pileup.right_max_pos + 1, | ||
name = pileup.id, | ||
score = pileup.total.toString, | ||
strand1 = Strand.decode(pileup.left_strand), | ||
strand2 = Strand.decode(pileup.right_strand), | ||
) | ||
} | ||
|
||
/** A writer class for writing [[BedPE]] records. */ | ||
class BedPEWriter(val out: BufferedWriter) extends Writer[BedPE] { | ||
|
||
/** Write a [[BedPE]] record to the underlying writer. */ | ||
override def write(record: BedPE): Unit = { | ||
out.write(record.chrom1) | ||
out.write('\t') | ||
out.write(Integer.toString(record.start1)) | ||
out.write('\t') | ||
out.write(Integer.toString(record.end1)) | ||
out.write('\t') | ||
out.write(record.chrom2) | ||
out.write('\t') | ||
out.write(Integer.toString(record.start2)) | ||
out.write('\t') | ||
out.write(Integer.toString(record.end2)) | ||
out.write('\t') | ||
out.write(record.name) | ||
out.write('\t') | ||
out.write(record.score) | ||
out.write('\t') | ||
out.write(record.strand1.toString) | ||
out.write('\t') | ||
out.write(record.strand2.toString) | ||
out.newLine() | ||
} | ||
|
||
/** Closes the underlying writer. */ | ||
override def close(): Unit = out.close() | ||
} | ||
|
||
/** Companion object to [[BedPEWriter]]. */ | ||
object BedPEWriter { | ||
|
||
/** Constructs a [[BedPEWriter]] that will write to the provided path. */ | ||
def apply(path: PathToIntervals): BedPEWriter = apply(Io.toWriter(path)) | ||
|
||
/** Constructs a [[BedPEWriter]] from a [[java.io.Writer]]. */ | ||
def apply(writer: java.io.Writer): BedPEWriter = writer match { | ||
case bw: BufferedWriter => new BedPEWriter(bw) | ||
case w => new BedPEWriter(new BufferedWriter(w)) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
117 changes: 117 additions & 0 deletions
117
src/test/scala/com/fulcrumgenomics/sv/tools/AggregateSvPileupToBedPETest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
package com.fulcrumgenomics.sv.tools | ||
|
||
import com.fulcrumgenomics.commons.io.Io | ||
import com.fulcrumgenomics.commons.util.DelimitedDataParser | ||
import com.fulcrumgenomics.sv.UnitSpec | ||
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE.BedPEWriter | ||
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.{BedPE, BedPEExtension} | ||
import com.fulcrumgenomics.util.Metric | ||
import htsjdk.tribble.annotation.Strand | ||
|
||
|
||
/** Unit tests for [[AggregateSvPileupToBedPE]]. */ | ||
class AggregateSvPileupToBedPETest extends UnitSpec { | ||
|
||
/** A test aggregate breakpoint pileup. */ | ||
private val test_aggregate_breakpoint_pileup = AggregatedBreakpointPileup( | ||
id = "112", | ||
category = "Inter-contig rearrangement", | ||
left_contig = "chr1", | ||
left_min_pos = 100, | ||
left_max_pos = 100, | ||
left_strand = '+', | ||
right_contig = "chr3", | ||
right_min_pos = 200, | ||
right_max_pos = 200, | ||
right_strand = '-', | ||
split_reads = 1, | ||
read_pairs = 1, | ||
total = 2, | ||
left_pileups = PositionList(100), | ||
right_pileups = PositionList(200), | ||
) | ||
|
||
/** A companion test BEDPE record. */ | ||
private val test_bed_pe = BedPE( | ||
chrom1 = "chr1", | ||
start1 = 100, | ||
end1 = 101, | ||
chrom2 = "chr3", | ||
start2 = 200, | ||
end2 = 201, | ||
name = "112", | ||
score = "2", | ||
strand1 = Strand.POSITIVE, | ||
strand2 = Strand.NEGATIVE, | ||
) | ||
|
||
"AggregateSvPileupToBedPE.BedPE" should "be instantiated from an AggregateBreakpointPileup" in { | ||
BedPE(test_aggregate_breakpoint_pileup) shouldBe test_bed_pe | ||
} | ||
|
||
"AggregateSvPileupToBedPE.BedPEWriter" should "write a BedPE record" in { | ||
val record = new BedPE( | ||
chrom1 = "chr1", | ||
start1 = 100, | ||
end1 = 101, | ||
chrom2 = "chr3", | ||
start2 = 200, | ||
end2 = 201, | ||
name = "112", | ||
score = "2", | ||
strand1 = Strand.POSITIVE, | ||
strand2 = Strand.NEGATIVE, | ||
) | ||
|
||
val expected = Seq( | ||
record.chrom1, | ||
Integer.toString(record.start1), | ||
Integer.toString(record.end1), | ||
record.chrom2, | ||
Integer.toString(record.start2), | ||
Integer.toString(record.end2), | ||
record.name, | ||
record.score, | ||
record.strand1.toString, | ||
record.strand2.toString, | ||
).toIndexedSeq | ||
|
||
val output = Io.makeTempFile(this.getClass.getSimpleName, BedPEExtension) | ||
val writer = BedPEWriter(output) | ||
writer.write(record) | ||
writer.close() | ||
|
||
val fields: Seq[String] = classOf[BedPE].getDeclaredFields.map(_.getName).toIndexedSeq | ||
val records = DelimitedDataParser(output, delimiter = '\t', header = fields).toSeq | ||
records.length shouldBe 1 | ||
val actual = fields.map(field => records.head.get[String](field).value) | ||
actual should contain theSameElementsInOrderAs expected | ||
} | ||
|
||
"AggregateSvPileupToBedPE" should "convert an AggregateSvPileup output to a BEDPE file" in { | ||
val expected = Seq( | ||
test_bed_pe.chrom1, | ||
Integer.toString(test_bed_pe.start1), | ||
Integer.toString(test_bed_pe.end1), | ||
test_bed_pe.chrom2, | ||
Integer.toString(test_bed_pe.start2), | ||
Integer.toString(test_bed_pe.end2), | ||
test_bed_pe.name, | ||
test_bed_pe.score, | ||
test_bed_pe.strand1.toString, | ||
test_bed_pe.strand2.toString, | ||
).toIndexedSeq | ||
|
||
val input = Io.makeTempFile(this.getClass.getSimpleName, ".txt") | ||
val output = Io.makeTempFile(this.getClass.getSimpleName, BedPEExtension) | ||
Metric.write[AggregatedBreakpointPileup](input, test_aggregate_breakpoint_pileup) | ||
|
||
new AggregateSvPileupToBedPE(input = input, output = output).execute() | ||
|
||
val fields: Seq[String] = classOf[BedPE].getDeclaredFields.map(_.getName).toIndexedSeq | ||
val records = DelimitedDataParser(output, delimiter = '\t', header = fields).toSeq | ||
records.length shouldBe 1 | ||
val actual = fields.map(field => records.head.get[String](field).value) | ||
actual should contain theSameElementsInOrderAs expected | ||
} | ||
} |