Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a tool to make IGV-compat. BEDPE files from AggregateSvPileup #37

Merged
merged 8 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions src/main/scala/com/fulcrumgenomics/sv/cmdline/ClpGroups.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@ import com.fulcrumgenomics.sopt.cmdline.ClpGroup
/** Groups for organizing command line programs for display. */
object ClpGroups {

class _All extends ClpGroup {
override val name: String = "All tools"
override val description: String = "All tools."
class _BreakpointAndSv extends ClpGroup {
override val name: String = "Breakpoint and SV Tools"
override val description: String = "Primary tools for calling and transforming breakpoints and SVs."
}

final val All = classOf[_All]
class _Utilities extends ClpGroup {
override val name: String = "Utility Tools"
override val description: String = "Helper tools for working with breakpoint or SV data."
}

final val BreakpointAndSv = classOf[_BreakpointAndSv]
final val Utilities = classOf[_Utilities]
Comment on lines -8 to +19
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a backwards compatible change and make the CLI look like:

Screenshot 2024-04-10 at 12 38 05 PM

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import com.fulcrumgenomics.FgBioDef._
import scala.collection.mutable


@clp(group=ClpGroups.All, description=
@clp(group=ClpGroups.BreakpointAndSv, description=
"""
|Merges nearby pileups of reads supporting putative breakpoints.
|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package com.fulcrumgenomics.sv.tools

import com.fulcrumgenomics.commons.CommonsDef._
import com.fulcrumgenomics.commons.io.Writer
import com.fulcrumgenomics.commons.util.LazyLogging
import com.fulcrumgenomics.sopt.{arg, clp}
import com.fulcrumgenomics.sv.cmdline.{ClpGroups, SvTool}
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE.BedPEWriter
import com.fulcrumgenomics.util.{Io, Metric}
import htsjdk.tribble.annotation.Strand

import java.io.BufferedWriter


@clp(group=ClpGroups.Utilities, description= "Convert the output of AggregateSvPileup to BEDPE.")
class AggregateSvPileupToBedPE(
@arg(flag='i', doc="Input text file of aggregate pileups generated by AggregateSvPileup") input: FilePath,
@arg(flag='o', doc="Output text file of the aggregate pileups in BEDPE format.") output: FilePath,
) extends SvTool with LazyLogging {

Io.assertReadable(input)
Io.assertCanWriteFile(output)

override def execute(): Unit = {
val reader = Metric.iterator[AggregatedBreakpointPileup](input)
val writer = BedPEWriter(output)

reader.map(BedPE.apply).foreach(writer.write)

writer.close()
}
}

/** Companion object for [[AggregateSvPileupToBedPE]]. */
object AggregateSvPileupToBedPE {

/** The IGV-supported BEDPE file extension. */
val BedPEExtension: String = ".bedpe"

/** A simple BEDPE record as defined by `bedtools`:
*
* - https://bedtools.readthedocs.io/en/latest/content/general-usage.html#bedpe-format)
*
* Future compatibility could be implemented for supporting 10x flavored BEDPE files:
*
* - https://github.com/igvteam/igv/wiki/BedPE-Support
*
* Note that the field `score` is allowed to be a string per bedtools!
clintval marked this conversation as resolved.
Show resolved Hide resolved
*/
case class BedPE(
chrom1: String,
start1: Int,
end1: Int,
chrom2: String,
start2: Int,
end2: Int,
name: String,
score: String,
strand1: Strand,
strand2: Strand,
) extends Metric

/** Companion object for [[BedPE]]. */
object BedPE {

/** Build a [[BedPE]] record from an [[AggregatedBreakpointPileup]]. */
def apply(pileup: AggregatedBreakpointPileup): BedPE = {
new BedPE(
chrom1 = pileup.left_contig,
start1 = pileup.left_min_pos,
end1 = pileup.left_max_pos + 1,
chrom2 = pileup.right_contig,
start2 = pileup.right_min_pos,
end2 = pileup.right_max_pos + 1,
Comment on lines +77 to +80
Copy link
Member Author

@clintval clintval Apr 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Max positions are 0-based so to make them half-open we need to add 1.

EDIT: This was wrong! See #40

name = pileup.id,
score = pileup.total.toString,
strand1 = Strand.decode(pileup.left_strand),
strand2 = Strand.decode(pileup.right_strand),
)
}

/** A writer class for writing [[BedPE]] records. */
clintval marked this conversation as resolved.
Show resolved Hide resolved
class BedPEWriter(val out: BufferedWriter) extends Writer[BedPE] {

/** Write a [[BedPE]] record to the underlying writer. */
override def write(record: BedPE): Unit = {
clintval marked this conversation as resolved.
Show resolved Hide resolved
out.write(record.chrom1)
out.write('\t')
out.write(Integer.toString(record.start1))
out.write('\t')
out.write(Integer.toString(record.end1))
out.write('\t')
out.write(record.chrom2)
out.write('\t')
out.write(Integer.toString(record.start2))
out.write('\t')
out.write(Integer.toString(record.end2))
out.write('\t')
out.write(record.name)
out.write('\t')
out.write(record.score)
out.write('\t')
out.write(record.strand1.toString)
out.write('\t')
out.write(record.strand2.toString)
out.newLine()
}

/** Closes the underlying writer. */
override def close(): Unit = out.close()
}

/** Companion object to [[BedPEWriter]]. */
object BedPEWriter {

/** Constructs a [[BedPEWriter]] that will write to the provided path. */
def apply(path: PathToIntervals): BedPEWriter = apply(Io.toWriter(path))

/** Constructs a [[BedPEWriter]] from a [[java.io.Writer]]. */
def apply(writer: java.io.Writer): BedPEWriter = writer match {
case bw: BufferedWriter => new BedPEWriter(bw)
case w => new BedPEWriter(new BufferedWriter(w))
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import com.fulcrumgenomics.util.{Io, Metric}

import scala.collection.immutable.IndexedSeq

@clp(group=ClpGroups.All, description=
@clp(group=ClpGroups.BreakpointAndSv, description=
"""
|Filters and merges SVPileup output.
""")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ object TargetBedRequirement extends FgBioEnum[TargetBedRequirement] {
}


@clp(group=ClpGroups.All, description=
@clp(group=ClpGroups.BreakpointAndSv, description=
"""
|Collates a pileup of putative structural variant supporting reads.
|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package com.fulcrumgenomics.sv.tools

import com.fulcrumgenomics.commons.io.Io
import com.fulcrumgenomics.commons.util.DelimitedDataParser
import com.fulcrumgenomics.sv.UnitSpec
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE.BedPEWriter
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.{BedPE, BedPEExtension}
import com.fulcrumgenomics.util.Metric
import htsjdk.tribble.annotation.Strand


/** Unit tests for [[AggregateSvPileupToBedPE]]. */
class AggregateSvPileupToBedPETest extends UnitSpec {

/** A test aggregate breakpoint pileup. */
private val test_aggregate_breakpoint_pileup = AggregatedBreakpointPileup(
id = "112",
category = "Inter-contig rearrangement",
left_contig = "chr1",
left_min_pos = 100,
left_max_pos = 100,
left_strand = '+',
right_contig = "chr3",
right_min_pos = 200,
right_max_pos = 200,
right_strand = '-',
split_reads = 1,
read_pairs = 1,
total = 2,
left_pileups = PositionList(100),
right_pileups = PositionList(200),
)

/** A companion test BEDPE record. */
private val test_bed_pe = BedPE(
chrom1 = "chr1",
start1 = 100,
end1 = 101,
chrom2 = "chr3",
start2 = 200,
end2 = 201,
name = "112",
score = "2",
strand1 = Strand.POSITIVE,
strand2 = Strand.NEGATIVE,
)

"AggregateSvPileupToBedPE.BedPE" should "be instantiated from an AggregateBreakpointPileup" in {
BedPE(test_aggregate_breakpoint_pileup) shouldBe test_bed_pe
}

"AggregateSvPileupToBedPE.BedPEWriter" should "write a BedPE record" in {
val record = new BedPE(
chrom1 = "chr1",
start1 = 100,
end1 = 101,
chrom2 = "chr3",
start2 = 200,
end2 = 201,
name = "112",
score = "2",
strand1 = Strand.POSITIVE,
strand2 = Strand.NEGATIVE,
)

val expected = Seq(
record.chrom1,
Integer.toString(record.start1),
Integer.toString(record.end1),
record.chrom2,
Integer.toString(record.start2),
Integer.toString(record.end2),
record.name,
record.score,
record.strand1.toString,
record.strand2.toString,
).toIndexedSeq

val output = Io.makeTempFile(this.getClass.getSimpleName, BedPEExtension)
val writer = BedPEWriter(output)
writer.write(record)
writer.close()

val fields: Seq[String] = classOf[BedPE].getDeclaredFields.map(_.getName).toIndexedSeq
val records = DelimitedDataParser(output, delimiter = '\t', header = fields).toSeq
records.length shouldBe 1
val actual = fields.map(field => records.head.get[String](field).value)
actual should contain theSameElementsInOrderAs expected
}

"AggregateSvPileupToBedPE" should "convert an AggregateSvPileup output to a BEDPE file" in {
val expected = Seq(
test_bed_pe.chrom1,
Integer.toString(test_bed_pe.start1),
Integer.toString(test_bed_pe.end1),
test_bed_pe.chrom2,
Integer.toString(test_bed_pe.start2),
Integer.toString(test_bed_pe.end2),
test_bed_pe.name,
test_bed_pe.score,
test_bed_pe.strand1.toString,
test_bed_pe.strand2.toString,
).toIndexedSeq

val input = Io.makeTempFile(this.getClass.getSimpleName, ".txt")
val output = Io.makeTempFile(this.getClass.getSimpleName, BedPEExtension)
Metric.write[AggregatedBreakpointPileup](input, test_aggregate_breakpoint_pileup)

new AggregateSvPileupToBedPE(input = input, output = output).execute()

val fields: Seq[String] = classOf[BedPE].getDeclaredFields.map(_.getName).toIndexedSeq
val records = DelimitedDataParser(output, delimiter = '\t', header = fields).toSeq
records.length shouldBe 1
val actual = fields.map(field => records.head.get[String](field).value)
actual should contain theSameElementsInOrderAs expected
}
}
Loading