Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into cv_README
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed May 10, 2024
2 parents e3e3dc1 + fbd4861 commit 0ade11f
Show file tree
Hide file tree
Showing 20 changed files with 400 additions and 75 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.java-version
.idea
.idea_modules
.java-version
out
jars
project
Expand All @@ -9,3 +10,4 @@ target
.metals
.vscode
.bsp
*.iml
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
[![Build Status][github-badge]][github-link]
[![Language][scala-badge]][scala-link]
[![License][license-badge]][license-link]
[![DOI][doi-badge]][doi-link]

[bioconda-badge-link]: https://img.shields.io/conda/dn/bioconda/fgsv.svg?label=Bioconda
[bioconda-link]: http://bioconda.github.io/recipes/fgsv/README.html
Expand All @@ -13,6 +14,8 @@
[scala-link]: https://www.scala-lang.org/
[license-badge]: https://img.shields.io/badge/license-MIT-blue.svg
[license-link]: https://github.com/fulcrumgenomics/fgsv/blob/main/LICENSE
[doi-badge]: https://zenodo.org/badge/454071954.svg
[doi-link]: https://zenodo.org/doi/10.5281/zenodo.10452647

Tools for calling breakpoints and exploring structural variation.

Expand Down
72 changes: 59 additions & 13 deletions build.sc
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,58 @@ trait CommonModule extends SbtModule {
os.copy(assembly.path, os.pwd / Symbol("jars") / jarName, replaceExisting = true)
}

override def repositories: Seq[coursier.Repository] = super.repositories ++ Seq(
MavenRepository("https://oss.sonatype.org/content/repositories/public"),
MavenRepository("https://oss.sonatype.org/content/repositories/snapshots"),
MavenRepository("https://jcenter.bintray.com/"),
MavenRepository("https://broadinstitute.jfrog.io/artifactory/libs-snapshot/")
)
override def repositoriesTask: Task[Seq[coursier.Repository]] = T.task {
super.repositoriesTask() ++ Seq(
MavenRepository("https://oss.sonatype.org/content/repositories/public"),
MavenRepository("https://oss.sonatype.org/content/repositories/snapshots"),
MavenRepository("https://jcenter.bintray.com/"),
MavenRepository("https://broadinstitute.jfrog.io/artifactory/libs-snapshot/")
)
}

/** All Scala compiler options for this package. */
override def scalacOptions: T[Seq[String]] = T {
Seq(
"-opt:inline:com.fulcrumgenomics.**", // Turn on the inliner.
"-opt-inline-from:com.fulcrumgenomics.**", // Tells the inliner that it is allowed to inline things from these classes.
"-Yopt-log-inline", "_", // Optional, logs the inliner activity so you know it is doing something.
"-Yopt-inline-heuristics:at-inline-annotated", // Tells the inliner to use your `@inliner` tags.
"-opt-warnings:at-inline-failed", // Tells you if methods marked with `@inline` cannot be inlined, so you can remove the tag.
// The following are sourced from https://nathankleyn.com/2019/05/13/recommended-scalac-flags-for-2-13/
"-deprecation", // Emit warning and location for usages of deprecated APIs.
"-explaintypes", // Explain type errors in more detail.
"-feature", // Emit warning and location for usages of features that should be imported explicitly.
"-unchecked", // Enable additional warnings where generated code depends on assumptions.
"-Xcheckinit", // Wrap field accessors to throw an exception on uninitialized access.
"-Xfatal-warnings", // Fail the compilation if there are any warnings.
"-Xlint:adapted-args", // Warn if an argument list is modified to match the receiver.
"-Xlint:constant", // Evaluation of a constant arithmetic expression results in an error.
"-Xlint:delayedinit-select", // Selecting member of DelayedInit.
"-Xlint:doc-detached", // A Scaladoc comment appears to be detached from its element.
"-Xlint:inaccessible", // Warn about inaccessible types in method signatures.
"-Xlint:infer-any", // Warn when a type argument is inferred to be `Any`.
"-Xlint:missing-interpolator", // A string literal appears to be missing an interpolator id.
"-Xlint:nullary-unit", // Warn when nullary methods return Unit.
"-Xlint:option-implicit", // Option.apply used implicit view.
"-Xlint:package-object-classes", // Class or object defined in package object.
"-Xlint:poly-implicit-overload", // Parameterized overloaded implicit methods are not visible as view bounds.
"-Xlint:private-shadow", // A private field (or class parameter) shadows a superclass field.
"-Xlint:stars-align", // Pattern sequence wildcard must align with sequence component.
"-Xlint:type-parameter-shadow", // A local type parameter shadows a type already in scope.
"-Ywarn-dead-code", // Warn when dead code is identified.
"-Ywarn-extra-implicit", // Warn when more than one implicit parameter section is defined.
"-Ywarn-numeric-widen", // Warn when numerics are widened.
"-Ywarn-unused:implicits", // Warn if an implicit parameter is unused.
"-Ywarn-unused:imports", // Warn if an import selector is not referenced.
"-Ywarn-unused:locals", // Warn if a local definition is unused.
"-Ywarn-unused:params", // Warn if a value parameter is unused.
"-Ywarn-unused:patvars", // Warn if a variable bound in a pattern is unused.
"-Ywarn-unused:privates", // Warn if a private member is unused.
"-Ybackend-parallelism", Math.min(Runtime.getRuntime.availableProcessors(), 8).toString, // Enable parallelization — scalac max is 16.
"-Ycache-plugin-class-loader:last-modified", // Enables caching of classloaders for compiler plugins
"-Ycache-macro-class-loader:last-modified", // and macro definitions. This can lead to performance improvements.
)
}
}

/** A base trait for versioning modules. */
Expand Down Expand Up @@ -69,12 +115,12 @@ trait ReleaseModule extends JavaModule {


object tools extends CommonModule with PublishModule with ReleaseModule {
def scalaVersion = "2.13.8"
def scalaVersion = "2.13.11"
override def millSourcePath = super.millSourcePath / os.up
override def mainClass = Some("com.fulcrumgenomics.sv.cmdline.SvMain")
override def artifactName = "fgsv"
def gitHash = Process("git rev-parse --short HEAD").lineStream.head
def publishVersion = s"0.1.0-${gitHash}-SNAPSHOT"
def gitHash = Process("git rev-parse --short HEAD").lazyLines.head
def publishVersion = "0.2.1-${gitHash}-SNAPSHOT"
def pomSettings = PomSettings(
description = artifactName(),
organization = "com.fulcrumgenomics",
Expand All @@ -96,12 +142,12 @@ object tools extends CommonModule with PublishModule with ReleaseModule {

override def ivyDeps = Agg(
ivy"org.scala-lang:scala-compiler:${scalaVersion()}",
ivy"com.fulcrumgenomics:fgbio_2.13:2.1.0".excludeOrg(orgsToExclude:_*)
ivy"com.fulcrumgenomics:fgbio_2.13:2.2.1".excludeOrg(orgsToExclude:_*)
)

object test extends Tests {
override def ivyDeps = Agg(ivy"org.scalatest::scalatest:3.1.0")
override def testFramework = "org.scalatest.tools.Framework"
object test extends SbtModuleTests {
override def ivyDeps = Agg(ivy"org.scalatest::scalatest:3.2.17")
override def testFramework: Target[String] = T { "org.scalatest.tools.Framework" }

// run mill tools.test.singleTest com.fulcrumgenomics.sv.x.y.x.TestClassName
def singleTest(args: String*) = T.command {
Expand Down
32 changes: 26 additions & 6 deletions docs/04_Metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ the type of each field/column is given, including two commonly used types:
|Metric Type|Description|
|-----------|-----------|
|[AggregatedBreakpointPileup](#aggregatedbreakpointpileup)|Aggregated cluster of breakpoint pileups|
|[BedPE](#bedpe)|A simple BEDPE record as defined by [`bedtools`](https://bedtools|
|[BreakpointPileup](#breakpointpileup)|Represents a pileup of evidence (reads, read-pairs) for a breakpoint|
|[MergedPileup](#mergedpileup)||

Expand All @@ -28,12 +29,12 @@ Aggregated cluster of breakpoint pileups
|id|String|Combined ID retaining the IDs of all constituent breakpoints|
|category|BreakpointCategory|Breakpoint category|
|left_contig|String|Contig name for left side of breakpoint|
|left_min_pos|Int|Minimum coordinate of left breakends|
|left_max_pos|Int|Maximum coordinate of left breakends|
|left_min_pos|Int|Minimum coordinate of left breakends (1-based)|
|left_max_pos|Int|Maximum coordinate of left breakends (1-based)|
|left_strand|Char|Strand at left breakends|
|right_contig|String|Contig name for right side of breakpoint|
|right_min_pos|Int|Minimum coordinate of right breakends|
|right_max_pos|Int|Maximum coordinate of right breakends|
|right_min_pos|Int|Minimum coordinate of right breakends (1-based)|
|right_max_pos|Int|Maximum coordinate of right breakends (1-based)|
|right_strand|Char|Strand at right breakends|
|split_reads|Int|Total number of split reads supporting the breakpoints in the cluster|
|read_pairs|Int|Total number of read pairs supporting the breakpoints in the cluster|
Expand All @@ -50,6 +51,25 @@ Aggregated cluster of breakpoint pileups
|right_targets|Option[String]|The comma-delimited list of target names overlapping the right breakpoint|


### BedPE

A simple BEDPE record as defined by [`bedtools`](https://bedtools.readthedocs.io/en/latest/content/general-usage.html#bedpe-format).Future compatibility could be implemented for supporting [10x flavored BEDPE files](https://github.com/igvteam/igv/wiki/BedPE-Support).


|Column|Type|Description|
|------|----|-----------|
|chrom1|String|The reference sequence name for the first interval.|
|start1|Int|The 0-based position for the start of the first interval.|
|end1|Int|The 0-based half-open position for the end of the first interval.|
|chrom2|String|The reference sequence name for the second interval.|
|start2|Int|The 0-based position for the start of the second interval.|
|end2|Int|The 0-based half-open position for the end of the second interval.|
|name|String|The name of the paired interval record.|
|score|Int|The score of the paired interval record.|
|strand1|Strand|The strand for the first interval.|
|strand2|Strand|The strand for the second interval.|


### BreakpointPileup

Represents a pileup of evidence (reads, read-pairs) for a breakpoint. If `split_reads` is greater than
Expand All @@ -62,10 +82,10 @@ the only information comes from read-pairs and the breakpoint information should
|------|----|-----------|
|id|String|An ID assigned to the breakpoint that can be used to lookup supporting reads in the BAM.|
|left_contig|String|The contig of chromosome on which the left hand side of the breakpoint exists.|
|left_pos|Int|The position (possibly imprecise) of the left-hand breakend.|
|left_pos|Int|The position (possibly imprecise) of the left-hand breakend (1-based).|
|left_strand|Char|The strand of the left-hand breakend; sequence reads would traverse this strand in order to arrive at the breakend and transit into the right-hand side of the breakpoint.|
|right_contig|String|The contig of chromosome on which the left hand side of the breakpoint exists.|
|right_pos|Int|The position (possibly imprecise) of the right-hand breakend.|
|right_pos|Int|The position (possibly imprecise) of the right-hand breakend (1-based).|
|right_strand|Char|The strand of the right-hand breakend;. sequence reads would continue reading onto this strand after transiting the breakpoint from the left breakend|
|split_reads|Int|The number of templates/inserts with split-read alignments that identified this breakpoint.|
|read_pairs|Int|The number of templates/inserts with read-pair alignments (and without split-read alignments) that identified this breakpoint.|
Expand Down
4 changes: 2 additions & 2 deletions docs/tools/AggregateSvPileup.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ title: AggregateSvPileup
# AggregateSvPileup

## Overview
**Group:** All tools
**Group:** Breakpoint and SV Tools

Merges nearby pileups of reads supporting putative breakpoints.

Expand Down Expand Up @@ -36,7 +36,7 @@ of the overlapping target regions are copied from the `SvPiluep` input (if prese
The output file is a tab-delimited table with one record per aggregated cluster of pileups. Aggregated
pileups are reported with the minimum and maximum (inclusive) coordinates of all pileups in the cluster, a
possible putative structural variant event type supported by the pileups, and the sum of read support from all
pileups in the cluster.
pileups in the cluster. Positions in this file are 1-based positions.

## Arguments

Expand Down
18 changes: 18 additions & 0 deletions docs/tools/AggregateSvPileupToBedPE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
title: AggregateSvPileupToBedPE
---

# AggregateSvPileupToBedPE

## Overview
**Group:** Utility Tools

Convert the output of AggregateSvPileup to BEDPE.

## Arguments

|Name|Flag|Type|Description|Required?|Max # of Values|Default Value(s)|
|----|----|----|-----------|---------|---------------|----------------|
|input|i|FilePath|Input text file of aggregate pileups generated by AggregateSvPileup|Required|1||
|output|o|FilePath|Output text file of the aggregate pileups in BEDPE format.|Required|1||

3 changes: 1 addition & 2 deletions docs/tools/FilterAndMerge.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ title: FilterAndMerge
# FilterAndMerge

## Overview
**Group:** All tools
**Group:** Breakpoint and SV Tools

Filters and merges SVPileup output.

Expand All @@ -15,7 +15,6 @@ Filters and merges SVPileup output.
|----|----|----|-----------|---------|---------------|----------------|
|input|i|FilePath|The input pileup file from SvPileup|Required|1||
|output|o|FilePath|The output filtered and merged SvPileup file|Required|1||
|dict|d|PathToSequenceDictionary|The path to the reference sequence dictionary.|Required|1||
|min-pre|m|Int|The minimum # of observations to examine an input site|Optional|1|1|
|min-post|M|Int|The minimum # of observations to output a site|Optional|1|1|
|slop|s|Int|The maximum # bases between a breakend across adjacent sites|Optional|1|0|
Expand Down
6 changes: 3 additions & 3 deletions docs/tools/SvPileup.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ title: SvPileup
# SvPileup

## Overview
**Group:** All tools
**Group:** Breakpoint and SV Tools

Collates a pileup of putative structural variant supporting reads.

Expand All @@ -15,9 +15,9 @@ Two output files will be created:

1. `<output-prefix>.txt`: a tab-delimited file describing SV pileups, one line per breakpoint event. The returned
breakpoint will be canonicalized such that the "left" side of the breakpoint will have the lower (or equal to)
position on the genome vs. the "right"s side.
position on the genome vs. the "right"s side. Positions in this file are 1-based positions.
2. `<output-prefix>.bam`: a SAM/BAM file containing reads that contain SV breakpoint evidence annotated with SAM
tag.
tag.

The `be` SAM tag contains a comma-delimited list of breakpoints to which a given alignment belongs. Each element is
semi-colon delimited, with four fields:
Expand Down
15 changes: 12 additions & 3 deletions docs/tools/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,24 @@ title: fgsv tools

# fgsv tools

The following tools are available in fgsv version 0.1.1-798a5ac.
## All tools
The following tools are available in fgsv version 0.2.0-d603e95.

All tools.
## Breakpoint and SV Tools

Primary tools for calling and transforming breakpoints and SVs.

|Tool|Description|
|----|-----------|
|[AggregateSvPileup](AggregateSvPileup.md)|Merges nearby pileups of reads supporting putative breakpoints|
|[FilterAndMerge](FilterAndMerge.md)|Filters and merges SVPileup output|
|[SvPileup](SvPileup.md)|Collates a pileup of putative structural variant supporting reads|

## Utility Tools

Helper tools for working with breakpoint or SV data.

|Tool|Description|
|----|-----------|
|[AggregateSvPileupToBedPE](AggregateSvPileupToBedPE.md)|Convert the output of AggregateSvPileup to BEDPE|


2 changes: 1 addition & 1 deletion mill
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# This is a wrapper script, that automatically download mill from GitHub release pages
# You can give the required mill version with MILL_VERSION env variable
# If no version is given, it falls back to the value of DEFAULT_MILL_VERSION
DEFAULT_MILL_VERSION=0.10.4
DEFAULT_MILL_VERSION=0.11.7

set -e

Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/com/fulcrumgenomics/sv/BreakpointPileup.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ import com.fulcrumgenomics.util.Metric
*
* @param id an ID assigned to the breakpoint that can be used to lookup supporting reads in the BAM.
* @param left_contig the contig of chromosome on which the left hand side of the breakpoint exists.
* @param left_pos the position (possibly imprecise) of the left-hand breakend.
* @param left_pos the position (possibly imprecise) of the left-hand breakend (1-based, inclusive).
* @param left_strand the strand of the left-hand breakend; sequence reads would traverse this strand
* in order to arrive at the breakend and transit into the right-hand side of the breakpoint.
* @param right_contig the contig of chromosome on which the left hand side of the breakpoint exists.
* @param right_pos the position (possibly imprecise) of the right-hand breakend.
* @param right_pos the position (possibly imprecise) of the right-hand breakend (1-based, inclusive).
* @param right_strand the strand of the right-hand breakend;. sequence reads would continue reading onto
* this strand after transiting the breakpoint from the left breakend
* @param split_reads the number of templates/inserts with split-read alignments that identified this breakpoint.
Expand Down
14 changes: 10 additions & 4 deletions src/main/scala/com/fulcrumgenomics/sv/cmdline/ClpGroups.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@ import com.fulcrumgenomics.sopt.cmdline.ClpGroup
/** Groups for organizing command line programs for display. */
object ClpGroups {

class _All extends ClpGroup {
override val name: String = "All tools"
override val description: String = "All tools."
class _BreakpointAndSv extends ClpGroup {
override val name: String = "Breakpoint and SV Tools"
override val description: String = "Primary tools for calling and transforming breakpoints and SVs."
}

final val All = classOf[_All]
class _Utilities extends ClpGroup {
override val name: String = "Utility Tools"
override val description: String = "Helper tools for working with breakpoint or SV data."
}

final val BreakpointAndSv = classOf[_BreakpointAndSv]
final val Utilities = classOf[_Utilities]
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,22 +85,22 @@ class FgSvMetricsDoclet extends FgMetricsDoclet {

find(universe.rootPackage)
.filter(d => d.isClass && !d.isAbstract)
.filter(d => d.parentTypes.exists { case (template, typ) => template.toString == classOf[Metric].getName })
.filter(d => d.parentTypes.exists { case (template, _) => template.toString == classOf[Metric].getName })
}

/** Take the body of a scaladoc comment and renders it into MarkDown. */
override protected def renderBody(body: Body): String = {
val buffer = new StringBuilder

// Takes a block element and renders it into MarkDown and writes it into the buffer
def renderBlock(block: Block, indent: String): Unit = {
def renderBlock(block: Block): Unit = {
(block: @unchecked) match {
case para: Paragraph => render(para.text)
case dlist: DefinitionList => () // TODO
case hr: HorizontalRule => () // TODO
case olist: OrderedList => () // TODO
case title: Title => buffer.append("#" * title.level).append(" "); render(title.text); buffer.append("\n\n")
case ulist: UnorderedList => () // TODO
case para: Paragraph => render(para.text)
case _: DefinitionList => () // TODO
case _: HorizontalRule => () // TODO
case _: OrderedList => () // TODO
case title: Title => buffer.append("#" * title.level).append(" "); render(title.text); buffer.append("\n\n")
case _: UnorderedList => () // TODO
}
}

Expand All @@ -120,7 +120,7 @@ class FgSvMetricsDoclet extends FgMetricsDoclet {
case under: Underline => buffer.append("__"); render(under.text); buffer.append("__")
}

body.blocks.foreach(renderBlock(_, ""))
body.blocks.foreach(renderBlock)
buffer.toString()
}

Expand Down
Loading

0 comments on commit 0ade11f

Please sign in to comment.