Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Release v15.0.0 #1422

Merged
merged 6 commits into from
Apr 10, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions BALSAMIC/assets/scripts/igh_dux4_detection.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash

# Check if at least 3 arguments are provided
if [ "$#" -lt 3 ]; then
echo "Usage: $0 <genome_version> <output_vcf> <tumor_bam> [normal_bam]"
exit 1
fi

# Assign variables
genome_version="$1"
output_vcf="$2"
tumor_bam="$3"
normal_bam="$4"
output_vcf_tmp=$(echo $output_vcf | sed 's/.gz$//;')

# Print given arguments
echo "genome_version: $genome_version"
echo "output vcf: $output_vcf"
echo "tumor bam: $tumor_bam"
echo "normal bam: $normal_bam"

# Set chr positions depending on the genome version
if [ "$genome_version" = "hg19" ]; then
igh_chr="14"
igh_pos="106032614"
dux4_chr="4"
dux4_pos="190988100"
elif [ "$genome_version" = "hg38" ]; then
igh_chr="14"
igh_pos="105586437"
dux4_chr="4"
dux4_pos="190173000"
else
echo "Invalid genome version. Accepted values: hg19, hg38. Given: $genome_version"
exit 1
fi


# Define functions
get_supporting_reads() {
# Get number of supporting reads for IGH::DUX4 rearrangement in a given BAM file
local bam="$1"
if [ "$genome_version" = "hg19" ]; then
local supporting_reads=$(samtools view -F 1024 -c \
-e '(rnext == "4" && pnext > 190988100 && pnext < 191007000) || (rnext == "10" && pnext > 135477000 && pnext < 135500000) || (rnext == "GL000228.1" && pnext > 70000 && pnext < 115000) || ([SA] =~ "10,1354[789][0-9]{4}") || ([SA] =~ "4,19(09[8-9][0-9]|100[0-7])[0-9]{3}" || [SA] =~ "GL000228.1,([7-9][0-9]{4}|1[0-1][0-5][0-9]{3})")' \
$bam 14:106032614-107288051 )
elif [ "$genome_version" = "hg38" ]; then
local supporting_reads=$(samtools view -F 1024 -c \
-e '(rnext == "4" && pnext > 190173000 && pnext < 190176000) || ([SA] =~ "4,19017[345][0-9]{3}")' \
$bam chr14:105586437-106879844 )
fi
echo $supporting_reads
}

# Set information for tumor and normal
supporting_reads_tumor=$(get_supporting_reads $tumor_bam)
samples_header="TUMOR"
samples_field="${supporting_reads_tumor}"
if [ -n "$normal_bam" ]; then
supporting_reads_normal=$(get_supporting_reads $normal_bam)
samples_header="NORMAL\tTUMOR"
samples_field="${supporting_reads_normal}\t${supporting_reads_tumor}"
fi


# If supporting reads are found in the tumor, set filter to PASS. Otherwise add: no_supporting_reads
if [ "$supporting_reads_tumor" -gt 0 ]; then
vcf_filter="PASS"
else
vcf_filter="no_supporting_reads"
fi

echo "supporting reads tumor: $supporting_reads_tumor"
echo "supporting reads normal: $supporting_reads_normal"
echo "vcf filter: $vcf_filter"

# Write vcf entry
{
echo '##fileformat=VCFv4.2'
echo '##ALT=<ID=BND,Description="Break end">'
echo '##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">'
echo '##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">'
echo '##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of paired-ends that support the event">'
echo -e "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t${samples_header}"
echo -e "${igh_chr}\t${igh_pos}\tsamtools_igh_dux4\tN\tN[${dux4_chr}:${dux4_pos}[\t.\t${vcf_filter}\tSVTYPE=BND;IMPRECISE;\tDV\t${samples_field}"
} >> $output_vcf_tmp

bgzip $output_vcf_tmp
tabix -p vcf $output_vcf
4 changes: 4 additions & 0 deletions BALSAMIC/commands/config/case.py
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@
OPTION_CASE_ID,
OPTION_CLINICAL_SNV_OBSERVATIONS,
OPTION_CLINICAL_SV_OBSERVATIONS,
OPTION_EXOME,
OPTION_FASTQ_PATH,
OPTION_GENDER,
OPTION_GENOME_INTERVAL,
@@ -70,6 +71,7 @@
@OPTION_CASE_ID
@OPTION_CLINICAL_SNV_OBSERVATIONS
@OPTION_CLINICAL_SV_OBSERVATIONS
@OPTION_EXOME
@OPTION_FASTQ_PATH
@OPTION_GENDER
@OPTION_GENOME_VERSION
@@ -101,6 +103,7 @@ def case_config(
case_id: str,
clinical_snv_observations: Path,
clinical_sv_observations: Path,
exome: bool,
fastq_path: Path,
gender: Gender,
genome_version: GenomeVersion,
@@ -219,6 +222,7 @@ def case_config(
container_conda_env_path=CONTAINERS_DIR,
),
panel={
"exome": exome,
"capture_kit": panel_bed,
"chrom": get_panel_chrom(panel_bed),
"pon_cnn": pon_cnn,
10 changes: 9 additions & 1 deletion BALSAMIC/commands/options.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@
from BALSAMIC.constants.constants import LOG_LEVELS, LogLevel
from BALSAMIC.constants.rules import DELIVERY_RULES
from BALSAMIC.constants.workflow_params import VCF_DICT
from BALSAMIC.utils.cli import validate_cache_version
from BALSAMIC.utils.cli import validate_cache_version, validate_exome_option

OPTION_ADAPTER_TRIM = click.option(
"--adapter-trim/--no-adapter-trim",
@@ -189,6 +189,14 @@
help="Enable dragen variant caller",
)

OPTION_EXOME = click.option(
"--exome",
is_flag=True,
default=False,
help="Assign exome parameters to TGA workflow",
callback=validate_exome_option,
)

OPTION_FASTQ_PATH = click.option(
"--fastq-path",
type=click.Path(exists=True, resolve_path=True),
4 changes: 4 additions & 0 deletions BALSAMIC/constants/cluster_analysis.json
Original file line number Diff line number Diff line change
@@ -411,5 +411,9 @@
"samtools_qc": {
"time": "04:00:00",
"n": 16
},
"igh_dux4_detection": {
"time": "02:00:00",
"n": 1
}
}
42 changes: 31 additions & 11 deletions BALSAMIC/constants/variant_filters.py
Original file line number Diff line number Diff line change
@@ -10,16 +10,8 @@
"description": "General purpose filters used for filtering any variant caller",
}

# Configuration of VARDICT settings
VARDICT_SETTINGS = {
"AD": {"tag_value": 5, "filter_name": "balsamic_low_tumor_ad", "field": "INFO"},
"DP": {
"tag_value": 100,
"filter_name": "balsamic_low_tumor_dp",
"field": "INFO",
},
"MQ": {"tag_value": 40, "filter_name": "balsamic_low_mq", "field": "INFO"},
"AF_min": {"tag_value": 0.007, "filter_name": "balsamic_low_af", "field": "INFO"},
# Configuration of common VARDICT settings
VARDICT_SETTINGS_COMMON = {
"pop_freq": {
"tag_value": 0.005,
"filter_name": "balsamic_high_pop_freq",
@@ -35,12 +27,35 @@
"filter_name": "Frq",
"field": "INFO",
},
"MQ": {"tag_value": 30, "filter_name": "balsamic_low_mq", "field": "INFO"},
"AF_min": {"tag_value": 0.007, "filter_name": "balsamic_low_af", "field": "INFO"},
"AD": {"tag_value": 5, "filter_name": "balsamic_low_tumor_ad", "field": "INFO"},
"varcaller_name": "VarDict",
"filter_type": "general",
"analysis_type": "tumor_only",
"analysis_type": "tumor_only,tumor_normal",
"description": "General purpose filters used for filtering VarDict",
}

# Configuration of VARDICT settings for smaller panels
VARDICT_SETTINGS_PANEL = {
**VARDICT_SETTINGS_COMMON,
"DP": {
"tag_value": 100,
"filter_name": "balsamic_low_tumor_dp",
"field": "INFO",
},
}

# Configuration of VARDICT settings for exomes
VARDICT_SETTINGS_EXOME = {
**VARDICT_SETTINGS_COMMON,
"DP": {
"tag_value": 20,
"filter_name": "balsamic_low_tumor_dp",
"field": "INFO",
},
}

# Configuration for SENTIEON settings:
SENTIEON_VARCALL_SETTINGS = {
"AD": {"tag_value": 3, "filter_name": "balsamic_low_tumor_ad", "field": "FORMAT"},
@@ -50,6 +65,11 @@
"field": "FORMAT",
},
"AF_min": {"tag_value": 0.05, "filter_name": "balsamic_low_af", "field": "FORMAT"},
"high_normal_tumor_af_frac": {
"tag_value": 0.3,
"filter_name": "high_normal_tumor_af_frac",
"field": "FORMAT",
},
"pop_freq": {
"tag_value": 0.001,
"filter_name": "balsamic_high_pop_freq",
7 changes: 7 additions & 0 deletions BALSAMIC/constants/workflow_params.py
Original file line number Diff line number Diff line change
@@ -99,6 +99,13 @@
"sequencing_type": ["wgs"],
"workflow_solution": ["BALSAMIC"],
},
"igh_dux4": {
"mutation": "somatic",
"mutation_type": "SV",
"analysis_type": ["single", "paired"],
"sequencing_type": ["wgs"],
"workflow_solution": ["BALSAMIC"],
},
"svdb": {
"mutation": "somatic",
"mutation_type": "SV",
3 changes: 3 additions & 0 deletions BALSAMIC/models/config.py
Original file line number Diff line number Diff line change
@@ -49,6 +49,7 @@ class SampleInstanceModel(BaseModel):
class PanelModel(BaseModel):
"""Holds attributes of PANEL BED file if provided
Attributes:
exome: (bool); optional parameter for targeted analyses to use exome parameters
capture_kit : Field(str(Path)); string representation of path to PANEL BED file
chrom : Field(list(str)); list of chromosomes in PANEL BED
pon_cnn: Field(optional); Path where PON reference .cnn file is stored
@@ -59,6 +60,7 @@ class PanelModel(BaseModel):

"""

exome: Optional[bool] = False
capture_kit: Annotated[Optional[str], AfterValidator(is_file)] = None
chrom: Optional[List[str]] = None
pon_cnn: Annotated[Optional[str], AfterValidator(is_file)] = None
@@ -102,6 +104,7 @@ class VCFModel(BaseModel):
dellycnv: VarcallerAttribute
tiddit: VarcallerAttribute
cnvpytor: VarcallerAttribute
igh_dux4: VarcallerAttribute
svdb: VarcallerAttribute


2 changes: 2 additions & 0 deletions BALSAMIC/models/params.py
Original file line number Diff line number Diff line change
@@ -218,6 +218,7 @@ class VarCallerFilter(BaseModel):
Attributes:
AD: VCFAttributes (required); minimum allelic depth
AF_min: VCFAttributes (optional); minimum allelic fraction
high_normal_tumor_af_frac: VCFAttributes (optional); maximum normal allele frequency / tumor allele frequency
MQ: VCFAttributes (optional); minimum mapping quality
DP: VCFAttributes (optional); minimum read depth
pop_freq: VCFAttributes (optional); maximum gnomad allele frequency
@@ -238,6 +239,7 @@ class VarCallerFilter(BaseModel):

AD: Optional[VCFAttributes] = None
AF_min: Optional[VCFAttributes] = None
high_normal_tumor_af_frac: Optional[VCFAttributes] = None
MQ: Optional[VCFAttributes] = None
DP: Optional[VCFAttributes] = None
pop_freq: Optional[VCFAttributes] = None
Original file line number Diff line number Diff line change
@@ -58,17 +58,21 @@ elif config["analysis"]["sequencing_type"] == 'wgs' and config["analysis"]["anal
AD = [SENTIEON_CALLER.AD.tag_value, SENTIEON_CALLER.AD.filter_name],
DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name],
AF_min = [SENTIEON_CALLER.AF_min.tag_value, SENTIEON_CALLER.AF_min.filter_name],
high_normal_tumor_af_frac_filter_name=SENTIEON_CALLER.high_normal_tumor_af_frac.filter_name,
high_normal_tumor_af_frac_value=SENTIEON_CALLER.high_normal_tumor_af_frac.tag_value,
case_name = config["analysis"]["case_id"],
threads:
get_threads(cluster_config, 'bcftools_quality_filter_tnscope_tumor_normal')
message:
"Quality filtering WGS tumor-normal tnscope variants using bcftools for {params.case_name}"
shell:
"""
bcftools view -f PASS,triallelic_site {input.vcf_snv} \
bcftools view {input.vcf_snv} \
| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \
| bcftools annotate -x FILTER/alt_allele_in_normal \
| bcftools filter --threads {threads} --exclude 'sum(FORMAT/AF[1])/sum(FORMAT/AF[0])>{params.high_normal_tumor_af_frac_value}' --soft-filter '{params.high_normal_tumor_af_frac_filter_name}' --mode '+' \
| bcftools view -f PASS,triallelic_site -O z -o {output.vcf_snv_research};

tabix -p vcf -f {output.vcf_snv_research};
@@ -179,14 +183,18 @@ tabix -p vcf -f {output.vcf_filtered};
Path(singularity_image,config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
params:
case_name = config["analysis"]["case_id"],
high_normal_tumor_af_frac_filter_name=SENTIEON_CALLER.high_normal_tumor_af_frac.filter_name,
high_normal_tumor_af_frac_value=SENTIEON_CALLER.high_normal_tumor_af_frac.tag_value,
threads:
get_threads(cluster_config,'bcftools_quality_filter_TNscope_umi_tumor_normal')
message:
"Quality filtering TNscope_umi tumor-normal annotated variants using bcftools for {params.case_name} "
shell:
"""
bcftools view {input.vcf} | \
bcftools view -f PASS,triallelic_site -o {output.vcf_filtered} -O z;
bcftools view {input.vcf} \
| bcftools annotate -x FILTER/alt_allele_in_normal \
| bcftools filter --threads {threads} --exclude 'sum(FORMAT/AF[1])/sum(FORMAT/AF[0])>{params.high_normal_tumor_af_frac_value}' --soft-filter '{params.high_normal_tumor_af_frac_filter_name}' --mode '+' \
| bcftools view -f PASS,triallelic_site -o {output.vcf_filtered} -O z;

tabix -p vcf -f {output.vcf_filtered};
"""
Original file line number Diff line number Diff line change
@@ -402,6 +402,33 @@ else:
rm {input.ascat_cnv};
"""


rule igh_dux4_detection_tumor_normal:
input:
fa = config["reference"]["reference_genome"],
bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample),
bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name=normal_sample),
output:
vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".igh_dux4.vcf.gz",
benchmark:
benchmark_dir + 'igh_dux4_detection_tumor_normal_' + config["analysis"]["case_id"] + ".tsv"
singularity:
Path(singularity_image, config["bioinfo_tools"].get("samtools") + ".sif").as_posix()
params:
genome_version = config["reference"]["genome_version"],
custom_sv_detection_script = get_script_path("igh_dux4_detection.sh"),
case_name = config["analysis"]["case_id"],
threads:
get_threads(cluster_config, "igh_dux4_detection")
message:
"Detecting IGH::DUX4 rearrangement for {params.case_name} using samtools."
shell:
"""
bash {params.custom_sv_detection_script} {params.genome_version} {output.vcf} {input.bamT} {input.bamN}
"""



rule svdb_merge_tumor_normal:
input:
vcf = expand(
Original file line number Diff line number Diff line change
@@ -288,6 +288,31 @@ tabix -p vcf -f {output.delly_sv};
tabix -p vcf -f {output.delly_cnv};
"""


rule igh_dux4_detection_tumor_only:
input:
fa = config["reference"]["reference_genome"],
bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample)
output:
vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".igh_dux4.vcf.gz",
benchmark:
benchmark_dir + 'igh_dux4_detection_tumor_only_' + config["analysis"]["case_id"] + ".tsv"
singularity:
Path(singularity_image, config["bioinfo_tools"].get("samtools") + ".sif").as_posix()
params:
genome_version = config["reference"]["genome_version"],
custom_sv_detection_script = get_script_path("igh_dux4_detection.sh"),
case_name = config["analysis"]["case_id"],
threads:
get_threads(cluster_config, "igh_dux4_detection")
message:
"Detecting IGH::DUX4 rearrangement for {params.case_name} using samtools."
shell:
"""
bash {params.custom_sv_detection_script} {params.genome_version} {output.vcf} {input.bamT}
"""


rule svdb_merge_tumor_only:
input:
vcf = expand(
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ mkdir -p {params.tmpdir};
export TMPDIR={params.tmpdir};
export VAR_DICT_OPTS='\"-Djava.io.tmpdir={params.tmpdir}\" \"-Xmx90G\"';

vardict-java -U -u -I 600 -G {input.fa} -f {params.af} -N {params.case_name} \
vardict-java -I 600 -G {input.fa} -f {params.af} -N {params.case_name} \
-b \"{input.bamT}|{input.bamN}\" \
-th {threads} \
{params.col_info} {input.bed} \
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@ mkdir -p {params.tmpdir};
export TMPDIR={params.tmpdir};
export VAR_DICT_OPTS='\"-Djava.io.tmpdir={params.tmpdir}\" \"-Xmx45G\"';
vardict-java -u -I 600 \
vardict-java -I 600 \
-G {input.fa} \
-f {params.af} \
-N {params.case_name} \
9 changes: 9 additions & 0 deletions BALSAMIC/utils/cli.py
Original file line number Diff line number Diff line change
@@ -488,6 +488,15 @@ def get_analysis_fastq_files_directory(case_dir: str, fastq_path: str) -> str:
return Path(fastq_path).as_posix()


def validate_exome_option(ctx: click.Context, _param: click.Parameter, exome: bool):
"""Validate that a panel-bed has been supplied together with the exome option."""
if exome and not ctx.params.get("panel_bed"):
raise click.BadParameter(
"If --exome is provided, --panel-bed must also be provided."
)
return exome


def validate_cache_version(
_ctx: click.Context, _param: click.Parameter, version: str
) -> str:
12 changes: 10 additions & 2 deletions BALSAMIC/workflows/balsamic.smk
Original file line number Diff line number Diff line change
@@ -16,7 +16,9 @@ from BALSAMIC.constants.variant_filters import (
COMMON_SETTINGS,
SENTIEON_VARCALL_SETTINGS,
SVDB_FILTER_SETTINGS,
VARDICT_SETTINGS,
VARDICT_SETTINGS_PANEL,
VARDICT_SETTINGS_EXOME,
VARDICT_SETTINGS_COMMON,
MANTA_FILTER_SETTINGS,
)
from BALSAMIC.constants.workflow_params import VARCALL_PARAMS, WORKFLOW_PARAMS, SLEEP_BEFORE_START
@@ -115,7 +117,13 @@ else:

# Varcaller filter settings
COMMON_FILTERS = VarCallerFilter.model_validate(COMMON_SETTINGS)
VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS)

# Set VarDict settings depending on if panel is exome or not
VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS_PANEL)
if config_model.panel and config_model.panel.exome:
VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS_EXOME)


SENTIEON_CALLER = VarCallerFilter.model_validate(SENTIEON_VARCALL_SETTINGS)
SVDB_FILTERS = VarCallerFilter.model_validate(SVDB_FILTER_SETTINGS)
MANTA_FILTERS = VarCallerFilter.model_validate(MANTA_FILTER_SETTINGS)
25 changes: 25 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
[15.0.0]
-------

Added:
^^^^^^
* high_normal_tumor_af_frac filter in bcftools for TNscope T+N filtering out more than 30% TINC https://github.com/Clinical-Genomics/BALSAMIC/pull/1289
* New option for exome samples `--exome` with modified bcftools filters compared to standard targeted workflow https://github.com/Clinical-Genomics/BALSAMIC/pull/1414
* Custom samtools script for the detection of IGH::DUX4 rearrangements https://github.com/Clinical-Genomics/BALSAMIC/pull/1397

Changed:
^^^^^^^^
* Reduced stringency of minimum MQ for all TGA to 30 from 40 https://github.com/Clinical-Genomics/BALSAMIC/pull/1414
* Removed -u flag from VarDict T+N and T only rules to remove calling only in reverse reads of overlapping mates https://github.com/Clinical-Genomics/BALSAMIC/pull/1414
* Removed -U flag to VarDict T+N rule to start calling SVs https://github.com/Clinical-Genomics/BALSAMIC/pull/1414

Removed:
^^^^^^^^
* alt_allele_in_normal filter from TNscope T+N workflows https://github.com/Clinical-Genomics/BALSAMIC/pull/1289

Fixed:
^^^^^^
* initial filter keeping only PASS or triallelic-site from T+N bcftools quality filter rule has been removed https://github.com/Clinical-Genomics/BALSAMIC/pull/1424


[14.0.1]
-------

@@ -36,6 +60,7 @@ Fixed:
^^^^^^
* Missing `__init__.py` in `snakemake_rules` folders https://github.com/Clinical-Genomics/BALSAMIC/pull/1383


[13.0.0]
-------

255 changes: 187 additions & 68 deletions docs/balsamic_filters.rst

Large diffs are not rendered by default.

16 changes: 15 additions & 1 deletion docs/balsamic_sv_cnv.rst
Original file line number Diff line number Diff line change
@@ -44,11 +44,21 @@ Depending on the sequencing type, BALSAMIC is currently running the following st
- tumor-only
- somatic
- CNV
* - igh_dux4 (see note below)
- WGS
- tumor-normal, tumor-only
- somatic
- SV

Further details about a specific caller can be found in the links for the repositories containing the documentation for SV and CNV callers along with the links for the articles are listed in `bioinfo softwares <https://balsamic.readthedocs.io/en/latest/bioinfo_softwares.html>`_.

Note that igh_dux4 is not a variant caller itself. This is a custom script that uses samtools to detect read pairs supporting IGH::DUX4 rearrangements. In short, the command identifies discordant reads mapping to the IGH region and to either DUX4 or its homologous DUX4-like regions (see references for details). The inclusion of this feature aims to alleviate the failure of callers to detect this rearrangement. It is important to note, however, that the reported breakpoints are fixed to the IGH and DUX4 coordinates and are, therefore, imprecise and uncertain. Therefore, we advise caution when interpreting this information.


It is mandatory to provide the gender of the sample from BALSAMIC version >= 10.0.0 For CNV analysis.



**Pre-merge Filtrations**
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

@@ -82,6 +92,9 @@ Manta calls are filtered using bcftools to only keep variants that have evidence
* - Manta
- low_pr_sr_count
- SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < 4.0
* - igh_dux4
- samtools_igh_dux4
- DV < 1


Further information regarding the TIDDIT tumor normal filtration: As translocation variants are represented by 2 BNDs in the VCF which allows for mixed assignment of soft-filters, a requirement for assigning soft-filters to translocations is that neither BND is PASS.
@@ -117,12 +130,13 @@ Further information regarding the TIDDIT tumor normal filtration: As translocati
| 3. ascat
| 4. dellycnv
| 5. tiddit
| 6. igh_dux4
- | 1. manta
| 2. dellysv
| 3. dellycnv
| 4. tiddit
| 5. cnvpytor
| 6. igh_dux4

The merged `SNV.somatic.<CASE_ID>.svdb.vcf.gz` file retains all the information for the variants from the caller in which the variants are identified, which are then annotated using `ensembl-vep`.
2 changes: 2 additions & 0 deletions docs/resources.rst
Original file line number Diff line number Diff line change
@@ -58,6 +58,8 @@ Relevant publications
#. **Two case studies and a pipeline (unpublished)**: Noll, A. C., Miller, N. A., Smith, L. D., Yoo, B., Fiedler, S., Cooley, L. D., … Kingsmore, S. F. (2016). Clinical detection of deletion structural variants in whole-genome sequences. Npj Genomic Medicine, 1(1), 16026. https://doi.org/10.1038/npjgenmed.2016.26
#. **Review on driver gene methods**: Tokheim, C. J., Papadopoulos, N., Kinzler, K. W., Vogelstein, B., & Karchin, R. (2016). Evaluating the evaluation of cancer driver genes. Proceedings of the National Academy of Sciences, 113(50), 14330–14335. https://doi.org/10.1073/pnas.1616440113

#. **Detection of IGH::DUX4 rearrangement**: Rezayee, F., Eisfeldt, J., Skaftason, A., Öfverholm, I., Sayyab, S., Syvänen, A. C., … & Barbany, G. (2023). Feasibility to use whole-genome sequencing as a sole diagnostic method to detect genomic aberrations in pediatric B-cell acute lymphoblastic leukemia. Frontiers in Oncology, 13, 1217712. https://doi.org/10.3389/fonc.2023.1217712

*Resource, or general notable papers including resource and KB papers related to cancer genomics*

#. **GIAB**: Zook, J. M., Catoe, D., McDaniel, J., Vang, L., Spies, N., Sidow, A., … Salit, M. (2016). Extensive sequencing of seven human genomes to characterize benchmark reference materials. Scientific Data, 3, 160025. https://doi.org/10.1038/sdata.2016.25
77 changes: 76 additions & 1 deletion tests/commands/config/test_config_sample.py
Original file line number Diff line number Diff line change
@@ -458,8 +458,83 @@ def test_config_with_gens_arguments_for_tga(
panel_bed_file,
],
)
# THEN a config should be created and exist
# THEN config should fail with error message
assert result.exit_code == 2
assert (
"GENS is currently not compatible with TGA analysis, only WGS." in result.output
)


def test_config_wgs_with_exome(
invoke_cli,
tumor_sample_name: str,
analysis_dir: str,
balsamic_cache: str,
fastq_dir_tumor_only: str,
case_id_tumor_only: str,
):
"""Test balsamic config case with --exome argument for WGS."""

# GIVEN CLI arguments including optional GENS input-files

# WHEN invoking the config case command
result = invoke_cli(
[
"config",
"case",
"--case-id",
case_id_tumor_only,
"--analysis-dir",
analysis_dir,
"--fastq-path",
fastq_dir_tumor_only,
"--balsamic-cache",
balsamic_cache,
"--tumor-sample-name",
tumor_sample_name,
"--exome",
],
)
# THEN config should fail with error message
assert result.exit_code == 2
assert "If --exome is provided, --panel-bed must also be provided." in result.output


def test_config_tga_with_exome(
invoke_cli,
tumor_sample_name: str,
analysis_dir: str,
balsamic_cache: str,
fastq_dir_tumor_only: str,
case_id_tumor_only: str,
panel_bed_file: str,
):
"""Test balsamic config case with GENS arguments for TGA."""

# GIVEN CLI arguments including optional GENS input-files

# WHEN invoking the config case command
result = invoke_cli(
[
"config",
"case",
"--case-id",
case_id_tumor_only,
"--analysis-dir",
analysis_dir,
"--fastq-path",
fastq_dir_tumor_only,
"--balsamic-cache",
balsamic_cache,
"--tumor-sample-name",
tumor_sample_name,
"-p",
panel_bed_file,
"--exome",
],
)
# THEN a config should be created and exist
assert result.exit_code == 0
assert Path(
analysis_dir, case_id_tumor_only, f"{case_id_tumor_only}.{FileType.JSON}"
).exists()
14 changes: 14 additions & 0 deletions tests/test_data/config.json
Original file line number Diff line number Diff line change
@@ -181,6 +181,20 @@
"BALSAMIC"
]
},
"igh_dux4": {
"mutation": "somatic",
"mutation_type": "SV",
"analysis_type": [
"single",
"paired"
],
"sequencing_type": [
"wgs"
],
"workflow_solution": [
"BALSAMIC"
]
},
"svdb": {
"mutation": "somatic",
"mutation_type": "SV",
37 changes: 33 additions & 4 deletions tests/test_workflow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from unittest import mock
import logging
import snakemake

from BALSAMIC.constants.analysis import AnalysisWorkflow
@@ -8,13 +9,17 @@


def test_workflow_tumor_only_tga_hg19(
tumor_only_config, sentieon_install_dir, sentieon_license
tumor_only_config,
sentieon_install_dir,
sentieon_license,
caplog,
):
# GIVEN a sample config dict and a snakefile
analysis_type = "single"
analysis_workflow = "balsamic"
snakefile = get_snakefile(analysis_type, analysis_workflow)
config_json = tumor_only_config
caplog.set_level(logging.INFO)

# WHEN invoking snakemake module with dry run option
# THEN the snakemake workflow for TGA, hg19-tumor-only should run successfully.
@@ -27,15 +32,22 @@ def test_workflow_tumor_only_tga_hg19(
):
assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True)

# THEN the following rules should not be included
assert "igh_dux4_detection_tumor_only" not in caplog.text


def test_workflow_tumor_normal_tga_hg19(
tumor_normal_config, sentieon_install_dir, sentieon_license
tumor_normal_config,
sentieon_install_dir,
sentieon_license,
caplog,
):
# GIVEN a sample config dict and a snakefile
analysis_type = "paired"
analysis_workflow = "balsamic"
snakefile = get_snakefile(analysis_type, analysis_workflow)
config_json = tumor_normal_config
caplog.set_level(logging.INFO)

# WHEN invoking snakemake module with dry run option
# THEN the snakemake workflow for TGA, hg19-tumor-normal should run successfully.
@@ -48,15 +60,22 @@ def test_workflow_tumor_normal_tga_hg19(
):
assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True)

# THEN the following rules should not be included
assert "igh_dux4_detection_tumor_normal" not in caplog.text


def test_workflow_tumor_only_wgs_hg19(
tumor_only_wgs_config, sentieon_install_dir, sentieon_license
tumor_only_wgs_config,
sentieon_install_dir,
sentieon_license,
caplog,
):
# GIVEN a sample config dict and a snakefile
analysis_type = "single"
analysis_workflow = "balsamic"
snakefile = get_snakefile(analysis_type, analysis_workflow)
config_json = tumor_only_wgs_config
caplog.set_level(logging.INFO)

# WHEN invoking snakemake module with dry run option
# THEN the snakemake workflow for WGS, hg19-tumor-only should run successfully.
@@ -69,15 +88,22 @@ def test_workflow_tumor_only_wgs_hg19(
):
assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True)

# THEN the following rules should be included
assert "igh_dux4_detection_tumor_only" in caplog.text


def test_workflow_tumor_normal_wgs_hg19(
tumor_normal_wgs_config, sentieon_install_dir, sentieon_license
tumor_normal_wgs_config,
sentieon_install_dir,
sentieon_license,
caplog,
):
# GIVEN a sample config dict and a snakefile
analysis_type = "paired"
analysis_workflow = "balsamic"
snakefile = get_snakefile(analysis_type, analysis_workflow)
config_json = tumor_normal_wgs_config
caplog.set_level(logging.INFO)

# WHEN invoking snakemake module with dry run option
# THEN the snakemake workflow for WGS, hg19-tumor-normal should run successfully.
@@ -90,6 +116,9 @@ def test_workflow_tumor_normal_wgs_hg19(
):
assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True)

# THEN the following rules should be included
assert "igh_dux4_detection_tumor_normal" in caplog.text


def test_workflow_qc_tumor_only_hg19(
tumor_only_config_qc, sentieon_install_dir, sentieon_license
19 changes: 19 additions & 0 deletions tests/utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
from _pytest.logging import LogCaptureFixture
from _pytest.tmpdir import TempPathFactory

from BALSAMIC.commands.config.case import case_config
from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, SampleType, SequencingType
from BALSAMIC.constants.cache import CacheVersion
from BALSAMIC.constants.cluster import ClusterConfigType
@@ -38,6 +39,7 @@
get_snakefile,
job_id_dump_to_yaml,
validate_cache_version,
validate_exome_option,
)
from BALSAMIC.utils.exc import BalsamicError, WorkflowRunError
from BALSAMIC.utils.io import (
@@ -1001,6 +1003,23 @@ def test_get_fastp_parameters(balsamic_model: ConfigModel):
assert "--disable_adapter_trimming" in fastp_params_tga["fastp_trim_adapter"]


def test_validate_exome_option(panel_bed_file: str):
# GIVEN that a panel bedfile has been supplied and exome parameter set to true
ctx = click.Context(case_config)
ctx.params["panel_bed"] = panel_bed_file
# WHEN validating exome option
# THEN exome argument should be correctly set
assert validate_exome_option(ctx, click.Parameter, True) == True

# GIVEN that a panel bedfile has NOT been supplied and exome parameter set to true
ctx.params["panel_bed"] = None

# WHEN validating exome option
# THEN a bad parameter error should be raised
with pytest.raises(click.BadParameter):
validate_exome_option(ctx, click.Parameter, True)


def test_validate_cache_version_develop():
"""Test develop cache version validation."""


Unchanged files with check annotations Beta

Here a short tutorial is provided for BALSAMIC (**version** = 14.0.1).
Regarding fastq-inputs
---------------------

Check warning on line 8 in docs/user_guide.rst

GitHub Actions / docs

Title underline too short.
Previous versions of BALSAMIC only accepted one fastq-pair per sample, which required concatenation of fastq-pairs if multiple existed.