Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VCF input option to Vapor WDL #771

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/.dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,15 @@ workflows:
tags:
- /.*/

- subclass: WDL
name: Vapor
primaryDescriptorPath: /wdl/Vapor.wdl
filters:
branches:
- main
tags:
- /.*/

- subclass: WDL
name: VisualizeCnvs
primaryDescriptorPath: /wdl/VisualizeCnvs.wdl
Expand Down
1 change: 0 additions & 1 deletion inputs/templates/test/Vapor/Vapor.json.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
"Vapor.ref_dict": {{ reference_resources.reference_dict | tojson }},
"Vapor.save_plots": "false",

"Vapor.prefix": {{ test_batch.example_pacbio_sample_id | tojson }},
"Vapor.sample_id": {{ test_batch.example_pacbio_sample_id | tojson }},
"Vapor.bam_or_cram_file": {{ test_batch.example_pacbio_cram | tojson }},
"Vapor.bam_or_cram_index": {{ test_batch.example_pacbio_cram_index | tojson }},
Expand Down
66 changes: 66 additions & 0 deletions wdl/Utils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,72 @@ task SubsetVcfBySamplesList {
}
}

# Subset a VCF to a specific sample
task SubsetVcfToSample {
input {
File vcf
File? vcf_idx
String sample
String? outfile_name
Boolean remove_sample = false # If false (default), keep the sample If true, remove it.
Boolean remove_private_sites = true # If true (default), remove sites that are private to excluded samples. If false, keep sites even if no remaining samples are non-ref.
Boolean keep_af = true # If true (default), do not recalculate allele frequencies (AC/AF/AN)
String sv_base_mini_docker
RuntimeAttr? runtime_attr_override
}

String vcf_subset_filename = select_first([outfile_name, basename(vcf, ".vcf.gz") + ".subset.vcf.gz"])
String vcf_subset_idx_filename = vcf_subset_filename + ".tbi"

String remove_private_sites_flag = if remove_private_sites then " | bcftools view -e 'SVTYPE!=\"CNV\" && COUNT(GT=\"alt\")==0' " else ""
String keep_af_flag = if keep_af then "--no-update" else ""
String complement_flag = if remove_sample then "^" else ""

# Disk must be scaled proportionally to the size of the VCF
Float input_size = size(vcf, "GiB")
RuntimeAttr default_attr = object {
mem_gb: 3.75,
disk_gb: ceil(10.0 + (input_size * 2)),
cpu_cores: 1,
preemptible_tries: 3,
max_retries: 1,
boot_disk_gb: 10
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])

command <<<

set -euo pipefail

bcftools view \
-s ~{complement_flag}~{sample} \
--force-samples \
~{keep_af_flag} \
~{vcf} \
~{remove_private_sites_flag} \
-O z \
-o ~{vcf_subset_filename}

tabix -f -p vcf ~{vcf_subset_filename}

>>>

output {
File vcf_subset = vcf_subset_filename
File vcf_subset_index = vcf_subset_idx_filename
}

runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
docker: sv_base_mini_docker
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
}
}

task VcfToBed {

input {
Expand Down
53 changes: 42 additions & 11 deletions wdl/Vapor.wdl
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
version 1.0

import "Utils.wdl" as utils
import "Structs.wdl"

workflow Vapor {
input {
String prefix
String sample_id
File bam_or_cram_file
File bam_or_cram_index
File bed_file
String sample_id

# One of the following must be specified. May be single- or multi-sample.
File? bed_file
File? vcf_file

Boolean save_plots # Control whether plots are final output

Expand All @@ -21,31 +24,52 @@ workflow Vapor {
String sv_base_mini_docker
String sv_pipeline_docker

RuntimeAttr? runtime_attr_subset_sample
RuntimeAttr? runtime_attr_vcf_to_bed
RuntimeAttr? runtime_attr_vapor
RuntimeAttr? runtime_attr_bcf2vcf
RuntimeAttr? runtime_attr_vcf2bed
RuntimeAttr? runtime_attr_split_vcf
RuntimeAttr? runtime_attr_concat_beds
RuntimeAttr? runtime_attr_LocalizeCram

File? NONE_FILE_ # Create a null file - do not use this input
}

# Convert vcf to bed if provided
if (defined(vcf_file) && !defined(bed_file)) {

call utils.SubsetVcfToSample {
input:
vcf=select_first([vcf_file]),
vcf_idx=select_first([vcf_file]) + ".tbi",
sample=sample_id,
outfile_name=sample_id,
sv_base_mini_docker=sv_base_mini_docker,
runtime_attr_override = runtime_attr_subset_sample
}

call utils.VcfToBed {
input:
vcf_file = SubsetVcfToSample.vcf_subset,
variant_interpretation_docker = sv_pipeline_docker,
runtime_attr_override = runtime_attr_vcf_to_bed
}

}

scatter (contig in read_lines(contigs)) {

call PreprocessBedForVapor {
input:
prefix = "~{prefix}.~{contig}.preprocess",
prefix = "~{sample_id}.~{contig}.preprocess",
contig = contig,
sample_to_extract = sample_id,
bed_file = bed_file,
bed_file = select_first([bed_file, VcfToBed.bed_output]),
sv_pipeline_docker = sv_pipeline_docker,
runtime_attr_override = runtime_attr_split_vcf
}

call RunVaporWithCram {
input:
prefix = "~{prefix}.~{contig}",
prefix = "~{sample_id}.~{contig}",
contig = contig,
bam_or_cram_file = bam_or_cram_file,
bam_or_cram_index = bam_or_cram_index,
Expand All @@ -62,7 +86,7 @@ workflow Vapor {
input:
shard_bed_files = RunVaporWithCram.vapor,
shard_plots = RunVaporWithCram.vapor_plot,
prefix = prefix,
prefix = sample_id,
sv_base_mini_docker = sv_base_mini_docker,
runtime_attr_override = runtime_attr_concat_beds
}
Expand Down Expand Up @@ -101,9 +125,16 @@ task PreprocessBedForVapor {

command <<<
set -euo pipefail

if [[ ~{bed_file} == *.gz ]]; then
gunzip -c ~{bed_file} > in.bed
else
cp ~{bed_file} in.bed
fi

python /opt/sv-pipeline/scripts/preprocess_bed_for_vapor.py \
--contig ~{contig} \
--bed-in ~{bed_file} \
--bed-in in.bed \
--bed-out ~{prefix}.bed \
~{"-s " + sample_to_extract}
>>>
Expand Down
9 changes: 4 additions & 5 deletions wdl/VaporBatch.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,16 @@ workflow VaporBatch {

Boolean save_plots

RuntimeAttr? runtime_attr_subset_sample
RuntimeAttr? runtime_attr_vcf_to_bed
RuntimeAttr? runtime_attr_vapor
RuntimeAttr? runtime_attr_bcf2vcf
RuntimeAttr? runtime_attr_vcf2bed
RuntimeAttr? runtime_attr_split_vcf
RuntimeAttr? runtime_attr_concat_beds
}

scatter (i in range(length(bam_or_cram_files))) {
call vapor_bed.Vapor {
input:
prefix = samples[i],
bam_or_cram_file = bam_or_cram_files[i],
bam_or_cram_index = bam_or_cram_indexes[i],
bed_file = bed_file,
Expand All @@ -45,8 +44,8 @@ workflow VaporBatch {
sv_base_mini_docker = sv_base_mini_docker,
sv_pipeline_docker = sv_pipeline_docker,
runtime_attr_vapor = runtime_attr_vapor,
runtime_attr_bcf2vcf = runtime_attr_bcf2vcf,
runtime_attr_vcf2bed = runtime_attr_vcf2bed,
runtime_attr_subset_sample = runtime_attr_subset_sample,
runtime_attr_vcf_to_bed = runtime_attr_vcf_to_bed,
runtime_attr_split_vcf = runtime_attr_split_vcf,
runtime_attr_concat_beds = runtime_attr_concat_beds
}
Expand Down
Loading