Skip to content

Commit

Permalink
📝 added doc to wf
Browse files Browse the repository at this point in the history
  • Loading branch information
migbro committed Oct 17, 2024
1 parent 9aae9f5 commit ba85e1b
Showing 1 changed file with 52 additions and 27 deletions.
79 changes: 52 additions & 27 deletions workflows/kfdrc-gatk-haplotypecaller-ploidy-mod-wf.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,65 @@ cwlVersion: v1.2
class: Workflow
id: kfdrc-gatk-haplotypecaller-ploidy-mod-workflow
label: Kids First DRC GATK HaplotypeCaller Modified Ploidy BETA Workflow
doc: "This workflow re-runs a subset of regions with a different expected ploidy and re-integrates those results into existing results"
doc: |
# Kids First DRC GATK HaplotypeCaller Modified Ploidy BETA Workflow
This is a research workflow for users wishing to modify the ploidy of certain
regions of their existing GVCF calls.

## Inputs

- input_cram: Input CRAM file
- input_gvcf: GVCF generated in standard workflow
- biospecimen_name: String name of biospcimen
- output_basename: String to use as the base for output filenames
- reference_fasta: FASTA file that was used during alignment. Also need
corresponding `.fai` and `.dict` files.
- region: Specific region to pull, in format 'chr21' or 'chr3:1-1000'
- dbsnp_vcf: dbSNP vcf file
- dbsnp_idx: dbSNP vcf index file
- contamination: Precalculated contamination value. Providing the value here
will skip the run of VerifyBAMID and use the provided value as ground truth.
- contamination_sites_bed: .Bed file for markers used in this
analysis,format(chr\tpos-1\tpos\trefAllele\taltAllele)
- contamination_sites_mu: .mu matrix file of genotype matrix
- contamination_sites_ud: .UD matrix file from SVD result of genotype matrix
- re_calling_interval_list: Interval list to re-call
- wgs_evaluation_interval_list: Target intervals to restrict GVCF metric
analysis (for VariantCallingMetrics)
- sample_ploidy: If sample/interval is expected to not have ploidy=2, enter expected ploidy

## Outputs

- mixed_ploidy_gvcf: Updated complete GVCF in which the desired region has had its ploidy updated


requirements:
- class: ScatterFeatureRequirement
- class: MultipleInputFeatureRequirement
- class: SubworkflowFeatureRequirement
inputs:
input_cram: {type: 'File', doc: "Input CRAM file"}
input_gvcf: { type: File, secondaryFiles: ['.tbi'], doc: "gVCF generated in standard workflow"}
input_gvcf: {type: File, secondaryFiles: ['.tbi'], doc: "gVCF generated in standard workflow"}
biospecimen_name: {type: 'string', doc: "String name of biospecimen"}
output_basename: {type: 'string', doc: "String to use as the base for output filenames"}
reference_fasta: {type: 'File', "sbg:suggestedValue": {class: File, path: 60639014357c3a53540ca7a3,
name: Homo_sapiens_assembly38.fasta, secondaryFiles: [{class: File, path: 60639016357c3a53540ca7af, name: Homo_sapiens_assembly38.fasta.fai}, {class: File, path: 60639019357c3a53540ca7e7,
name: Homo_sapiens_assembly38.dict}]},
secondaryFiles: ['.fai', '^.dict']}
region: { type: 'string?', doc: "Specific region to pull, in format 'chr21' or 'chr3:1-1000'" }
dbsnp_vcf: {type: 'File', doc: "dbSNP vcf file", "sbg:suggestedValue": {class: File,
path: 6063901f357c3a53540ca84b, name: Homo_sapiens_assembly38.dbsnp138.vcf}}
dbsnp_idx: {type: 'File?', doc: "dbSNP vcf index file", "sbg:suggestedValue": {
class: File, path: 6063901e357c3a53540ca834, name: Homo_sapiens_assembly38.dbsnp138.vcf.idx}}
contamination: {type: 'float?', doc: "Precalculated contamination value. Providing\
\ the value here will skip the run of VerifyBAMID and use the provided value\
\ as ground truth."}
contamination_sites_bed: {type: 'File?', doc: ".Bed file for markers used in this\
\ analysis,format(chr\tpos-1\tpos\trefAllele\taltAllele)", "sbg:suggestedValue": {
class: File, path: 6063901e357c3a53540ca833, name: Homo_sapiens_assembly38.contam.bed}}
contamination_sites_mu: {type: 'File?', doc: ".mu matrix file of genotype matrix",
"sbg:suggestedValue": {class: File, path: 60639017357c3a53540ca7cd, name: Homo_sapiens_assembly38.contam.mu}}
contamination_sites_ud: {type: 'File?', doc: ".UD matrix file from SVD result of\
\ genotype matrix", "sbg:suggestedValue": {class: File, path: 6063901f357c3a53540ca84f,
name: Homo_sapiens_assembly38.contam.UD}}
re_calling_interval_list: {type: 'File', doc: "Interval list to re-call" }
wgs_evaluation_interval_list: {type: 'File', doc: "Target intervals to restrict\
\ gvcf metric analysis (for VariantCallingMetrics)", "sbg:suggestedValue": {
class: File, path: 60639017357c3a53540ca7d3, name: wgs_evaluation_regions.hg38.interval_list}}
sample_ploidy: { type: 'int?', doc: "If sample/interval is expected to not have ploidy=2, enter expected ploidy" }
reference_fasta: {type: 'File', "sbg:suggestedValue": {class: File, path: 60639014357c3a53540ca7a3, name: Homo_sapiens_assembly38.fasta,
secondaryFiles: [{class: File, path: 60639016357c3a53540ca7af, name: Homo_sapiens_assembly38.fasta.fai}, {class: File, path: 60639019357c3a53540ca7e7,
name: Homo_sapiens_assembly38.dict}]}, secondaryFiles: ['.fai', '^.dict']}
region: {type: 'string?', doc: "Specific region to pull, in format 'chr21' or 'chr3:1-1000'"}
dbsnp_vcf: {type: 'File', doc: "dbSNP vcf file", "sbg:suggestedValue": {class: File, path: 6063901f357c3a53540ca84b, name: Homo_sapiens_assembly38.dbsnp138.vcf}}
dbsnp_idx: {type: 'File?', doc: "dbSNP vcf index file", "sbg:suggestedValue": {class: File, path: 6063901e357c3a53540ca834, name: Homo_sapiens_assembly38.dbsnp138.vcf.idx}}
contamination: {type: 'float?', doc: "Precalculated contamination value. Providing the value here will skip the run of VerifyBAMID
and use the provided value as ground truth."}
contamination_sites_bed: {type: 'File?', doc: ".Bed file for markers used in this analysis,format(chr\tpos-1\tpos\trefAllele\taltAllele)",
"sbg:suggestedValue": {class: File, path: 6063901e357c3a53540ca833, name: Homo_sapiens_assembly38.contam.bed}}
contamination_sites_mu: {type: 'File?', doc: ".mu matrix file of genotype matrix", "sbg:suggestedValue": {class: File, path: 60639017357c3a53540ca7cd,
name: Homo_sapiens_assembly38.contam.mu}}
contamination_sites_ud: {type: 'File?', doc: ".UD matrix file from SVD result of genotype matrix", "sbg:suggestedValue": {class: File,
path: 6063901f357c3a53540ca84f, name: Homo_sapiens_assembly38.contam.UD}}
re_calling_interval_list: {type: 'File', doc: "Interval list to re-call"}
wgs_evaluation_interval_list: {type: 'File', doc: "Target intervals to restrict gvcf metric analysis (for VariantCallingMetrics)",
"sbg:suggestedValue": {class: File, path: 60639017357c3a53540ca7d3, name: wgs_evaluation_regions.hg38.interval_list}}
sample_ploidy: {type: 'int?', doc: "If sample/interval is expected to not have ploidy=2, enter expected ploidy"}

outputs:
mixed_ploidy_gvcf: {type: File, outputSource: picard_mergevcfs_python_renamesample/output}
Expand Down Expand Up @@ -103,3 +126,5 @@ $namespaces:
hints:
- class: 'sbg:maxNumberOfParallelInstances'
value: 4
sbg:license: Apache License 2.0
sbg:publisher: KFDRC

0 comments on commit ba85e1b

Please sign in to comment.