From 9aae9f56992a537642be29220404672b80d4d3a9 Mon Sep 17 00:00:00 2001 From: Miguel Brown Date: Wed, 16 Oct 2024 15:19:07 -0400 Subject: [PATCH] :broom: cleared ou a premature prod wf update and removed extra input :pencil: added readme --- docs/KFDRC_GATK_HC_MOD_PLOIDY_README.md | 29 +++++++++++++++++++ ...drc-gatk-haplotypecaller-ploidy-mod-wf.cwl | 8 ++--- workflows/kfdrc-gatk-haplotypecaller-wf.cwl | 2 -- 3 files changed, 33 insertions(+), 6 deletions(-) create mode 100644 docs/KFDRC_GATK_HC_MOD_PLOIDY_README.md diff --git a/docs/KFDRC_GATK_HC_MOD_PLOIDY_README.md b/docs/KFDRC_GATK_HC_MOD_PLOIDY_README.md new file mode 100644 index 0000000..8195762 --- /dev/null +++ b/docs/KFDRC_GATK_HC_MOD_PLOIDY_README.md @@ -0,0 +1,29 @@ +# Kids First DRC GATK HaplotypeCaller Modified Ploidy BETA Workflow +This is a research workflow for users wishing to modify the ploidy of certain +regions of their existing GVCF calls. + +## Inputs + +- input_cram: Input CRAM file +- input_gvcf: GVCF generated in standard workflow +- biospecimen_name: String name of biospcimen +- output_basename: String to use as the base for output filenames +- reference_fasta: FASTA file that was used during alignment. Also need + corresponding `.fai` and `.dict` files. +- region: Specific region to pull, in format 'chr21' or 'chr3:1-1000' +- dbsnp_vcf: dbSNP vcf file +- dbsnp_idx: dbSNP vcf index file +- contamination: Precalculated contamination value. Providing the value here + will skip the run of VerifyBAMID and use the provided value as ground truth. +- contamination_sites_bed: .Bed file for markers used in this + analysis,format(chr\tpos-1\tpos\trefAllele\taltAllele) +- contamination_sites_mu: .mu matrix file of genotype matrix +- contamination_sites_ud: .UD matrix file from SVD result of genotype matrix +- re_calling_interval_list: Interval list to re-call +- wgs_evaluation_interval_list: Target intervals to restrict GVCF metric + analysis (for VariantCallingMetrics) +- sample_ploidy: If sample/interval is expected to not have ploidy=2, enter expected ploidy + +## Outputs + +- mixed_ploidy_gvcf: Updated complete GVCF in which the desired region has had its ploidy updated diff --git a/workflows/kfdrc-gatk-haplotypecaller-ploidy-mod-wf.cwl b/workflows/kfdrc-gatk-haplotypecaller-ploidy-mod-wf.cwl index 70a74ab..37263f3 100644 --- a/workflows/kfdrc-gatk-haplotypecaller-ploidy-mod-wf.cwl +++ b/workflows/kfdrc-gatk-haplotypecaller-ploidy-mod-wf.cwl @@ -1,7 +1,7 @@ cwlVersion: v1.2 class: Workflow id: kfdrc-gatk-haplotypecaller-ploidy-mod-workflow -label: Kids First DRC GATK HaplotypeCaller Modified Ploidy Workflow +label: Kids First DRC GATK HaplotypeCaller Modified Ploidy BETA Workflow doc: "This workflow re-runs a subset of regions with a different expected ploidy and re-integrates those results into existing results" requirements: @@ -17,8 +17,6 @@ inputs: name: Homo_sapiens_assembly38.fasta, secondaryFiles: [{class: File, path: 60639016357c3a53540ca7af, name: Homo_sapiens_assembly38.fasta.fai}, {class: File, path: 60639019357c3a53540ca7e7, name: Homo_sapiens_assembly38.dict}]}, secondaryFiles: ['.fai', '^.dict']} - reference_dict: {type: 'File?', "sbg:suggestedValue": {class: File, path: 60639019357c3a53540ca7e7, - name: Homo_sapiens_assembly38.dict}} region: { type: 'string?', doc: "Specific region to pull, in format 'chr21' or 'chr3:1-1000'" } dbsnp_vcf: {type: 'File', doc: "dbSNP vcf file", "sbg:suggestedValue": {class: File, path: 6063901f357c3a53540ca84b, name: Homo_sapiens_assembly38.dbsnp138.vcf}} @@ -80,7 +78,9 @@ steps: output_basename: output_basename dbsnp_vcf: dbsnp_vcf dbsnp_idx: dbsnp_idx - reference_dict: reference_dict + reference_dict: + source: reference_fasta + valueFrom: "${self.secondaryFiles.filter(function(e) {return e.nameext == '.dict'})[0])}" wgs_calling_interval_list: re_calling_interval_list wgs_evaluation_interval_list: wgs_evaluation_interval_list conditional_run: diff --git a/workflows/kfdrc-gatk-haplotypecaller-wf.cwl b/workflows/kfdrc-gatk-haplotypecaller-wf.cwl index b7d61be..97c282d 100644 --- a/workflows/kfdrc-gatk-haplotypecaller-wf.cwl +++ b/workflows/kfdrc-gatk-haplotypecaller-wf.cwl @@ -113,7 +113,6 @@ inputs: class: File, path: 60639017357c3a53540ca7d3, name: wgs_evaluation_regions.hg38.interval_list}} run_sex_metrics: {type: 'boolean?', default: false, doc: "idxstats will be collected\ \ and X/Y ratios calculated"} - sample_ploidy: { type: 'int?', doc: "If sample/interval is expected to not have ploidy=2, enter expected ploidy" } outputs: gvcf: {type: File, outputSource: generate_gvcf/gvcf} gvcf_calling_metrics: {type: 'File[]', outputSource: generate_gvcf/gvcf_calling_metrics} @@ -159,7 +158,6 @@ steps: valueFrom: $(1) contamination: contamination biospecimen_name: biospecimen_name - sample_ploidy: sample_ploidy out: [verifybamid_output, gvcf, gvcf_calling_metrics] $namespaces: sbg: https://sevenbridges.com