gatk4_auto/gatk4_genotypegvcfs.xml

<tool id="gatk4_genotypegvcfs" name="GATK4 GenotypeGVCFs" profile="17.09" version="@WRAPPER_VERSION@0">
  <description>- Perform joint genotyping on one or more samples pre-called with HaplotypeCaller</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="version_cmd"/>
  <command detect_errors="exit_code"><![CDATA[#include source=$gatk_gvcf_tabix#
#include source=$pre_gatk_ints_chth#
#include source=$pre_gatk_excl_ints_chth#
#if $dbsnp:
#set datatype = $dbsnp.datatype
#if $dbsnp.is_of_type("vcf_bgzip"):
ln -s $dbsnp dbsnp.vcf.gz &&
tabix dbsnp.vcf.gz &&
#else
ln -s $dbsnp dbsnp.vcf &&
#end if
#end if
#if $population_callset:
#set datatype = $population_callset.datatype
#if $population_callset.is_of_type("vcf_bgzip"):
ln -s $population_callset population_callset.vcf.gz &&
tabix population_callset.vcf.gz &&
#else
ln -s $population_callset population_callset.vcf &&
#end if
#end if
@CMD_BEGIN@ GenotypeGVCFs
#if $add_output_sam_program_record:
$add_output_sam_program_record
#end if
#if $add_output_vcf_command_line:
$add_output_vcf_command_line
#end if
#if $annotate_with_num_discovered_alleles:
$annotate_with_num_discovered_alleles
#end if
#if $cloud_index_prefetch_buffer:
--cloud-index-prefetch-buffer $cloud_index_prefetch_buffer
#end if
#if $cloud_prefetch_buffer:
--cloud-prefetch-buffer $cloud_prefetch_buffer
#end if
#if $create_output_bam_index:
$create_output_bam_index
#end if
#if $create_output_bam_md5:
$create_output_bam_md5
#end if
#if $create_output_variant_index:
$create_output_variant_index
#end if
#if $create_output_variant_md5:
$create_output_variant_md5
#end if
#if $dbsnp:
#if $dbsnp.is_of_type("vcf_bgzip"):
--dbsnp dbsnp.vcf.gz
#else
--dbsnp dbsnp.vcf
#end if
#end if
#if $disable_bam_index_caching:
$disable_bam_index_caching
#end if
#if $disable_read_filter:
--disable-read-filter $disable_read_filter
#end if
#if $disable_sequence_dictionary_validation:
$disable_sequence_dictionary_validation
#end if
#if $disable_tool_default_annotations:
$disable_tool_default_annotations
#end if
#if $disable_tool_default_read_filters:
$disable_tool_default_read_filters
#end if
#if $enable_all_annotations:
$enable_all_annotations
#end if
#if $founder_id:
--founder-id $founder_id
#end if
#if $gatk_config_file:
--gatk-config-file $gatk_config_file
#end if
#if $gcs_max_retries:
--gcs-max-retries $gcs_max_retries
#end if
#if $heterozygosity:
--heterozygosity $heterozygosity
#end if
#if $heterozygosity_stdev:
--heterozygosity-stdev $heterozygosity_stdev
#end if
#if $indel_heterozygosity:
--indel-heterozygosity $indel_heterozygosity
#end if
#if $input_prior:
--input-prior $input_prior
#end if
#if $interval_merging_rule:
--interval-merging-rule $interval_merging_rule
#end if
#if $interval_set_rule:
--interval-set-rule $interval_set_rule
#end if
#if $lenient:
$lenient
#end if
#if $max_alternate_alleles:
--max-alternate-alleles $max_alternate_alleles
#end if
#if $max_genotype_count:
--max-genotype-count $max_genotype_count
#end if
#if $num_reference_samples_if_no_call:
--num-reference-samples-if-no-call $num_reference_samples_if_no_call
#end if
#if $only_output_calls_starting_in_intervals:
$only_output_calls_starting_in_intervals
#end if
#if $pedigree:
--pedigree $pedigree
#end if
#if $population_callset:
#if $population_callset.is_of_type("vcf_bgzip"):
--population-callset population_callset.vcf.gz
#else
--population-callset population_callset.vcf
#end if
#end if
#if $read_filter:
--read-filter $read_filter
#end if
#if $read_validation_stringency:
--read-validation-stringency $read_validation_stringency
#end if
#if $sample_ploidy:
--sample-ploidy $sample_ploidy
#end if
#if $seconds_between_progress_updates:
--seconds-between-progress-updates $seconds_between_progress_updates
#end if
#if $sites_only_vcf_output:
$sites_only_vcf_output
#end if
#if $standard_min_confidence_threshold_for_calling:
--standard-min-confidence-threshold-for-calling $standard_min_confidence_threshold_for_calling
#end if
#if $use_jdk_deflater:
$use_jdk_deflater
#end if
#if $use_jdk_inflater:
$use_jdk_inflater
#end if
#if $use_new_qual_calculator:
$use_new_qual_calculator
#end if
#if $verbosity:
--verbosity $verbosity
#end if
#include source=$ref_opts#
#include source=$vcf_output_opts#
#include source=$gatk_gvcf_input#
#include source=$gatk_ints_chth#
#include source=$gatk_excl_ints_chth#]]></command>
  <inputs>
    <expand macro="ref_sel"/>
    <expand macro="gzip_vcf_params"/>
    <expand macro="gatk_ints"/>
    <expand macro="gatk_gvcf_input_params"/>
    <expand macro="gatk_excl_ints"/>
    <param name="add_output_sam_program_record" argument="--add-output-sam-program-record" type="boolean" truevalue="--add-output-sam-program-record" falsevalue="" optional="true" checked="true" label="Add Output Sam Program Record" help="If true, adds a PG tag to created SAM/BAM/CRAM files."/>
    <param name="add_output_vcf_command_line" argument="--add-output-vcf-command-line" type="boolean" truevalue="--add-output-vcf-command-line" falsevalue="" optional="true" checked="true" label="Add Output Vcf Command Line" help="If true, adds a command line header line to created VCF files."/>
    <param name="annotate_with_num_discovered_alleles" argument="--annotate-with-num-discovered-alleles" type="boolean" truevalue="--annotate-with-num-discovered-alleles" falsevalue="" optional="true" checked="false" label="Annotate With Num Discovered Alleles" help="If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site"/>
    <param name="cloud_index_prefetch_buffer" argument="--cloud-index-prefetch-buffer" type="integer" optional="true" value="-1" label="Cloud Index Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset."/>
    <param name="cloud_prefetch_buffer" argument="--cloud-prefetch-buffer" type="integer" optional="true" value="40" label="Cloud Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable)."/>
    <param name="create_output_bam_index" argument="--create-output-bam-index" type="boolean" truevalue="--create-output-bam-index" falsevalue="" optional="true" checked="true" label="Create Output Bam Index" help="If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM file."/>
    <param name="create_output_bam_md5" argument="--create-output-bam-md5" type="boolean" truevalue="--create-output-bam-md5" falsevalue="" optional="true" checked="false" label="Create Output Bam Md5" help="If true, create a MD5 digest for any BAM/SAM/CRAM file created"/>
    <param name="create_output_variant_index" argument="--create-output-variant-index" type="boolean" truevalue="--create-output-variant-index" falsevalue="" optional="true" checked="true" label="Create Output Variant Index" help="If true, create a VCF index when writing a coordinate-sorted VCF file."/>
    <param name="create_output_variant_md5" argument="--create-output-variant-md5" type="boolean" truevalue="--create-output-variant-md5" falsevalue="" optional="true" checked="false" label="Create Output Variant Md5" help="If true, create a a MD5 digest any VCF file created."/>
    <param name="dbsnp" argument="--dbsnp" type="data" optional="true" format="vcf,vcf_bgzip" label="Dbsnp" help="dbSNP file"/>
    <param name="disable_bam_index_caching" argument="--disable-bam-index-caching" type="boolean" truevalue="--disable-bam-index-caching" falsevalue="" optional="true" checked="false" label="Disable Bam Index Caching" help="If true, don&amp;apos;t cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified.  Caching is automatically disabled if there are no intervals specified."/>
    <param name="disable_read_filter" argument="--disable-read-filter" type="text" optional="true" value="" label="Disable Read Filter" help="Read filters to be disabled before analysis"/>
    <param name="disable_sequence_dictionary_validation" argument="--disable-sequence-dictionary-validation" type="boolean" truevalue="--disable-sequence-dictionary-validation" falsevalue="" optional="true" checked="false" label="Disable Sequence Dictionary Validation" help="If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!"/>
    <param name="disable_tool_default_annotations" argument="--disable-tool-default-annotations" type="boolean" truevalue="--disable-tool-default-annotations" falsevalue="" optional="true" checked="false" label="Disable Tool Default Annotations" help="Disable all tool default annotations"/>
    <param name="disable_tool_default_read_filters" argument="--disable-tool-default-read-filters" type="boolean" truevalue="--disable-tool-default-read-filters" falsevalue="" optional="true" checked="false" label="Disable Tool Default Read Filters" help="Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)"/>
    <param name="enable_all_annotations" argument="--enable-all-annotations" type="boolean" truevalue="--enable-all-annotations" falsevalue="" optional="true" checked="false" label="Enable All Annotations" help="Use all possible annotations (not for the faint of heart)"/>
    <param name="founder_id" argument="--founder-id" type="text" optional="true" value="" label="Founder Id" help="Samples representing the population &amp;quot;founders&amp;quot;"/>
    <param name="gatk_config_file" argument="--gatk-config-file" type="data" optional="true" format="txt" label="Gatk Config File" help="A configuration file to use with the GATK."/>
    <param name="gcs_max_retries" argument="--gcs-max-retries" type="integer" optional="true" value="20" label="Gcs Max Retries" help="If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection"/>
    <param name="heterozygosity" argument="--heterozygosity" type="float" optional="true" value="0.001" label="Heterozygosity" help="Heterozygosity value used to compute prior likelihoods for any locus.  See the GATKDocs for full details on the meaning of this population genetics concept"/>
    <param name="heterozygosity_stdev" argument="--heterozygosity-stdev" type="float" optional="true" value="0.01" label="Heterozygosity Stdev" help="Standard deviation of heterozygosity for SNP and indel calling."/>
    <param name="indel_heterozygosity" argument="--indel-heterozygosity" type="float" optional="true" value="0.000125" label="Indel Heterozygosity" help="Heterozygosity for indel calling.  See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept"/>
    <param name="input_prior" argument="--input-prior" type="text" optional="true" value="" label="Input Prior" help="Input prior for calls"/>
    <param name="interval_merging_rule" argument="--interval-merging-rule" type="select" optional="true" label="Interval Merging Rule" help="Interval merging rule for abutting intervals">
      <option selected="true" value="ALL">ALL</option>
      <option selected="false" value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
    </param>
    <param name="interval_set_rule" argument="--interval-set-rule" type="select" optional="true" label="Interval Set Rule" help="Set merging approach to use for combining interval inputs">
      <option selected="true" value="UNION">UNION</option>
      <option selected="false" value="INTERSECTION">INTERSECTION</option>
    </param>
    <param name="lenient" argument="--lenient" type="boolean" truevalue="--lenient" falsevalue="" optional="true" checked="false" label="Lenient" help="Lenient processing of VCF files"/>
    <param name="max_alternate_alleles" argument="--max-alternate-alleles" type="integer" optional="true" value="6" label="Max Alternate Alleles" help="Maximum number of alternate alleles to genotype"/>
    <param name="max_genotype_count" argument="--max-genotype-count" type="integer" optional="true" value="1024" label="Max Genotype Count" help="Maximum number of genotypes to consider at any site"/>
    <param name="num_reference_samples_if_no_call" argument="--num-reference-samples-if-no-call" type="integer" optional="true" value="0" label="Num Reference Samples If No Call" help="Number of hom-ref genotypes to infer at sites not present in a panel"/>
    <param name="only_output_calls_starting_in_intervals" argument="--only-output-calls-starting-in-intervals" type="boolean" truevalue="--only-output-calls-starting-in-intervals" falsevalue="" optional="true" checked="false" label="Only Output Calls Starting In Intervals" help="Restrict variant output to sites that start within provided intervals"/>
    <param name="pedigree" argument="--pedigree" type="data" optional="true" format="tabular" label="Pedigree" help="Pedigree file for determining the population &amp;quot;founders&amp;quot;"/>
    <param name="population_callset" argument="--population-callset" type="data" optional="true" format="vcf,vcf_bgzip" label="Population Callset" help="Callset to use in calculating genotype priors"/>
    <param name="read_filter" argument="--read-filter" type="text" optional="true" value="" label="Read Filter" help="Read filters to be applied before analysis"/>
    <param name="read_validation_stringency" argument="--read-validation-stringency" type="select" optional="true" label="Read Validation Stringency" help="Validation stringency for all SAM/BAM/CRAM/SRA files read by this program.  The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded.">
      <option selected="false" value="STRICT">STRICT</option>
      <option selected="false" value="LENIENT">LENIENT</option>
      <option selected="true" value="SILENT">SILENT</option>
    </param>
    <param name="sample_ploidy" argument="--sample-ploidy" type="integer" optional="true" value="2" label="Sample Ploidy" help="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)."/>
    <param name="seconds_between_progress_updates" argument="--seconds-between-progress-updates" type="float" optional="true" value="10.0" label="Seconds Between Progress Updates" help="Output traversal statistics every time this many seconds elapse"/>
    <param name="sites_only_vcf_output" argument="--sites-only-vcf-output" type="boolean" truevalue="--sites-only-vcf-output" falsevalue="" optional="true" checked="false" label="Sites Only Vcf Output" help="If true, don&amp;apos;t emit genotype fields when writing vcf file output."/>
    <param name="standard_min_confidence_threshold_for_calling" argument="--standard-min-confidence-threshold-for-calling" type="float" optional="true" value="10.0" label="Standard Min Confidence Threshold For Calling" help="The minimum phred-scaled confidence threshold at which variants should be called"/>
    <param name="use_jdk_deflater" argument="--use-jdk-deflater" type="boolean" truevalue="--use-jdk-deflater" falsevalue="" optional="true" checked="false" label="Use Jdk Deflater" help="Whether to use the JdkDeflater (as opposed to IntelDeflater)"/>
    <param name="use_jdk_inflater" argument="--use-jdk-inflater" type="boolean" truevalue="--use-jdk-inflater" falsevalue="" optional="true" checked="false" label="Use Jdk Inflater" help="Whether to use the JdkInflater (as opposed to IntelInflater)"/>
    <param name="use_new_qual_calculator" argument="--use-new-qual-calculator" type="boolean" truevalue="--use-new-qual-calculator" falsevalue="" optional="true" checked="false" label="Use New Qual Calculator" help="If provided, we will use the new AF model instead of the so-called exact model"/>
    <param name="verbosity" argument="--verbosity" type="select" optional="true" label="Verbosity" help="Control verbosity of logging.">
      <option selected="false" value="ERROR">ERROR</option>
      <option selected="false" value="WARNING">WARNING</option>
      <option selected="true" value="INFO">INFO</option>
      <option selected="false" value="DEBUG">DEBUG</option>
    </param>
  </inputs>
  <outputs>
    <expand macro="gzip_vcf_output_params"/>
  </outputs>
  <tests/>
  <help><![CDATA[Perform joint genotyping on one or more samples pre-called with
HaplotypeCaller

This tool is designed to perform joint genotyping on a single input,
which may contain one or many samples. In any case, the input samples
must possess genotype likelihoods produced by HaplotypeCaller with
\`-ERC GVCF\` or \`-ERC BP_RESOLUTION`.

Input
~~~~~

The GATK4 GenotypeGVCFs tool can take only one input track. Options are
1) a single single-sample GVCF 2) a single multi-sample GVCF created by
CombineGVCFs or 3) a GenomicsDB workspace created by GenomicsDBImport. A
sample-level GVCF is produced by HaplotypeCaller with the \`-ERC GVCF\`
setting.

Output
~~~~~~

A final VCF in which all samples have been jointly genotyped.

Usage example
~~~~~~~~~~~~~

Perform joint genotyping on a singular sample by providing a single-sample GVCF or on a cohort by providing a combined multi-sample GVCF
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

::

    gatk --java-options "-Xmx4g" GenotypeGVCFs \
      -R Homo_sapiens_assembly38.fasta \
      -V input.g.vcf.gz \
      -O output.vcf.gz
    

Perform joint genotyping on GenomicsDB workspace created with GenomicsDBImport
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

::

    gatk --java-options "-Xmx4g" GenotypeGVCFs \
      -R Homo_sapiens_assembly38.fasta \
      -V gendb://my_database \
      -O output.vcf.gz
    

Caveats
~~~~~~~

-  Only GVCF files produced by HaplotypeCaller (or CombineGVCFs) can be
   used as input for this tool. Some other programs produce files that
   they call GVCFs but those lack some important information (accurate
   genotype likelihoods for every position) that GenotypeGVCFs requires
   for its operation.
-  Cannot take multiple GVCF files in one command.

Special note on ploidy
~~~~~~~~~~~~~~~~~~~~~~

This tool is able to handle any ploidy (or mix of ploidies)
intelligently; there is no need to specify ploidy for non-diploid
organisms.
]]></help>
  <citations>
    <expand macro="citations"/>
  </citations>
</tool>