-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathgatk4_genotypegvcfs.xml
304 lines (286 loc) · 18 KB
/
gatk4_genotypegvcfs.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
<tool id="gatk4_genotypegvcfs" name="GATK4 GenotypeGVCFs" profile="17.09" version="@WRAPPER_VERSION@0">
<description>- Perform joint genotyping on one or more samples pre-called with HaplotypeCaller</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements"/>
<expand macro="version_cmd"/>
<command detect_errors="exit_code"><![CDATA[#include source=$gatk_gvcf_tabix#
#include source=$pre_gatk_ints_chth#
#include source=$pre_gatk_excl_ints_chth#
#if $dbsnp:
#set datatype = $dbsnp.datatype
#if $dbsnp.is_of_type("vcf_bgzip"):
ln -s $dbsnp dbsnp.vcf.gz &&
tabix dbsnp.vcf.gz &&
#else
ln -s $dbsnp dbsnp.vcf &&
#end if
#end if
#if $population_callset:
#set datatype = $population_callset.datatype
#if $population_callset.is_of_type("vcf_bgzip"):
ln -s $population_callset population_callset.vcf.gz &&
tabix population_callset.vcf.gz &&
#else
ln -s $population_callset population_callset.vcf &&
#end if
#end if
@CMD_BEGIN@ GenotypeGVCFs
#if $add_output_sam_program_record:
$add_output_sam_program_record
#end if
#if $add_output_vcf_command_line:
$add_output_vcf_command_line
#end if
#if $annotate_with_num_discovered_alleles:
$annotate_with_num_discovered_alleles
#end if
#if $cloud_index_prefetch_buffer:
--cloud-index-prefetch-buffer $cloud_index_prefetch_buffer
#end if
#if $cloud_prefetch_buffer:
--cloud-prefetch-buffer $cloud_prefetch_buffer
#end if
#if $create_output_bam_index:
$create_output_bam_index
#end if
#if $create_output_bam_md5:
$create_output_bam_md5
#end if
#if $create_output_variant_index:
$create_output_variant_index
#end if
#if $create_output_variant_md5:
$create_output_variant_md5
#end if
#if $dbsnp:
#if $dbsnp.is_of_type("vcf_bgzip"):
--dbsnp dbsnp.vcf.gz
#else
--dbsnp dbsnp.vcf
#end if
#end if
#if $disable_bam_index_caching:
$disable_bam_index_caching
#end if
#if $disable_read_filter:
--disable-read-filter $disable_read_filter
#end if
#if $disable_sequence_dictionary_validation:
$disable_sequence_dictionary_validation
#end if
#if $disable_tool_default_annotations:
$disable_tool_default_annotations
#end if
#if $disable_tool_default_read_filters:
$disable_tool_default_read_filters
#end if
#if $enable_all_annotations:
$enable_all_annotations
#end if
#if $founder_id:
--founder-id $founder_id
#end if
#if $gatk_config_file:
--gatk-config-file $gatk_config_file
#end if
#if $gcs_max_retries:
--gcs-max-retries $gcs_max_retries
#end if
#if $heterozygosity:
--heterozygosity $heterozygosity
#end if
#if $heterozygosity_stdev:
--heterozygosity-stdev $heterozygosity_stdev
#end if
#if $indel_heterozygosity:
--indel-heterozygosity $indel_heterozygosity
#end if
#if $input_prior:
--input-prior $input_prior
#end if
#if $interval_merging_rule:
--interval-merging-rule $interval_merging_rule
#end if
#if $interval_set_rule:
--interval-set-rule $interval_set_rule
#end if
#if $lenient:
$lenient
#end if
#if $max_alternate_alleles:
--max-alternate-alleles $max_alternate_alleles
#end if
#if $max_genotype_count:
--max-genotype-count $max_genotype_count
#end if
#if $num_reference_samples_if_no_call:
--num-reference-samples-if-no-call $num_reference_samples_if_no_call
#end if
#if $only_output_calls_starting_in_intervals:
$only_output_calls_starting_in_intervals
#end if
#if $pedigree:
--pedigree $pedigree
#end if
#if $population_callset:
#if $population_callset.is_of_type("vcf_bgzip"):
--population-callset population_callset.vcf.gz
#else
--population-callset population_callset.vcf
#end if
#end if
#if $read_filter:
--read-filter $read_filter
#end if
#if $read_validation_stringency:
--read-validation-stringency $read_validation_stringency
#end if
#if $sample_ploidy:
--sample-ploidy $sample_ploidy
#end if
#if $seconds_between_progress_updates:
--seconds-between-progress-updates $seconds_between_progress_updates
#end if
#if $sites_only_vcf_output:
$sites_only_vcf_output
#end if
#if $standard_min_confidence_threshold_for_calling:
--standard-min-confidence-threshold-for-calling $standard_min_confidence_threshold_for_calling
#end if
#if $use_jdk_deflater:
$use_jdk_deflater
#end if
#if $use_jdk_inflater:
$use_jdk_inflater
#end if
#if $use_new_qual_calculator:
$use_new_qual_calculator
#end if
#if $verbosity:
--verbosity $verbosity
#end if
#include source=$ref_opts#
#include source=$vcf_output_opts#
#include source=$gatk_gvcf_input#
#include source=$gatk_ints_chth#
#include source=$gatk_excl_ints_chth#]]></command>
<inputs>
<expand macro="ref_sel"/>
<expand macro="gzip_vcf_params"/>
<expand macro="gatk_ints"/>
<expand macro="gatk_gvcf_input_params"/>
<expand macro="gatk_excl_ints"/>
<param name="add_output_sam_program_record" argument="--add-output-sam-program-record" type="boolean" truevalue="--add-output-sam-program-record" falsevalue="" optional="true" checked="true" label="Add Output Sam Program Record" help="If true, adds a PG tag to created SAM/BAM/CRAM files."/>
<param name="add_output_vcf_command_line" argument="--add-output-vcf-command-line" type="boolean" truevalue="--add-output-vcf-command-line" falsevalue="" optional="true" checked="true" label="Add Output Vcf Command Line" help="If true, adds a command line header line to created VCF files."/>
<param name="annotate_with_num_discovered_alleles" argument="--annotate-with-num-discovered-alleles" type="boolean" truevalue="--annotate-with-num-discovered-alleles" falsevalue="" optional="true" checked="false" label="Annotate With Num Discovered Alleles" help="If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site"/>
<param name="cloud_index_prefetch_buffer" argument="--cloud-index-prefetch-buffer" type="integer" optional="true" value="-1" label="Cloud Index Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset."/>
<param name="cloud_prefetch_buffer" argument="--cloud-prefetch-buffer" type="integer" optional="true" value="40" label="Cloud Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable)."/>
<param name="create_output_bam_index" argument="--create-output-bam-index" type="boolean" truevalue="--create-output-bam-index" falsevalue="" optional="true" checked="true" label="Create Output Bam Index" help="If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM file."/>
<param name="create_output_bam_md5" argument="--create-output-bam-md5" type="boolean" truevalue="--create-output-bam-md5" falsevalue="" optional="true" checked="false" label="Create Output Bam Md5" help="If true, create a MD5 digest for any BAM/SAM/CRAM file created"/>
<param name="create_output_variant_index" argument="--create-output-variant-index" type="boolean" truevalue="--create-output-variant-index" falsevalue="" optional="true" checked="true" label="Create Output Variant Index" help="If true, create a VCF index when writing a coordinate-sorted VCF file."/>
<param name="create_output_variant_md5" argument="--create-output-variant-md5" type="boolean" truevalue="--create-output-variant-md5" falsevalue="" optional="true" checked="false" label="Create Output Variant Md5" help="If true, create a a MD5 digest any VCF file created."/>
<param name="dbsnp" argument="--dbsnp" type="data" optional="true" format="vcf,vcf_bgzip" label="Dbsnp" help="dbSNP file"/>
<param name="disable_bam_index_caching" argument="--disable-bam-index-caching" type="boolean" truevalue="--disable-bam-index-caching" falsevalue="" optional="true" checked="false" label="Disable Bam Index Caching" help="If true, don&apos;t cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified. Caching is automatically disabled if there are no intervals specified."/>
<param name="disable_read_filter" argument="--disable-read-filter" type="text" optional="true" value="" label="Disable Read Filter" help="Read filters to be disabled before analysis"/>
<param name="disable_sequence_dictionary_validation" argument="--disable-sequence-dictionary-validation" type="boolean" truevalue="--disable-sequence-dictionary-validation" falsevalue="" optional="true" checked="false" label="Disable Sequence Dictionary Validation" help="If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!"/>
<param name="disable_tool_default_annotations" argument="--disable-tool-default-annotations" type="boolean" truevalue="--disable-tool-default-annotations" falsevalue="" optional="true" checked="false" label="Disable Tool Default Annotations" help="Disable all tool default annotations"/>
<param name="disable_tool_default_read_filters" argument="--disable-tool-default-read-filters" type="boolean" truevalue="--disable-tool-default-read-filters" falsevalue="" optional="true" checked="false" label="Disable Tool Default Read Filters" help="Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)"/>
<param name="enable_all_annotations" argument="--enable-all-annotations" type="boolean" truevalue="--enable-all-annotations" falsevalue="" optional="true" checked="false" label="Enable All Annotations" help="Use all possible annotations (not for the faint of heart)"/>
<param name="founder_id" argument="--founder-id" type="text" optional="true" value="" label="Founder Id" help="Samples representing the population &quot;founders&quot;"/>
<param name="gatk_config_file" argument="--gatk-config-file" type="data" optional="true" format="txt" label="Gatk Config File" help="A configuration file to use with the GATK."/>
<param name="gcs_max_retries" argument="--gcs-max-retries" type="integer" optional="true" value="20" label="Gcs Max Retries" help="If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection"/>
<param name="heterozygosity" argument="--heterozygosity" type="float" optional="true" value="0.001" label="Heterozygosity" help="Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept"/>
<param name="heterozygosity_stdev" argument="--heterozygosity-stdev" type="float" optional="true" value="0.01" label="Heterozygosity Stdev" help="Standard deviation of heterozygosity for SNP and indel calling."/>
<param name="indel_heterozygosity" argument="--indel-heterozygosity" type="float" optional="true" value="0.000125" label="Indel Heterozygosity" help="Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept"/>
<param name="input_prior" argument="--input-prior" type="text" optional="true" value="" label="Input Prior" help="Input prior for calls"/>
<param name="interval_merging_rule" argument="--interval-merging-rule" type="select" optional="true" label="Interval Merging Rule" help="Interval merging rule for abutting intervals">
<option selected="true" value="ALL">ALL</option>
<option selected="false" value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
</param>
<param name="interval_set_rule" argument="--interval-set-rule" type="select" optional="true" label="Interval Set Rule" help="Set merging approach to use for combining interval inputs">
<option selected="true" value="UNION">UNION</option>
<option selected="false" value="INTERSECTION">INTERSECTION</option>
</param>
<param name="lenient" argument="--lenient" type="boolean" truevalue="--lenient" falsevalue="" optional="true" checked="false" label="Lenient" help="Lenient processing of VCF files"/>
<param name="max_alternate_alleles" argument="--max-alternate-alleles" type="integer" optional="true" value="6" label="Max Alternate Alleles" help="Maximum number of alternate alleles to genotype"/>
<param name="max_genotype_count" argument="--max-genotype-count" type="integer" optional="true" value="1024" label="Max Genotype Count" help="Maximum number of genotypes to consider at any site"/>
<param name="num_reference_samples_if_no_call" argument="--num-reference-samples-if-no-call" type="integer" optional="true" value="0" label="Num Reference Samples If No Call" help="Number of hom-ref genotypes to infer at sites not present in a panel"/>
<param name="only_output_calls_starting_in_intervals" argument="--only-output-calls-starting-in-intervals" type="boolean" truevalue="--only-output-calls-starting-in-intervals" falsevalue="" optional="true" checked="false" label="Only Output Calls Starting In Intervals" help="Restrict variant output to sites that start within provided intervals"/>
<param name="pedigree" argument="--pedigree" type="data" optional="true" format="tabular" label="Pedigree" help="Pedigree file for determining the population &quot;founders&quot;"/>
<param name="population_callset" argument="--population-callset" type="data" optional="true" format="vcf,vcf_bgzip" label="Population Callset" help="Callset to use in calculating genotype priors"/>
<param name="read_filter" argument="--read-filter" type="text" optional="true" value="" label="Read Filter" help="Read filters to be applied before analysis"/>
<param name="read_validation_stringency" argument="--read-validation-stringency" type="select" optional="true" label="Read Validation Stringency" help="Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded.">
<option selected="false" value="STRICT">STRICT</option>
<option selected="false" value="LENIENT">LENIENT</option>
<option selected="true" value="SILENT">SILENT</option>
</param>
<param name="sample_ploidy" argument="--sample-ploidy" type="integer" optional="true" value="2" label="Sample Ploidy" help="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)."/>
<param name="seconds_between_progress_updates" argument="--seconds-between-progress-updates" type="float" optional="true" value="10.0" label="Seconds Between Progress Updates" help="Output traversal statistics every time this many seconds elapse"/>
<param name="sites_only_vcf_output" argument="--sites-only-vcf-output" type="boolean" truevalue="--sites-only-vcf-output" falsevalue="" optional="true" checked="false" label="Sites Only Vcf Output" help="If true, don&apos;t emit genotype fields when writing vcf file output."/>
<param name="standard_min_confidence_threshold_for_calling" argument="--standard-min-confidence-threshold-for-calling" type="float" optional="true" value="10.0" label="Standard Min Confidence Threshold For Calling" help="The minimum phred-scaled confidence threshold at which variants should be called"/>
<param name="use_jdk_deflater" argument="--use-jdk-deflater" type="boolean" truevalue="--use-jdk-deflater" falsevalue="" optional="true" checked="false" label="Use Jdk Deflater" help="Whether to use the JdkDeflater (as opposed to IntelDeflater)"/>
<param name="use_jdk_inflater" argument="--use-jdk-inflater" type="boolean" truevalue="--use-jdk-inflater" falsevalue="" optional="true" checked="false" label="Use Jdk Inflater" help="Whether to use the JdkInflater (as opposed to IntelInflater)"/>
<param name="use_new_qual_calculator" argument="--use-new-qual-calculator" type="boolean" truevalue="--use-new-qual-calculator" falsevalue="" optional="true" checked="false" label="Use New Qual Calculator" help="If provided, we will use the new AF model instead of the so-called exact model"/>
<param name="verbosity" argument="--verbosity" type="select" optional="true" label="Verbosity" help="Control verbosity of logging.">
<option selected="false" value="ERROR">ERROR</option>
<option selected="false" value="WARNING">WARNING</option>
<option selected="true" value="INFO">INFO</option>
<option selected="false" value="DEBUG">DEBUG</option>
</param>
</inputs>
<outputs>
<expand macro="gzip_vcf_output_params"/>
</outputs>
<tests/>
<help><![CDATA[Perform joint genotyping on one or more samples pre-called with
HaplotypeCaller
This tool is designed to perform joint genotyping on a single input,
which may contain one or many samples. In any case, the input samples
must possess genotype likelihoods produced by HaplotypeCaller with
\`-ERC GVCF\` or \`-ERC BP_RESOLUTION`.
Input
~~~~~
The GATK4 GenotypeGVCFs tool can take only one input track. Options are
1) a single single-sample GVCF 2) a single multi-sample GVCF created by
CombineGVCFs or 3) a GenomicsDB workspace created by GenomicsDBImport. A
sample-level GVCF is produced by HaplotypeCaller with the \`-ERC GVCF\`
setting.
Output
~~~~~~
A final VCF in which all samples have been jointly genotyped.
Usage example
~~~~~~~~~~~~~
Perform joint genotyping on a singular sample by providing a single-sample GVCF or on a cohort by providing a combined multi-sample GVCF
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
gatk --java-options "-Xmx4g" GenotypeGVCFs \
-R Homo_sapiens_assembly38.fasta \
-V input.g.vcf.gz \
-O output.vcf.gz
Perform joint genotyping on GenomicsDB workspace created with GenomicsDBImport
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
gatk --java-options "-Xmx4g" GenotypeGVCFs \
-R Homo_sapiens_assembly38.fasta \
-V gendb://my_database \
-O output.vcf.gz
Caveats
~~~~~~~
- Only GVCF files produced by HaplotypeCaller (or CombineGVCFs) can be
used as input for this tool. Some other programs produce files that
they call GVCFs but those lack some important information (accurate
genotype likelihoods for every position) that GenotypeGVCFs requires
for its operation.
- Cannot take multiple GVCF files in one command.
Special note on ploidy
~~~~~~~~~~~~~~~~~~~~~~
This tool is able to handle any ploidy (or mix of ploidies)
intelligently; there is no need to specify ploidy for non-diploid
organisms.
]]></help>
<citations>
<expand macro="citations"/>
</citations>
</tool>