diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/AnnotationPipeline.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/AnnotationPipeline.java index 281796f9..5264666d 100644 --- a/annotationPipeline/src/main/java/org/cbioportal/annotation/AnnotationPipeline.java +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/AnnotationPipeline.java @@ -60,23 +60,30 @@ public class AnnotationPipeline { private static final Logger LOG = LoggerFactory.getLogger(AnnotationPipeline.class); private static void annotateJob(String[] args, String filename, String outputFilename, String outputFormat, String isoformOverride, - String errorReportLocation, boolean replace, Integer postIntervalSize) throws Exception { + String errorReportLocation, boolean replace, String postIntervalSize, boolean splitOutput) throws Exception { SpringApplication app = new SpringApplication(AnnotationPipeline.class); app.setWebApplicationType(WebApplicationType.NONE); app.setAllowBeanDefinitionOverriding(Boolean.TRUE); ConfigurableApplicationContext ctx = app.run(args); JobLauncher jobLauncher = ctx.getBean(JobLauncher.class); - + String failedOutputFilename = ""; + String successfulOutputFilename = ""; + if(splitOutput) { + failedOutputFilename = outputFilename + ".FAILED"; + successfulOutputFilename = outputFilename + ".SUCCESS"; + } Job annotationJob = ctx.getBean(BatchConfiguration.ANNOTATION_JOB, Job.class); JobParameters jobParameters = new JobParametersBuilder() - .addString("filename", filename) - .addString("outputFilename", outputFilename) - .addString("outputFormat", outputFormat) - .addString("replace", String.valueOf(replace)) - .addString("isoformOverride", isoformOverride) - .addString("errorReportLocation", errorReportLocation) - .addString("postIntervalSize", String.valueOf(postIntervalSize)) - .toJobParameters(); + .addString("filename", filename) + .addString("outputFilename", outputFilename) + .addString("failedOutputFilename", failedOutputFilename) + .addString("successfulOutputFilename", successfulOutputFilename) + .addString("outputFormat", outputFormat) + .addString("replace", String.valueOf(replace)) + .addString("isoformOverride", isoformOverride) + .addString("errorReportLocation", errorReportLocation) + .addString("postIntervalSize", postIntervalSize) + .toJobParameters(); JobExecution jobExecution = jobLauncher.run(annotationJob, jobParameters); if (!jobExecution.getExitStatus().equals(ExitStatus.COMPLETED)) { System.exit(2); @@ -176,7 +183,7 @@ private static void annotate(Subcommand subcommand, String[] args) throws Annota subcommand.getOptionValue("isoform-override"), subcommand.hasOption("error-report-location") ? subcommand.getOptionValue("error-report-location") : null, subcommand.hasOption("replace-symbol-entrez"), - subcommand.hasOption("post-interval-size") ? Integer.parseInt(subcommand.getOptionValue("post-interval-size")) : -1); + subcommand.hasOption("post-interval-size") ? subcommand.getOptionValue("post-interval-size") : "-1", subcommand.hasOption("split-output")); } catch (Exception e) { throw new AnnotationFailedException(e); } diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/cli/AnnotateSubcommand.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/cli/AnnotateSubcommand.java index ea09e133..5cc85da3 100644 --- a/annotationPipeline/src/main/java/org/cbioportal/annotation/cli/AnnotateSubcommand.java +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/cli/AnnotateSubcommand.java @@ -36,6 +36,7 @@ private static Options getOptions() { .addOption("i", "isoform-override", true, "Isoform Overrides (mskcc or uniprot)") .addOption("e", "error-report-location", true, "Error report filename (including path)") .addOption("r", "replace-symbol-entrez", false, "Replace gene symbols and entrez id with what is provided by annotator") + .addOption("s", "split-output", false, "Output will be split based on Annotation_Status. 3 files will be created: , .SUCCESS, .FAILED" ) .addOption("p", "post-interval-size", true, "Number of records to make POST requests to Genome Nexus with at a time"); return gnuOptions; } diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/BatchConfiguration.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/BatchConfiguration.java index 84379651..aeb927ef 100644 --- a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/BatchConfiguration.java +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/BatchConfiguration.java @@ -33,6 +33,7 @@ package org.cbioportal.annotation.pipeline; import java.net.MalformedURLException; +import java.util.Arrays; import javax.sql.DataSource; import org.cbioportal.annotator.util.AnnotationUtil; import org.cbioportal.models.AnnotatedRecord; @@ -40,6 +41,7 @@ import org.springframework.batch.core.*; import org.springframework.batch.item.*; import org.springframework.batch.core.configuration.annotation.*; +import org.springframework.batch.item.support.CompositeItemWriter; import org.springframework.context.annotation.*; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.batch.core.configuration.annotation.StepScope; @@ -94,7 +96,7 @@ public Step step() . chunk(Integer.parseInt(chunk)) .reader(reader()) .processor(processor()) - .writer(writer()) + .writer(compositeItemWriter()) .build(); } @@ -114,11 +116,31 @@ public MutationRecordProcessor processor() @Bean @StepScope - public ItemStreamWriter writer() + public ItemStreamWriter mainWriter() { return new MutationRecordWriter(); } + @Bean + @StepScope + public ItemStreamWriter failedItemWriter() + { + return new FailedMutationRecordWriter(); + } + + @Bean + @StepScope + public ItemStreamWriter successfulItemWriter() + { + return new SuccessfulMutationRecordWriter(); + } + + public CompositeItemWriter compositeItemWriter(){ + CompositeItemWriter writer = new CompositeItemWriter(); + writer.setDelegates(Arrays.asList(mainWriter(), successfulItemWriter(), failedItemWriter())); + return writer; + } + // general spring batch configuration @Value("org/springframework/batch/core/schema-drop-sqlite.sql") private Resource dropRepositoryTables; diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/DefaultFlatFileHeaderCallback.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/DefaultFlatFileHeaderCallback.java new file mode 100644 index 00000000..dcb3bf07 --- /dev/null +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/DefaultFlatFileHeaderCallback.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal CMO-Pipelines. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.cbioportal.annotation.pipeline; + +import org.apache.commons.lang.StringUtils; +import org.springframework.batch.item.file.FlatFileHeaderCallback; + +import java.io.IOException; +import java.io.Writer; +import java.util.List; + +/** + * @author Mete Ozguz + */ +public class DefaultFlatFileHeaderCallback implements FlatFileHeaderCallback { + private final List header; + private final List commentLines; + + public DefaultFlatFileHeaderCallback(List header, List commentLines) { + this.header = header; + this.commentLines = commentLines; + } + + @Override + public void writeHeader(Writer writer) throws IOException { + // first write out the comment lines, then write the actual header + for (String comment : commentLines) { + writer.write(comment + "\n"); + } + writer.write(StringUtils.join(header, "\t")); + } +} diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/DefaultLineCallbackHandler.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/DefaultLineCallbackHandler.java index 006fb9f0..b489284e 100644 --- a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/DefaultLineCallbackHandler.java +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/DefaultLineCallbackHandler.java @@ -1,3 +1,35 @@ +/* + * Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal CMO-Pipelines. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + package org.cbioportal.annotation.pipeline; import org.springframework.batch.item.file.LineCallbackHandler; diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/FailedMutationRecordWriter.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/FailedMutationRecordWriter.java new file mode 100644 index 00000000..50544902 --- /dev/null +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/FailedMutationRecordWriter.java @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal CMO-Pipelines. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.cbioportal.annotation.pipeline; + +import org.springframework.batch.item.ExecutionContext; +import org.springframework.batch.item.ItemStreamException; +import org.springframework.batch.item.ItemStreamWriter; +import org.springframework.batch.item.file.FlatFileItemWriter; +import org.springframework.batch.item.file.transform.PassThroughLineAggregator; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.FileSystemResource; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +/** + * @author Mete Ozguz + */ +public class FailedMutationRecordWriter implements ItemStreamWriter { + + @Value("#{jobParameters[failedOutputFilename]}") + private String failedOutputFilename; + + @Value("#{stepExecutionContext['commentLines']}") + private List commentLines; + + @Value("#{stepExecutionContext['mutation_header']}") + private List header; + + @Value("#{stepExecutionContext['failedRecordsCount']}") + private Integer failedRecordsCount; + + private FlatFileItemWriter flatFileItemWriter = new FlatFileItemWriter<>(); + + // Set up the writer and print the json from CVR to a file + @Override + public void open(ExecutionContext ec) throws ItemStreamException { + if (failedOutputFilename != null && !failedOutputFilename.isEmpty() && failedRecordsCount > 0) { + Path stagingFile = Paths.get(failedOutputFilename); + PassThroughLineAggregator aggr = new PassThroughLineAggregator(); + flatFileItemWriter.setLineAggregator(aggr); + flatFileItemWriter.setResource(new FileSystemResource(stagingFile.toString())); + flatFileItemWriter.setHeaderCallback(new DefaultFlatFileHeaderCallback(header, commentLines)); + flatFileItemWriter.open(ec); + } + } + + @Override + public void update(ExecutionContext ec) throws ItemStreamException { + } + + @Override + public void close() throws ItemStreamException { + if (failedOutputFilename != null && !failedOutputFilename.isEmpty() && failedRecordsCount > 0) { + flatFileItemWriter.close(); + } + } + + @Override + public void write(List items) throws Exception { + if (failedOutputFilename != null && !failedOutputFilename.isEmpty() && failedRecordsCount > 0) { + List failedItems = new ArrayList<>(); + for (String item : items) { + if (!item.endsWith("SUCCESS")) { + failedItems.add(item); + } + } + flatFileItemWriter.write(failedItems); + } + } +} diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/MutationRecordReader.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/MutationRecordReader.java index 93c68f18..a1f46515 100644 --- a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/MutationRecordReader.java +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/MutationRecordReader.java @@ -131,9 +131,18 @@ public void open(ExecutionContext ec) throws ItemStreamException { } } } else { - for (AnnotatedRecord ar : allAnnotatedRecords) { + int failedRecordCount = 0; + int successfulRecordCount = 0; + for (AnnotatedRecord ar : this.allAnnotatedRecords) { header.addAll(ar.getHeaderWithAdditionalFields()); + if(ar.getANNOTATION_STATUS().equals("SUCCESS")) { + successfulRecordCount++; + } else { + failedRecordCount++; + } } + ec.put("failedRecordsCount", failedRecordCount); + ec.put("successfulRecordsCount", successfulRecordCount); } // add 'Annotation_Status' to header if not already present if (!header.contains("Annotation_Status")) { diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/MutationRecordWriter.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/MutationRecordWriter.java index 969f6034..3a636a1a 100644 --- a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/MutationRecordWriter.java +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/MutationRecordWriter.java @@ -32,19 +32,17 @@ package org.cbioportal.annotation.pipeline; -import java.io.*; import java.util.*; import java.nio.file.*; -import org.apache.commons.lang.StringUtils; import org.springframework.batch.item.*; import org.springframework.batch.item.file.*; import org.springframework.batch.item.file.transform.PassThroughLineAggregator; -import org.cbioportal.models.AnnotatedRecord; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.io.FileSystemResource; /** * + * @author Mete Ozguz * @author Zachary Heins */ public class MutationRecordWriter implements ItemStreamWriter { @@ -61,28 +59,17 @@ public class MutationRecordWriter implements ItemStreamWriter { @Value("#{stepExecutionContext['records_to_write_count']}") private Integer recordsToWriteCount; - private Path stagingFile; private FlatFileItemWriter flatFileItemWriter = new FlatFileItemWriter<>(); // Set up the writer and print the json from CVR to a file @Override public void open(ExecutionContext ec) throws ItemStreamException { if (recordsToWriteCount > 0) { - stagingFile = Paths.get(outputFilename); - + Path stagingFile = Paths.get(outputFilename); PassThroughLineAggregator aggr = new PassThroughLineAggregator(); flatFileItemWriter.setLineAggregator(aggr); flatFileItemWriter.setResource( new FileSystemResource(stagingFile.toString())); - flatFileItemWriter.setHeaderCallback(new FlatFileHeaderCallback() { - @Override - public void writeHeader(Writer writer) throws IOException { - // first write out the comment lines, then write the actual header - for (String comment : commentLines) { - writer.write(comment + "\n"); - } - writer.write(StringUtils.join(header, "\t")); - } - }); + flatFileItemWriter.setHeaderCallback(new DefaultFlatFileHeaderCallback(header, commentLines)); flatFileItemWriter.open(ec); } } diff --git a/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/SuccessfulMutationRecordWriter.java b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/SuccessfulMutationRecordWriter.java new file mode 100644 index 00000000..f3db6311 --- /dev/null +++ b/annotationPipeline/src/main/java/org/cbioportal/annotation/pipeline/SuccessfulMutationRecordWriter.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016 Memorial Sloan-Kettering Cancer Center. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS + * FOR A PARTICULAR PURPOSE. The software and documentation provided hereunder + * is on an "as is" basis, and Memorial Sloan-Kettering Cancer Center has no + * obligations to provide maintenance, support, updates, enhancements or + * modifications. In no event shall Memorial Sloan-Kettering Cancer Center be + * liable to any party for direct, indirect, special, incidental or + * consequential damages, including lost profits, arising out of the use of this + * software and its documentation, even if Memorial Sloan-Kettering Cancer + * Center has been advised of the possibility of such damage. + */ + +/* + * This file is part of cBioPortal CMO-Pipelines. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.cbioportal.annotation.pipeline; + +import org.springframework.batch.item.ExecutionContext; +import org.springframework.batch.item.ItemStreamException; +import org.springframework.batch.item.ItemStreamWriter; +import org.springframework.batch.item.file.FlatFileItemWriter; +import org.springframework.batch.item.file.transform.PassThroughLineAggregator; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.FileSystemResource; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +/** + * @author Mete Ozguz + */ +public class SuccessfulMutationRecordWriter implements ItemStreamWriter { + + @Value("#{jobParameters[successfulOutputFilename]}") + private String successfulOutputFilename; + + @Value("#{stepExecutionContext['commentLines']}") + private List commentLines; + + @Value("#{stepExecutionContext['mutation_header']}") + private List header; + + @Value("#{stepExecutionContext['successfulRecordsCount']}") + private Integer successfulRecordsCount; + private FlatFileItemWriter flatFileItemWriter = new FlatFileItemWriter<>(); + + // Set up the writer and print the json from CVR to a file + @Override + public void open(ExecutionContext ec) throws ItemStreamException { + if (successfulOutputFilename != null && !successfulOutputFilename.isEmpty() && successfulRecordsCount > 0) { + Path stagingFile = Paths.get(successfulOutputFilename); + PassThroughLineAggregator aggr = new PassThroughLineAggregator(); + flatFileItemWriter.setLineAggregator(aggr); + flatFileItemWriter.setResource(new FileSystemResource(stagingFile.toString())); + flatFileItemWriter.setHeaderCallback(new DefaultFlatFileHeaderCallback(header, commentLines)); + flatFileItemWriter.open(ec); + } + } + + @Override + public void update(ExecutionContext ec) throws ItemStreamException { + } + + @Override + public void close() throws ItemStreamException { + if (successfulOutputFilename != null && !successfulOutputFilename.isEmpty() && successfulRecordsCount > 0) { + flatFileItemWriter.close(); + } + } + + @Override + public void write(List items) throws Exception { + if (successfulOutputFilename != null && !successfulOutputFilename.isEmpty() && successfulRecordsCount > 0) { + List failedItems = new ArrayList<>(); + for (String item : items) { + if (item.endsWith("SUCCESS")) { + failedItems.add(item); + } + } + flatFileItemWriter.write(failedItems); + } + } +} diff --git a/annotationPipeline/src/test/java/org/cbioportal/annotation/SpringBatchIntegrationTest.java b/annotationPipeline/src/test/java/org/cbioportal/annotation/SpringBatchIntegrationTest.java index a65cd20d..d9b2c56e 100644 --- a/annotationPipeline/src/test/java/org/cbioportal/annotation/SpringBatchIntegrationTest.java +++ b/annotationPipeline/src/test/java/org/cbioportal/annotation/SpringBatchIntegrationTest.java @@ -25,7 +25,7 @@ import static org.junit.Assert.assertEquals; /** - * Unit test version of .circleci/config.yml + * Unit test version of .circleci/config.yml ( and much more ) * ReflectionTestUtils.setField used in following tests change other tests' outcomes * Tests: check_if_nucleotide_context_provides_Ref_Tri_and_Var_Tri_columns, and * check_if_my_variant_info_provides_gnomad_annotations @@ -323,6 +323,36 @@ public void test_output_format_with_formatFile() throws Exception { testWith(jobParameters, expectedFile, actualFile); } + @Test + @DisplayName("Check if option split-output is working") + public void check_if_option_split_output_is_working() throws Exception { + ReflectionTestUtils.setField(annotator, "enrichmentFields", "annotation_summary"); + String inputFile = IN + "data_mutations.txt"; + String expectedFile = EXPECTED + "data_mutations.txt"; + String expectedFailedFile = EXPECTED + "data_mutations.txt.FAILED"; + String expectedSuccessfulFile = EXPECTED + "data_mutations.txt.SUCCESS"; + String actualFile = ACTUAL + "data_mutations.txt"; + String actualFailedFile = ACTUAL + "data_mutations.txt.FAILED"; + String actualSuccessfulFile = ACTUAL + "data_mutations.txt.SUCCESS"; + JobParameters jobParameters = new JobParametersBuilder() + .addString("filename", inputFile) + .addString("outputFilename", actualFile) + .addString("failedOutputFilename", actualFailedFile) + .addString("successfulOutputFilename", actualSuccessfulFile) + .addString("replace", String.valueOf(true)) + .addString("isoformOverride", "uniprot") + .addString("errorReportLocation", null) + .addString("postIntervalSize", String.valueOf(-1)) + .toJobParameters(); + testWith(jobParameters, expectedFile, actualFile); + FileSystemResource expectedFailedResult = new FileSystemResource(expectedFailedFile); + FileSystemResource actualFailedResult = new FileSystemResource(actualFailedFile); + AssertFile.assertFileEquals(expectedFailedResult, actualFailedResult); + FileSystemResource expectedSuccessfulResult = new FileSystemResource(expectedSuccessfulFile); + FileSystemResource actualSuccessfulResult = new FileSystemResource(actualSuccessfulFile); + AssertFile.assertFileEquals(expectedSuccessfulResult, actualSuccessfulResult); + } + private void testWith(JobParameters jobParameters, String expectedPath, String actualPath) throws Exception { FileSystemResource expectedResult = new FileSystemResource(expectedPath); FileSystemResource actualResult = new FileSystemResource(actualPath); diff --git a/annotationPipeline/src/test/resources/expected/data_mutations.txt b/annotationPipeline/src/test/resources/expected/data_mutations.txt new file mode 100644 index 00000000..8a61026b --- /dev/null +++ b/annotationPipeline/src/test/resources/expected/data_mutations.txt @@ -0,0 +1,9 @@ +#genome_nexus_version: 0-unknown-version-SNAPSHOT +#isoform: uniprot +#version 2.4 +Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Consequence Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer t_ref_count t_alt_count n_ref_count n_alt_count HGVSc HGVSp HGVSp_Short Transcript_ID RefSeq Protein_position Codons Exon_Number AA_AF AF AFR_AF ALLELE_NUM AMR_AF ASN_AF Allele Amino_acids BIOTYPE CANONICAL CCDS CDS_position CLIN_SIG DISTANCE DOMAINS EAS_AF EA_AF ENSP EUR_AF EXON ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_Adj ExAC_AC_AN_EAS ExAC_AC_AN_FIN ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_Adj ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE ExAC_AF_OTH ExAC_AF_SAS ExAC_FILTER Existing_variation FILTER Feature Feature_type GENE_PHENO Gene HGNC_ID HGVS_OFFSET HIGH_INF_POS IMPACT INTRON MINIMISED MOTIF_NAME MOTIF_POS MOTIF_SCORE_CHANGE PHENO PICK PUBMED PolyPhen SAS_AF SIFT SOMATIC STRAND_VEP SWISSPROT SYMBOL SYMBOL_SOURCE TREMBL TSL UNIPARC VARIANT_CLASS all_effects cDNA_position flanking_bps gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF n_depth t_depth vcf_id vcf_pos vcf_qual Annotation_Status +AL645728.3 . GRCh37 1 1517171 1517171 + downstream_gene_variant 3'Flank SNP G G A rs753543553 7316-918 BS_84QXT7FR G G 34 7 17 0 ENST00000426195 96/634 0.1779 0.2156 1 0.1282 A R/Q protein_coding YES CCDS31.1 287/1905 Coiled-coils_(Ncoils):Coil,hmmpanther:PTHR23075,hmmpanther:PTHR23075:SF4 0.3323 ENSP00000368030 0.004 3/16 rs1619925,COSM4596874 PASS ENST00000378755 Transcript 1 ENSG00000197785 HGNC:25567 MODERATE 0,1 1 benign(0) 0.182 tolerated_low_confidence(0.58) 0,1 1 Q9NVI7 ATAD3A HGNC 2 UPI000013D456 SNV ATAD3A,missense_variant,p.Arg96Gln,ENST00000378755,NM_018188.3;ATAD3A,intron_variant,,ENST00000339113,;ATAD3A,intron_variant,,ENST00000378756,NM_001170535.1;ATAD3A,intron_variant,,ENST00000536055,NM_001170536.1;ATAD3A,non_coding_transcript_exon_variant,,ENST00000439513,;ATAD3A,upstream_gene_variant,,ENST00000429957,; 381/2612 CGG 0.03914 0.1307 0.04243 0.0006101 0.2592 0.01968 0.0008531 0.02897 0.04709 17 41 . 1517171 . SUCCESS +CA6 . GRCh38 chr1 8958917 8958917 + missense_variant Missense_Mutation SNP T T C novel 7316-918 BS_84QXT7FR T T 59 11 22 1 c.416T>C p.Ile139Thr p.I139T ENST00000377436 NM_001270500.1 139/313 aTt/aCt 1 C I/T protein_coding YES CCDS57970.1 416/942 PROSITE_profiles:PS51144,cd03125,hmmpanther:PTHR18952,hmmpanther:PTHR18952:SF110,PROSITE_patterns:PS00162,Gene3D:3.10.200.10,Pfam_domain:PF00194,SMART_domains:SM01057,Superfamily_domains:SSF51069 ENSP00000366654 4/8 PASS ENST00000377436 Transcript ENSG00000131686 HGNC:1380 MODERATE 1 benign(0.368) deleterious(0) 1 P23280 CA6 HGNC 1 UPI00004CA0CB SNV CA6,missense_variant,p.Ile139Thr,ENST00000377443,NM_001215.3;CA6,missense_variant,p.Ile139Thr,ENST00000377436,NM_001270500.1;CA6,missense_variant,p.Ile79Thr,ENST00000377442,NM_001270501.1,NM_001270502.1;CA6,missense_variant,p.Ile107Thr,ENST00000549778,;CA6,non_coding_transcript_exon_variant,,ENST00000476083,; 416/942 ATT 23 70 . 8958917 . FAILED +MAP3K6 . GRCh38 chr1 27361345 27361345 + splice_donor_variant Splice_Site SNP C C A rs771659002 7316-918 BS_84QXT7FR C C 59 19 28 0 c.1736+1G>T p.X579_splice ENST00000493901 NM_004672.4 1 A protein_coding YES CCDS299.1 ENSP00000419591 rs771659002 PASS ENST00000493901 Transcript 1 ENSG00000142733 HGNC:6858 HIGH 13/29 1 -1 O95382 MAP3K6 HGNC 5 UPI0000205587 SNV MAP3K6,splice_donor_variant,,ENST00000357582,;MAP3K6,splice_donor_variant,,ENST00000374040,NM_001297609.1;MAP3K6,splice_donor_variant,,ENST00000472410,;MAP3K6,splice_donor_variant,,ENST00000493901,NM_004672.4;MAP3K6,upstream_gene_variant,,ENST00000486046,;MAP3K6,upstream_gene_variant,,ENST00000470890,;MAP3K6,upstream_gene_variant,,ENST00000476509,;MAP3K6,upstream_gene_variant,,ENST00000495230,;,regulatory_region_variant,,ENSR00000003702,; ACC 4.073e-06 8.989e-06 28 78 . 27361345 . FAILED +SPAG17 . GRCh38 chr1 118066887 118066887 + missense_variant Missense_Mutation SNP C C T novel 7316-918 BS_84QXT7FR C C 55 15 29 0 c.2398G>A p.Ala800Thr p.A800T ENST00000336338 NM_206996.2 800/2223 Gcc/Acc 1 T A/T protein_coding YES CCDS899.1 2398/6672 hmmpanther:PTHR21963 ENSP00000337804 18/49 PASS ENST00000336338 Transcript ENSG00000155761 HGNC:26620 MODERATE 1 probably_damaging(0.998) deleterious(0) -1 Q6Q759 SPAG17 HGNC 1 UPI00001601FD SNV SPAG17,missense_variant,p.Ala800Thr,ENST00000336338,NM_206996.2;SPAG17,intron_variant,,ENST00000477444,; 2464/6924 GCT 29 70 . 118066887 . FAILED +IGFN1 . GRCh38 chr1 201208915 201208915 + missense_variant Missense_Mutation SNP A A G rs1247701351 7316-918 BS_84QXT7FR A A 53 13 24 0 c.4022A>G p.Asp1341Gly p.D1341G ENST00000335211 NM_001164586.1 1341/3708 gAt/gGt 1 G D/G protein_coding YES CCDS53455.1 4022/11127 mobidb-lite ENSP00000334714 12/24 rs1247701351 PASS ENST00000335211 Transcript ENSG00000163395 HGNC:24607 MODERATE 1 possibly_damaging(0.737) tolerated(1) 1 Q86VF2 IGFN1 HGNC 5 UPI0001B300F4 SNV IGFN1,missense_variant,p.Asp1341Gly,ENST00000335211,NM_001164586.1;IGFN1,intron_variant,,ENST00000295591,;IGFN1,upstream_gene_variant,,ENST00000412892,;IGFN1,intron_variant,,ENST00000437879,;IGFN1,downstream_gene_variant,,ENST00000444705,;IGFN1,upstream_gene_variant,,ENST00000473483,; 4152/11810 GAT 27 73 . 201208915 . FAILED diff --git a/annotationPipeline/src/test/resources/expected/data_mutations.txt.FAILED b/annotationPipeline/src/test/resources/expected/data_mutations.txt.FAILED new file mode 100644 index 00000000..dfd410dc --- /dev/null +++ b/annotationPipeline/src/test/resources/expected/data_mutations.txt.FAILED @@ -0,0 +1,8 @@ +#genome_nexus_version: 0-unknown-version-SNAPSHOT +#isoform: uniprot +#version 2.4 +Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Consequence Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer t_ref_count t_alt_count n_ref_count n_alt_count HGVSc HGVSp HGVSp_Short Transcript_ID RefSeq Protein_position Codons Exon_Number AA_AF AF AFR_AF ALLELE_NUM AMR_AF ASN_AF Allele Amino_acids BIOTYPE CANONICAL CCDS CDS_position CLIN_SIG DISTANCE DOMAINS EAS_AF EA_AF ENSP EUR_AF EXON ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_Adj ExAC_AC_AN_EAS ExAC_AC_AN_FIN ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_Adj ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE ExAC_AF_OTH ExAC_AF_SAS ExAC_FILTER Existing_variation FILTER Feature Feature_type GENE_PHENO Gene HGNC_ID HGVS_OFFSET HIGH_INF_POS IMPACT INTRON MINIMISED MOTIF_NAME MOTIF_POS MOTIF_SCORE_CHANGE PHENO PICK PUBMED PolyPhen SAS_AF SIFT SOMATIC STRAND_VEP SWISSPROT SYMBOL SYMBOL_SOURCE TREMBL TSL UNIPARC VARIANT_CLASS all_effects cDNA_position flanking_bps gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF n_depth t_depth vcf_id vcf_pos vcf_qual Annotation_Status +CA6 . GRCh38 chr1 8958917 8958917 + missense_variant Missense_Mutation SNP T T C novel 7316-918 BS_84QXT7FR T T 59 11 22 1 c.416T>C p.Ile139Thr p.I139T ENST00000377436 NM_001270500.1 139/313 aTt/aCt 1 C I/T protein_coding YES CCDS57970.1 416/942 PROSITE_profiles:PS51144,cd03125,hmmpanther:PTHR18952,hmmpanther:PTHR18952:SF110,PROSITE_patterns:PS00162,Gene3D:3.10.200.10,Pfam_domain:PF00194,SMART_domains:SM01057,Superfamily_domains:SSF51069 ENSP00000366654 4/8 PASS ENST00000377436 Transcript ENSG00000131686 HGNC:1380 MODERATE 1 benign(0.368) deleterious(0) 1 P23280 CA6 HGNC 1 UPI00004CA0CB SNV CA6,missense_variant,p.Ile139Thr,ENST00000377443,NM_001215.3;CA6,missense_variant,p.Ile139Thr,ENST00000377436,NM_001270500.1;CA6,missense_variant,p.Ile79Thr,ENST00000377442,NM_001270501.1,NM_001270502.1;CA6,missense_variant,p.Ile107Thr,ENST00000549778,;CA6,non_coding_transcript_exon_variant,,ENST00000476083,; 416/942 ATT 23 70 . 8958917 . FAILED +MAP3K6 . GRCh38 chr1 27361345 27361345 + splice_donor_variant Splice_Site SNP C C A rs771659002 7316-918 BS_84QXT7FR C C 59 19 28 0 c.1736+1G>T p.X579_splice ENST00000493901 NM_004672.4 1 A protein_coding YES CCDS299.1 ENSP00000419591 rs771659002 PASS ENST00000493901 Transcript 1 ENSG00000142733 HGNC:6858 HIGH 13/29 1 -1 O95382 MAP3K6 HGNC 5 UPI0000205587 SNV MAP3K6,splice_donor_variant,,ENST00000357582,;MAP3K6,splice_donor_variant,,ENST00000374040,NM_001297609.1;MAP3K6,splice_donor_variant,,ENST00000472410,;MAP3K6,splice_donor_variant,,ENST00000493901,NM_004672.4;MAP3K6,upstream_gene_variant,,ENST00000486046,;MAP3K6,upstream_gene_variant,,ENST00000470890,;MAP3K6,upstream_gene_variant,,ENST00000476509,;MAP3K6,upstream_gene_variant,,ENST00000495230,;,regulatory_region_variant,,ENSR00000003702,; ACC 4.073e-06 8.989e-06 28 78 . 27361345 . FAILED +SPAG17 . GRCh38 chr1 118066887 118066887 + missense_variant Missense_Mutation SNP C C T novel 7316-918 BS_84QXT7FR C C 55 15 29 0 c.2398G>A p.Ala800Thr p.A800T ENST00000336338 NM_206996.2 800/2223 Gcc/Acc 1 T A/T protein_coding YES CCDS899.1 2398/6672 hmmpanther:PTHR21963 ENSP00000337804 18/49 PASS ENST00000336338 Transcript ENSG00000155761 HGNC:26620 MODERATE 1 probably_damaging(0.998) deleterious(0) -1 Q6Q759 SPAG17 HGNC 1 UPI00001601FD SNV SPAG17,missense_variant,p.Ala800Thr,ENST00000336338,NM_206996.2;SPAG17,intron_variant,,ENST00000477444,; 2464/6924 GCT 29 70 . 118066887 . FAILED +IGFN1 . GRCh38 chr1 201208915 201208915 + missense_variant Missense_Mutation SNP A A G rs1247701351 7316-918 BS_84QXT7FR A A 53 13 24 0 c.4022A>G p.Asp1341Gly p.D1341G ENST00000335211 NM_001164586.1 1341/3708 gAt/gGt 1 G D/G protein_coding YES CCDS53455.1 4022/11127 mobidb-lite ENSP00000334714 12/24 rs1247701351 PASS ENST00000335211 Transcript ENSG00000163395 HGNC:24607 MODERATE 1 possibly_damaging(0.737) tolerated(1) 1 Q86VF2 IGFN1 HGNC 5 UPI0001B300F4 SNV IGFN1,missense_variant,p.Asp1341Gly,ENST00000335211,NM_001164586.1;IGFN1,intron_variant,,ENST00000295591,;IGFN1,upstream_gene_variant,,ENST00000412892,;IGFN1,intron_variant,,ENST00000437879,;IGFN1,downstream_gene_variant,,ENST00000444705,;IGFN1,upstream_gene_variant,,ENST00000473483,; 4152/11810 GAT 27 73 . 201208915 . FAILED diff --git a/annotationPipeline/src/test/resources/expected/data_mutations.txt.SUCCESS b/annotationPipeline/src/test/resources/expected/data_mutations.txt.SUCCESS new file mode 100644 index 00000000..ab7b3d63 --- /dev/null +++ b/annotationPipeline/src/test/resources/expected/data_mutations.txt.SUCCESS @@ -0,0 +1,5 @@ +#genome_nexus_version: 0-unknown-version-SNAPSHOT +#isoform: uniprot +#version 2.4 +Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Consequence Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer t_ref_count t_alt_count n_ref_count n_alt_count HGVSc HGVSp HGVSp_Short Transcript_ID RefSeq Protein_position Codons Exon_Number AA_AF AF AFR_AF ALLELE_NUM AMR_AF ASN_AF Allele Amino_acids BIOTYPE CANONICAL CCDS CDS_position CLIN_SIG DISTANCE DOMAINS EAS_AF EA_AF ENSP EUR_AF EXON ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_Adj ExAC_AC_AN_EAS ExAC_AC_AN_FIN ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_Adj ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE ExAC_AF_OTH ExAC_AF_SAS ExAC_FILTER Existing_variation FILTER Feature Feature_type GENE_PHENO Gene HGNC_ID HGVS_OFFSET HIGH_INF_POS IMPACT INTRON MINIMISED MOTIF_NAME MOTIF_POS MOTIF_SCORE_CHANGE PHENO PICK PUBMED PolyPhen SAS_AF SIFT SOMATIC STRAND_VEP SWISSPROT SYMBOL SYMBOL_SOURCE TREMBL TSL UNIPARC VARIANT_CLASS all_effects cDNA_position flanking_bps gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF n_depth t_depth vcf_id vcf_pos vcf_qual Annotation_Status +AL645728.3 . GRCh37 1 1517171 1517171 + downstream_gene_variant 3'Flank SNP G G A rs753543553 7316-918 BS_84QXT7FR G G 34 7 17 0 ENST00000426195 96/634 0.1779 0.2156 1 0.1282 A R/Q protein_coding YES CCDS31.1 287/1905 Coiled-coils_(Ncoils):Coil,hmmpanther:PTHR23075,hmmpanther:PTHR23075:SF4 0.3323 ENSP00000368030 0.004 3/16 rs1619925,COSM4596874 PASS ENST00000378755 Transcript 1 ENSG00000197785 HGNC:25567 MODERATE 0,1 1 benign(0) 0.182 tolerated_low_confidence(0.58) 0,1 1 Q9NVI7 ATAD3A HGNC 2 UPI000013D456 SNV ATAD3A,missense_variant,p.Arg96Gln,ENST00000378755,NM_018188.3;ATAD3A,intron_variant,,ENST00000339113,;ATAD3A,intron_variant,,ENST00000378756,NM_001170535.1;ATAD3A,intron_variant,,ENST00000536055,NM_001170536.1;ATAD3A,non_coding_transcript_exon_variant,,ENST00000439513,;ATAD3A,upstream_gene_variant,,ENST00000429957,; 381/2612 CGG 0.03914 0.1307 0.04243 0.0006101 0.2592 0.01968 0.0008531 0.02897 0.04709 17 41 . 1517171 . SUCCESS diff --git a/annotationPipeline/src/test/resources/input/data_mutations.txt b/annotationPipeline/src/test/resources/input/data_mutations.txt new file mode 100644 index 00000000..58490608 --- /dev/null +++ b/annotationPipeline/src/test/resources/input/data_mutations.txt @@ -0,0 +1,7 @@ +#version 2.4 +Hugo_Symbol Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED ExAC_AF ExAC_AF_AFR ExAC_AF_AMR ExAC_AF_EAS ExAC_AF_FIN ExAC_AF_NFE ExAC_AF_OTH ExAC_AF_SAS GENE_PHENO FILTER flanking_bps vcf_id vcf_qual ExAC_AF_Adj ExAC_AC_AN_Adj ExAC_AC_AN ExAC_AC_AN_AFR ExAC_AC_AN_AMR ExAC_AC_AN_EAS ExAC_AC_AN_FIN ExAC_AC_AN_NFE ExAC_AC_AN_OTH ExAC_AC_AN_SAS ExAC_FILTER gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF vcf_pos +ATAD3A . GRCh38 chr1 1517171 1517171 + Missense_Mutation SNP G G A rs1619925 7316-918 BS_84QXT7FR G G c.287G>A p.Arg96Gln p.R96Q ENST00000378755 3/16 41 34 7 17 17 0 ATAD3A,missense_variant,p.Arg96Gln,ENST00000378755,NM_018188.3;ATAD3A,intron_variant,,ENST00000339113,;ATAD3A,intron_variant,,ENST00000378756,NM_001170535.1;ATAD3A,intron_variant,,ENST00000536055,NM_001170536.1;ATAD3A,non_coding_transcript_exon_variant,,ENST00000439513,;ATAD3A,upstream_gene_variant,,ENST00000429957,; A ENSG00000197785 ENST00000378755 Transcript missense_variant 381/2612 287/1905 96/634 R/Q cGg/cAg rs1619925,COSM4596874 1 1 ATAD3A HGNC HGNC:25567 protein_coding YES CCDS31.1 ENSP00000368030 Q9NVI7 UPI000013D456 NM_018188.3 tolerated_low_confidence(0.58) benign(0) 3/16 Coiled-coils_(Ncoils):Coil,hmmpanther:PTHR23075,hmmpanther:PTHR23075:SF4 0.1779 0.2156 0.1282 0.3323 0.004 0.182 0,1 MODERATE 1 SNV 2 0,1 1 PASS CGG . . 0.03914 0.1307 0.04243 0.0006101 0.2592 0.01968 0.0008531 0.02897 0.04709 1517171 +CA6 . GRCh38 chr1 8958917 8958917 + Missense_Mutation SNP T T C novel 7316-918 BS_84QXT7FR T T c.416T>C p.Ile139Thr p.I139T ENST00000377436 4/8 70 59 11 23 22 1 CA6,missense_variant,p.Ile139Thr,ENST00000377443,NM_001215.3;CA6,missense_variant,p.Ile139Thr,ENST00000377436,NM_001270500.1;CA6,missense_variant,p.Ile79Thr,ENST00000377442,NM_001270501.1,NM_001270502.1;CA6,missense_variant,p.Ile107Thr,ENST00000549778,;CA6,non_coding_transcript_exon_variant,,ENST00000476083,; C ENSG00000131686 ENST00000377436 Transcript missense_variant 416/942 416/942 139/313 I/T aTt/aCt 1 1 CA6 HGNC HGNC:1380 protein_coding YES CCDS57970.1 ENSP00000366654 P23280 UPI00004CA0CB NM_001270500.1 deleterious(0) benign(0.368) 4/8 PROSITE_profiles:PS51144,cd03125,hmmpanther:PTHR18952,hmmpanther:PTHR18952:SF110,PROSITE_patterns:PS00162,Gene3D:3.10.200.10,Pfam_domain:PF00194,SMART_domains:SM01057,Superfamily_domains:SSF51069 MODERATE 1 SNV 1 PASS ATT . . 8958917 +MAP3K6 . GRCh38 chr1 27361345 27361345 + Splice_Site SNP C C A rs771659002 7316-918 BS_84QXT7FR C C c.1736+1G>T p.X579_splice ENST00000493901 78 59 19 28 28 0 MAP3K6,splice_donor_variant,,ENST00000357582,;MAP3K6,splice_donor_variant,,ENST00000374040,NM_001297609.1;MAP3K6,splice_donor_variant,,ENST00000472410,;MAP3K6,splice_donor_variant,,ENST00000493901,NM_004672.4;MAP3K6,upstream_gene_variant,,ENST00000486046,;MAP3K6,upstream_gene_variant,,ENST00000470890,;MAP3K6,upstream_gene_variant,,ENST00000476509,;MAP3K6,upstream_gene_variant,,ENST00000495230,;,regulatory_region_variant,,ENSR00000003702,; A ENSG00000142733 ENST00000493901 Transcript splice_donor_variant rs771659002 1 -1 MAP3K6 HGNC HGNC:6858 protein_coding YES CCDS299.1 ENSP00000419591 O95382 UPI0000205587 NM_004672.4 13/29 HIGH 1 SNV 5 1 PASS ACC . . 4.073e-06 8.989e-06 27361345 +SPAG17 . GRCh38 chr1 118066887 118066887 + Missense_Mutation SNP C C T novel 7316-918 BS_84QXT7FR C C c.2398G>A p.Ala800Thr p.A800T ENST00000336338 18/49 70 55 15 29 29 0 SPAG17,missense_variant,p.Ala800Thr,ENST00000336338,NM_206996.2;SPAG17,intron_variant,,ENST00000477444,; T ENSG00000155761 ENST00000336338 Transcript missense_variant 2464/6924 2398/6672 800/2223 A/T Gcc/Acc 1 -1 SPAG17 HGNC HGNC:26620 protein_coding YES CCDS899.1 ENSP00000337804 Q6Q759 UPI00001601FD NM_206996.2 deleterious(0) probably_damaging(0.998) 18/49 hmmpanther:PTHR21963 MODERATE 1 SNV 1 PASS GCT . . 118066887 +IGFN1 . GRCh38 chr1 201208915 201208915 + Missense_Mutation SNP A A G rs1247701351 7316-918 BS_84QXT7FR A A c.4022A>G p.Asp1341Gly p.D1341G ENST00000335211 12/24 73 53 13 27 24 0 IGFN1,missense_variant,p.Asp1341Gly,ENST00000335211,NM_001164586.1;IGFN1,intron_variant,,ENST00000295591,;IGFN1,upstream_gene_variant,,ENST00000412892,;IGFN1,intron_variant,,ENST00000437879,;IGFN1,downstream_gene_variant,,ENST00000444705,;IGFN1,upstream_gene_variant,,ENST00000473483,; G ENSG00000163395 ENST00000335211 Transcript missense_variant 4152/11810 4022/11127 1341/3708 D/G gAt/gGt rs1247701351 1 1 IGFN1 HGNC HGNC:24607 protein_coding YES CCDS53455.1 ENSP00000334714 Q86VF2 UPI0001B300F4 NM_001164586.1 tolerated(1) possibly_damaging(0.737) 12/24 mobidb-lite MODERATE 1 SNV 5 PASS GAT . . 201208915