diff --git a/src/main/cliapp/src/service/DataLoadService.js b/src/main/cliapp/src/service/DataLoadService.js index dc60d520d..ebd513c14 100644 --- a/src/main/cliapp/src/service/DataLoadService.js +++ b/src/main/cliapp/src/service/DataLoadService.js @@ -81,8 +81,8 @@ export class DataLoadService extends BaseAuthService { getBackendBulkLoadTypes(loadType) { const bulkLoadTypes = { BulkFMSLoad: [ + 'BIOGRID-ORCS', 'GFF', // This needs to be removed at some point - 'GFF_EXON', 'GFF_CDS', 'GFF_TRANSCRIPT', @@ -95,8 +95,9 @@ export class DataLoadService extends BaseAuthService { 'PHENOTYPE', 'PARALOGY', 'SEQUENCE_TARGETING_REAGENT', - // 'VARIATION', - 'BIOGRID-ORCS', + 'VARIATION', + 'VEPGENE', + 'VEPTRANSCRIPT', ], BulkURLLoad: [ 'ONTOLOGY', diff --git a/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java b/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java index 9ecf6976c..b613cf23b 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/Gff3Constants.java @@ -12,7 +12,7 @@ private Gff3Constants() { "mRNA", "ncRNA", "piRNA", "lincRNA", "miRNA", "pre_miRNA", "snoRNA", "lncRNA", "tRNA", "snRNA", "rRNA", "antisense_RNA", "C_gene_segment", "V_gene_segment", "pseudogene_attribute", "pseudogenic_transcript", "lnc_RNA", "nc_primary_transcript", - "circular_ncRNA" + "circular_ncRNA", "transcript" ); public static final List STRANDS = List.of("+", "-"); diff --git a/src/main/java/org/alliancegenome/curation_api/constants/ValidationConstants.java b/src/main/java/org/alliancegenome/curation_api/constants/ValidationConstants.java index 0e879c431..2ca7244d2 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/ValidationConstants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/ValidationConstants.java @@ -15,5 +15,6 @@ private ValidationConstants() { public static final String DUPLICATE_MESSAGE = "Duplicate entries found"; public static final String DUPLICATE_RELATION_PREFIX = "Entries found with same relation field - "; public static final String UNRECOGNIZED_MESSAGE = "Unrecognized entry"; // To be used instead of INVALID_MESSAGE when entry to be skipped instead of failed + public static final String AMBIGUOUS_MESSAGE = "Could not be unambiguously resolved"; } \ No newline at end of file diff --git a/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java b/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java index 9f20f49fb..d7a3f03b8 100644 --- a/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java +++ b/src/main/java/org/alliancegenome/curation_api/constants/VocabularyConstants.java @@ -87,5 +87,11 @@ private VocabularyConstants() { public static final String HTP_DATASET_CATEGORY_TAGS_VOCABULARY = "data_set_category_tags"; public static final String HTP_DATASET_NOTE_TYPE_VOCABULARY_TERM_SET = "htp_expression_dataset_note_type"; public static final String HTP_DATASET_SAMPLE_NOTE_TYPE_VOCABULARY_TERM_SET = "htp_expression_dataset_sample_note_type"; + + public static final String VEP_IMPACT_VOCABULARY = "vep_impact"; + public static final String VEP_CONSEQUENCE_VOCABULARY = "vep_consequence"; + public static final String SIFT_PREDICTION_VOCABULARY = "sift_prediction"; + public static final String POLYPHEN_PREDICTION_VOCABULARY = "polyphen_prediction"; + public static final String HTP_DATASET_SAMPLE_SEQUENCE_FORMAT_VOCABULARY = "htp_data_sample_sequencing_format"; } diff --git a/src/main/java/org/alliancegenome/curation_api/controllers/crud/PredictedVariantConsequenceCrudController.java b/src/main/java/org/alliancegenome/curation_api/controllers/crud/PredictedVariantConsequenceCrudController.java new file mode 100644 index 000000000..1a709cce6 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/controllers/crud/PredictedVariantConsequenceCrudController.java @@ -0,0 +1,40 @@ +package org.alliancegenome.curation_api.controllers.crud; + +import java.util.List; + +import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController; +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.interfaces.crud.PredictedVariantConsequenceCrudInterface; +import org.alliancegenome.curation_api.jobs.executors.VepGeneExecutor; +import org.alliancegenome.curation_api.jobs.executors.VepTranscriptExecutor; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.services.PredictedVariantConsequenceService; + +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class PredictedVariantConsequenceCrudController extends BaseEntityCrudController + implements PredictedVariantConsequenceCrudInterface { + + @Inject PredictedVariantConsequenceService predictedVariantConsequenceService; + @Inject VepTranscriptExecutor vepTranscriptExecutor; + @Inject VepGeneExecutor vepGeneExecutor; + + @Override + @PostConstruct + protected void init() { + setService(predictedVariantConsequenceService); + } + + public APIResponse updateTranscriptLevelConsequences(String dataProvider, List consequenceData) { + return vepTranscriptExecutor.runLoadApi(predictedVariantConsequenceService, dataProvider, consequenceData); + } + + public APIResponse updateGeneLevelConsequences(String dataProvider, List consequenceData) { + return vepGeneExecutor.runLoadApi(dataProvider, consequenceData); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/dao/PredictedVariantConsequenceDAO.java b/src/main/java/org/alliancegenome/curation_api/dao/PredictedVariantConsequenceDAO.java new file mode 100644 index 000000000..e602925cf --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/dao/PredictedVariantConsequenceDAO.java @@ -0,0 +1,15 @@ +package org.alliancegenome.curation_api.dao; + +import org.alliancegenome.curation_api.dao.base.BaseSQLDAO; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; + +import jakarta.enterprise.context.ApplicationScoped; + +@ApplicationScoped +public class PredictedVariantConsequenceDAO extends BaseSQLDAO { + + protected PredictedVariantConsequenceDAO() { + super(PredictedVariantConsequence.class); + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java index 11b17cb8a..c06d1b845 100644 --- a/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java +++ b/src/main/java/org/alliancegenome/curation_api/enums/BackendBulkLoadType.java @@ -19,6 +19,8 @@ public enum BackendBulkLoadType { CONSTRUCT_ASSOCIATION("json"), VARIANT("json"), VARIATION("json"), // FMS variants as opposed to direct submission for VARIANT + VEPTRANSCRIPT("tsv"), + VEPGENE("tsv"), // GFF all from the same file but split out GFF("gff"), // For Database entries diff --git a/src/main/java/org/alliancegenome/curation_api/interfaces/crud/PredictedVariantConsequenceCrudInterface.java b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/PredictedVariantConsequenceCrudInterface.java new file mode 100644 index 000000000..25139fde7 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/interfaces/crud/PredictedVariantConsequenceCrudInterface.java @@ -0,0 +1,37 @@ +package org.alliancegenome.curation_api.interfaces.crud; + +import java.util.List; + +import org.alliancegenome.curation_api.interfaces.base.BaseIdCrudInterface; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.view.View; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; + +import com.fasterxml.jackson.annotation.JsonView; + +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.MediaType; + +@Path("predictedvariantconsequence") +@Tag(name = "CRUD - Predicted Variant Consequence") +@Produces(MediaType.APPLICATION_JSON) +@Consumes(MediaType.APPLICATION_JSON) +public interface PredictedVariantConsequenceCrudInterface extends BaseIdCrudInterface { + + @POST + @Path("/bulk/{dataProvider}/transcriptConsequenceFile") + @JsonView(View.FieldsAndLists.class) + APIResponse updateTranscriptLevelConsequences(@PathParam("dataProvider") String dataProvider, List consequenceData); + + @POST + @Path("/bulk/{dataProvider}/geneConsequenceFile") + @JsonView(View.FieldsAndLists.class) + APIResponse updateGeneLevelConsequences(@PathParam("dataProvider") String dataProvider, List consequenceData); + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java index ae56ef352..ceb7e70e5 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/BulkLoadJobExecutor.java @@ -61,6 +61,8 @@ public class BulkLoadJobExecutor { @Inject Gff3ExonExecutor gff3ExonExecutor; @Inject Gff3CDSExecutor gff3CDSExecutor; @Inject Gff3TranscriptExecutor gff3TranscriptExecutor; + @Inject VepTranscriptExecutor vepTranscriptExecutor; + @Inject VepGeneExecutor vepGeneExecutor; @Inject ExpressionAtlasExecutor expressionAtlasExecutor; @@ -140,6 +142,10 @@ public void process(BulkLoadFileHistory bulkLoadFileHistory, Boolean cleanUp) th expressionAtlasExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.BIOGRID_ORCS) { biogridOrcExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.VEPTRANSCRIPT) { + vepTranscriptExecutor.execLoad(bulkLoadFileHistory); + } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.VEPGENE) { + vepGeneExecutor.execLoad(bulkLoadFileHistory); } else if (bulkLoadFileHistory.getBulkLoad().getBackendBulkLoadType() == BackendBulkLoadType.HTPDATASAMPLE) { htpExpressionDatasetSampleAnnotationExecutor.execLoad(bulkLoadFileHistory); } else { diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java index b8bbb4e25..d5e36f8e9 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/ExpressionAtlasExecutor.java @@ -1,10 +1,13 @@ package org.alliancegenome.curation_api.jobs.executors; -import com.fasterxml.jackson.dataformat.xml.XmlMapper; -import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; -import jakarta.enterprise.context.ApplicationScoped; -import jakarta.inject.Inject; -import lombok.extern.jbosslog.JBossLog; +import static org.alliancegenome.curation_api.services.DataProviderService.RESOURCE_DESCRIPTOR_PREFIX; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + import org.alliancegenome.curation_api.model.entities.CrossReference; import org.alliancegenome.curation_api.model.entities.DataProvider; import org.alliancegenome.curation_api.model.entities.Organization; @@ -17,15 +20,12 @@ import org.alliancegenome.curation_api.util.ProcessDisplayHelper; import org.jetbrains.annotations.NotNull; -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; +import com.fasterxml.jackson.dataformat.xml.XmlMapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; -import static org.alliancegenome.curation_api.services.DataProviderService.RESOURCE_DESCRIPTOR_PREFIX; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; -@JBossLog @ApplicationScoped public class ExpressionAtlasExecutor extends LoadFileExecutor { diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java index d7b6b8b52..451177395 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/LoadFileExecutor.java @@ -218,14 +218,13 @@ protected boolean runLoad(BaseUpser idsAdded.add(dbObject.getId()); } } catch (ObjectUpdateException e) { - // e.printStackTrace(); history.incrementFailed(); addException(history, e.getData()); } catch (KnownIssueValidationException e) { Log.debug(e.getMessage()); history.incrementSkipped(); } catch (Exception e) { - // e.printStackTrace(); + e.printStackTrace(); history.incrementFailed(); addException(history, new ObjectUpdateExceptionData(dtoObject, e.getMessage(), e.getStackTrace())); } diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepGeneExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepGeneExecutor.java new file mode 100644 index 000000000..ac4659bc3 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepGeneExecutor.java @@ -0,0 +1,169 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.KnownIssueValidationException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException; +import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData; +import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.APIResponse; +import org.alliancegenome.curation_api.response.LoadHistoryResponce; +import org.alliancegenome.curation_api.services.PredictedVariantConsequenceService; +import org.alliancegenome.curation_api.util.ProcessDisplayHelper; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections4.ListUtils; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; + +import io.quarkus.logging.Log; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class VepGeneExecutor extends LoadFileExecutor { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject PredictedVariantConsequenceService predictedVariantConsequenceService; + + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + + CsvSchema vepTxtSchema = CsvSchemaBuilder.vepTxtSchema(); + CsvMapper csvMapper = new CsvMapper(); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(VepTxtDTO.class).with(vepTxtSchema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); + List vepData = it.readAll(); + + + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); + + List consequenceIdsLoaded = new ArrayList<>(); + List consequenceIdsBefore = predictedVariantConsequenceService.getGeneLevelIdsByDataProvider(dataProvider); + + bulkLoadFileHistory.setCount(vepData.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(bulkLoadFileHistory, dataProvider, vepData, consequenceIdsLoaded); + if (success) { + runCleanup(predictedVariantConsequenceService, bulkLoadFileHistory, dataProvider.name(), consequenceIdsBefore, consequenceIdsLoaded, "gene-level predicted variant consequences"); + } + bulkLoadFileHistory.finishLoad(); + updateHistory(bulkLoadFileHistory); + updateExceptions(bulkLoadFileHistory); + + } catch (Exception e) { + failLoad(bulkLoadFileHistory, e); + e.printStackTrace(); + } + } + + protected boolean runLoad(BulkLoadFileHistory history, BackendBulkDataProvider dataProvider, List objectList, List idsUpdated) { + ProcessDisplayHelper ph = new ProcessDisplayHelper(); + ph.addDisplayHandler(loadProcessDisplayService); + if (CollectionUtils.isNotEmpty(objectList)) { + String loadMessage = objectList.get(0).getClass().getSimpleName() + " update"; + if (dataProvider != null) { + loadMessage = loadMessage + " for " + dataProvider.name(); + } + ph.startProcess(loadMessage, objectList.size()); + + updateHistory(history); + for (VepTxtDTO dtoObject : objectList) { + try { + Long idUpdated = predictedVariantConsequenceService.updateGeneLevelConsequence(dtoObject); + history.incrementCompleted(); + if (idsUpdated != null) { + idsUpdated.add(idUpdated); + } + } catch (ObjectUpdateException e) { + history.incrementFailed(); + addException(history, e.getData()); + } catch (KnownIssueValidationException e) { + Log.debug(e.getMessage()); + history.incrementSkipped(); + } catch (Exception e) { + e.printStackTrace(); + history.incrementFailed(); + addException(history, new ObjectUpdateExceptionData(dtoObject, e.getMessage(), e.getStackTrace())); + } + if (history.getErrorRate() > 0.25) { + Log.error("Failure Rate > 25% aborting load"); + updateHistory(history); + updateExceptions(history); + failLoadAboveErrorRateCutoff(history); + return false; + } + ph.progressProcess(); + } + updateHistory(history); + updateExceptions(history); + ph.finishProcess(); + } + return true; + } + + protected void runCleanup(BulkLoadFileHistory history, String dataProviderName, List annotationIdsBefore, List annotationIdsAfter, String loadTypeString, Boolean deprecate) { + Log.debug("runLoad: After: " + dataProviderName + " " + annotationIdsAfter.size()); + + List distinctAfter = annotationIdsAfter.stream().distinct().collect(Collectors.toList()); + Log.debug("runLoad: Distinct: " + dataProviderName + " " + distinctAfter.size()); + + List idsToReset = ListUtils.subtract(annotationIdsBefore, distinctAfter); + Log.debug("runLoad: Reset: " + dataProviderName + " " + idsToReset.size()); + + String countType = loadTypeString + " reset"; + + long existingResets = history.getCount(countType).getTotal() == null ? 0 : history.getCount(countType).getTotal(); + history.setCount(countType, idsToReset.size() + existingResets); + + String loadDescription = dataProviderName + " " + loadTypeString + " bulk load (" + history.getBulkLoadFile().getMd5Sum() + ")"; + + ProcessDisplayHelper ph = new ProcessDisplayHelper(10000); + ph.startProcess("Deletion/deprecation of: " + dataProviderName + " " + loadTypeString, idsToReset.size()); + + for (Long id : idsToReset) { + try { + predictedVariantConsequenceService.resetGeneLevelConsequence(id, loadDescription); + history.incrementCompleted(countType); + } catch (Exception e) { + history.incrementFailed(countType); + addException(history, new ObjectUpdateExceptionData("{ \"id\": " + id + "}", e.getMessage(), e.getStackTrace())); + } + if (history.getErrorRate(countType) > 0.25) { + Log.error(countType + " failure rate > 25% aborting load"); + failLoadAboveErrorRateCutoff(history); + break; + } + ph.progressProcess(); + } + updateHistory(history); + updateExceptions(history); + ph.finishProcess(); + } + + public APIResponse runLoadApi(String dataProviderName, List consequenceData) { + List idsLoaded = new ArrayList<>(); + BulkLoadFileHistory history = new BulkLoadFileHistory(consequenceData.size()); + history = bulkLoadFileHistoryDAO.persist(history); + BackendBulkDataProvider dataProvider = null; + if (dataProviderName != null) { + dataProvider = BackendBulkDataProvider.valueOf(dataProviderName); + } + runLoad(history, dataProvider, consequenceData, idsLoaded); + history.finishLoad(); + return new LoadHistoryResponce(history); + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepTranscriptExecutor.java b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepTranscriptExecutor.java new file mode 100644 index 000000000..dfa877ad4 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/jobs/executors/VepTranscriptExecutor.java @@ -0,0 +1,62 @@ +package org.alliancegenome.curation_api.jobs.executors; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; + +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad; +import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.services.PredictedVariantConsequenceService; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; +import com.fasterxml.jackson.dataformat.csv.CsvSchema; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; + +@ApplicationScoped +public class VepTranscriptExecutor extends LoadFileExecutor { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject PredictedVariantConsequenceService predictedVariantConsequenceService; + + public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) { + try { + + CsvSchema vepTxtSchema = CsvSchemaBuilder.vepTxtSchema(); + CsvMapper csvMapper = new CsvMapper(); + MappingIterator it = csvMapper.enable(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS).readerFor(VepTxtDTO.class).with(vepTxtSchema).readValues(new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()))); + List vepData = it.readAll(); + + + BulkFMSLoad fmsLoad = (BulkFMSLoad) bulkLoadFileHistory.getBulkLoad(); + BackendBulkDataProvider dataProvider = BackendBulkDataProvider.valueOf(fmsLoad.getFmsDataSubType()); + + List consequenceIdsLoaded = new ArrayList<>(); + List consequenceIdsBefore = predictedVariantConsequenceService.getIdsByDataProvider(dataProvider); + + bulkLoadFileHistory.setCount(vepData.size()); + updateHistory(bulkLoadFileHistory); + + boolean success = runLoad(predictedVariantConsequenceService, bulkLoadFileHistory, dataProvider, vepData, consequenceIdsLoaded); + if (success) { + runCleanup(predictedVariantConsequenceService, bulkLoadFileHistory, dataProvider.name(), consequenceIdsBefore, consequenceIdsLoaded, "predicted variant consequences"); + } + bulkLoadFileHistory.finishLoad(); + updateHistory(bulkLoadFileHistory); + updateExceptions(bulkLoadFileHistory); + + } catch (Exception e) { + failLoad(bulkLoadFileHistory, e); + e.printStackTrace(); + } + } + +} diff --git a/src/main/java/org/alliancegenome/curation_api/jobs/util/CsvSchemaBuilder.java b/src/main/java/org/alliancegenome/curation_api/jobs/util/CsvSchemaBuilder.java index 28c4bfca0..8320586b8 100644 --- a/src/main/java/org/alliancegenome/curation_api/jobs/util/CsvSchemaBuilder.java +++ b/src/main/java/org/alliancegenome/curation_api/jobs/util/CsvSchemaBuilder.java @@ -105,4 +105,30 @@ public static CsvSchema gff3Schema() { return schema; } + + public static CsvSchema vepTxtSchema() { + CsvSchema schema = CsvSchema.builder() + .setColumnSeparator('\t') + .setArrayElementSeparator(";") + .setAllowComments(true) + .setNullValue("-") + .disableQuoteChar() + .addColumn("uploadedVariation") + .addColumn("location") + .addColumn("allele") + .addColumn("gene") + .addColumn("feature") + .addColumn("featureType") + .addColumn("consequence") + .addColumn("cdnaPosition") + .addColumn("cdsPosition") + .addColumn("proteinPosition") + .addColumn("aminoAcids") + .addColumn("codons") + .addColumn("existingVariation") + .addColumn("extra") + .build(); + + return schema; + } } diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java b/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java new file mode 100644 index 000000000..7920ede56 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/PredictedVariantConsequence.java @@ -0,0 +1,170 @@ +package org.alliancegenome.curation_api.model.entities; + +import java.util.List; + +import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; +import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.bridges.BooleanValueBridge; +import org.alliancegenome.curation_api.model.entities.associations.variantAssociations.CuratedVariantGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.entities.base.AuditedObject; +import org.alliancegenome.curation_api.model.entities.ontology.SOTerm; +import org.alliancegenome.curation_api.view.View; +import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.hibernate.search.engine.backend.types.Aggregable; +import org.hibernate.search.engine.backend.types.Projectable; +import org.hibernate.search.engine.backend.types.Searchable; +import org.hibernate.search.engine.backend.types.Sortable; +import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; +import org.hibernate.search.mapper.pojo.bridge.mapping.annotation.ValueBridgeRef; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; + +import com.fasterxml.jackson.annotation.JsonBackReference; +import com.fasterxml.jackson.annotation.JsonView; + +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.Index; +import jakarta.persistence.JoinTable; +import jakarta.persistence.ManyToMany; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +@Entity +@Data +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) +@ToString(callSuper = true) +@AGRCurationSchemaVersion(min = "2.7.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { AuditedObject.class }) +@Schema(name = "PredictedVariantConsequence", description = "POJO representing VEP predicted variant consequence results") +@Table(indexes = { + @Index(name = "predictedvariantconsequence_varianttranscript_index", columnList = "varianttranscript_id"), + @Index(name = "predictedvariantconsequence_vepimpact_index", columnList = "vepimpact_id"), + @Index(name = "predictedvariantconsequence_polyphenprediction_index", columnList = "polyphenprediction_id"), + @Index(name = "predictedvariantconsequence_siftprediction_index", columnList = "siftprediction_id"), + @Index(name = "predictedvariantconsequence_createdby_index", columnList = "createdby_id"), + @Index(name = "predictedvariantconsequence_updatedby_index", columnList = "updatedby_id"), + @Index(name = "predictedvariantconsequence_hgvsproteinnomenclature_index", columnList = "hgvsProteinNomenclature"), + @Index(name = "predictedvariantconsequence_hgvscodingnomenclature_index", columnList = "hgvsCodingNomenclature"), + @Index(name = "predictedvariantconsequence_variantgenomiclocation_index", columnList = "variantGenomicLocation_id") +}) +public class PredictedVariantConsequence extends AuditedObject { + + @ManyToOne + @JsonBackReference + private CuratedVariantGenomicLocationAssociation variantGenomicLocation; + + @IndexedEmbedded(includePaths = {"name", "name_keyword", "curie", "curie_keyword", "modEntityId", "modEntityId_keyword", "modInternalId", "modInternalId_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + private Transcript variantTranscript; + + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + private VocabularyTerm vepImpact; + + @IndexedEmbedded(includePaths = {"curie", "name", "secondaryIdentifiers", "synonyms.name", "namespace", + "curie_keyword", "name_keyword", "secondaryIdentifiers_keyword", "synonyms.name_keyword", "namespace_keyword" }) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToMany + @JoinTable(indexes = { + @Index(name = "predictedvariantconsequence_ontologyterm_pvc_index", columnList = "predictedvariantconsequence_id"), + @Index(name = "predictedvariantconsequence_ontologyterm_vc_index", columnList = "vepconsequences_id") + }) + @JsonView({ View.FieldsAndLists.class, View.VariantView.class }) + private List vepConsequences; + + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + private VocabularyTerm polyphenPrediction; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Float polyphenScore; + + @IndexedEmbedded(includePaths = {"name", "name_keyword"}) + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne + @JsonView({ View.FieldsOnly.class }) + private VocabularyTerm siftPrediction; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Float siftScore; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "aminoAcidReference_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String aminoAcidReference; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "aminoAcidVariant_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String aminoAcidVariant; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "codonReference_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String codonReference; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "codonVariant_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String codonVariant; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedCdnaStart; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedCdnaEnd; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedCdsStart; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedCdsEnd; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedProteinStart; + + @GenericField(projectable = Projectable.YES, sortable = Sortable.YES) + @JsonView({ View.FieldsOnly.class }) + private Integer calculatedProteinEnd; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "hgvsProteinNomenclature_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String hgvsProteinNomenclature; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "hgvsCodingNomenclature_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "TEXT") + private String hgvsCodingNomenclature; + + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer", valueBridge = @ValueBridgeRef(type = BooleanValueBridge.class)) + @KeywordField(name = "geneLevelConsequence_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, valueBridge = @ValueBridgeRef(type = BooleanValueBridge.class)) + @JsonView({ View.FieldsOnly.class }) + @Column(columnDefinition = "boolean default false", nullable = false) + private Boolean geneLevelConsequence = false; +} diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java b/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java index 46254b04e..0617af6f8 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/Transcript.java @@ -11,9 +11,14 @@ import org.alliancegenome.curation_api.model.entities.ontology.SOTerm; import org.alliancegenome.curation_api.view.View; import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.hibernate.search.engine.backend.types.Aggregable; +import org.hibernate.search.engine.backend.types.Searchable; +import org.hibernate.search.engine.backend.types.Sortable; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.KeywordField; import com.fasterxml.jackson.annotation.JsonView; @@ -33,12 +38,18 @@ @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) @ToString(exclude = { "transcriptGenomicLocationAssociations", "transcriptGeneAssociations", "transcriptCodingSequenceAssociations", "transcriptExonAssociations" }, callSuper = true) @Schema(name = "Transcript", description = "POJO that represents the Transcript") -@AGRCurationSchemaVersion(min = "2.4.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { GenomicEntity.class }) +@AGRCurationSchemaVersion(min = "2.8.1", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { GenomicEntity.class }) @Table(indexes = { + @Index(name = "transcript_transcriptId_index", columnList = "transcriptId"), @Index(name = "transcript_transcriptType_index", columnList = "transcriptType_id") }) public class Transcript extends GenomicEntity { + @FullTextField(analyzer = "autocompleteAnalyzer", searchAnalyzer = "autocompleteSearchAnalyzer") + @KeywordField(name = "transcriptId_keyword", aggregable = Aggregable.YES, sortable = Sortable.YES, searchable = Searchable.YES, normalizer = "sortNormalizer") + @JsonView({ View.FieldsOnly.class }) + private String transcriptId; + @JsonView({ View.FieldsOnly.class }) private String name; diff --git a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java index 82508a178..f62005b21 100644 --- a/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java +++ b/src/main/java/org/alliancegenome/curation_api/model/entities/associations/variantAssociations/CuratedVariantGenomicLocationAssociation.java @@ -1,11 +1,22 @@ package org.alliancegenome.curation_api.model.entities.associations.variantAssociations; +import java.util.List; + import org.alliancegenome.curation_api.constants.LinkMLSchemaConstants; import org.alliancegenome.curation_api.interfaces.AGRCurationSchemaVersion; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.view.View; +import org.alliancegenome.curation_api.view.View.VariantView; import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; + +import com.fasterxml.jackson.annotation.JsonManagedReference; +import com.fasterxml.jackson.annotation.JsonView; +import jakarta.persistence.CascadeType; import jakarta.persistence.Entity; import jakarta.persistence.Index; +import jakarta.persistence.OneToMany; import jakarta.persistence.Table; import lombok.Data; import lombok.EqualsAndHashCode; @@ -14,7 +25,7 @@ @Entity @Data @EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = true) -@ToString(callSuper = true) +@ToString(exclude = "predictedVariantConsequences", callSuper = true) @AGRCurationSchemaVersion(min = "2.4.0", max = LinkMLSchemaConstants.LATEST_RELEASE, dependencies = { VariantGenomicLocationAssociation.class }) @Schema(name = "CuratedVariantGenomicLocationAssociation", description = "POJO representing an association between a variant and a curated genomic location") @@ -35,5 +46,19 @@ ) public class CuratedVariantGenomicLocationAssociation extends VariantGenomicLocationAssociation { - + + @IndexedEmbedded( + includePaths = { + "variantTranscript.name", "variantTranscript.modEntityId", + "variantTranscript.modInternalId", "variantTranscript.curie", + "vepConsequence.name", "variantTranscript.name_keyword", + "variantTranscript.modEntityId_keyword", "variantTranscript.modInternalId_keyword", + "variantTranscript.curie_keyword", "vepConsequence.name_keyword", + "variantTranscript.transcriptId", "variantTranscript.transcriptId_keyword" + } + ) + @OneToMany(mappedBy = "variantGenomicLocation", cascade = CascadeType.ALL, orphanRemoval = true) + @JsonManagedReference + @JsonView({ View.FieldsAndLists.class, VariantView.class }) + private List predictedVariantConsequences; } diff --git a/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/VepTxtDTO.java b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/VepTxtDTO.java new file mode 100644 index 000000000..a25fe8eac --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/model/ingest/dto/fms/VepTxtDTO.java @@ -0,0 +1,29 @@ +package org.alliancegenome.curation_api.model.ingest.dto.fms; + +import java.util.List; + +import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +@Data +@EqualsAndHashCode(callSuper = true) +public class VepTxtDTO extends BaseDTO { + + private String uploadedVariation; + private String location; + private String allele; + private String gene; + private String feature; + private String featureType; + private String consequence; + private String cdnaPosition; + private String cdsPosition; + private String proteinPosition; + private String aminoAcids; + private String codons; + private String existingVariation; + private List extra; + +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java b/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java new file mode 100644 index 000000000..e1b241bb6 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/PredictedVariantConsequenceService.java @@ -0,0 +1,100 @@ +package org.alliancegenome.curation_api.services; + +import java.time.OffsetDateTime; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.alliancegenome.curation_api.constants.EntityFieldConstants; +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.interfaces.crud.BaseUpsertServiceInterface; +import org.alliancegenome.curation_api.model.entities.Person; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.services.base.BaseEntityCrudService; +import org.alliancegenome.curation_api.services.validation.dto.fms.VepGeneFmsDTOValidator; +import org.alliancegenome.curation_api.services.validation.dto.fms.VepTranscriptFmsDTOValidator; +import org.apache.commons.lang.StringUtils; + +import io.quarkus.logging.Log; +import jakarta.annotation.PostConstruct; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; +import jakarta.transaction.Transactional; + +@RequestScoped +public class PredictedVariantConsequenceService extends BaseEntityCrudService implements BaseUpsertServiceInterface { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject VepTranscriptFmsDTOValidator vepTranscriptFmsDtoValidator; + @Inject VepGeneFmsDTOValidator vepGeneFmsDtoValidator; + @Inject PersonService personService; + + @Override + @PostConstruct + protected void init() { + setSQLDao(predictedVariantConsequenceDAO); + } + + public List getIdsByDataProvider(BackendBulkDataProvider dataProvider) { + Map params = new HashMap<>(); + params.put("variantTranscript." + EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + params.put("variantTranscript." + EntityFieldConstants.TAXON, dataProvider.canonicalTaxonCurie); + } + List ids = predictedVariantConsequenceDAO.findIdsByParams(params); + ids.removeIf(Objects::isNull); + return ids; + } + + public List getGeneLevelIdsByDataProvider(BackendBulkDataProvider dataProvider) { + Map params = new HashMap<>(); + params.put("variantTranscript." + EntityFieldConstants.DATA_PROVIDER, dataProvider.sourceOrganization); + if (StringUtils.equals(dataProvider.sourceOrganization, "RGD")) { + params.put("variantTranscript." + EntityFieldConstants.TAXON, dataProvider.canonicalTaxonCurie); + } + params.put("geneLevelConsequence", true); + List ids = predictedVariantConsequenceDAO.findIdsByParams(params); + ids.removeIf(Objects::isNull); + return ids; + } + + @Override + @Transactional + public PredictedVariantConsequence upsert(VepTxtDTO dto, BackendBulkDataProvider dataProvider) + throws ValidationException { + return vepTranscriptFmsDtoValidator.validateTranscriptLevelConsequence(dto, dataProvider); + } + + @Transactional + public Long updateGeneLevelConsequence(VepTxtDTO dto) throws ValidationException { + return vepGeneFmsDtoValidator.validateGeneLevelConsequence(dto); + } + + @Transactional + public PredictedVariantConsequence resetGeneLevelConsequence(Long id, String requestSource) { + PredictedVariantConsequence pvc = predictedVariantConsequenceDAO.find(id); + + if (pvc == null) { + String errorMessage = "Could not find PredictedVariantConsequence with id: " + id; + Log.error(errorMessage); + return null; + } + + if (pvc.getGeneLevelConsequence()) { + pvc.setGeneLevelConsequence(true); + if (authenticatedPerson.getUniqueId() != null) { + requestSource = authenticatedPerson.getUniqueId(); + } + Person updatedBy = personService.fetchByUniqueIdOrCreate(requestSource); + pvc.setUpdatedBy(updatedBy); + pvc.setDateUpdated(OffsetDateTime.now()); + return predictedVariantConsequenceDAO.persist(pvc); + } + + return pvc; + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java index 25174699f..e04ca58a8 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/annotations/GeneExpressionAnnotationUniqueIdHelper.java @@ -14,11 +14,15 @@ public String generateUniqueId(GeneExpressionFmsDTO geneExpressionFmsDTO, String uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getAssay()); uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getGeneId()); uniqueIdGeneratorHelper.add(referenceCurie); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhenExpressed().getStageTermId()); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhenExpressed().getStageName()); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getWhereExpressedStatement()); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureTermId()); - uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getCellularComponentTermId()); + if (geneExpressionFmsDTO.getWhenExpressed() != null) { + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhenExpressed().getStageTermId()); + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhenExpressed().getStageName()); + } + if (geneExpressionFmsDTO.getWhereExpressed() != null) { + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getWhereExpressedStatement()); + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getAnatomicalStructureTermId()); + uniqueIdGeneratorHelper.add(geneExpressionFmsDTO.getWhereExpressed().getCellularComponentTermId()); + } return uniqueIdGeneratorHelper.getUniqueId(); } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java b/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java index 331712250..389ce0d88 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java +++ b/src/main/java/org/alliancegenome/curation_api/services/helpers/variants/HgvsIdentifierHelper.java @@ -1,5 +1,7 @@ package org.alliancegenome.curation_api.services.helpers.variants; +import java.util.Objects; + import org.alliancegenome.curation_api.model.ingest.dto.fms.VariantFmsDTO; import org.apache.commons.lang3.StringUtils; @@ -15,9 +17,16 @@ public static String getHgvsIdentifier(VariantFmsDTO dto) { if (dto.getEnd() != null) { end = Integer.toString(dto.getEnd()); } + + String refSeq = ""; + if (StringUtils.isNotBlank(dto.getGenomicReferenceSequence()) && !Objects.equals(dto.getGenomicReferenceSequence(), "N/A")) { + refSeq = dto.getGenomicReferenceSequence(); + } - String varSeq = StringUtils.isBlank(dto.getGenomicVariantSequence()) ? "" : dto.getGenomicVariantSequence(); - String refSeq = StringUtils.isBlank(dto.getGenomicReferenceSequence()) ? "" : dto.getGenomicReferenceSequence(); + String varSeq = ""; + if (StringUtils.isNotBlank(dto.getGenomicVariantSequence()) && !Objects.equals(dto.getGenomicVariantSequence(), "N/A")) { + varSeq = dto.getGenomicVariantSequence(); + } String chrAccession = ""; if (StringUtils.isNotBlank(dto.getSequenceOfReferenceAccessionNumber())) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java index 430cec249..289cbedf6 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/Gff3DtoValidator.java @@ -164,6 +164,8 @@ public void validateTranscriptEntry(Gff3DTO dto, Map attributes, if (attributes.containsKey("Name")) { transcript.setName(attributes.get("Name")); + } else { + transcript.setName(null); } ObjectResponse transcriptResponse = validateGenomicEntity(transcript, dto, attributes, dataProvider); @@ -171,6 +173,12 @@ public void validateTranscriptEntry(Gff3DTO dto, Map attributes, transcriptResponse.addErrorMessage("attributes - ID", ValidationConstants.REQUIRED_MESSAGE); } + if (attributes.containsKey("transcript_id")) { + transcript.setTranscriptId(attributes.get("transcript_id")); + } else { + transcript.setTranscriptId(null); + } + if (transcriptResponse.hasErrors()) { throw new ObjectValidationException(dto, transcriptResponse.errorMessagesString()); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/alleleAssociations/AlleleGeneAssociationDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/alleleAssociations/AlleleGeneAssociationDTOValidator.java index bfd87c332..6351aa369 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/alleleAssociations/AlleleGeneAssociationDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/alleleAssociations/AlleleGeneAssociationDTOValidator.java @@ -57,7 +57,7 @@ public AlleleGeneAssociation validateAlleleGeneAssociationDTO(AlleleGeneAssociat } AlleleGeneAssociation association = null; - if (subjectIds != null && subjectIds.size() == 1 && objectIds != null || objectIds.size() == 1 && StringUtils.isNotBlank(dto.getRelationName())) { + if (subjectIds != null && subjectIds.size() == 1 && objectIds != null && objectIds.size() == 1 && StringUtils.isNotBlank(dto.getRelationName())) { HashMap params = new HashMap<>(); params.put("alleleAssociationSubject.id", subjectIds.get(0)); diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java index 62f269466..d5b8a7c31 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/associations/constructAssociations/ConstructGenomicEntityAssociationDTOValidator.java @@ -47,7 +47,10 @@ public ConstructGenomicEntityAssociation validateConstructGenomicEntityAssociati Construct construct = null; if (StringUtils.isNotBlank(dto.getConstructIdentifier())) { - construct = constructService.getShallowEntity(constructService.getIdByModID(dto.getConstructIdentifier())); + Long constructId = constructService.getIdByModID(dto.getConstructIdentifier()); + if (constructId != null) { + construct = constructService.getShallowEntity(constructId); + } if (construct == null) { assocResponse.addErrorMessage("construct_identifier", ValidationConstants.INVALID_MESSAGE); } else { @@ -63,7 +66,10 @@ public ConstructGenomicEntityAssociation validateConstructGenomicEntityAssociati if (StringUtils.isBlank(dto.getGenomicEntityIdentifier())) { assocResponse.addErrorMessage("genomic_entity_identifier", ValidationConstants.REQUIRED_MESSAGE); } else { - genomicEntity = genomicEntityService.getShallowEntity(genomicEntityService.getIdByModID(dto.getGenomicEntityIdentifier())); + Long genomicEntityId = genomicEntityService.getIdByModID(dto.getGenomicEntityIdentifier()); + if (genomicEntityId != null) { + genomicEntity = genomicEntityService.getShallowEntity(genomicEntityId); + } if (genomicEntity == null) { assocResponse.addErrorMessage("genomic_entity_identifier", ValidationConstants.INVALID_MESSAGE + " (" + dto.getGenomicEntityIdentifier() + ")"); } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/ParalogyFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/ParalogyFmsDTOValidator.java index c0c6060f3..bc0c8ac39 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/ParalogyFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/ParalogyFmsDTOValidator.java @@ -75,7 +75,7 @@ public GeneToGeneParalogy validateParalogyFmsDTO(ParalogyFmsDTO dto) throws Vali if (subjectGene == null) { paralogyResponse.addErrorMessage("gene1", ValidationConstants.INVALID_MESSAGE + " (" + subjectGeneIdentifier + ")"); } else { - if (!sameGenus(speciesTaxon, subjectGene.getTaxon())) { + if (speciesTaxon != null && !sameGenus(speciesTaxon, subjectGene.getTaxon())) { paralogyResponse.addErrorMessage("Species", ValidationConstants.INVALID_MESSAGE + " (" + dto.getSpecies() + ") for gene " + subjectGene.getCurie()); } } diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java index 5cbd0a331..5ae14da84 100644 --- a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VariantFmsDTOValidator.java @@ -110,7 +110,7 @@ public Long validateVariant(VariantFmsDTO dto, List idsAdded, BackendBulkD } String hgvs = HgvsIdentifierHelper.getHgvsIdentifier(dto); - String modInternalId = DigestUtils.md5Hex(hgvs); + String modInternalId = hgvs == null ? null : DigestUtils.md5Hex(hgvs); if (StringUtils.isNotBlank(hgvs) && !variantResponse.hasErrors()) { SearchResponse searchResponse = variantDAO.findByField("modInternalId", modInternalId); @@ -256,11 +256,17 @@ public void validateCuratedVariantGenomicLocationAssociation(VariantFmsDTO dto, association.setStart(dto.getStart()); association.setEnd(dto.getEnd()); association.setRelation(vocabularyTermService.getTermInVocabulary(VocabularyConstants.LOCATION_ASSOCIATION_RELATION_VOCABULARY, "located_on").getEntity()); - if (StringUtils.isNotBlank(dto.getGenomicReferenceSequence())) { + + if (StringUtils.isNotBlank(dto.getGenomicReferenceSequence()) && !Objects.equals(dto.getGenomicReferenceSequence(), "N/A")) { association.setReferenceSequence(dto.getGenomicReferenceSequence()); + } else { + association.setReferenceSequence(null); } - if (StringUtils.isNotBlank(dto.getGenomicVariantSequence())) { + + if (StringUtils.isNotBlank(dto.getGenomicVariantSequence()) && !Objects.equals(dto.getGenomicVariantSequence(), "N/A")) { association.setVariantSequence(dto.getGenomicVariantSequence()); + } else { + association.setVariantSequence(null); } if (variantId == null) { diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java new file mode 100644 index 000000000..e888215d9 --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepGeneFmsDTOValidator.java @@ -0,0 +1,82 @@ +package org.alliancegenome.curation_api.services.validation.dto.fms; + +import org.alliancegenome.curation_api.constants.ValidationConstants; +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.exceptions.ObjectValidationException; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.entities.Transcript; +import org.alliancegenome.curation_api.model.entities.associations.variantAssociations.CuratedVariantGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.response.SearchResponse; +import org.alliancegenome.curation_api.services.TranscriptService; +import org.alliancegenome.curation_api.services.associations.variantAssociations.CuratedVariantGenomicLocationAssociationService; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; + +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class VepGeneFmsDTOValidator { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject CuratedVariantGenomicLocationAssociationService cvglaService; + @Inject TranscriptService transcriptService; + + public Long validateGeneLevelConsequence(VepTxtDTO dto) throws ValidationException { + ObjectResponse response = new ObjectResponse<>(); + PredictedVariantConsequence predictedVariantConsequence = null; + + CuratedVariantGenomicLocationAssociation variantLocation = null; + if (StringUtils.isBlank(dto.getUploadedVariation())) { + response.addErrorMessage("uploadedVariant", ValidationConstants.REQUIRED_MESSAGE); + } else { + SearchResponse cvglaResponse = cvglaService.findByField("hgvs", dto.getUploadedVariation()); + if (cvglaResponse != null && cvglaResponse.getSingleResult() != null) { + variantLocation = cvglaResponse.getSingleResult(); + } else { + response.addErrorMessage("uploadedVariant", ValidationConstants.INVALID_MESSAGE + " (" + dto.getUploadedVariation() + ")"); + } + } + + Transcript transcript = null; + if (StringUtils.isBlank(dto.getFeature())) { + response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); + } else { + SearchResponse searchResponse = transcriptService.findByField("transcriptId", dto.getFeature()); + if (searchResponse == null || searchResponse.getSingleResult() == null) { + response.addErrorMessage("feature", ValidationConstants.INVALID_MESSAGE + " (" + dto.getFeature() + ")"); + } else if (searchResponse.getReturnedRecords() > 1) { + response.addErrorMessage("feature", ValidationConstants.AMBIGUOUS_MESSAGE + " (" + dto.getFeature() + ")"); + } else { + transcript = searchResponse.getSingleResult(); + } + } + + if (variantLocation != null && CollectionUtils.isNotEmpty(variantLocation.getPredictedVariantConsequences()) && transcript != null) { + for (PredictedVariantConsequence existingPvc : variantLocation.getPredictedVariantConsequences()) { + if (transcript.getId() == existingPvc.getVariantTranscript().getId()) { + predictedVariantConsequence = existingPvc; + break; + } + } + } + + if (predictedVariantConsequence == null) { + response.addErrorMessage("uploadedVariant / feature", ValidationConstants.INVALID_MESSAGE + " (" + + dto.getUploadedVariation() + " / " + dto.getFeature() + ")"); + } else { + predictedVariantConsequence.setGeneLevelConsequence(true); + } + + if (response.hasErrors()) { + throw new ObjectValidationException(dto, response.errorMessagesString()); + } + + predictedVariantConsequence = predictedVariantConsequenceDAO.persist(predictedVariantConsequence); + + return predictedVariantConsequence.getId(); + } +} diff --git a/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java new file mode 100644 index 000000000..9a965af8d --- /dev/null +++ b/src/main/java/org/alliancegenome/curation_api/services/validation/dto/fms/VepTranscriptFmsDTOValidator.java @@ -0,0 +1,331 @@ +package org.alliancegenome.curation_api.services.validation.dto.fms; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.alliancegenome.curation_api.constants.ValidationConstants; +import org.alliancegenome.curation_api.constants.VocabularyConstants; +import org.alliancegenome.curation_api.dao.PredictedVariantConsequenceDAO; +import org.alliancegenome.curation_api.dao.TranscriptDAO; +import org.alliancegenome.curation_api.dao.associations.variantAssociations.CuratedVariantGenomicLocationAssociationDAO; +import org.alliancegenome.curation_api.enums.BackendBulkDataProvider; +import org.alliancegenome.curation_api.exceptions.ObjectValidationException; +import org.alliancegenome.curation_api.exceptions.ValidationException; +import org.alliancegenome.curation_api.model.entities.PredictedVariantConsequence; +import org.alliancegenome.curation_api.model.entities.Transcript; +import org.alliancegenome.curation_api.model.entities.VocabularyTerm; +import org.alliancegenome.curation_api.model.entities.associations.variantAssociations.CuratedVariantGenomicLocationAssociation; +import org.alliancegenome.curation_api.model.entities.ontology.SOTerm; +import org.alliancegenome.curation_api.model.ingest.dto.fms.VepTxtDTO; +import org.alliancegenome.curation_api.response.ObjectResponse; +import org.alliancegenome.curation_api.response.SearchResponse; +import org.alliancegenome.curation_api.services.VocabularyTermService; +import org.alliancegenome.curation_api.services.associations.variantAssociations.CuratedVariantGenomicLocationAssociationService; +import org.alliancegenome.curation_api.services.ontology.SoTermService; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; + +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Inject; + +@RequestScoped +public class VepTranscriptFmsDTOValidator { + + @Inject PredictedVariantConsequenceDAO predictedVariantConsequenceDAO; + @Inject CuratedVariantGenomicLocationAssociationDAO cvglaDAO; + @Inject CuratedVariantGenomicLocationAssociationService cvglaService; + @Inject TranscriptDAO transcriptDAO; + @Inject VocabularyTermService vocabularyTermService; + @Inject SoTermService soTermService; + + private static final Pattern PATHOGENICITY_PREDICTION_RESULT = Pattern.compile("^([\\w]+)\\(([\\d\\.]+)\\)$"); + private static final Pattern POSITION_STRING = Pattern.compile("^[\\d\\?\\-]+$"); + + public PredictedVariantConsequence validateTranscriptLevelConsequence(VepTxtDTO dto, BackendBulkDataProvider dataProvider) throws ValidationException { + ObjectResponse response = new ObjectResponse<>(); + PredictedVariantConsequence predictedVariantConsequence = new PredictedVariantConsequence(); + + CuratedVariantGenomicLocationAssociation variantLocation = null; + if (StringUtils.isBlank(dto.getUploadedVariation())) { + response.addErrorMessage("uploadedVariant", ValidationConstants.REQUIRED_MESSAGE); + } else { + SearchResponse cvglaResponse = cvglaService.findByField("hgvs", dto.getUploadedVariation()); + if (cvglaResponse != null && cvglaResponse.getSingleResult() != null) { + variantLocation = cvglaResponse.getSingleResult(); + } else { + response.addErrorMessage("uploadedVariant", ValidationConstants.INVALID_MESSAGE + " (" + dto.getUploadedVariation() + ")"); + } + } + + Transcript transcript = null; + if (StringUtils.isBlank(dto.getFeature())) { + response.addErrorMessage("feature", ValidationConstants.REQUIRED_MESSAGE); + } else { + HashMap params = new HashMap<>(); + params.put("transcriptId", dto.getFeature()); + params.put("obsolete", false); + + SearchResponse searchResponse = transcriptDAO.findByParams(params); + if (searchResponse == null || searchResponse.getSingleResult() == null) { + response.addErrorMessage("feature", ValidationConstants.INVALID_MESSAGE + " (" + dto.getFeature() + ")"); + } else if (searchResponse.getReturnedRecords() > 1) { + response.addErrorMessage("feature", ValidationConstants.AMBIGUOUS_MESSAGE + " (" + dto.getFeature() + ")"); + } else { + transcript = searchResponse.getSingleResult(); + } + } + + Boolean isUpdate = false; + if (variantLocation != null && CollectionUtils.isNotEmpty(variantLocation.getPredictedVariantConsequences()) && transcript != null) { + for (PredictedVariantConsequence existingPvc : variantLocation.getPredictedVariantConsequences()) { + if (transcript.getId() == existingPvc.getVariantTranscript().getId()) { + predictedVariantConsequence = existingPvc; + isUpdate = true; + break; + } + } + } + + predictedVariantConsequence.setVariantGenomicLocation(variantLocation); + predictedVariantConsequence.setVariantTranscript(transcript); + + Map attributes = getExtraAttributes(dto); + + VocabularyTerm vepImpact = null; + if (!attributes.containsKey("IMPACT")) { + response.addErrorMessage("extra - IMPACT", ValidationConstants.REQUIRED_MESSAGE); + } else { + vepImpact = vocabularyTermService.getTermInVocabulary(VocabularyConstants.VEP_IMPACT_VOCABULARY, attributes.get("IMPACT")).getEntity(); + if (vepImpact == null) { + response.addErrorMessage("extra - IMPACT", ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("IMPACT") + ")"); + } + } + predictedVariantConsequence.setVepImpact(vepImpact); + + List vepConsequences = null; + if (StringUtils.isBlank(dto.getConsequence())) { + response.addErrorMessage("consequence", ValidationConstants.REQUIRED_MESSAGE); + } else { + vepConsequences = new ArrayList<>(); + for (String consequence : dto.getConsequence().split(",")) { + SearchResponse soTermResponse = soTermService.findByField("name", consequence); + SOTerm vepConsequence = null; + if (soTermResponse != null && soTermResponse.getSingleResult() != null + && vocabularyTermService.getTermInVocabulary(VocabularyConstants.VEP_CONSEQUENCE_VOCABULARY, consequence).getEntity() != null) { + vepConsequence = soTermResponse.getSingleResult(); + } + if (vepConsequence == null) { + response.addErrorMessage("consequence", ValidationConstants.INVALID_MESSAGE + " (" + consequence + ")"); + break; + } else { + vepConsequences.add(vepConsequence); + } + } + } + predictedVariantConsequence.setVepConsequences(vepConsequences); + + String hgvsCodingNomenclature = null; + if (attributes.containsKey("HGVSc")) { + hgvsCodingNomenclature = attributes.get("HGVSc"); + } + predictedVariantConsequence.setHgvsCodingNomenclature(hgvsCodingNomenclature); + + String hgvsProteinNomenclature = null; + if (attributes.containsKey("HGVSp")) { + hgvsProteinNomenclature = attributes.get("HGVSp"); + } + predictedVariantConsequence.setHgvsProteinNomenclature(hgvsProteinNomenclature); + + String referenceCodon = null; + String variantCodon = null; + if (StringUtils.isNotBlank(dto.getCodons())) { + String[] refVarCodons = dto.getCodons().split("/"); + if (refVarCodons.length == 1 && dto.getConsequence().contains("synonymous_variant")) { + referenceCodon = dto.getCodons(); + variantCodon = dto.getCodons(); + } else if (refVarCodons.length == 2) { + referenceCodon = refVarCodons[0]; + variantCodon = refVarCodons[1]; + } else { + response.addErrorMessage("codons", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCodons() + ")"); + } + } + predictedVariantConsequence.setCodonReference(referenceCodon); + predictedVariantConsequence.setCodonVariant(variantCodon); + + String referenceAminoAcids = null; + String variantAminoAcids = null; + if (StringUtils.isNotBlank(dto.getAminoAcids())) { + String[] refVarAminoAcids = dto.getAminoAcids().split("/"); + if (refVarAminoAcids.length != 2) { + response.addErrorMessage("aminoAcids", ValidationConstants.INVALID_MESSAGE + " (" + dto.getAminoAcids() + ")"); + } else { + referenceAminoAcids = refVarAminoAcids[0]; + variantAminoAcids = refVarAminoAcids[1]; + } + } + predictedVariantConsequence.setAminoAcidReference(referenceAminoAcids); + predictedVariantConsequence.setAminoAcidVariant(variantAminoAcids); + + VocabularyTerm polyphenPrediction = null; + Float polyphenScore = null; + if (attributes.containsKey("PolyPhen")) { + Pair polyphenResult = parsePathogenicityPredictionScore(attributes.get("PolyPhen"), VocabularyConstants.POLYPHEN_PREDICTION_VOCABULARY); + if (polyphenResult == null) { + response.addErrorMessage("extra - PolyPhen", ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("PolyPhen") + ")"); + + } else { + polyphenPrediction = polyphenResult.getLeft(); + polyphenScore = polyphenResult.getRight(); + } + } + predictedVariantConsequence.setPolyphenPrediction(polyphenPrediction); + predictedVariantConsequence.setPolyphenScore(polyphenScore); + + VocabularyTerm siftPrediction = null; + Float siftScore = null; + if (attributes.containsKey("SIFT")) { + Pair siftResult = parsePathogenicityPredictionScore(attributes.get("SIFT"), VocabularyConstants.SIFT_PREDICTION_VOCABULARY); + if (siftResult == null) { + response.addErrorMessage("extra - SIFT", ValidationConstants.INVALID_MESSAGE + " (" + attributes.get("SIFT") + ")"); + + } else { + siftPrediction = siftResult.getLeft(); + siftScore = siftResult.getRight(); + } + } + predictedVariantConsequence.setSiftPrediction(siftPrediction); + predictedVariantConsequence.setSiftScore(siftScore); + + Integer cdnaStart = null; + Integer cdnaEnd = null; + if (StringUtils.isNotBlank(dto.getCdnaPosition())) { + Pair cdnaStartEnd = parseStartEnd(dto.getCdnaPosition()); + if (cdnaStartEnd == null) { + response.addErrorMessage("cdnaPosition", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCdnaPosition() + ")"); + } else { + cdnaStart = cdnaStartEnd.getLeft(); + cdnaEnd = cdnaStartEnd.getRight(); + } + } + predictedVariantConsequence.setCalculatedCdnaStart(cdnaStart); + predictedVariantConsequence.setCalculatedCdnaEnd(cdnaEnd); + + Integer cdsStart = null; + Integer cdsEnd = null; + if (StringUtils.isNotBlank(dto.getCdsPosition())) { + Pair cdsStartEnd = parseStartEnd(dto.getCdsPosition()); + if (cdsStartEnd == null) { + response.addErrorMessage("cdsPosition", ValidationConstants.INVALID_MESSAGE + " (" + dto.getCdsPosition() + ")"); + } else { + cdsStart = cdsStartEnd.getLeft(); + cdsEnd = cdsStartEnd.getRight(); + } + } + predictedVariantConsequence.setCalculatedCdsStart(cdsStart); + predictedVariantConsequence.setCalculatedCdsEnd(cdsEnd); + + Integer proteinStart = null; + Integer proteinEnd = null; + if (StringUtils.isNotBlank(dto.getProteinPosition())) { + Pair proteinStartEnd = parseStartEnd(dto.getProteinPosition()); + if (proteinStartEnd == null) { + response.addErrorMessage("proteinPosition", ValidationConstants.INVALID_MESSAGE + " (" + dto.getProteinPosition() + ")"); + } else { + proteinStart = proteinStartEnd.getLeft(); + proteinEnd = proteinStartEnd.getRight(); + } + } + predictedVariantConsequence.setCalculatedProteinStart(proteinStart); + predictedVariantConsequence.setCalculatedProteinEnd(proteinEnd); + + if (response.hasErrors()) { + throw new ObjectValidationException(dto, response.errorMessagesString()); + } + + predictedVariantConsequence = predictedVariantConsequenceDAO.persist(predictedVariantConsequence); + + if (!isUpdate) { + if (variantLocation.getPredictedVariantConsequences() == null) { + variantLocation.setPredictedVariantConsequences(new ArrayList<>()); + } + variantLocation.getPredictedVariantConsequences().add(predictedVariantConsequence); + } + cvglaDAO.persist(variantLocation); + + return predictedVariantConsequence; + } + + private Map getExtraAttributes(VepTxtDTO dto) { + Map attributes = new HashMap(); + if (CollectionUtils.isNotEmpty(dto.getExtra())) { + for (String keyValue : dto.getExtra()) { + String[] parts = keyValue.split("="); + if (parts.length == 2) { + attributes.put(parts[0], parts[1]); + } + } + } + + return attributes; + } + + private Pair parsePathogenicityPredictionScore(String result, String vocabularyName) { + + if (StringUtils.isBlank(result)) { + return null; + } + + Matcher matcher = PATHOGENICITY_PREDICTION_RESULT.matcher(result); + if (!matcher.find()) { + return null; + } + + VocabularyTerm consequence = vocabularyTermService.getTermInVocabulary(vocabularyName, matcher.group(1)).getEntity(); + if (consequence == null) { + return null; + } + + ImmutablePair parsedResult = new ImmutablePair<>(consequence, Float.parseFloat(matcher.group(2))); + + return parsedResult; + } + + private Pair parseStartEnd(String position) { + Matcher matcher = POSITION_STRING.matcher(position); + if (!matcher.find()) { + return null; + } + + Integer start = null; + Integer end = null; + String[] positions = position.split("-"); + + if (positions.length > 2) { + return null; + } + + if (positions.length == 1) { + start = Integer.parseInt(position); + end = start; + } else { + if (!Objects.equals("?", positions[0])) { + start = Integer.parseInt(positions[0]); + } + if (!Objects.equals("?", positions[1])) { + end = Integer.parseInt(positions[1]); + } + } + + ImmutablePair startEnd = new ImmutablePair<>(start, end); + + return startEnd; + } +} diff --git a/src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql b/src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql new file mode 100644 index 000000000..0bda5e377 --- /dev/null +++ b/src/main/resources/db/migration/v0.38.0.1__predictedvariantconsequence.sql @@ -0,0 +1,185 @@ +ALTER TABLE transcript ADD COLUMN transcriptid VARCHAR(255); + +CREATE INDEX transcript_transcriptid_index ON transcript USING btree (transcriptid); + +CREATE TABLE predictedvariantconsequence ( + id bigint PRIMARY KEY, + datecreated timestamp(6) with time zone, + dateupdated timestamp(6) with time zone, + dbdatecreated timestamp(6) with time zone, + dbdateupdated timestamp(6) with time zone, + internal boolean DEFAULT false NOT NULL, + obsolete boolean DEFAULT false NOT NULL, + createdby_id bigint, + updatedby_id bigint, + variantgenomiclocation_id bigint, + varianttranscript_id bigint, + vepimpact_id bigint, + polyphenprediction_id bigint, + polyphenscore real, + siftprediction_id bigint, + siftscore real, + aminoacidreference text, + aminoacidvariant text, + codonreference text, + codonvariant text, + calculatedcdnastart integer, + calculatedcdnaend integer, + calculatedcdsstart integer, + calculatedcdsend integer, + calculatedproteinstart integer, + calculatedproteinend integer, + hgvsproteinnomenclature text, + hgvscodingnomenclature text, + genelevelconsequence boolean DEFAULT false NOT NULL +); + +CREATE SEQUENCE predictedvariantconsequence_seq + START WITH 1 + INCREMENT BY 50 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +CREATE INDEX predictedvariantconsequence_varianttranscript_index ON predictedvariantconsequence USING btree (varianttranscript_id); +CREATE INDEX predictedvariantconsequence_vepimpact_index ON predictedvariantconsequence USING btree (vepimpact_id); +CREATE INDEX predictedvariantconsequence_polyphenprediction_index ON predictedvariantconsequence USING btree (polyphenprediction_id); +CREATE INDEX predictedvariantconsequence_siftprediction_index ON predictedvariantconsequence USING btree (siftprediction_id); +CREATE INDEX predictedvariantconsequence_createdby_index ON predictedvariantconsequence USING btree (createdby_id); +CREATE INDEX predictedvariantconsequence_updatedby_index ON predictedvariantconsequence USING btree (updatedby_id); +CREATE INDEX predictedvariantconsequence_hgvsproteinnomenclature_index ON predictedvariantconsequence USING btree (hgvsProteinNomenclature); +CREATE INDEX predictedvariantconsequence_hgvscodingnomenclature_index ON predictedvariantconsequence USING btree (hgvsCodingNomenclature); +CREATE INDEX predictedvariantconsequence_variantgenomiclocation_index ON predictedvariantconsequence USING btree (variantGenomicLocation_id); + +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_createdby_id_fk FOREIGN KEY (createdby_id) REFERENCES person(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_updatedby_id_fk FOREIGN KEY (updatedby_id) REFERENCES person(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_variantgenomiclocation_id_fk FOREIGN KEY (variantgenomiclocation_id) REFERENCES curatedvariantgenomiclocation(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_varianttranscript_id_fk FOREIGN KEY (varianttranscript_id) REFERENCES transcript(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_vepimpact_id_fk FOREIGN KEY (vepimpact_id) REFERENCES vocabularyterm(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_polyphenprediction_id_fk FOREIGN KEY (polyphenprediction_id) REFERENCES vocabularyterm(id); +ALTER TABLE ONLY predictedvariantconsequence ADD CONSTRAINT predictedvariantconsequence_polyphenscore_id_fk FOREIGN KEY (siftprediction_id) REFERENCES vocabularyterm(id); + +CREATE TABLE predictedvariantconsequence_ontologyterm ( + predictedvariantconsequence_id bigint, + vepconsequences_id bigint +); + +CREATE INDEX predictedvariantconsequence_ontologyterm_pvc_index ON predictedvariantconsequence_ontologyterm USING btree (predictedvariantconsequence_id); +CREATE INDEX predictedvariantconsequence_ontologyterm_vc_index ON predictedvariantconsequence_ontologyterm USING btree (vepconsequences_id); + +ALTER TABLE ONLY predictedvariantconsequence_ontologyterm ADD CONSTRAINT predictedvariantconsequence_ontologyterm_pvc_id_fk FOREIGN KEY (predictedvariantconsequence_id) REFERENCES predictedvariantconsequence (id); +ALTER TABLE ONLY predictedvariantconsequence_ontologyterm ADD CONSTRAINT predictedvariantconsequence_ontologyterm_vc_id_fk FOREIGN KEY (vepconsequences_id) REFERENCES ontologyterm (id); + +INSERT INTO bulkloadgroup (id, name) VALUES (nextval('bulkloadgroup_seq'), 'File Management System (FMS) VEP Transcript Loads'); +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'FB VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'MGI VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'RGD VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'WB VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPTRANSCRIPT', 'ZFIN VEP Transcript Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Transcript Loads'; +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) + SELECT id, '0 0 20 ? * SUN-THU', false FROM bulkload WHERE backendbulkloadtype = 'VEPTRANSCRIPT'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'FB' FROM bulkload WHERE name = 'FB VEP Transcript Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'MGI' FROM bulkload WHERE name = 'MGI VEP Transcript Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'RGD' FROM bulkload WHERE name = 'RGD VEP Transcript Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'WB' FROM bulkload WHERE name = 'WB VEP Transcript Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPTRANSCRIPT', 'ZFIN' FROM bulkload WHERE name = 'ZFIN VEP Transcript Load'; + +INSERT INTO bulkloadgroup (id, name) VALUES (nextval('bulkloadgroup_seq'), 'File Management System (FMS) VEP Gene Loads'); +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'FB VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'MGI VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'RGD VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'WB VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id) + SELECT nextval('bulkload_seq'), 'VEPGENE', 'ZFIN VEP Gene Load', 'STOPPED', id FROM bulkloadgroup WHERE name = 'File Management System (FMS) VEP Gene Loads'; +INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive) + SELECT id, '0 0 23 ? * SUN-THU', false FROM bulkload WHERE backendbulkloadtype = 'VEPGENE'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'FB' FROM bulkload WHERE name = 'FB VEP Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'MGI' FROM bulkload WHERE name = 'MGI VEP Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'RGD' FROM bulkload WHERE name = 'RGD VEP Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'WB' FROM bulkload WHERE name = 'WB VEP Gene Load'; +INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype) + SELECT id, 'VEPGENE', 'ZFIN' FROM bulkload WHERE name = 'ZFIN VEP Gene Load'; + +INSERT INTO vocabulary (id, name, vocabularydescription, vocabularylabel) + VALUES (nextval('vocabulary_seq'), 'SIFT Prediction', 'SIFT prediction of impact of missense variation', 'sift_prediction'); +INSERT INTO vocabulary (id, name, vocabularydescription, vocabularylabel) + VALUES (nextval('vocabulary_seq'), 'PolyPhen-2 Prediction', 'Polyphen-2 prediction of impact of missense variation', 'polyphen_prediction'); +INSERT INTO vocabulary (id, name, vocabularydescription, vocabularylabel) + VALUES (nextval('vocabulary_seq'), 'VEP Impact', 'Ensembl VEP predicted impact rating of variant', 'vep_impact'); +INSERT INTO vocabulary (id, name, vocabularydescription, vocabularylabel) + VALUES (nextval('vocabulary_seq'), 'VEP Consequence', 'Names of SOTerms used to report predicted consequence of variant by Ensembl VEP', 'vep_consequence'); + +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'deleterious', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'deleterious_low_confidence', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'tolerated', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'tolerated_low_confidence', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'no_prediction', id FROM vocabulary WHERE vocabularylabel = 'sift_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'benign', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'possibly_damaging', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'probably_damaging', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'unknown', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'no_prediction', id FROM vocabulary WHERE vocabularylabel = 'polyphen_prediction'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'HIGH', id FROM vocabulary WHERE vocabularylabel = 'vep_impact'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'MODERATE', id FROM vocabulary WHERE vocabularylabel = 'vep_impact'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'LOW', id FROM vocabulary WHERE vocabularylabel = 'vep_impact'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'MODIFIER', id FROM vocabulary WHERE vocabularylabel = 'vep_impact'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'transcript_ablation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_acceptor_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_donor_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'stop_gained', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'frameshift_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'stop_lost', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'start_lost', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'transcript_amplification', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'feature_elongation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'feature_truncation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'inframe_insertion', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'inframe_deletion', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'missense_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'protein_altering_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_donor_5th_base_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_region_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_donor_region_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'splice_polypyrimidine_tract_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'incomplete_terminal_codon_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'start_retained_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'stop_retained_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'synonymous_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'coding_sequence_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'mature_miRNA_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), '5_prime_UTR_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), '3_prime_UTR_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'non_coding_transcript_exon_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'intron_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'NMD_transcript_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'non_coding_transcript_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'coding_transcript_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'upstream_gene_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'downstream_gene_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'TFBS_ablation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'TFBS_amplification', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'TF_binding_site_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'regulatory_region_ablation', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'regulatory_region_amplification', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'regulatory_region_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'intergenic_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; +INSERT INTO vocabularyterm (id, name, vocabulary_id) SELECT nextval('vocabularyterm_seq'), 'sequence_variant', id FROM vocabulary WHERE vocabularylabel = 'vep_consequence'; + diff --git a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java index 13f5ae5a9..205203b3c 100644 --- a/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java +++ b/src/test/java/org/alliancegenome/curation_api/Gff3BulkUploadITCase.java @@ -78,6 +78,7 @@ public void gff3DataBulkUploadTranscriptEntity() throws Exception { statusCode(200). body("entity.modInternalId", is(transcriptId)). body("entity.name", is("Y74C9A.2a.1")). + body("entity.transcriptId", is("WB:Y74C9A.2a.1")). body("entity.taxon.curie", is("NCBITaxon:6239")). body("entity.dataProvider.sourceOrganization.abbreviation", is("WB")). body("entity.transcriptType.curie", is("SO:0000234")). @@ -192,6 +193,7 @@ public void gff3DataBulkUploadUpdateTranscriptEntity() throws Exception { statusCode(200). body("entity.modInternalId", is(transcriptId)). body("entity.name", is("Y74C9A.2a.1")). + body("entity.transcriptId", is("RefSeq:Y74C9A.2a.1")). body("entity.taxon.curie", is("NCBITaxon:6239")). body("entity.dataProvider.sourceOrganization.abbreviation", is("WB")). body("entity.transcriptType.curie", is("SO:0001035")). diff --git a/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java b/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java new file mode 100644 index 000000000..f971cfebf --- /dev/null +++ b/src/test/java/org/alliancegenome/curation_api/VepFmsITCase.java @@ -0,0 +1,199 @@ +package org.alliancegenome.curation_api; + +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.hasKey; + +import org.alliancegenome.curation_api.base.BaseITCase; +import org.alliancegenome.curation_api.resources.TestContainerResource; +import org.apache.commons.codec.digest.DigestUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestMethodOrder; + +import io.quarkus.test.common.QuarkusTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.restassured.RestAssured; +import io.restassured.config.HttpClientConfig; +import io.restassured.config.RestAssuredConfig; + +@QuarkusIntegrationTest +@QuarkusTestResource(TestContainerResource.Initializer.class) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@DisplayName("611 - VEP data bulk upload - FMS") +@Order(611) +public class VepFmsITCase extends BaseITCase { + + // These tests require: GeneBulkUploadITCase and VocabularyTermITCase + + @BeforeEach + public void init() { + RestAssured.config = RestAssuredConfig.config() + .httpClient(HttpClientConfig.httpClientConfig() + .setParam("http.socket.timeout", 100000) + .setParam("http.connection.timeout", 100000)); + } + + private final String vepFmsTestFilePath = "src/test/resources/bulk/fms/11_vep/"; + private final String vepTranscriptFmsBulkPostEndpoint = "/api/predictedvariantconsequence/bulk/WB/transcriptConsequenceFile"; + private final String vepGeneFmsBulkPostEndpoint = "/api/predictedvariantconsequence/bulk/WB/geneConsequenceFile"; + private final String variantHgvs = "NC_003279.8:g.1A>T"; + private final String variantId = DigestUtils.md5Hex(variantHgvs); + private final String variantGetEndpoint = "/api/variant/"; + + private void loadRequiredEntities() throws Exception { + createSoTerm("SO:0001574", "splice_acceptor_variant", false); + createSoTerm("SO:0001630", "splice_donor_5th_base_variant", false); + } + + @Test + @Order(1) + public void vepTranscriptFmsBulkUpload() throws Exception { + loadRequiredEntities(); + + checkSuccessfulBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "AF_01_all_fields.json"); + + RestAssured.given(). + when(). + get(variantGetEndpoint + variantId). + then(). + statusCode(200). + body("entity.modInternalId", is(variantId)). + body("entity.curatedVariantGenomicLocations[0].hgvs", is("NC_003279.8:g.1A>T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].variantTranscript.modInternalId", is("WB:Y74C9A.2a.1")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepImpact.name", is("MODERATE")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences[0].name", is("splice_acceptor_variant")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenPrediction.name", is("probably_damaging")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenScore", is(0.993F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftPrediction.name", is("tolerated")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftScore", is(0F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidReference", is("T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidVariant", is("I")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonReference", is("aCc")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonVariant", is("aTc")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaStart", is(3)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaEnd", is(800)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsStart", is(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsEnd", is(600)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedProteinStart", is(246)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0]", not(hasKey("calculatedProteinEnd"))). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsProteinNomenclature", is("WB:CE49439:p.Thr10Ile")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsCodingNomenclature", is("WB:Y74C9A.2a.1:c.29T>I")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].geneLevelConsequence", is(false)); + } + + @Test + @Order(2) + public void vepGeneBulkUpload() throws Exception { + checkSuccessfulBulkLoad(vepGeneFmsBulkPostEndpoint, vepFmsTestFilePath + "AF_01_all_fields.json"); + + RestAssured.given(). + when(). + get(variantGetEndpoint + variantId). + then(). + statusCode(200). + body("entity.modInternalId", is(variantId)). + body("entity.curatedVariantGenomicLocations[0].hgvs", is("NC_003279.8:g.1A>T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].variantTranscript.modInternalId", is("WB:Y74C9A.2a.1")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepImpact.name", is("MODERATE")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences[0].name", is("splice_acceptor_variant")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenPrediction.name", is("probably_damaging")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenScore", is(0.993F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftPrediction.name", is("tolerated")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftScore", is(0F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidReference", is("T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidVariant", is("I")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonReference", is("aCc")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonVariant", is("aTc")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaStart", is(3)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaEnd", is(800)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsStart", is(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsEnd", is(600)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedProteinStart", is(246)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0]", not(hasKey("calculatedProteinEnd"))). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsProteinNomenclature", is("WB:CE49439:p.Thr10Ile")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsCodingNomenclature", is("WB:Y74C9A.2a.1:c.29T>I")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].geneLevelConsequence", is(true)); + } + + @Test + @Order(3) + public void vepTranscriptMissingRequiredFields() throws Exception { + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "MR_01_no_uploaded_variation.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "MR_02_no_feature.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "MR_03_no_consequence.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "MR_04_no_impact.json"); + } + + @Test + @Order(4) + public void vepTranscriptEmptyRequiredFields() throws Exception { + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "ER_01_empty_uploaded_variation.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "ER_02_empty_feature.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "ER_03_empty_consequence.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "ER_04_empty_impact.json"); + } + + @Test + @Order(5) + public void vepTranscriptInvalidFields() throws Exception { + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_01_invalid_uploaded_variation.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_02_invalid_feature.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_03_invalid_consequence.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_04_invalid_cdna_position.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_05_invalid_cds_position.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_06_invalid_protein_position.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_07_invalid_amino_acids.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_08_invalid_codons.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_09_invalid_impact.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_10_invalid_polyphen.json"); + checkFailedBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_11_invalid_sift.json"); + checkFailedBulkLoad(vepGeneFmsBulkPostEndpoint, vepFmsTestFilePath + "IV_12_invalid_variant_transcript_pair.json"); + } + + @Test + @Order(6) + public void vepTranscriptUpdate() throws Exception { + checkSuccessfulBulkLoad(vepTranscriptFmsBulkPostEndpoint, vepFmsTestFilePath + "UD_01_update.json"); + + RestAssured.given(). + when(). + get(variantGetEndpoint + variantId). + then(). + statusCode(200). + body("entity.modInternalId", is(variantId)). + body("entity.curatedVariantGenomicLocations[0].hgvs", is("NC_003279.8:g.1A>T")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].variantTranscript.modInternalId", is("WB:Y74C9A.2a.1")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepImpact.name", is("MODIFIER")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences", hasSize(1)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].vepConsequences[0].name", is("splice_donor_5th_base_variant")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenPrediction.name", is("possibly_damaging")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].polyphenScore", is(0.8F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftPrediction.name", is("deleterious_low_confidence")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].siftScore", is(0.767F)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidReference", is("M")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].aminoAcidVariant", is("N")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonReference", is("aCt")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].codonVariant", is("aTt")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaStart", is(2)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdnaEnd", is(900)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsStart", is(3)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedCdsEnd", is(500)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedProteinStart", is(247)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].calculatedProteinEnd", is(250)). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsProteinNomenclature", is("WB:CE49439:p.Met10Neo")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].hgvsCodingNomenclature", is("WB:Y74C9A.2a.1:c.29M>N")). + body("entity.curatedVariantGenomicLocations[0].predictedVariantConsequences[0].geneLevelConsequence", is(true)); + } + +} diff --git a/src/test/resources/bulk/fms/08_gff_data/ER_01_empty_seq_id.json b/src/test/resources/bulk/fms/08_gff_data/ER_01_empty_seq_id.json index a691479e6..8cf67b6a4 100644 --- a/src/test/resources/bulk/fms/08_gff_data/ER_01_empty_seq_id.json +++ b/src/test/resources/bulk/fms/08_gff_data/ER_01_empty_seq_id.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/ER_02_empty_strand.json b/src/test/resources/bulk/fms/08_gff_data/ER_02_empty_strand.json index 32a9ac0ec..495bfdb3c 100644 --- a/src/test/resources/bulk/fms/08_gff_data/ER_02_empty_strand.json +++ b/src/test/resources/bulk/fms/08_gff_data/ER_02_empty_strand.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/ER_03_empty_transcript_parent.json b/src/test/resources/bulk/fms/08_gff_data/ER_03_empty_transcript_parent.json index 3e262c924..de0dc15de 100644 --- a/src/test/resources/bulk/fms/08_gff_data/ER_03_empty_transcript_parent.json +++ b/src/test/resources/bulk/fms/08_gff_data/ER_03_empty_transcript_parent.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/GFF_01_transcript.json b/src/test/resources/bulk/fms/08_gff_data/GFF_01_transcript.json index ef30adafe..e4157e0f5 100644 --- a/src/test/resources/bulk/fms/08_gff_data/GFF_01_transcript.json +++ b/src/test/resources/bulk/fms/08_gff_data/GFF_01_transcript.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/IV_01_invalid_strand.json b/src/test/resources/bulk/fms/08_gff_data/IV_01_invalid_strand.json index 920bde5f5..9d832b705 100644 --- a/src/test/resources/bulk/fms/08_gff_data/IV_01_invalid_strand.json +++ b/src/test/resources/bulk/fms/08_gff_data/IV_01_invalid_strand.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/IV_02_invalid_phase.json b/src/test/resources/bulk/fms/08_gff_data/IV_02_invalid_phase.json index e6f751326..788af87dd 100644 --- a/src/test/resources/bulk/fms/08_gff_data/IV_02_invalid_phase.json +++ b/src/test/resources/bulk/fms/08_gff_data/IV_02_invalid_phase.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/IV_03_invalid_transcript_parent.json b/src/test/resources/bulk/fms/08_gff_data/IV_03_invalid_transcript_parent.json index 8d524dd08..71503f4fd 100644 --- a/src/test/resources/bulk/fms/08_gff_data/IV_03_invalid_transcript_parent.json +++ b/src/test/resources/bulk/fms/08_gff_data/IV_03_invalid_transcript_parent.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:Invalid", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_01_no_seq_id.json b/src/test/resources/bulk/fms/08_gff_data/MR_01_no_seq_id.json index e9651e61e..554c08d40 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_01_no_seq_id.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_01_no_seq_id.json @@ -9,7 +9,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_02_no_start.json b/src/test/resources/bulk/fms/08_gff_data/MR_02_no_start.json index 0997e4e33..6065ed59b 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_02_no_start.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_02_no_start.json @@ -9,7 +9,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_03_no_end.json b/src/test/resources/bulk/fms/08_gff_data/MR_03_no_end.json index 7284de1d8..fe1480334 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_03_no_end.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_03_no_end.json @@ -9,7 +9,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_04_no_strand.json b/src/test/resources/bulk/fms/08_gff_data/MR_04_no_strand.json index 3a6cdcbd6..d2162706f 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_04_no_strand.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_04_no_strand.json @@ -9,7 +9,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/MR_05_no_transcript_parent.json b/src/test/resources/bulk/fms/08_gff_data/MR_05_no_transcript_parent.json index 123f5e739..1a76034db 100644 --- a/src/test/resources/bulk/fms/08_gff_data/MR_05_no_transcript_parent.json +++ b/src/test/resources/bulk/fms/08_gff_data/MR_05_no_transcript_parent.json @@ -9,7 +9,8 @@ "phase": 0, "attributes": [ "ID=Transcript:Y74C9A.2a.1", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=WB:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/08_gff_data/UD_01_update_transcript.json b/src/test/resources/bulk/fms/08_gff_data/UD_01_update_transcript.json index ed9d9e98e..227ff22ce 100644 --- a/src/test/resources/bulk/fms/08_gff_data/UD_01_update_transcript.json +++ b/src/test/resources/bulk/fms/08_gff_data/UD_01_update_transcript.json @@ -10,7 +10,8 @@ "attributes": [ "ID=Transcript:Y74C9A.2a.1", "Parent=Gene:WBGene00022276", - "Name=Y74C9A.2a.1" + "Name=Y74C9A.2a.1", + "transcript_id=RefSeq:Y74C9A.2a.1" ] } ] diff --git a/src/test/resources/bulk/fms/11_vep/AF_01_all_fields.json b/src/test/resources/bulk/fms/11_vep/AF_01_all_fields.json new file mode 100644 index 000000000..f68f032b4 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/AF_01_all_fields.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/ER_01_empty_uploaded_variation.json b/src/test/resources/bulk/fms/11_vep/ER_01_empty_uploaded_variation.json new file mode 100644 index 000000000..a50d2f5bf --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/ER_01_empty_uploaded_variation.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/ER_02_empty_feature.json b/src/test/resources/bulk/fms/11_vep/ER_02_empty_feature.json new file mode 100644 index 000000000..623ee5311 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/ER_02_empty_feature.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/ER_03_empty_consequence.json b/src/test/resources/bulk/fms/11_vep/ER_03_empty_consequence.json new file mode 100644 index 000000000..56ff42450 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/ER_03_empty_consequence.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/ER_04_empty_impact.json b/src/test/resources/bulk/fms/11_vep/ER_04_empty_impact.json new file mode 100644 index 000000000..5a4ff658b --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/ER_04_empty_impact.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_01_invalid_uploaded_variation.json b/src/test/resources/bulk/fms/11_vep/IV_01_invalid_uploaded_variation.json new file mode 100644 index 000000000..7bb5766d4 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_01_invalid_uploaded_variation.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "Invalid", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_02_invalid_feature.json b/src/test/resources/bulk/fms/11_vep/IV_02_invalid_feature.json new file mode 100644 index 000000000..010eb35b1 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_02_invalid_feature.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Invalid", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_03_invalid_consequence.json b/src/test/resources/bulk/fms/11_vep/IV_03_invalid_consequence.json new file mode 100644 index 000000000..1984520a5 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_03_invalid_consequence.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "invalid", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_04_invalid_cdna_position.json b/src/test/resources/bulk/fms/11_vep/IV_04_invalid_cdna_position.json new file mode 100644 index 000000000..e8b70e196 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_04_invalid_cdna_position.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3 - 800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_05_invalid_cds_position.json b/src/test/resources/bulk/fms/11_vep/IV_05_invalid_cds_position.json new file mode 100644 index 000000000..697fdc926 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_05_invalid_cds_position.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1 to 600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_06_invalid_protein_position.json b/src/test/resources/bulk/fms/11_vep/IV_06_invalid_protein_position.json new file mode 100644 index 000000000..fbd4db078 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_06_invalid_protein_position.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "unknown", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json b/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json new file mode 100644 index 000000000..bfa4a2564 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_07_invalid_amino_acids.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "TI", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_08_invalid_codons.json b/src/test/resources/bulk/fms/11_vep/IV_08_invalid_codons.json new file mode 100644 index 000000000..c80c0bdd4 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_08_invalid_codons.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCcaTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_09_invalid_impact.json b/src/test/resources/bulk/fms/11_vep/IV_09_invalid_impact.json new file mode 100644 index 000000000..854d16908 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_09_invalid_impact.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=INVALID", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_10_invalid_polyphen.json b/src/test/resources/bulk/fms/11_vep/IV_10_invalid_polyphen.json new file mode 100644 index 000000000..76638b5ed --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_10_invalid_polyphen.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=invalid(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_11_invalid_sift.json b/src/test/resources/bulk/fms/11_vep/IV_11_invalid_sift.json new file mode 100644 index 000000000..d61d901d9 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_11_invalid_sift.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=invalid(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/IV_12_invalid_variant_transcript_pair.json b/src/test/resources/bulk/fms/11_vep/IV_12_invalid_variant_transcript_pair.json new file mode 100644 index 000000000..6728068eb --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/IV_12_invalid_variant_transcript_pair.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "VARIANTTEST:Variant0001", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/MR_01_no_uploaded_variation.json b/src/test/resources/bulk/fms/11_vep/MR_01_no_uploaded_variation.json new file mode 100644 index 000000000..20b35b4f2 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/MR_01_no_uploaded_variation.json @@ -0,0 +1,21 @@ +[ + { + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/MR_02_no_feature.json b/src/test/resources/bulk/fms/11_vep/MR_02_no_feature.json new file mode 100644 index 000000000..6bc3d351f --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/MR_02_no_feature.json @@ -0,0 +1,21 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/MR_03_no_consequence.json b/src/test/resources/bulk/fms/11_vep/MR_03_no_consequence.json new file mode 100644 index 000000000..512fd10b1 --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/MR_03_no_consequence.json @@ -0,0 +1,21 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "IMPACT=MODERATE", + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/MR_04_no_impact.json b/src/test/resources/bulk/fms/11_vep/MR_04_no_impact.json new file mode 100644 index 000000000..89430906d --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/MR_04_no_impact.json @@ -0,0 +1,21 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:1-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_acceptor_variant", + "cdnaPosition": "3-800", + "cdsPosition": "1-600", + "proteinPosition": "246-?", + "aminoAcids": "T/I", + "codons": "aCc/aTc", + "extra": [ + "HGVSc=WB:Y74C9A.2a.1:c.29T>I", + "HGVSp=WB:CE49439:p.Thr10Ile", + "PolyPhen=probably_damaging(0.993)", + "SIFT=tolerated(0)" + ] + } +] + diff --git a/src/test/resources/bulk/fms/11_vep/UD_01_update.json b/src/test/resources/bulk/fms/11_vep/UD_01_update.json new file mode 100644 index 000000000..1fb48758c --- /dev/null +++ b/src/test/resources/bulk/fms/11_vep/UD_01_update.json @@ -0,0 +1,22 @@ +[ + { + "uploadedVariation": "NC_003279.8:g.1A>T", + "location": "I:2-1000", + "feature": "WB:Y74C9A.2a.1", + "featureType": "Transcript", + "consequence": "splice_donor_5th_base_variant", + "cdnaPosition": "2-900", + "cdsPosition": "3-500", + "proteinPosition": "247-250", + "aminoAcids": "M/N", + "codons": "aCt/aTt", + "extra": [ + "IMPACT=MODIFIER", + "HGVSc=WB:Y74C9A.2a.1:c.29M>N", + "HGVSp=WB:CE49439:p.Met10Neo", + "PolyPhen=possibly_damaging(0.8)", + "SIFT=deleterious_low_confidence(0.767)" + ] + } +] +