Skip to content

Commit

Permalink
feat: allow organisms without consensus sequences (#3537) (#3388)
Browse files Browse the repository at this point in the history
* feat(kubernetes): add `allowSubmissionOfConsensusSequences` to the config

* feat(backend): allow organisms without sequences

* feat(website): allow organisms without sequences

* test(preprocessing): make sure that it works without sequences

* docs: add `allowSubmissionOfConsensusSequences` to Helm chart config reference

* fixup! feat(website): allow organisms without sequences

* fixup! feat(website): allow organisms without sequences

* fixup! feat(kubernetes): add `allowSubmissionOfConsensusSequences` to the config

* minor text changes

* don't use a complicated looking template

* add test that sequences download isn't present anymore

* refactor(backend): rename stuff

* rename config value

* fix tests and format
  • Loading branch information
fengelniederhammer authored Jan 22, 2025
1 parent e535ff7 commit 3229be7
Show file tree
Hide file tree
Showing 45 changed files with 875 additions and 230 deletions.
3 changes: 3 additions & 0 deletions backend/src/main/kotlin/org/loculus/backend/config/Config.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ data class Schema(
val metadata: List<Metadata>,
val externalMetadata: List<ExternalMetadata> = emptyList(),
val earliestReleaseDate: EarliestReleaseDate = EarliestReleaseDate(false, emptyList()),
val submissionDataTypes: SubmissionDataTypes = SubmissionDataTypes(),
)

data class SubmissionDataTypes(val consensusSequences: Boolean = true)

// The Json property names need to be kept in sync with website config enum `metadataPossibleTypes` in `config.ts`
// They also need to be in sync with SILO database config, as the Loculus config is a sort of superset of it
// See https://lapis.cov-spectrum.org/gisaid/v2/docs/maintainer-docs/references/database-configuration#metadata-types
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ open class SubmissionController(
@HiddenParam authenticatedUser: AuthenticatedUser,
@Parameter(description = GROUP_ID_DESCRIPTION) @RequestParam groupId: Int,
@Parameter(description = METADATA_FILE_DESCRIPTION) @RequestParam metadataFile: MultipartFile,
@Parameter(description = SEQUENCE_FILE_DESCRIPTION) @RequestParam sequenceFile: MultipartFile,
@Parameter(description = SEQUENCE_FILE_DESCRIPTION) @RequestParam sequenceFile: MultipartFile?,
@Parameter(description = "Data Use terms under which data is released.") @RequestParam dataUseTermsType:
DataUseTermsType,
@Parameter(
Expand Down Expand Up @@ -118,7 +118,7 @@ open class SubmissionController(
) @RequestParam metadataFile: MultipartFile,
@Parameter(
description = SEQUENCE_FILE_DESCRIPTION,
) @RequestParam sequenceFile: MultipartFile,
) @RequestParam sequenceFile: MultipartFile?,
): List<SubmissionIdMapping> {
val params = SubmissionParams.RevisionSubmissionParams(
organism,
Expand Down Expand Up @@ -172,7 +172,9 @@ open class SubmissionController(
}

val lastDatabaseWriteETag = releasedDataModel.getLastDatabaseWriteETag()
if (ifNoneMatch == lastDatabaseWriteETag) return ResponseEntity.status(HttpStatus.NOT_MODIFIED).build()
if (ifNoneMatch == lastDatabaseWriteETag) {
return ResponseEntity.status(HttpStatus.NOT_MODIFIED).build()
}

val headers = HttpHeaders()
headers.contentType = MediaType.parseMediaType(MediaType.APPLICATION_NDJSON_VALUE)
Expand Down
58 changes: 41 additions & 17 deletions backend/src/main/kotlin/org/loculus/backend/model/SubmitModel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import org.loculus.backend.api.DataUseTerms
import org.loculus.backend.api.Organism
import org.loculus.backend.api.SubmissionIdMapping
import org.loculus.backend.auth.AuthenticatedUser
import org.loculus.backend.config.BackendConfig
import org.loculus.backend.controller.BadRequestException
import org.loculus.backend.controller.DuplicateKeyException
import org.loculus.backend.controller.UnprocessableEntityException
Expand Down Expand Up @@ -41,14 +42,14 @@ interface SubmissionParams {
val organism: Organism
val authenticatedUser: AuthenticatedUser
val metadataFile: MultipartFile
val sequenceFile: MultipartFile
val sequenceFile: MultipartFile?
val uploadType: UploadType

data class OriginalSubmissionParams(
override val organism: Organism,
override val authenticatedUser: AuthenticatedUser,
override val metadataFile: MultipartFile,
override val sequenceFile: MultipartFile,
override val sequenceFile: MultipartFile?,
val groupId: Int,
val dataUseTerms: DataUseTerms,
) : SubmissionParams {
Expand All @@ -59,7 +60,7 @@ interface SubmissionParams {
override val organism: Organism,
override val authenticatedUser: AuthenticatedUser,
override val metadataFile: MultipartFile,
override val sequenceFile: MultipartFile,
override val sequenceFile: MultipartFile?,
) : SubmissionParams {
override val uploadType: UploadType = UploadType.REVISION
}
Expand All @@ -76,6 +77,7 @@ class SubmitModel(
private val groupManagementPreconditionValidator: GroupManagementPreconditionValidator,
private val dataUseTermsPreconditionValidator: DataUseTermsPreconditionValidator,
private val dateProvider: DateProvider,
private val backendConfig: BackendConfig,
) {

companion object AcceptedFileTypes {
Expand Down Expand Up @@ -106,9 +108,11 @@ class SubmitModel(
batchSize,
)

log.debug { "Validating submission with uploadId $uploadId" }
val (metadataSubmissionIds, sequencesSubmissionIds) = uploadDatabaseService.getUploadSubmissionIds(uploadId)
validateSubmissionIdSets(metadataSubmissionIds.toSet(), sequencesSubmissionIds.toSet())
if (requiresConsensusSequenceFile(submissionParams.organism)) {
log.debug { "Validating submission with uploadId $uploadId" }
val (metadataSubmissionIds, sequencesSubmissionIds) = uploadDatabaseService.getUploadSubmissionIds(uploadId)
validateSubmissionIdSets(metadataSubmissionIds.toSet(), sequencesSubmissionIds.toSet())
}

if (submissionParams is SubmissionParams.RevisionSubmissionParams) {
log.info { "Associating uploaded sequence data with existing sequence entries with uploadId $uploadId" }
Expand Down Expand Up @@ -150,17 +154,32 @@ class SubmitModel(
metadataTempFileToDelete.delete()
}

val sequenceTempFileToDelete = MaybeFile()
try {
val sequenceStream = getStreamFromFile(
submissionParams.sequenceFile,
uploadId,
sequenceFileTypes,
sequenceTempFileToDelete,
)
uploadSequences(uploadId, sequenceStream, batchSize, submissionParams.organism)
} finally {
sequenceTempFileToDelete.delete()
val sequenceFile = submissionParams.sequenceFile
if (sequenceFile == null) {
if (requiresConsensusSequenceFile(submissionParams.organism)) {
throw BadRequestException(
"Submissions for organism ${submissionParams.organism.name} require a sequence file.",
)
}
} else {
if (!requiresConsensusSequenceFile(submissionParams.organism)) {
throw BadRequestException(
"Sequence uploads are not allowed for organism ${submissionParams.organism.name}.",
)
}

val sequenceTempFileToDelete = MaybeFile()
try {
val sequenceStream = getStreamFromFile(
sequenceFile,
uploadId,
sequenceFileTypes,
sequenceTempFileToDelete,
)
uploadSequences(uploadId, sequenceStream, batchSize, submissionParams.organism)
} finally {
sequenceTempFileToDelete.delete()
}
}
}

Expand Down Expand Up @@ -324,4 +343,9 @@ class SubmitModel(
SequenceUploadAuxTable.select(SequenceUploadAuxTable.sequenceSubmissionIdColumn).count() > 0
return metadataInAuxTable || sequencesInAuxTable
}

private fun requiresConsensusSequenceFile(organism: Organism) = backendConfig.getInstanceConfig(organism)
.schema
.submissionDataTypes
.consensusSequences
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,17 @@ class UploadDatabaseService(
jsonb_build_object(
'metadata', metadata_upload_aux_table.metadata,
'unalignedNucleotideSequences',
jsonb_object_agg(
sequence_upload_aux_table.segment_name,
sequence_upload_aux_table.compressed_sequence_data::jsonb
COALESCE(
jsonb_object_agg(
sequence_upload_aux_table.segment_name,
sequence_upload_aux_table.compressed_sequence_data::jsonb
) FILTER (WHERE sequence_upload_aux_table.segment_name IS NOT NULL),
'{}'::jsonb
)
)
FROM
metadata_upload_aux_table
JOIN
LEFT JOIN
sequence_upload_aux_table
ON metadata_upload_aux_table.upload_id = sequence_upload_aux_table.upload_id
AND metadata_upload_aux_table.submission_id = sequence_upload_aux_table.submission_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.testcontainers.shaded.org.awaitility.Awaitility.await

const val DEFAULT_ORGANISM = "dummyOrganism"
const val OTHER_ORGANISM = "otherOrganism"
const val ORGANISM_WITHOUT_CONSENSUS_SEQUENCES = "dummyOrganismWithoutConsensusSequences"
const val DEFAULT_PIPELINE_VERSION = 1L
const val DEFAULT_EXTERNAL_METADATA_UPDATER = "ena"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,24 @@ import org.hamcrest.CoreMatchers.hasItem
import org.hamcrest.CoreMatchers.`is`
import org.hamcrest.MatcherAssert.assertThat
import org.hamcrest.Matchers.allOf
import org.hamcrest.Matchers.anEmptyMap
import org.hamcrest.Matchers.containsInAnyOrder
import org.hamcrest.Matchers.empty
import org.hamcrest.Matchers.greaterThan
import org.hamcrest.Matchers.hasProperty
import org.hamcrest.Matchers.hasSize
import org.hamcrest.Matchers.matchesRegex
import org.junit.jupiter.api.Test
import org.loculus.backend.api.GeneticSequence
import org.loculus.backend.api.OriginalData
import org.loculus.backend.api.Status.IN_PROCESSING
import org.loculus.backend.api.Status.RECEIVED
import org.loculus.backend.api.UnprocessedData
import org.loculus.backend.config.BackendSpringProperty
import org.loculus.backend.controller.DEFAULT_ORGANISM
import org.loculus.backend.controller.DEFAULT_USER_NAME
import org.loculus.backend.controller.EndpointTest
import org.loculus.backend.controller.ORGANISM_WITHOUT_CONSENSUS_SEQUENCES
import org.loculus.backend.controller.OTHER_ORGANISM
import org.loculus.backend.controller.assertStatusIs
import org.loculus.backend.controller.expectForbiddenResponse
Expand All @@ -27,7 +31,6 @@ import org.loculus.backend.controller.expectUnauthorizedResponse
import org.loculus.backend.controller.getAccessionVersions
import org.loculus.backend.controller.jwtForDefaultUser
import org.loculus.backend.controller.submission.SubmitFiles.DefaultFiles
import org.loculus.backend.controller.submission.SubmitFiles.DefaultFiles.NUMBER_OF_SEQUENCES
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.http.HttpHeaders.ETAG
import org.springframework.test.web.servlet.result.MockMvcResultMatchers.header
Expand Down Expand Up @@ -181,4 +184,37 @@ class ExtractUnprocessedDataEndpointTest(
`is`(empty()),
)
}

@Test
fun `GIVEN entries for organism without consensus sequences THEN only returns metadata`() {
val submissionResult = convenienceClient.submitDefaultFiles(organism = ORGANISM_WITHOUT_CONSENSUS_SEQUENCES)
val accessionVersions = submissionResult.submissionIdMappings

val result = client.extractUnprocessedData(
numberOfSequenceEntries = DefaultFiles.NUMBER_OF_SEQUENCES,
organism = ORGANISM_WITHOUT_CONSENSUS_SEQUENCES,
)
val responseBody = result.expectNdjsonAndGetContent<UnprocessedData>()
assertThat(responseBody, hasSize(DefaultFiles.NUMBER_OF_SEQUENCES))
assertThat(
responseBody,
hasItem(
allOf(
hasProperty<UnprocessedData>("accession", `is`(accessionVersions[0].accession)),
hasProperty("version", `is`(1L)),
hasProperty(
"data",
allOf(
hasProperty<OriginalData<GeneticSequence>>("metadata", `is`(defaultOriginalData.metadata)),
hasProperty("unalignedNucleotideSequences", `is`(anEmptyMap<String, GeneticSequence>())),
),
),
hasProperty("submissionId", matchesRegex("custom[0-9]")),
hasProperty("submitter", `is`(DEFAULT_USER_NAME)),
hasProperty("groupId", `is`(submissionResult.groupId)),
hasProperty("submittedAt", greaterThan(1_700_000_000L)),
),
),
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.node.IntNode
import com.fasterxml.jackson.databind.node.NullNode
import com.fasterxml.jackson.databind.node.TextNode
import org.loculus.backend.api.GeneName
import org.loculus.backend.api.GeneticSequence
import org.loculus.backend.api.Insertion
import org.loculus.backend.api.PreprocessingAnnotation
import org.loculus.backend.api.PreprocessingAnnotationSource
Expand Down Expand Up @@ -99,6 +100,21 @@ val defaultProcessedDataMultiSegmented = ProcessedData(
),
)

val defaultProcessedDataWithoutSequences = ProcessedData<GeneticSequence>(
metadata = mapOf(
"date" to TextNode("2002-12-15"),
"host" to TextNode("google.com"),
"region" to TextNode("Europe"),
"country" to TextNode("Spain"),
"division" to NullNode.instance,
),
unalignedNucleotideSequences = emptyMap(),
alignedNucleotideSequences = emptyMap(),
nucleotideInsertions = emptyMap(),
alignedAminoAcidSequences = emptyMap(),
aminoAcidInsertions = emptyMap(),
)

private val defaultSuccessfulSubmittedData = SubmittedProcessedData(
accession = "If a test result shows this, processed data was not prepared correctly.",
version = 1,
Expand Down
Loading

0 comments on commit 3229be7

Please sign in to comment.