diff --git a/src/main/kotlin/org/genspectrum/ingest/Main.kt b/src/main/kotlin/org/genspectrum/ingest/Main.kt index 3ba2980..265186b 100644 --- a/src/main/kotlin/org/genspectrum/ingest/Main.kt +++ b/src/main/kotlin/org/genspectrum/ingest/Main.kt @@ -3,6 +3,7 @@ package org.genspectrum.ingest import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.core.subcommands import com.github.ajalt.clikt.parameters.arguments.argument +import org.genspectrum.ingest.file.AllPangoLineagesFile import org.genspectrum.ingest.file.Compression import org.genspectrum.ingest.file.File import org.genspectrum.ingest.file.FileType @@ -33,18 +34,27 @@ class SC2GisaidIngestCommand : CliktCommand(name = "ingest-sc2-gisaid") { private val geoLocationRulesFile by argument("geo-location-rules") override fun run() { + val previousProcessedVersionDir = Path(workdirPath) + .resolve("00_archive") + .resolve(previousProcessedVersion) runSC2GisaidWorkflow( - Path(workdirPath), - url, user, password, - File( + workdir = Path(workdirPath), + url = url, + user = user, + password = password, + previousProcessed = File( "provision.$previousProcessedVersion", - Path(workdirPath).resolve("00_archive"), + previousProcessedVersionDir, false, FileType.NDJSON, Compression.ZSTD ), - Path(workdirPath).resolve("00_archive/provision.$previousProcessedVersion.hashes.ndjson.zst"), - Path(geoLocationRulesFile) + previousAllPangoLineagesFile = AllPangoLineagesFile( + previousProcessedVersion, + previousProcessedVersionDir, + ), + previousHashes = previousProcessedVersionDir.resolve("provision.$previousProcessedVersion.hashes.ndjson.zst"), + geoLocationRulesFile = Path(geoLocationRulesFile) ) } } diff --git a/src/main/kotlin/org/genspectrum/ingest/entry/mapPangoLineageToNull.kt b/src/main/kotlin/org/genspectrum/ingest/entry/mapPangoLineageToNull.kt new file mode 100644 index 0000000..0b8cfb0 --- /dev/null +++ b/src/main/kotlin/org/genspectrum/ingest/entry/mapPangoLineageToNull.kt @@ -0,0 +1,16 @@ +package org.genspectrum.ingest.entry + + +/** + * This function cleans "non-pango lineages" from pango lineages columns + */ +fun MutableEntry.mapPangoLineageToNull( + keys: Collection, + nullValues: Set = setOf("Unassigned", "unclassifiable") +) { + for (key in keys) { + if (nullValues.contains(this.metadata[key])) { + this.metadata[key] = null + } + } +} diff --git a/src/main/kotlin/org/genspectrum/ingest/file/File.kt b/src/main/kotlin/org/genspectrum/ingest/file/File.kt index a035bf2..23fede8 100644 --- a/src/main/kotlin/org/genspectrum/ingest/file/File.kt +++ b/src/main/kotlin/org/genspectrum/ingest/file/File.kt @@ -1,5 +1,7 @@ package org.genspectrum.ingest.file +import org.genspectrum.ingest.util.readFile +import org.genspectrum.ingest.util.writeFile import java.nio.file.Path data class File( @@ -38,3 +40,39 @@ enum class Compression { ZSTD, XZ } + +class AllPangoLineagesFile( + dataVersion: String? = null, + directory: Path +) { + val file = File( + name = when (dataVersion) { + null -> "allPangoLineages" + else -> "allPangoLineages.$dataVersion" + }, + directory = directory, + sorted = false, + type = FileType.TSV, + compression = Compression.NONE + ) + + val path + get() = file.path + + fun read(): Set { + return readFile(file.path) + .bufferedReader() + .use { it.readText() } + .lines() + .toSet() + } + + fun write(allPangoLineages: Collection) { + writeFile(file.path).bufferedWriter().use { + for (pangoLineage in allPangoLineages) { + it.write(pangoLineage) + it.newLine() + } + } + } +} diff --git a/src/main/kotlin/org/genspectrum/ingest/proc/JoinSC2GisaidData.kt b/src/main/kotlin/org/genspectrum/ingest/proc/JoinSC2GisaidData.kt index b26ab25..add0ca9 100644 --- a/src/main/kotlin/org/genspectrum/ingest/proc/JoinSC2GisaidData.kt +++ b/src/main/kotlin/org/genspectrum/ingest/proc/JoinSC2GisaidData.kt @@ -3,6 +3,7 @@ package org.genspectrum.ingest.proc import com.alibaba.fastjson2.to import org.genspectrum.ingest.AlignedGenome import org.genspectrum.ingest.entry.* +import org.genspectrum.ingest.file.AllPangoLineagesFile import org.genspectrum.ingest.file.Compression import org.genspectrum.ingest.file.File import org.genspectrum.ingest.file.FileType @@ -21,7 +22,7 @@ fun joinSC2GisaidData( outputDirectory: Path, outputName: String, nextcladeDatasetVersion: String, -): File { +): Pair { val allFiles = listOf(provisionFile, nextcladeFile, alignedFile) + translationFiles.map { it.second } require(allFiles.all { it.sorted && it.type == FileType.NDJSON }) val outputFile = File(outputName, outputDirectory, true, FileType.NDJSON, Compression.ZSTD) @@ -29,6 +30,8 @@ fun joinSC2GisaidData( val translationNames = translationFiles.map { it.first } val inputStreams = allFiles.map { readFile(it.path) } + val allPangoLineages = HashSet() + val joiner = SortedNdjsonFilesOuterJoiner("id", "seqName", inputStreams) val writer = writeNdjson(writeFile(outputFile.path)) for ((_, values) in joiner) { @@ -66,17 +69,32 @@ fun joinSC2GisaidData( clean(provisionEntry) provisionEntry.metadata["nextcladeDatasetVersion"] = nextcladeDatasetVersion + for (pangoLineageField in pangoLineageNames) { + val pangoLineage = provisionEntry.metadata[pangoLineageField] + if (pangoLineage is String) { + allPangoLineages.add(pangoLineage) + } + } + writer.write(provisionEntry) } writer.close() - return outputFile + val allPangoLineagesFile = AllPangoLineagesFile(directory = outputDirectory) + allPangoLineagesFile.write(allPangoLineages) + + return outputFile to allPangoLineagesFile } +private const val pangoLineage = "pangoLineage" +private const val nextcladePangoLineage = "nextcladePangoLineage" + +private val pangoLineageNames = listOf(pangoLineage, nextcladePangoLineage) + private val oldToNewMetadataNames = listOf( "clade_nextstrain" to "nextstrainClade", "clade_who" to "whoClade", - "Nextclade_pango" to "nextcladePangoLineage", + "Nextclade_pango" to nextcladePangoLineage, "qc.overallScore" to "nextcladeQcOverallScore", "qc.missingData.score" to "nextcladeQcMissingDataScore", "qc.mixedSites.score" to "nextcladeQcMixedSites", @@ -111,8 +129,8 @@ private val selectedMetadata = setOf( "age", "sex", "samplingStrategy", - "pangoLineage", - "nextcladePangoLineage", + pangoLineage, + nextcladePangoLineage, "nextstrainClade", "whoClade", "gisaidClade", @@ -162,6 +180,7 @@ private fun clean(entry: MutableEntry) { parseIntegerFields.forEach { parseInteger(it, true) } parseFloatFields.forEach { parseFloat(it) } fillInMissingAlignedSequences(fillInMissingAlignedSequencesTemplate) + mapPangoLineageToNull(pangoLineageNames) metadata["genbankAccession"] = null metadata["sraAccession"] = null diff --git a/src/main/kotlin/org/genspectrum/ingest/proc/JoinSC2NextstrainOpenData.kt b/src/main/kotlin/org/genspectrum/ingest/proc/JoinSC2NextstrainOpenData.kt index bc3cc87..6ff3bc7 100644 --- a/src/main/kotlin/org/genspectrum/ingest/proc/JoinSC2NextstrainOpenData.kt +++ b/src/main/kotlin/org/genspectrum/ingest/proc/JoinSC2NextstrainOpenData.kt @@ -2,6 +2,7 @@ package org.genspectrum.ingest.proc import org.genspectrum.ingest.AlignedGenome import org.genspectrum.ingest.entry.* +import org.genspectrum.ingest.file.AllPangoLineagesFile import org.genspectrum.ingest.file.Compression import org.genspectrum.ingest.file.File import org.genspectrum.ingest.file.FileType @@ -21,7 +22,7 @@ fun joinSC2NextstrainOpenData( outputDirectory: Path, outputName: String, outputCompression: Compression = Compression.ZSTD, -): File { +): Pair { val allInputFiles = listOf(sortedMetadataFile, sortedNextcladeFile, sortedSequencesFile, sortedAlignedFile) + sortedTranslationFiles.map { it.second } require(allInputFiles.all { it.sorted && it.type == FileType.NDJSON }) @@ -30,6 +31,8 @@ fun joinSC2NextstrainOpenData( val translationNames = sortedTranslationFiles.map { it.first } val inputStreams = allInputFiles.map { readFile(it.path) } + val allPangoLineages = HashSet() + val joiner = SortedNdjsonFilesOuterJoiner("strain", "seqName", inputStreams) val writer = writeNdjson(writeFile(outputFile.path)) for ((_, values) in joiner) { @@ -72,12 +75,27 @@ fun joinSC2NextstrainOpenData( if (joined.metadata["strain"] == null) { continue } + for (pangoLineageField in pangoLineageNames) { + val pangoLineage = joined.metadata[pangoLineageField] + if (pangoLineage is String) { + allPangoLineages.add(pangoLineage) + } + } writer.write(joined) } writer.close() - return outputFile + + val allPangoLineagesFile = AllPangoLineagesFile(directory = outputDirectory) + allPangoLineagesFile.write(allPangoLineages) + + return outputFile to allPangoLineagesFile } +private const val pangoLineage = "pangoLineage" +private const val nextcladePangoLineage = "nextcladePangoLineage" + +private val pangoLineageNames = listOf(pangoLineage, nextcladePangoLineage) + private val oldToNewMetadataNames = listOf( "gisaid_epi_isl" to "gisaidEpiIsl", "genbank_accession" to "genbankAccession", @@ -87,7 +105,7 @@ private val oldToNewMetadataNames = listOf( "country_exposure" to "countryExposure", "division_exposure" to "divisionExposure", "Nextstrain_clade" to "nextstrainClade", - "pango_lineage" to "pangoLineage", + "pango_lineage" to pangoLineage, "GISAID_clade" to "gisaidClade", "originating_lab" to "originatingLab", "submitting_lab" to "submittingLab", @@ -96,7 +114,7 @@ private val oldToNewMetadataNames = listOf( "sampling_strategy" to "samplingStrategy", "clade_nextstrain" to "nextstrainClade", "clade_who" to "whoClade", - "Nextclade_pango" to "nextcladePangoLineage", + "Nextclade_pango" to nextcladePangoLineage, "immune_escape" to "immuneEscape", "ace2_binding" to "ace2Binding", "QC_overall_score" to "nextcladeQcOverallScore", @@ -133,8 +151,8 @@ private val selectedMetadata = setOf( "age", "sex", "samplingStrategy", - "pangoLineage", - "nextcladePangoLineage", + pangoLineage, + nextcladePangoLineage, "nextstrainClade", "whoClade", "gisaidClade", @@ -184,6 +202,7 @@ private fun clean(entry: MutableEntry) { parseIntegerFields.forEach { parseInteger(it) } parseFloatFields.forEach { parseFloat(it) } fillInMissingAlignedSequences(fillInMissingAlignedSequencesTemplate) + mapPangoLineageToNull(pangoLineageNames) metadata["died"] = null metadata["fullyVaccinated"] = null diff --git a/src/main/kotlin/org/genspectrum/ingest/proc/TransformSC2GisaidBasics.kt b/src/main/kotlin/org/genspectrum/ingest/proc/TransformSC2GisaidBasics.kt index 9ff2aad..2047287 100644 --- a/src/main/kotlin/org/genspectrum/ingest/proc/TransformSC2GisaidBasics.kt +++ b/src/main/kotlin/org/genspectrum/ingest/proc/TransformSC2GisaidBasics.kt @@ -7,7 +7,11 @@ import org.genspectrum.ingest.entry.mapToNull import org.genspectrum.ingest.file.Compression import org.genspectrum.ingest.file.File import org.genspectrum.ingest.file.FileType -import org.genspectrum.ingest.util.* +import org.genspectrum.ingest.util.GeoLocationMapper +import org.genspectrum.ingest.util.readFile +import org.genspectrum.ingest.util.readNdjson +import org.genspectrum.ingest.util.writeFile +import org.genspectrum.ingest.util.writeNdjson import java.nio.file.Path fun transformSC2GisaidBasics( @@ -80,7 +84,7 @@ fun transformSC2GisaidBasics( return TransformSC2GisaidBasicsResult(outputFile, hashOutputFile) } -data class TransformSC2GisaidBasicsResult ( +data class TransformSC2GisaidBasicsResult( val dataFile: File, val hashesFile: File ) diff --git a/src/main/kotlin/org/genspectrum/ingest/util/io.kt b/src/main/kotlin/org/genspectrum/ingest/util/io.kt index 2687342..e525228 100644 --- a/src/main/kotlin/org/genspectrum/ingest/util/io.kt +++ b/src/main/kotlin/org/genspectrum/ingest/util/io.kt @@ -90,10 +90,10 @@ fun writeNdjson( } return WriteNdjsonResponse( - fun (entry: T) { + write = fun (entry: T) { queue.put(entry) }, - fun () { + close = fun () { closed = true writingThread.join() } diff --git a/src/main/kotlin/org/genspectrum/ingest/workflow/SC2GisaidWorkflow.kt b/src/main/kotlin/org/genspectrum/ingest/workflow/SC2GisaidWorkflow.kt index 47e5be0..a22c0f0 100644 --- a/src/main/kotlin/org/genspectrum/ingest/workflow/SC2GisaidWorkflow.kt +++ b/src/main/kotlin/org/genspectrum/ingest/workflow/SC2GisaidWorkflow.kt @@ -5,6 +5,7 @@ import com.alibaba.fastjson2.JSONObject import com.alibaba.fastjson2.JSONWriter import com.alibaba.fastjson2.toJSONByteArray import org.genspectrum.ingest.AlignedGenome +import org.genspectrum.ingest.file.AllPangoLineagesFile import org.genspectrum.ingest.file.Compression import org.genspectrum.ingest.file.File import org.genspectrum.ingest.file.FileType @@ -42,6 +43,7 @@ fun runSC2GisaidWorkflow( user: String, password: String, previousProcessed: File, + previousAllPangoLineagesFile: AllPangoLineagesFile, previousHashes: Path, geoLocationRulesFile: Path ) { @@ -87,7 +89,12 @@ fun runSC2GisaidWorkflow( val joinedPath = workdir.resolve("07_joined") Files.createDirectories(joinedPath) - val joinedFilePath = joinFiles(extractedSortedFile, nextcladeNdjsonFiles, joinedPath, nextcladeDatasetVersion) + val (joinedFilePath, newPangoLineagesFile) = joinFiles( + extractedSortedFile, + nextcladeNdjsonFiles, + joinedPath, + nextcladeDatasetVersion + ) println("${LocalDateTime.now()}: Finished joinFiles") @@ -99,16 +106,26 @@ fun runSC2GisaidWorkflow( val unchangedAndNewPath = workdir.resolve("09_unchanged_and_new") Files.createDirectories(unchangedAndNewPath) - val unchangedAndNewFilePath = mergeUnchangedAndNew(unchangedAndNewPath, unchangedFilePath, joinedFilePath) + val unchangedAndNewFilePath = mergeUnchangedAndNew( + outputDirectory = unchangedAndNewPath, + unchangedFilePath = unchangedFilePath, + joinedFilePath = joinedFilePath + ) + val allPangoLineagesFile = mergePangoLineageFiles( + outputDirectory = unchangedAndNewPath, + previousAllPangoLineagesFile = previousAllPangoLineagesFile, + newPangoLineagesFile = newPangoLineagesFile + ) println("${LocalDateTime.now()}: Finished mergeUnchangedAndNew") val finalDestinationPath = workdir.resolve("00_archive") Files.createDirectories(finalDestinationPath) val (finalHashesFile, finalProvisionFile) = moveFinalFiles( - hashesFile, - unchangedAndNewFilePath, - finalDestinationPath + hashesFile = hashesFile, + provisionFile = unchangedAndNewFilePath, + allPangoLineagesFile = allPangoLineagesFile, + directoryPath = finalDestinationPath ) println("Final output: ${finalHashesFile.path}, ${finalProvisionFile.path}") @@ -276,15 +293,15 @@ private fun joinFiles( nextcladeNdjsonFiles: NextcladeOutput, joinedPath: Path, nextcladeDatasetVersion: String, -): File { +): Pair { return joinSC2GisaidData( - extractedSortedFile, - nextcladeNdjsonFiles.nextclade, - nextcladeNdjsonFiles.aligned, - nextcladeNdjsonFiles.translations.toList(), - joinedPath, - "joined", - nextcladeDatasetVersion, + provisionFile = extractedSortedFile, + nextcladeFile = nextcladeNdjsonFiles.nextclade, + alignedFile = nextcladeNdjsonFiles.aligned, + translationFiles = nextcladeNdjsonFiles.translations.toList(), + outputDirectory = joinedPath, + outputName = "joined", + nextcladeDatasetVersion = nextcladeDatasetVersion, ) } @@ -298,24 +315,52 @@ fun mergeUnchangedAndNew(outputDirectory: Path, unchangedFilePath: File, joinedF return outputFile } -private fun moveFinalFiles(hashesFile: File, provisionFile: File, directoryPath: Path): Pair { +fun mergePangoLineageFiles( + outputDirectory: Path, + previousAllPangoLineagesFile: AllPangoLineagesFile, + newPangoLineagesFile: AllPangoLineagesFile +): AllPangoLineagesFile { + val previousAllPangoLineages = previousAllPangoLineagesFile.read() + val newPangoLineages = newPangoLineagesFile.read() + val allPangoLineages = previousAllPangoLineages + newPangoLineages + + val outputFile = AllPangoLineagesFile(directory = outputDirectory) + outputFile.write(allPangoLineages) + return outputFile +} + +private fun moveFinalFiles( + hashesFile: File, + provisionFile: File, + allPangoLineagesFile: AllPangoLineagesFile, + directoryPath: Path +): Pair { val zoneId = ZoneId.systemDefault() val newDataVersion = Instant.now().atZone(zoneId).toEpochSecond() + val dataVersionPath = directoryPath.resolve(newDataVersion.toString()) + Files.createDirectories(dataVersionPath) + val finalHashesFile = File( "provision.$newDataVersion.hashes", - directoryPath, + dataVersionPath, hashesFile.sorted, hashesFile.type, hashesFile.compression ) val finalProvisionFile = File( "provision.$newDataVersion", - directoryPath, + dataVersionPath, provisionFile.sorted, provisionFile.type, provisionFile.compression ) - renameFile(hashesFile.path, finalHashesFile.path) - renameFile(provisionFile.path, finalProvisionFile.path) + val finalPangoLineagesFile = AllPangoLineagesFile( + dataVersion = newDataVersion.toString(), + directory = dataVersionPath + ) + + renameFile(oldPath = hashesFile.path, newPath = finalHashesFile.path) + renameFile(oldPath = provisionFile.path, newPath = finalProvisionFile.path) + renameFile(oldPath = allPangoLineagesFile.path, newPath = finalPangoLineagesFile.path) return Pair(finalHashesFile, finalProvisionFile) } diff --git a/src/main/kotlin/org/genspectrum/ingest/workflow/SC2NextstrainOpenWorkflow.kt b/src/main/kotlin/org/genspectrum/ingest/workflow/SC2NextstrainOpenWorkflow.kt index 5d49b27..425f983 100644 --- a/src/main/kotlin/org/genspectrum/ingest/workflow/SC2NextstrainOpenWorkflow.kt +++ b/src/main/kotlin/org/genspectrum/ingest/workflow/SC2NextstrainOpenWorkflow.kt @@ -1,5 +1,6 @@ package org.genspectrum.ingest.workflow +import org.genspectrum.ingest.file.AllPangoLineagesFile import org.genspectrum.ingest.file.Compression import org.genspectrum.ingest.file.File import org.genspectrum.ingest.file.FileType @@ -20,7 +21,8 @@ import java.time.ZoneId fun runSC2NextstrainOpenWorkflow(workdir: Path) { val fromSourcePath = workdir.resolve("01_from_source") Files.createDirectories(fromSourcePath) - val sourceFiles = runParallel(OpenFiles.entries.map { { it to downloadFromNextstrain(it, fromSourcePath) } }, 3).toMap() + val sourceFiles = runParallel(OpenFiles.entries.map { { it to downloadFromNextstrain(it, fromSourcePath) } }, 3) + .toMap() println("${LocalDateTime.now()}: Finished downloading from Nextstrain") @@ -38,15 +40,16 @@ fun runSC2NextstrainOpenWorkflow(workdir: Path) { val joinedPath = workdir.resolve("04_joined_and_cleaned") Files.createDirectories(joinedPath) - val joinedFile = joinFiles(sortedFiles, joinedPath) + val (joinedFile, allPangoLineagesFile) = joinFiles(sortedFiles, joinedPath) println("${LocalDateTime.now()}: Finished joinFiles") val finalDestinationPath = workdir.resolve("00_archive") Files.createDirectories(finalDestinationPath) - val finalProvisionFile = moveFinalFile( - joinedFile, - finalDestinationPath + val finalProvisionFile = moveFinalFiles( + provisionFile = joinedFile, + allPangoLineagesFile = allPangoLineagesFile, + directoryPath = finalDestinationPath ) println("Final output: ${finalProvisionFile.path}") @@ -74,7 +77,7 @@ private enum class OpenFiles { private fun downloadFromNextstrain(file: OpenFiles, outputDirectory: Path): File { val tsvTemplate = File("-", outputDirectory, false, FileType.TSV, Compression.ZSTD) val fastaTemplate = File("-", outputDirectory, false, FileType.FASTA, Compression.ZSTD) - val outputFile = when(file) { + val outputFile = when (file) { OpenFiles.METADATA -> tsvTemplate.copy(name = "metadata") OpenFiles.NEXTCLADE -> tsvTemplate.copy(name = "nextclade") OpenFiles.SEQUENCES -> fastaTemplate.copy(name = "sequences") @@ -143,13 +146,13 @@ private fun sortNdjsonFiles( private fun joinFiles( sortedFiles: Map, joinedPath: Path -): File { +): Pair { return joinSC2NextstrainOpenData( - sortedFiles[OpenFiles.METADATA]!!, - sortedFiles[OpenFiles.NEXTCLADE]!!, - sortedFiles[OpenFiles.SEQUENCES]!!, - sortedFiles[OpenFiles.ALIGNED]!!, - listOf( + sortedMetadataFile = sortedFiles[OpenFiles.METADATA]!!, + sortedNextcladeFile = sortedFiles[OpenFiles.NEXTCLADE]!!, + sortedSequencesFile = sortedFiles[OpenFiles.SEQUENCES]!!, + sortedAlignedFile = sortedFiles[OpenFiles.ALIGNED]!!, + sortedTranslationFiles = listOf( "E" to sortedFiles[OpenFiles.TRANSLATION_E]!!, "M" to sortedFiles[OpenFiles.TRANSLATION_M]!!, "N" to sortedFiles[OpenFiles.TRANSLATION_N]!!, @@ -163,21 +166,37 @@ private fun joinFiles( "ORF9b" to sortedFiles[OpenFiles.TRANSLATION_ORF9b]!!, "S" to sortedFiles[OpenFiles.TRANSLATION_S]!! ), - joinedPath, - "processed" + outputDirectory = joinedPath, + outputName = "processed" ) } -private fun moveFinalFile(provisionFile: File, directoryPath: Path): File { +private fun moveFinalFiles(provisionFile: File, allPangoLineagesFile: AllPangoLineagesFile, directoryPath: Path): File { val zoneId = ZoneId.systemDefault() val newDataVersion = Instant.now().atZone(zoneId).toEpochSecond() + val dataVersionPath = directoryPath.resolve(newDataVersion.toString()) + Files.createDirectories(dataVersionPath) + val finalProvisionFile = File( "provision.$newDataVersion", - directoryPath, + dataVersionPath, provisionFile.sorted, provisionFile.type, provisionFile.compression ) - renameFile(provisionFile.path, finalProvisionFile.path) + renameFile( + oldPath = provisionFile.path, + newPath = finalProvisionFile.path + ) + + val finalPangoLineagesFile = AllPangoLineagesFile( + dataVersion = newDataVersion.toString(), + directory = dataVersionPath + ) + renameFile( + oldPath = allPangoLineagesFile.path, + newPath = finalPangoLineagesFile.path + ) + return finalProvisionFile }