Skip to content

Commit

Permalink
feat: Add lineage definition file to SILO import config (#3434)
Browse files Browse the repository at this point in the history
* Add TODOs

* Add dummy lineage definitions bits

* WIP

* Add env var

* Remove outdated docs

* .

* .

* progress

* write config file (only with the setting we need, I hope that's enough)

* move file defintions up in the script

* Add a bit of docs

* fix log msg

* Add dummy lineage defintion file

* fix lineage and add correct link

* remove myLineage but

* fix config writing

* fix deleting

* fix lineage definition file path

* fix lineage definition file path now?

* copy for no lineage defs

* Add docs

* fix typo

* generate lineage indexes for lineage system fields in silo

* fix

* test fix?

* print

* fix

* update lineage def

* Update lineage in test query to fix the test

* Add comment; remove print

* Update docs/src/content/docs/reference/helm-chart-config.mdx

Co-authored-by: Theo Sanderson <[email protected]>

* fix the quoted keys

---------

Co-authored-by: Theo Sanderson <[email protected]>
  • Loading branch information
fhennig and theosanderson authored Dec 17, 2024
1 parent 701a19b commit 0d9d597
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 6 deletions.
54 changes: 54 additions & 0 deletions docs/src/content/docs/reference/helm-chart-config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,51 @@ For production environments, these should always be set to false. Instead, exter
<td></td>
<td>An object where the keys are the organism IDs and values are an [Organism (type)](#organism-type)</td>
</tr>
<tr>
<td>`lineageSystemDefinitions`</td>
<td>Object</td>
<td></td>
<td>An object where the keys are the lineage system names and values are links to lineage system definition files per pipeline version (See [Lineage system defintions](#lineage-type))</td>

</tr>
</tbody>
</table>

### Lineage system definitions

Here's an example of a `lineageDefinitions` section:

```yaml
lineageSystemDefinitions:
pangoLineage: # Lineage name to use in metadata fields
1: https://example.org/lineage_defintions_v1.yaml # Definition per pipeline version
2: https://example.org/lineage_defintions_v2.yaml
myLineage:
1: ...
```
<table>
<thead>
<tr>
<th className='w-48'>Field</th>
<th>Type</th>
<th>Default</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>`lineageSystemDefinitions.<name>`</td>
<td>Object</td>
<td></td>
<td>A map from pipeline versions to file URLs.</td>
</tr>
<tr>
<td>`lineageSystemDefinitions.<name>.<pipelineVersion>`</td>
<td>`String`</td>
<td></td>
<td>The URL to the lineage defintion file for that lineage system and that pipeline version.</td>
</tr>
</tbody>
</table>

Expand Down Expand Up @@ -818,6 +863,15 @@ Each organism object has the following fields:
[here](https://github.com/loculus-project/loculus/blob/main/preprocessing/nextclade/README.md#custom-preprocessing-functions).
</td>
</tr>
<tr>
<td>`lineageSystem`</td>
<td>String</td>
<td></td>
<td>
Use this on string fields that contain lineages, if you want to enable searches that can include sublineages.
The value needs to be a lineage system that is defined under the `lineageSystemDefinitions` key.
</td>
</tr>
</tbody>
</table>

Expand Down
48 changes: 47 additions & 1 deletion kubernetes/loculus/silo_import_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ set -e
# Default values
root_dir=""
last_etag=""
lineage_definition_file=/preprocessing/input/lineage_definitions.yaml
preprocessing_config_file=preprocessing_config.yaml
preprocessing_config_file_merged=preprocessing_config_merged.yaml

# Parse command-line arguments
usage() {
Expand Down Expand Up @@ -148,6 +151,48 @@ download_data() {
echo
}

# Generate the preprocessing config file with the lineage file for the current pipeline version.
# the lineage definition file needs to be downloaded first.
prepare_preprocessing_config() {
rm -f $lineage_definition_file $preprocessing_config_file_merged

if [[ -z "$LINEAGE_DEFINITIONS" ]]; then
echo "No LINEAGE_DEFINITIONS given, nothing to configure;"
cp $preprocessing_config_file $preprocessing_config_file_merged
return
fi

pipelineVersion=$(zstd -d -c "$new_input_data_path" | jq -r '.metadata.pipelineVersion' | sort -u)

if [[ -z "$pipelineVersion" ]]; then
echo "No pipeline version found. Writing empty lineage definition file."
touch $lineage_definition_file
elif [[ $(echo "$pipelineVersion" | wc -l) -eq 1 ]]; then
echo "Single pipeline version: $pipelineVersion"

# Get the URL for the version from LINEAGE_DEFINITIONS
lineage_url=$(echo "$LINEAGE_DEFINITIONS" | jq -r --arg version "$pipelineVersion" '.[$version]')
if [[ -z "$lineage_url" || "$lineage_url" == "null" ]]; then
echo "Error: No URL defined for pipeline version $pipelineVersion."
exit 1
fi

# Download the file from the URL
if ! curl -s -o "$lineage_definition_file" "$lineage_url"; then
echo "Error: Failed to download file from $lineage_url."
exit 1
fi
else
echo "Multiple pipeline versions in data to import: $pipelineVersion"
exit 1
fi

# the lineage definition filename needs to be set in the config
# Once https://github.com/GenSpectrum/LAPIS-SILO/pull/633 is merged, it can be done as a commandline arg
cp $preprocessing_config_file $preprocessing_config_file_merged
echo -e "lineageDefinitionsFilename: \"$lineage_definition_file\"\n" >> $preprocessing_config_file_merged
}

preprocessing() {
echo "Starting preprocessing"

Expand All @@ -158,7 +203,7 @@ preprocessing() {
cp "$new_input_data_path" "$silo_input_data_path"

set +e
time /app/siloApi --preprocessing
time /app/siloApi --preprocessing --preprocessingConfig=$preprocessing_config_file_merged
exit_code=$?
set -e

Expand Down Expand Up @@ -229,6 +274,7 @@ main() {
# cleanup at start in case we fail later
cleanup_output_data
download_data
prepare_preprocessing_config
preprocessing

echo "done"
Expand Down
19 changes: 19 additions & 0 deletions kubernetes/loculus/templates/_lineage-system-for-organism.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{{- define "loculus.lineageSystemForOrganism" -}}
{{- $organism := . -}}
{{- $schema := $organism.schema | include "loculus.patchMetadataSchema" | fromYaml }}
{{- $lineageSystems := list }}
{{- range $entry := $schema.metadata }}
{{- if hasKey $entry "lineageSystem" }}
{{- $lineageSystems = append $lineageSystems $entry.lineageSystem }}
{{- end }}
{{- end }}

{{- $uniqueLineageSystems := $lineageSystems | uniq }}
{{- if gt (len $uniqueLineageSystems) 1 }}
{{- fail (printf "Multiple lineage systems found: %v" $uniqueLineageSystems) }}
{{- else if eq (len $uniqueLineageSystems) 0 }}
{{- /*no op*/ -}}
{{- else }}
{{- index $uniqueLineageSystems 0 -}}
{{- end }}
{{- end }}
4 changes: 4 additions & 0 deletions kubernetes/loculus/templates/_siloDatabaseConfig.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
{{- if .enableSubstringSearch }}
lapisAllowsRegexSearch: true
{{- end }}
{{- if .lineageSystem }}
generateIndex: true
generateLineageIndex: true
{{- end }}
{{- end }}


Expand Down
9 changes: 7 additions & 2 deletions kubernetes/loculus/templates/lapis-silo-deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{{- $dockerTag := include "loculus.dockerTag" $.Values }}
{{- $dockerTag := include "loculus.dockerTag" .Values }}
{{- $keycloakTokenUrl := "http://loculus-keycloak-service:8083/realms/loculus/protocol/openid-connect/token" }}

{{- range $key, $_ := (.Values.organisms | default .Values.defaultOrganisms) }}
{{- range $key, $organism := (.Values.organisms | default .Values.defaultOrganisms) }}
{{- $lineageSystem := $organism | include "loculus.lineageSystemForOrganism" }}
---
apiVersion: apps/v1
kind: Deployment
Expand Down Expand Up @@ -84,6 +85,10 @@ spec:
{{- else }}
value: "http://loculus-backend-service:8079/{{ $key }}"
{{- end }}
{{- if $lineageSystem }}
- name: LINEAGE_DEFINITIONS
value: {{ index $.Values.lineageSystemDefinitions $lineageSystem | toJson | quote }}
{{- end }}
volumeMounts:
- name: lapis-silo-database-config-processed
mountPath: /preprocessing/input/reference_genomes.json
Expand Down
4 changes: 4 additions & 0 deletions kubernetes/loculus/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ logo:
url: "/favicon.svg"
width: 100
height: 100
lineageSystemDefinitions:
pangoLineage:
1: https://raw.githubusercontent.com/loculus-project/loculus/refs/heads/lineage-validation/preprocessing/dummy/lineage.yaml
defaultOrganismConfig: &defaultOrganismConfig
schema: &schema
loadSequencesAutomatically: true
Expand Down Expand Up @@ -1321,6 +1324,7 @@ defaultOrganisms:
autocomplete: true
required: true
type: string
lineageSystem: pangoLineage
website:
tableColumns:
- country
Expand Down
10 changes: 10 additions & 0 deletions preprocessing/dummy/lineage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
A.1:
aliases: []
parents: []
A.1.1:
aliases: []
parents:
- A.1
A.2:
aliases: []
parents: []
2 changes: 1 addition & 1 deletion preprocessing/dummy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def main():
etag, unprocessed = fetch_unprocessed_sequences(etag, sequences_to_fetch)
if len(unprocessed) == 0:
if watch_mode:
logging.debug(f"Processed {locally_processed} sequences. Sleeping for 10 seconds.")
logging.debug(f"Processed {locally_processed} sequences. Sleeping for 2 seconds.")
time.sleep(2)
locally_processed = 0
continue
Expand Down
2 changes: 1 addition & 1 deletion website/tests/pages/search/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ test.describe('The search page', () => {

const rowLocator = searchPage.page.locator('tr');
await expect(rowLocator.getByText('2002-12-15')).toBeVisible();
await expect(rowLocator.getByText('B.1.1.7')).toBeVisible();
await expect(rowLocator.getByText('A.1.1')).toBeVisible();

await accessionLink.click();
await expect(searchPage.page.getByText('Amino acid mutations')).toBeVisible({ timeout: 30000 });
Expand Down
2 changes: 1 addition & 1 deletion website/tests/util/preprocessingPipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ async function submit(preprocessingOptions: PreprocessingOptions[]) {
region: 'Europe',
country: 'Switzerland',
division: 'Schaffhausen',
pangoLineage: 'B.1.1.7',
pangoLineage: 'A.1.1',
},
...sequenceData,
},
Expand Down

0 comments on commit 0d9d597

Please sign in to comment.