Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] WISE Sequences instance test #3330

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions kubernetes/loculus/silo_import_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,44 @@ download_data() {
echo
}

extract_short_read_files_from_s3() {
# Input from https://backend-wise-seqs.loculus.org/test/get-released-data

aws configure set aws_access_key_id "$AWS_ACCESS_KEY"
aws configure set aws_secret_access_key "$AWS_SECRET_ACCESS_KEY"
aws configure set region "$AWS_DEFAULT_REGION"

S3_LINKS_FILE="tmp_s3_links.txt"

# Extract S3 links from the metadata
jq -r '.metadata.s3Link' "$new_input_data_path" > "$S3_LINKS_FILE"

touch "$silo_input_data_path"

# Loop through each S3 link and append the content to the output file
while read -r S3_LINK; do
# Temporary file for downloaded content
TEMP_FILE=$(mktemp)

# Download the ndjson file from S3
aws s3 cp "$S3_LINK" "$TEMP_FILE"

# Append the content to the output file
cat "$TEMP_FILE" >> "$silo_input_data_path"

# Clean up the temporary file
rm "$TEMP_FILE"
done < "$S3_LINKS_FILE"
}

preprocessing() {
echo "Starting preprocessing"

rm -f "$silo_input_data_path"

# This is necessary because the silo preprocessing is configured to expect the input data
# at /preprocessing/input/data.ndjson.zst
cp "$new_input_data_path" "$silo_input_data_path"
# take data from $new_input_data_path, get all data from the S3 buckets (referenced in column s3Link)
# and put it into $silo_input_data_path
extract_short_read_files_from_s3

set +e
time /app/siloApi --preprocessing
Expand Down
65 changes: 43 additions & 22 deletions kubernetes/loculus/templates/_siloDatabaseConfig.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,48 @@

{{- define "loculus.siloDatabaseConfig" }}
schema:
{{- $segments := .nucleotideSequences | default (list "main")}}
{{- $is_segmented := gt (len $segments) 1 }}
instanceName: {{ .organismName }}
opennessLevel: OPEN
metadata:
{{- range (concat .commonMetadata .metadata) }}
{{- $currentItem := . }}
{{- if and $is_segmented .perSegment }}
{{- range $segment := $segments }}
{{- with $currentItem }}
{{- include "loculus.siloDatabaseShared" . | nindent 4 }}
name: {{ printf "%s_%s" .name $segment | quote}}
{{- end }}
{{- end }}
{{- else }}
{{- include "loculus.siloDatabaseShared" . | nindent 4 }}
name: {{ .name }}
{{- end }}
{{- end }}
primaryKey: accessionVersion
{{ if .silo}}
{{- .silo | toYaml | nindent 2 }}
{{ end }}
- name: sample_id
type: string
generateIndex: false
- name: batch_id
type: string
generateIndex: false
- name: sequencing_well_position
type: string
generateIndex: false
- name: location_code
type: string
generateIndex: false
- name: sampling_date
type: date
generateIndex: false
- name: sequencing_date
type: string
generateIndex: false
- name: flow_cell_serial_number
type: string
generateIndex: false
- name: read_length
type: int
generateIndex: false
- name: primer_protocol
type: string
generateIndex: false
- name: location_name
type: string
generateIndex: false
- name: primer_protocol_name
type: string
generateIndex: false
- name: nextclade_reference
type: string
generateIndex: false
- name: read_id
type: string
generateIndex: false
opennessLevel: OPEN
instanceName: wise-sarsCoV2
features: []
primaryKey: read_id
{{- end }}
9 changes: 9 additions & 0 deletions kubernetes/loculus/templates/lapis-silo-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ spec:
{{- else }}
value: "http://loculus-backend-service:8079/{{ $key }}"
{{- end }}
- name: AWS_DEFAULT_REGION
value: eu-central-1
- name: AWS_ACCESS_KEY
value: AKIA6AB5EFK3N6KDJJ52
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: wise-short-read-sequence-bucket
key: secret-access-key
volumeMounts:
- name: lapis-silo-database-config-processed
mountPath: /preprocessing/input/reference_genomes.json
Expand Down
67 changes: 37 additions & 30 deletions kubernetes/loculus/values_preview_server.yaml
Original file line number Diff line number Diff line change
@@ -1,35 +1,42 @@
name: 'WISE'
organisms:
test:
schema:
organismName: 'Test'
metadata:
- name: s3Link
type: string
displayName: S3 link
generateIndex: false
website:
tableColumns:
- s3Link
defaultOrder: ascending
defaultOrderBy: s3Link
preprocessing:
- version: 1
image: ghcr.io/loculus-project/preprocessing-nextclade
args:
- 'prepro'
configFile:
log_level: DEBUG
genes: []
batch_size: 100
referenceGenomes:
nucleotideSequences:
- name: 'main'
sequence: 'NNN' # We are not performing alignment here, so this sequence doesn't matter
genes: []
createTestAccounts: true
secrets:
smtp-password:
type: sealedsecret
clusterWide: "true"
encryptedData:
secretKey: AgCoDKVhu9eUF90E2CXJmF4CVFDHe8zHyQqQLNDaJ4dlCkm2QHvEEBL8WXABAMlfJqIv/W/rHmz2QfhpZRNYGr7r+Q4wk5nvi5bh+HAmAecglsI6UMqTj+hNRVDrgwZ1mevzJKdx2dDQ3kOk6hP8s/EmMsabmPEavCVse/3dE88VNop9HAPlbvuTk2hbFZhavDOmarVxv2vSuPSGOjEch2YwuHai+3LK2F4gVRk5ziDzkc7WhjGlczAW/IKqPo/Y7ecKeopJN+nZp4lMzNHzaiYFX8eegzRnXW0aB1Cop91mLx2PKUGiM4jLmUb/m2tWaLvUPmmrNeblI6yxHoHbu1McPJlQAepUekeoeypxuTxgCDkdTG3CPhpPQjg3XUpdrtjmjuxdljHEhIbmP/8sX+v9I5Px88msQkcUHEkXibjsWmSNChRCN40e61B4VScvEB84SemrEAFyApMbbru2H0G2kKwPMPBqN2f71rDUdqiryh7yMCwZANxBCt6XNxwo7ndss0NNBcE+qLfthHX0mFr9Sn8p09tC8Qo8pg9lKhseT2o/1VEdvXQONi2zy/lo/KNijhrn+Kbhc2Ea3bcFosAsSGxVgbZ1penGmjrYW7XVgRxeQ171oc/Xiy7SBc8wSqfBOamnYzIQkpHfpBnE5KoDzFdkUsQPwA+32FfsZt9PgXiGyVf9dSfO+yQJXFO+dhpWQMk4UfG0wEUB2TyzpI+/HhVhRAz6Bpwdj5YJc+IPTw==
orcid:
type: sealedsecret
clusterWide: "true"
encryptedData:
orcidSecret: AgBwVSGo2Y9TPdkGd78BikI/Nm+4SI3TS5AHQu+h4s0qb5Ydc1m7MJLd5ZwoXhkvWWfm2kt8K0Tj5eoAsFSVktA5EIZMWZOR7DugU5Z5GLgGJogpgrzN+dR7AEsyxwfXGzF2RISqsBDjRBzzfiF0ZhpyjUOCab2fn9iH3HmJ9qPK6xpzOcS47jjfg+5ofv8BxdGPAKiOpLUgFAZO1xTKZXTnbdVvyRUORJVZJffwiZ3RG3EO+eUKgHIf5YeG0b52OzDPfCzgNDGV/ryY+jPix6c+T1/g2qnUpLzECYdaqc5R6ZWtNt42VELxU3kpmwNyswraec+3IxkufXZTyTpSxikpHDKK9Zcz9woh/+R4BUKjjO1zpHWFPV5jY+hzY+rxfHTP8VX0txyMsBVEafDRL0R3YYz8rUBzXC4C6IFJLK0QmJKD7GHQM2Ue6SjGkr49k1ZPSGCGAJ16IZCe3l3lVhzncobpEBPx4qYJhD8oo+ZIt7YIZ/Egmkza+DFAAWmtSCEztL85Y3pNN5g3SVn6urSKZbNUeI6rC4JqwS6Uk2370VrvEEufml9fXOdE5ATO5C+zHj3AWd0zancDsk4nQegbE5fjLblGfQ1g/ID2OHCFaw7FMBItrE+R5VkTwN5nuszAnWXc191T7PS3mWrYBDrjbqHOr/P1X6BZWT0Q7bqR8y5d2JUX5qNaHI4s4WN43Z/m0oPRMZC0WHn/ebtPPukoKRWnMWqN8MTwnB9oHV/84egrHUY=
ingest-ncbi:
type: sealedsecret
clusterWide: "true"
encryptedData:
api-key: AgAwbNNXOtyIXqZzr0SswXwp7Q48HgaafxMXvGByPpJClR+L+jFFJHgHrVnDbI+Gt/YGlETOeYhlS7qHD/erANZForlM5LegtG+0u9UunZPXtfoRmYAeCqie7adIuE6gfH7Ubf+DsGxI0DNCwaM0xNcNDohBgIwNZpigLrpHWlA/Yq95k/2mG/qFT7AUltSpPDoQPKfor0jeuVSGIiQQ9lsaUchJK1YDfLHWhKy+/ymNE+1rEerfp4wx+kMGMV4zDPbYtV9OTxne+qyddV6vBbWB2+8otHIneGs3zE6YVOAVGMvwh2w+mSJverPbzX4B705ovDsrKARbLtrVUwnvlDeqtbG3TUlI9OdcoN+PT+Lk3NyGca8jOOCQp3aU3RTi8Z9kyUa69v6QiiU3i1q8sn6F4DgRuwFvXOSPrrxvvq25RB+XXctP0y/BrPM5wiLI4l9PiHO6eEKq/ZO/df/ab493Dd8DNDm1eu+qsPb7Ju+BzorkX7CoNVTdnKZ2gPmK+k1GTLRm/til+E4MwTMo5xIjuf5lWPVgbgVeUvN5tFcz/YniS3MX3RHY1OzWG/XnsNM8b0hDxI/Rw0omqM5S9kkN+NjPYjxRbfhtuLazddDiu/RAU6zc8L4IVNXzEnmKZv3bIAH7wIfmli4XjRmtr9isrshUelOUA1OsPrhIpey+NheFShPxNVC2IZoCC9OlQ3vKzUuXL7TI+sw3/8vwVKlm3ok4OO0+AyEccnlIAx5Kygd5fkJ3txzpMdo0jvk7c8Q=
slack-notifications:
type: sealedsecret
clusterWide: "true"
encryptedData:
slack-hook: AgCLEhTwqKL278AbNwpqdRqeg6naNrQJWx3q8Zp+ecXjMaaLLBi1C3uQlt0WKioy+pUAhfe7MowXKLM55hLyh/InZ9o3yLi9T/5cVRXcEXCvODWmbhr94XhcYI3KnVngZLcNl9Gr4LR+bz8A0sl/rCijNYrqYeDCLI6XUmB8mlKnHPqrF6CXC8Y5xyDbNYJONx6DAugq+gQcZYJ101vUOtu2LTD8awCsdF5FOzdcZ344Vxn/xwDlbbvUEKEQp5A5aMfx95zpa+rV/sQYHeCb7Dy1oWqpOrZP/rPJ4K9VGRx5QA9o1Qi0Pl3alRUqiPUR6pbMxbX8u0kCN6drFKxXDAMd+SadsppDGbNQNeQP5cphNJwYxL/0MIgXxJTrQpcynJK1FULX9W+1GtXg+tX4hRCtZL5hnCxPw12QcNOL2N8SJLGEe8gK8QtALpu/DH/trVJ3rMDRkZhhWCvtb9Zt9EuvUhxs07sE9DZ7rEAqzx51v4vr9CzmxkHEiAhrC3Se3CxnSspBP1/X9SvZ+GXn+ZuXzN+KivWCnim0RwhRD75Y7ZP8ct/iu3ilb6b7Pl+KOgOkA3In7c4yVAZwXMTuF6aP2/8inPx5Kk6p8ks7c5XeSIDFOH7C6EJuD7E69Fz6ijaF5bJN8NWBVxkE88xq4un5e7dcuqjIqaQ8kLDX1g8aXiklr6qD29q9H/m+gtd6lxcMb53bMl0EI+GHYTBZn3w+T1PxlY1uoBfNzt1efjXJD7AWlTDxze+5PIYgiFVAOdfv3ey5HJMMw1w5MTLMW44hkpt4MCaHvREBTXq5sxBJe10=
slack-token: AgDFAP+F7ze+TY+yK71JPOSkKwIpnBXh7WWweyZZwYm1/CwmKS1iQ4O1p54sTrHrMC737Si27MfTVvrEZRj9aAwq0fQJZt5yldpfMaTccZJHj3rQ5kczyuCMYVcFzmyywAr4DXsUCscjrOLgwTiiK/d8jSFkKyXupC7bB2EcZZUFGpFAj7PiSeJuEIKQptkSjeaasCQXkdkuoKczM38GCu502pIxaJ9kIXVrereyKUpsU/uFDgj0IcKqfiT0M2FGs8Ujl3CXpMxcOLSuxVyCnje27GHpsYrd/uEKX3yl1rB1rV6Z+gMzlO9DDPW/XJl6TY5snOxdaCv7uNzAGwgb/rlaZ5fnrNqsOoucJvh35yxMcKDsx/hY72H7PRnzNpLeqZ/2zAub+fQP/o1edjxaYHaSltS0lwzCivIPOHv66/dDOD9v0LncWkCWGXXOp8Fsz9OrF+NcAZjIY/hDzwy+JRDA7Wtn7jlkA07WFpobkyyKfN+bNT1664wS2IMDRYA6+MbkA99v+ScVsEVlxJqn+PiaDtexQAfQcyN2NPbQe+9xMIQavvrcnwxdMwAP8DBME3vhdrD8yDRJ9GN+ygtZ3dB4FC4iW20ETyzlAqJ/H9M2/ed1O3VyraFDCV3PmSBdq7Rinj1Zg6D+IEp54HtwWiu5s7iNeKW37cSSloRUaojWQ1BFPB6msfP/O5yqREdGrWVhmChWvSDMw2LxmnZbCw3mVdMr9B1XeK76GHa4kOhVOcEqzl0X61NILYDecgLP6HVJZhB+NpHJfOY=
slack-channel-id: AgDDxu0CGC/AFqOHMeHjV3KUGPoY0QAmGoqtiuxPPaP+GWOuz4xZbBP6Fymh0XbHFHMB9PtWowrMbvFOkfTncRiKqyK6HIU7GU2GtCla2WTZQNRAW82gcJnxtbtm4KymN3LyTj27qBHkQHNZ90qyBGdsJUYwmBW8XB6wNoz10KDyQSvYrYwQEe0onCgnislxslATPB6CQFWHMghKYoaHHAECpf6sN1kS9rvNq/1e7gG8s++lgF7qSZgjQP77Q6kMoiMS5krX03pPKZXsc69mI8GLIvhalsUfg2BO7swa3FgCbjecp32lW6KuRCfzeMmj2NWpWTLcJSPYPJN0sOkhRFOWUlrylztG82l7dGl5JofJWQVopF+qLTAR6LxHaujFHQ+Y2x4/5tBmurwOT6xXknQjXqYs2qbG1OriivJrjRwhRoZWE2vR5YlE+Zz8S9/vYw0JnKibnB1YvbdBBnpllyXYjTJa4818W05DvJ70qLeILMYcEkY4/jv5xqNdGuwp34gZLcW6+qztHfQVXRf0uXM1B7BPNH0aNMBNN7D0m1vWkTNgKC+V2PiEH9nTVhwSF+MlG/rmR2+v84kWhMP3qdX8/28GnBnvS9rryzuc3e2mHBIiIGHwW+SQjlmdq0jDTtuvFtU8I7ncB5PUe+sYZ0zFrn57blraBG5ntqtZfb+aS3modE+ElmCgBzi8gSQoVxXzmOIfMZRRwAaH0w==
ena-submission:
type: sealedsecret
clusterWide: "true"
encryptedData:
username: AgB/I94iUkgkAX3Tqs3znnIvVqeLuhcznJlMEa1iTMiwfitx1Dw4O8NBehTnnVD1XQ+jfj2x2fGeINpjz61GbCMEFz8rhQ9gGRr4eFiWLWSsThUX+cTrRxPdGDxCZkFhr959agKMd2ks91eZe+99RoSq/39WthVvpFaITNr6PLIVH1EBycCkC0i6mIS0PIU9rZwigfw1Q/qnOgPOi9usv8KU31zwBY/f68FzplrCMNQ8rrO9mmfQs1q8Ac3e6Vj6GVaWeFSc1vc/j+n0EbQJKlqZnX6VrESY45u1lHbqQpKyUiejQ+T36jJIA2573YMJFi7GXfxpvWuLxNmgCOilVqdrNcpINs0ajokn2AHPNdTzZiDOtrvt2uBUinsUGrtO5olBz6wfWwzx0jMtdH2wjm6dwYlMONwOh9REyISk40XAL0VgpTQlZRFCs+NdhLRyLW5gKtOatmuAg4yx6d69PHSBwjsWjHSzf7vrcr/MBzpQSzEGEsqRB0NMXaTGj79VGQWG7a2ToQuh0TIUgnvE6Ffb9uj0bcEXTrLZgtJsbwLuKfCJQy6JXLmpWYHngAAQRVuDhW1pr9xhSgN9rDUrk5LGvDvFOxvo+9BmUqYApEGyO3jM3cOMiDLqh3x34nqwiZ+1p8W603gPMyUIR22WevoCIiYmiwFXfnsBmvVdw880xVtTc8XNiQo5gzo0ycW1RCkoYs0u8LtWNCF6Dg==
password: AgA8KuXDI0SykCoyS1veoKaMXjA/Vv3LHGBFaRyks2KJvRJjMm8QapeMzxCxR0ln2poqnC0BZ97tqvj5ZmE3GsMjdw0vYLl15uQXcu8AdnmKhud+7c0ks6IHVo14jBPtR7/wga/l/3Ot54P7xiwDLdrQkRDsX2X7qw5q6QXM81poDcMlAE6QQArCusnm+64sPzM2FU0QGZ1awZeXvjB20dR0/x1HqiVrrxLAms+XN3fARrviXVQz6jlAGMs/L6z70BngcnNj3d4eYxh5ZIG4l+L4EyhuwW67jn9l/Zy0m0bhQZMImwmuGIsAyQGwnrGRz4UG/W0TauW3hFF7GF59qYGNrjv97iwk/TZ32dKID9wUZBaiVGSmqzXlFLmdNzEH6dQ2is/juT+75ej/nMqgMkdT9tn0yPol1jZCx/xkdfsHafKOkTxPk/WF9IlY/c+9BiajvgQwCTZILYxI5hJft96xd+RMiwQtwyccBwXI5YccVq2cDZLZ9fTkreot3YRDX4R9PgK+sBhHYb3tYmnxJ4FU72Yyr1ezcP30ArXfhwcn2UBQ4RfgUEFYJajSEgB0emuc3pr1F7jKMopwgX06zZOlrhAWOSSn4s2Z8JKG2o1DELJslBhT6/i+DAKxFWe2RiIf0IZqFOlPmU7ztfmDWBrK+RZE5fbUViPb3YQDgyITYVKFcuBeUl3vHi0DEaZ4Eal1R7lfQdY/8qDnXE3PZ2U=
disableIngest: true
disableEnaSubmission: true
reduceResourceRequest: true
previewDocs: true
previewDocs: false
robotsNoindexHeader: true
disableEnaSubmission: false
additionalHeadHTML: '<script defer data-domain="loculus.org" src="https://plausible.io/js/script.js"></script>'
wise-short-read-sequence-bucket:
type: sealedsecret
clusterWide: "true"
data:
secret-access-key: 'AgB9YyY/cXTfmi44zSfySs2mRjEeZfaye4lZlNL//mMuc7kaQZcEaBQ2N0C4UBQBN3zz61T+3YrR64PSMcRC97GaEGj/fMrxc8WUW9AMzLZEHIXZRmrM+BChPCA/MGoN/ekUpBuWZnTlh48fGxQg4GlCHFrnq3fpztoHiSrmED6Q7FuWOliuWnRqObmyh7xs+6AwGcs0NRhH5yQVAjwZlL9/m8LN4Cjr1mA7yedYuSYd5Ztdy5LMHOukWH9tD+NKdH8X/BfIP6axQCUA4wUiiGWI+mXfBC1dXDaueblu1zTejloLJ3CpS9BGuzS2uxC5ac9xVifA6hljyWD8oPRQ7Rzi7Uv3gdMTUFXZBCLMLK9YQntqhqnvkroWdg4kn7J4VOKbLiHz6JiLCbiELPhpbEuvZFGQl4psANSg1ODOuaibcdMiwfJc1vnSCEzQ27ura/ubZ2v6QcEz5c1jDasG26e+n5xSHtzn1aKHPaRdBTQJa5F5TPYbFiIZbWY9+1mabNxRSVCeAusmeXCyDDI7z4NqxrLBQW5NDGZx1vvDYrPiAwcjjbNX+y7P7apPsvi23n4MNWbd5WCZ6ETyG7pirNsGmkS8kNvjlUlXdkcCF5tAvGwvfPtndxeizul4sxg98eKXSOzeiMbgmpAry42OPSgF7HbHMvurWCltIkdrutb54TIstei1qjV5MBmEXVGSM+m4Zzj1u6YoiC/DV6grnuaCE8ZCf2nWHFtiy26oBo7+7NLsHE7uROfR'
Loading