From 1e70d5041eed4084f46966a2fbe36c9ba56ce30a Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Fri, 6 Dec 2024 17:38:14 +0100 Subject: [PATCH] perf(silo-prepro): speed up by using more efficient jq command (#3396) It truned out that `jq -c '.'` was rate limiting and causing significant extra processing time. New command has been tested for correctness and performance, still validates. see https://loculus.slack.com/archives/C05G172HL6L/p1733500150097089?thread_ts=1733478069.696049&cid=C05G172HL6L --- kubernetes/loculus/silo_import_job.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/loculus/silo_import_job.sh b/kubernetes/loculus/silo_import_job.sh index c4c30396f7..9cf3b43613 100755 --- a/kubernetes/loculus/silo_import_job.sh +++ b/kubernetes/loculus/silo_import_job.sh @@ -109,7 +109,7 @@ download_data() { echo "Response should contain a total of : $expected_record_count records" # jq validates each individual json object, to catch truncated lines - true_record_count=$(zstd -d -c "$new_input_data_path" | jq -c . | wc -l | tr -d '[:space:]') + true_record_count=$(zstd -d -c "$new_input_data_path" | jq -n 'reduce inputs as $item (0; . + 1)' | tr -d '[:space:]') echo "Response contained a total of : $true_record_count records" if [ "$true_record_count" -ne "$expected_record_count" ]; then