Skip to content

remove max chunk size constant #2075

remove max chunk size constant

remove max chunk size constant #2075

name: short-read-mngs viral benchmarks
on:
push
env:
LC_ALL: C.UTF-8
LANG: C.UTF-8
DEBIAN_FRONTEND: noninteractive
jobs:
cancel-previous:
runs-on: ubuntu-22.04
steps:
- uses: styfle/[email protected]
with:
access_token: ${{ github.token }}
has-diff:
runs-on: ubuntu-22.04
outputs:
has_diff: ${{steps.check_for_diff.outputs.has_diff}}
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 2
- id: check_for_diff
# only run on wdl dirs with diff
run: echo "::set-output name=has_diff::$(if `git diff --name-status HEAD^|cut -f2-|xargs dirname|sort|uniq|grep -q short-read-mngs`; then echo true; else echo false; fi)"
run-samples:
runs-on: ubuntu-22.04
needs: has-diff
if: ${{needs['has-diff'].outputs.has_diff == 'true'}}
strategy:
matrix:
sample: [idseq_bench_3, idseq_bench_5]
settings: [default]
steps:
- uses: actions/checkout@v2
- name: docker build
run: |
IMAGE_NAME=czid-short-read-mngs-public
IMAGE_URI="ghcr.io/${GITHUB_REPOSITORY}/${IMAGE_NAME}"
CACHE_FROM=""; docker pull "$IMAGE_URI" && CACHE_FROM="--cache-from $IMAGE_URI"
./scripts/docker-build.sh workflows/short-read-mngs --tag czid-short-read-mngs $CACHE_FROM \
|| scripts/docker-build.sh workflows/short-read-mngs --tag czid-short-read-mngs
- name: run sample
run: |
pip install -r requirements-dev.txt
export PATH=$PATH:$HOME/.local/bin
export MINIWDL__DOWNLOAD_CACHE__GET=true
export MINIWDL__DOWNLOAD_CACHE__PUT=true
export MINIWDL__DOWNLOAD_CACHE__DIR="$(mktemp -d --tmpdir miniwdl_download_cache_XXXXXX)"
# configure miniwdl to auto-delete task working directories, to reduce chance of worker
# running out of space
export MINIWDL__FILE_IO__OUTPUT_HARDLINKS=true
export MINIWDL__FILE_IO__DELETE_WORK=success
# explicitly block EC2 IMDS endpoint to work around awscli issue:
# https://github.com/aws/aws-cli/issues/5234#issuecomment-635459464
# https://github.com/aws/aws-cli/issues/5262
sudo ip route add blackhole 169.254.169.254
workflows/short-read-mngs/auto_benchmark/run_local.py --dir testrun/ --docker-image-id czid-short-read-mngs \
--settings ${{ matrix.settings }} ${{ matrix.sample }}
- name: harvest output statistics
run: |
export PATH=$PATH:$HOME/.local/bin
taxadb download -o taxadb --type taxa
taxadb create -i taxadb --dbname taxadb.sqlite || true # exits nonzero w/o accessions
workflows/short-read-mngs/auto_benchmark/harvest.py ${{ matrix.sample }}=testrun/${{ matrix.sample }} \
--taxadb taxadb.sqlite > ${{ matrix.sample }}.${{ matrix.settings }}_viral.json
- uses: actions/upload-artifact@v2
with:
name: ${{ matrix.sample }}.${{ matrix.settings }}_viral.json
path: ${{ matrix.sample }}.${{ matrix.settings }}_viral.json
notebook:
runs-on: ubuntu-22.04
needs: run-samples
strategy:
matrix:
settings: [default]
steps:
- uses: actions/checkout@v2
- name: fetch idseq_bench_3
uses: actions/download-artifact@v2
with:
name: idseq_bench_3.${{ matrix.settings }}_viral.json
- name: fetch idseq_bench_5
uses: actions/download-artifact@v2
with:
name: idseq_bench_5.${{ matrix.settings }}_viral.json
- name: run notebook
run: |
TAG=$(git describe --long --tags --always --dirty)
cp *_viral.json workflows/short-read-mngs/auto_benchmark # notebook wd
chmod -R 777 workflows/short-read-mngs/auto_benchmark
# execute notebook
docker run -v $(pwd)/workflows:/home/jovyan/work --workdir /home/jovyan/work \
--env "RUN_NAME=${{ matrix.settings }}_viral_$TAG" \
--env HARVEST_DATA=$(ls -1 *_viral.json | tr '\n' ':') \
--env REF_LIB=/home/jovyan/work/short-read-mngs/auto_benchmark/ref_libs/${{ matrix.settings }}_viral \
--env CHOWN_HOME=yes \
jupyter/scipy-notebook:2022-03-14 jupyter nbconvert --to notebook --execute --inplace \
short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.ipynb
cp workflows/short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.ipynb short-read-mngs-benchmarks.${{ matrix.settings }}_viral.ipynb
# make HTML with code
docker run -v $(pwd)/workflows:/home/jovyan/work --workdir /home/jovyan/work \
jupyter/scipy-notebook:2022-03-14 jupyter nbconvert --to html \
short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.ipynb
mv workflows/short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.html short-read-mngs-benchmarks.${{ matrix.settings }}_viral.with_code.html
# make HTML without code
docker run -v $(pwd)/workflows:/home/jovyan/work --workdir /home/jovyan/work \
jupyter/scipy-notebook:2022-03-14 jupyter nbconvert --to html --no-input \
short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.ipynb
mv workflows/short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.html short-read-mngs-benchmarks.${{ matrix.settings }}_viral.html
mkdir ref_libs
cp -r workflows/short-read-mngs/auto_benchmark/ref_libs/${{ matrix.settings }}_viral ref_libs/
- name: test harvest2.py
run: workflows/short-read-mngs/auto_benchmark/harvest2.py *_viral.json --version "$(git describe --long --tags --always --dirty)" --index-version 2021-01-22 --wall 999
- uses: actions/upload-artifact@v2
with:
name: short-read-mngs-benchmarks.${{ matrix.settings }}_viral.html
path: short-read-mngs-benchmarks.${{ matrix.settings }}_viral.html
- uses: actions/upload-artifact@v2
with:
name: short-read-mngs-benchmarks.${{ matrix.settings }}_viral.ipynb
path: |
short-read-mngs-benchmarks.${{ matrix.settings }}_viral.*
*.${{ matrix.settings }}_viral.json
ref_libs/
- name: annotate deviations
run: |
if [[ -f workflows/short-read-mngs/auto_benchmark/.short-read-mngs-benchmarks-deviation ]]; then
echo "::warning ::auto_benchmark detected >1% deviation from reference values; review the notebook and update reference library if needed."
else
echo "auto_benchmark OK (all metrics within 1% of reference values)"
fi