Skip to content

Commit

Permalink
PIP-1639-docker-to-input (#74)
Browse files Browse the repository at this point in the history
  • Loading branch information
ottojolanki authored Nov 15, 2021
1 parent e3f42fd commit 087baba
Show file tree
Hide file tree
Showing 33 changed files with 304 additions and 36 deletions.
86 changes: 63 additions & 23 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python_defaults: &python_defaults

machine_defaults: &machine_defaults
machine:
image: ubuntu-1604:202007-01
image: ubuntu-2004:202107-02
working_directory: ~/long-read-rna-pipeline

commands:
Expand All @@ -27,15 +27,16 @@ commands:
- run:
command: |
echo "export TAG=encodedcc/${CIRCLE_PROJECT_REPONAME}:${CIRCLE_BRANCH}_${CIRCLE_WORKFLOW_ID}" >> ${BASH_ENV}
echo "export CROMWELL=./cromwell-49.jar" >> ${BASH_ENV}
echo "export WOMTOOL=./womtool-49.jar" >> ${BASH_ENV}
echo "export CROMWELL=./cromwell-70.jar" >> ${BASH_ENV}
echo "export WOMTOOL=./womtool-70.jar" >> ${BASH_ENV}
source ${BASH_ENV}
install_python_requirements:
description: "Install pytest workflow requirements"
steps:
- run: pyenv install 3.7.8
- run: pyenv global 3.7.8
- run: pip install --upgrade pip
- run: pip install --upgrade pip==21.3.1
- run: pip install -r requirements.txt

download_file:
Expand All @@ -52,9 +53,9 @@ commands:
- set_env_variables
- install_python_requirements
- download_file:
file_url: "https://github.com/broadinstitute/cromwell/releases/download/49/cromwell-49.jar"
file_url: "https://github.com/broadinstitute/cromwell/releases/download/70/cromwell-70.jar"
- download_file:
file_url: "https://github.com/broadinstitute/cromwell/releases/download/49/womtool-49.jar"
file_url: "https://github.com/broadinstitute/cromwell/releases/download/70/womtool-70.jar"

run_test_tag:
description: "Run tagged test"
Expand All @@ -68,6 +69,39 @@ commands:
command: pytest -v --tag <<parameters.tag>> --wt <<parameters.wt>> --symlink --kwd
no_output_timeout: 60m

add_docker_image_tag_to_inputs:
description: "Add docker tag"
parameters:
path_to_inputs:
type: string
steps:
- run:
command: |
for json in <<parameters.path_to_inputs>>
do
prefix=$(basename $json .json)
cat $json | jq ".+{\"${prefix}.docker\": \"${TAG}\"}" > tmp.json
cp tmp.json $json
rm tmp.json
done
add_runtime_environment_to_input:
description: "Add runtime environment"
parameters:
path_to_input:
type: string
prefix:
type: string
steps:
- run:
command: |
json=<<parameters.path_to_input>>
prefix=<<parameters.prefix>>
cat $json | jq ".+{\"${prefix}.runtime_environment\": {\"docker\": \"${TAG}\", \"singularity\": \"\"}}" > tmp.json
cp tmp.json $json
rm tmp.json
# Jobs
jobs:
build:
Expand Down Expand Up @@ -111,6 +145,8 @@ jobs:
steps:
- checkout
- prepare_pytest_environment
- add_docker_image_tag_to_inputs:
path_to_inputs: "test/unit/json/*.json"
- run_test_tag:
tag: "unit"
wt: "4"
Expand All @@ -120,6 +156,15 @@ jobs:
steps:
- checkout
- prepare_pytest_environment
- add_runtime_environment_to_input:
path_to_input: "test/integration/json/test_concatenate_files.json"
prefix: "test_concatenate_files.concatenate_files"
- add_runtime_environment_to_input:
path_to_input: "test/integration/json/test_crop_reference_fasta_headers.json"
prefix: "test_crop_reference_fasta_headers.crop_reference_fasta_headers"
- add_runtime_environment_to_input:
path_to_input: "test/integration/json/test_make_gtf_from_spikein_fasta.json"
prefix: "test_make_gtf_from_spikein_fasta.make_gtf_from_spikein_fasta"
- run_test_tag:
tag: "integration"
wt: "4"
Expand All @@ -129,6 +174,8 @@ jobs:
steps:
- checkout
- prepare_pytest_environment
- run: test/add_docker_image_tag_to_input.sh test/functional/json/test_pipeline_with_spikeins.json long_read_rna_pipeline
- run: test/add_docker_image_tag_to_input.sh test/functional/json/test_pipeline_with_two_spikeins.json long_read_rna_pipeline
- run_test_tag:
tag: "functional"
wt: "4"
Expand All @@ -138,6 +185,7 @@ jobs:
steps:
- checkout
- prepare_pytest_environment
- run: test/add_docker_image_tag_to_input.sh test/test_workflow/json/test_workflow_input.json long_read_rna_pipeline
- run_test_tag:
tag: "workflow_one_replicate"
wt: "4"
Expand All @@ -147,6 +195,7 @@ jobs:
steps:
- checkout
- prepare_pytest_environment
- run: test/add_docker_image_tag_to_input.sh test/test_workflow/json/test_workflow_2reps_input.json long_read_rna_pipeline
- run_test_tag:
tag: "workflow_two_replicates"
wt: "4"
Expand All @@ -158,6 +207,7 @@ jobs:
- prepare_pytest_environment
- run:
command: |
test/add_docker_image_tag_to_input.sh test/test_task/test_minimap2_input.json test_minimap2
test/caper_run.sh test/test_task/test_minimap2.wdl test/test_task/test_minimap2_input.json
python3 src/compare_md5.py --keys_to_inspect test_minimap2.skipNfirstlines.output_file test_minimap2.minimap2.mapping_qc \
--metadata_json metadata.json \
Expand All @@ -174,6 +224,7 @@ jobs:
- prepare_pytest_environment
- run:
command: |
test/add_docker_image_tag_to_input.sh test/test_task/test_transcriptclean_input.json test_transcriptclean
test/caper_run.sh test/test_task/test_transcriptclean.wdl test/test_task/test_transcriptclean_input.json
python3 src/compare_md5.py \
--keys_to_inspect test_transcriptclean.transcriptclean.transcript_log \
Expand All @@ -187,30 +238,14 @@ jobs:
python3 -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data['match_overall']))" < test_transcriptclean_input.result.json
no_output_timeout: 30m

test_filter_transcriptclean:
<<: *machine_defaults
steps:
- checkout
- prepare_pytest_environment
- run:
command: |
test/caper_run.sh test/test_task/test_filter_transcriptclean.wdl test/test_task/test_filter_transcriptclean_input.json
python3 src/compare_md5.py \
--keys_to_inspect test_filter_transcriptclean.skipNfirstlines.output_file \
--metadata_json metadata.json \
--reference_json test/test_task/test_filter_transcriptclean_reference_md5.json \
--outfile test_filter_transcriptclean_input.result.json
cat test_filter_transcriptclean_input.result.json
python3 -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data['match_overall']))" < test_filter_transcriptclean_input.result.json
no_output_timeout: 30m

test_init_talon_database:
<<: *machine_defaults
steps:
- checkout
- prepare_pytest_environment
- run:
command: |
test/add_docker_image_tag_to_input.sh test/test_task/test_init_talon_db_input.json test_init_talon_db
test/caper_run.sh test/test_task/test_init_talon_db.wdl test/test_task/test_init_talon_db_input.json
python3 src/compare_md5.py \
--keys_to_inspect test_init_talon_db.init_talon_db.database test_init_talon_db.init_talon_db.talon_inputs \
Expand All @@ -228,6 +263,7 @@ jobs:
- prepare_pytest_environment
- run:
command: |
test/add_docker_image_tag_to_input.sh test/test_task/test_init_talon_db_idxprefix_input.json test_init_talon_db
test/caper_run.sh test/test_task/test_init_talon_db.wdl test/test_task/test_init_talon_db_idxprefix_input.json
python3 src/compare_md5.py \
--keys_to_inspect test_init_talon_db.init_talon_db.database test_init_talon_db.init_talon_db.talon_inputs \
Expand All @@ -245,6 +281,7 @@ jobs:
- prepare_pytest_environment
- run:
command: |
test/add_docker_image_tag_to_input.sh test/test_task/test_talon_input.json test_talon
test/caper_run.sh test/test_task/test_talon.wdl test/test_task/test_talon_input.json
python3 src/compare_md5.py \
--keys_to_inspect test_talon.talon.talon_log \
Expand All @@ -262,6 +299,7 @@ jobs:
- prepare_pytest_environment
- run:
command: |
test/add_docker_image_tag_to_input.sh test/test_task/test_create_abundance_from_talon_db_input.json test_create_abundance_from_talon_db
test/caper_run.sh test/test_task/test_create_abundance_from_talon_db.wdl test/test_task/test_create_abundance_from_talon_db_input.json
python3 src/compare_md5.py \
--keys_to_inspect test_create_abundance_from_talon_db.create_abundance_from_talon_db.talon_abundance \
Expand All @@ -280,6 +318,7 @@ jobs:
- prepare_pytest_environment
- run:
command: |
test/add_docker_image_tag_to_input.sh test/test_task/test_create_gtf_from_talon_db_input.json test_create_gtf_from_talon_db
test/caper_run.sh test/test_task/test_create_gtf_from_talon_db.wdl test/test_task/test_create_gtf_from_talon_db_input.json
python3 src/compare_md5.py \
--keys_to_inspect test_create_gtf_from_talon_db.create_gtf_from_talon_db.gtf \
Expand All @@ -297,6 +336,7 @@ jobs:
- prepare_pytest_environment
- run:
command: |
test/add_docker_image_tag_to_input.sh test/test_task/test_calculate_spearman_input.json test_calculate_spearman
test/caper_run.sh test/test_task/test_calculate_spearman.wdl test/test_task/test_calculate_spearman_input.json
python3 src/compare_md5.py \
--keys_to_inspect test_calculate_spearman.calculate_spearman.spearman \
Expand Down
5 changes: 2 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,13 @@ RUN echo "r <- getOption('repos'); r['CRAN'] <- 'https://cloud.r-project.org'; o
Rscript -e "install.packages('reshape2')"

# Install TC dependencies
RUN python3.7 -m pip install --upgrade pip
RUN python3.7 -m pip install --upgrade pip==21.3.1
RUN python3.7 -m pip install cython
RUN python3.7 -m pip install pybedtools==0.8.0 pyfasta==0.5.2 numpy pandas

# splice junction finding accessory script from TC still runs in python2 and requires pyfasta, which in turn requires numpy

RUN python -m pip install --upgrade pip
RUN python -m pip install pyfasta==0.5.2 numpy
RUN python -m pip install pyfasta==0.5.2 numpy==1.16.6

# Install qc-utils to python 3.7

Expand Down
1 change: 1 addition & 0 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ The following elaborates on the meaning of each line in the input file.
* `long_read_rna_pipeline.annotation_name` Annotation name in the initial TALON database. This is internal metadata variable you typically do not need to touch.
* `long_read_rna_pipeline.talon_prefixes` This is a list of strings that, if provided, will be prefixes to the transcript names in the gtf generated by `create_gtf_from_talon_db`. If this is not defined, "TALON" will be the default prefix. Note, that if this list is defined, its length must be equal to the number of replicates.
* `long_read_rna_pipeline.canonical_only` If this option is set to true, TranscriptClean will only output transcripts that are either canonical or that contain annotated noncanonical junctions to the clean SAM and Fasta files at the end of the run. Set this parameter to false to output all transcripts.
* `long_read_rna_pipeline.docker` and `long_read_rna_pipeline.singularity` These should not need to be touched, unless you are hosting your own images, in which case you probably know what to do.
In addition, if you have used spikeins in your experiment, you can define them as an array of gzipped fasta files in workflow level variable `long_read_rna_pipeline.spikeins`.
The rest of the variables are for adjusting the computational resources of the pipeline tasks. See [notes about resources](reference.md#note-about-resources) below for more details.

Expand Down
Loading

0 comments on commit 087baba

Please sign in to comment.