From 1d0147b62bdc94ecc6cbd87eb1fa3d8d0aff886d Mon Sep 17 00:00:00 2001 From: Karan Jaisingh Date: Thu, 15 Aug 2024 11:35:20 -0400 Subject: [PATCH] Initial commit --- .github/workflows/testwdls.yaml | 5 +---- README.md | 17 +++------------ inputs/values/google_cloud.json | 3 --- scripts/cromwell/launch_wdl.sh | 11 ---------- scripts/inputs/build_default_inputs.sh | 21 ++++++------------ scripts/inputs/build_inputs.py | 5 +---- website/docs/advanced/build_inputs.md | 30 ++++++-------------------- website/docs/gs/quick_start.md | 3 +-- 8 files changed, 19 insertions(+), 76 deletions(-) delete mode 100644 inputs/values/google_cloud.json diff --git a/.github/workflows/testwdls.yaml b/.github/workflows/testwdls.yaml index 3f1fdf353..49d0d7f6d 100644 --- a/.github/workflows/testwdls.yaml +++ b/.github/workflows/testwdls.yaml @@ -54,10 +54,7 @@ jobs: # Setup for running womtool pip install jinja2==3.1.2 wget -O womtool.jar https://github.com/broadinstitute/cromwell/releases/download/84/womtool-84.jar - echo \ - '{ "google_project_id": "my-google-project-id", "terra_billing_project_id": "my-terra-billing-project" }' \ - > inputs/values/google_cloud.my_project.json - scripts/inputs/build_default_inputs.sh -d . -c google_cloud.my_project + scripts/inputs/build_default_inputs.sh -d . - name: Test with Miniwdl run: | diff --git a/README.md b/README.md index 4ccce1c17..d39c804af 100644 --- a/README.md +++ b/README.md @@ -125,14 +125,6 @@ Example workflow inputs can be found in `/inputs`. Build using `scripts/inputs/b generates input jsons in `/inputs/build`. Except the MELT docker image, all required resources are available in public Google buckets. -Some workflows require a Google Cloud Project ID to be defined in a cloud environment parameter group. Workspace builds -require a Terra billing project ID as well. An example is provided at `/inputs/values/google_cloud.json` but should -not be used, as modifying this file will cause tracked changes in the repository. Instead, create a copy in the same -directory with the format `google_cloud.my_project.json` and modify as necessary. - -Note that these inputs are required only when certain data are located in requester pays buckets. If this does not -apply, users may use placeholder values for the cloud configuration and simply delete the inputs manually. - #### MELT **Important**: The example input files contain MELT inputs that are NOT public (see [Requirements](#requirements)). These include: @@ -150,8 +142,7 @@ We recommend running the pipeline on a dedicated [Cromwell](https://github.com/b > cp $GATK_SV_ROOT/wdl/*.wdl . > zip dep.zip *.wdl > cd .. -> echo '{ "google_project_id": "my-google-project-id", "terra_billing_project_id": "my-terra-billing-project" }' > inputs/values/google_cloud.my_project.json -> bash scripts/inputs/build_default_inputs.sh -d $GATK_SV_ROOT -c google_cloud.my_project +> bash scripts/inputs/build_default_inputs.sh -d $GATK_SV_ROOT > cp $GATK_SV_ROOT/inputs/build/ref_panel_1kg/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json GATKSVPipelineBatch.my_run.json > cromshell submit wdl/GATKSVPipelineBatch.wdl GATKSVPipelineBatch.my_run.json cromwell_config.json wdl/dep.zip ``` @@ -231,14 +222,12 @@ Here is an example of how to generate workflow input jsons from `GATKSVPipelineB --final-workflow-outputs-dir gs://my-outputs-bucket \ metadata.json \ > inputs/values/my_ref_panel.json -> # Define your google project id (for Cromwell inputs) and Terra billing project (for workspace inputs) -> echo '{ "google_project_id": "my-google-project-id", "terra_billing_project_id": "my-terra-billing-project" }' > inputs/values/google_cloud.my_project.json -> # Build test files for batched workflows (google cloud project id required) +> # Build test files for batched workflows > python scripts/inputs/build_inputs.py \ inputs/values \ inputs/templates/test \ inputs/build/my_ref_panel/test \ - -a '{ "test_batch" : "ref_panel_1kg", "cloud_env": "google_cloud.my_project" }' + -a '{ "test_batch" : "ref_panel_1kg" }' > # Build test files for the single-sample workflow > python scripts/inputs/build_inputs.py \ inputs/values \ diff --git a/inputs/values/google_cloud.json b/inputs/values/google_cloud.json deleted file mode 100644 index 64603ae3e..000000000 --- a/inputs/values/google_cloud.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "terra_billing_project_id": "your-terra-billing-project-id-see-readme" -} diff --git a/scripts/cromwell/launch_wdl.sh b/scripts/cromwell/launch_wdl.sh index 0d67ab859..23aa8db4b 100755 --- a/scripts/cromwell/launch_wdl.sh +++ b/scripts/cromwell/launch_wdl.sh @@ -15,15 +15,6 @@ done WDL_FILENAME=$(basename "$WDL") WDL_NAME=${WDL_FILENAME%.*} -CLOUD_ENV="$GATK_SV_ROOT/inputs/values/google_cloud.my_project.json" -echo "CLOUD_ENV=$CLOUD_ENV" -cat << EOF > "$CLOUD_ENV" -{ - "google_project_id": "broad-dsde-methods", - "terra_billing_project_id": "broad-dsde-methods" -} -EOF - RUN_DIR="$GATK_SV_ROOT/runs/$WDL_NAME" DEPS_ZIP="$RUN_DIR/deps.zip" @@ -34,10 +25,8 @@ zip "$DEPS_ZIP" *.wdl &> /dev/null cd "$GATK_SV_ROOT" "$GATK_SV_ROOT/scripts/inputs/build_default_inputs.sh" \ -d "$GATK_SV_ROOT" \ - -c google_cloud.my_project \ > /dev/null -rm -f $CLOUD_ENV echo "Available input jsons:" printf "%d\t%s\n" 0 "none (skip cromwell submit)" diff --git a/scripts/inputs/build_default_inputs.sh b/scripts/inputs/build_default_inputs.sh index 6397de18a..b184f692a 100755 --- a/scripts/inputs/build_default_inputs.sh +++ b/scripts/inputs/build_default_inputs.sh @@ -2,9 +2,8 @@ function usage() { printf "Usage: \n \ - %s -d -c \n \ - \t path to gatk-sv base directory \n \ - \t name of cloud environment json (e.g. 'google_cloud.my' for inputs/values/google_cloud.my.json)" "$1" + %s -d \n \ + \t path to gatk-sv base directory" "$1" } if [[ "$#" == 0 ]]; then @@ -14,10 +13,9 @@ fi ################################################# # Parsing arguments ################################################# -while getopts "d:c:" option; do +while getopts "d:" option; do case "$option" in d) BASE_DIR="$OPTARG" ;; - c) CLOUD_ENV="$OPTARG" ;; *) usage "$0" && exit 1 ;; esac done @@ -28,12 +26,6 @@ if [ -z "$BASE_DIR" ] ; then exit 1 fi -if [ -z "$CLOUD_ENV" ] ; then - echo "xy" - usage "$0" - exit 1 -fi - if [[ ! -d "$BASE_DIR" ]]; then echo "Invalid directory: $BASE_DIR" exit 1 @@ -45,17 +37,16 @@ bash scripts/inputs/clean_default_inputs.sh -d ${BASE_DIR} echo "########## Building ref_panel_1kg test ##########" scripts/inputs/build_inputs.py ${BASE_DIR}/inputs/values ${BASE_DIR}/inputs/templates/test ${BASE_DIR}/inputs/build/ref_panel_1kg/test \ - -a '{ "test_batch" : "ref_panel_1kg", "cloud_env" : "'$CLOUD_ENV'" }' + -a '{ "test_batch" : "ref_panel_1kg" }' echo "########## Building ref_panel_1kg cohort Terra workspace ##########" scripts/inputs/build_inputs.py ${BASE_DIR}/inputs/values ${BASE_DIR}/inputs/templates/terra_workspaces/cohort_mode ${BASE_DIR}/inputs/build/ref_panel_1kg/terra \ - -a '{ "test_batch" : "ref_panel_1kg", "cloud_env" : "'$CLOUD_ENV'" }' + -a '{ "test_batch" : "ref_panel_1kg" }' echo "########## Building hgdp test ##########" scripts/inputs/build_inputs.py ${BASE_DIR}/inputs/values ${BASE_DIR}/inputs/templates/test ${BASE_DIR}/inputs/build/hgdp/test \ - -a '{ "test_batch" : "hgdp", "cloud_env" : "'$CLOUD_ENV'" }' + -a '{ "test_batch" : "hgdp" }' -# Note CLOUD_ENV is not currently required for the single-sample workflow echo "########## Building NA19240 single-sample test ##########" scripts/inputs/build_inputs.py ${BASE_DIR}/inputs/values ${BASE_DIR}/inputs/templates/test/GATKSVPipelineSingleSample ${BASE_DIR}/inputs/build/NA19240/test \ -a '{ "single_sample" : "test_single_sample_NA19240", "ref_panel" : "ref_panel_1kg" }' diff --git a/scripts/inputs/build_inputs.py b/scripts/inputs/build_inputs.py index e097f4d46..57ea26505 100755 --- a/scripts/inputs/build_inputs.py +++ b/scripts/inputs/build_inputs.py @@ -117,15 +117,12 @@ def main(): raw_input_bundles['test_batch_empty']['name'] = 'test_batch' raw_input_bundles['single_sample_none'] = {} raw_input_bundles['single_sample_none']['name'] = 'single_sample' - raw_input_bundles['cloud_env_none'] = {} - raw_input_bundles['cloud_env_none']['name'] = 'cloud_env' default_aliases = {'dockers': 'dockers', 'ref_panel': 'ref_panel_empty', 'reference_resources': 'resources_hg38', 'test_batch': 'test_batch_empty', - 'single_sample': 'single_sample_none', - 'cloud_env': 'cloud_env_none'} + 'single_sample': 'single_sample_none'} # prepare the input_dict using default, document default, and user-specified aliases input_dict = {} diff --git a/website/docs/advanced/build_inputs.md b/website/docs/advanced/build_inputs.md index b986919c0..2beb1993a 100644 --- a/website/docs/advanced/build_inputs.md +++ b/website/docs/advanced/build_inputs.md @@ -21,20 +21,10 @@ You may run the following commands to get these example inputs. git clone https://github.com/broadinstitute/gatk-sv && cd gatk-sv ``` -2. Create a JSON file containing the Terra billing project (for use on Terra) - or the Google project ID (for use on Cromwell) that you will use to run - the workflows with the test input. You may create this file by running - the following command and replacing `"my-google-project-id"` and - `"my-terra-billing-project"` with your project and billing IDs. +2. Create test inputs. ```shell - echo '{ "google_project_id": "my-google-project-id", "terra_billing_project_id": "my-terra-billing-project" }' > inputs/values/google_cloud.my_project.json - ``` - -3. Create test inputs. - - ```shell - bash scripts/inputs/build_default_inputs.sh -d . -c google_cloud.my_project + bash scripts/inputs/build_default_inputs.sh -d . ``` Running this command generates test inputs in `gatk-sv/inputs/build` with the following structure. @@ -62,7 +52,7 @@ python scripts/inputs/build_inputs.py \ inputs/values \ inputs/templates/test/GATKSVPipelineSingleSample \ inputs/build/NA19240/test \ - -a '{ "test_batch" : "ref_panel_1kg", "cloud_env": "google_cloud.my_project" }' + -a '{ "test_batch" : "ref_panel_1kg" }' ``` @@ -98,24 +88,18 @@ Here is an example of how to generate workflow input jsons from `GATKSVPipelineB metadata.json \ > inputs/values/my_ref_panel.json ``` - -3. Define your google project id (for Cromwell inputs) and Terra billing project (for workspace inputs). - - ```shell - echo '{ "google_project_id": "my-google-project-id", "terra_billing_project_id": "my-terra-billing-project" }' > inputs/values/google_cloud.my_project.json - ``` -4. Build test files for batched workflows (google cloud project id required). +3. Build test files for batched workflows (google cloud project id required). ```shell python scripts/inputs/build_inputs.py \ inputs/values \ inputs/templates/test \ inputs/build/my_ref_panel/test \ - -a '{ "test_batch" : "ref_panel_1kg", "cloud_env": "google_cloud.my_project" }' + -a '{ "test_batch" : "ref_panel_1kg" }' ``` -5. Build test files for the single-sample workflow +4. Build test files for the single-sample workflow ```shell python scripts/inputs/build_inputs.py \ @@ -125,7 +109,7 @@ Here is an example of how to generate workflow input jsons from `GATKSVPipelineB -a '{ "single_sample" : "test_single_sample_NA19240", "ref_panel" : "my_ref_panel" }' ``` -6. Build files for a Terra workspace. +5. Build files for a Terra workspace. ```shell python scripts/inputs/build_inputs.py \ diff --git a/website/docs/gs/quick_start.md b/website/docs/gs/quick_start.md index 282b8e72e..b225f7837 100644 --- a/website/docs/gs/quick_start.md +++ b/website/docs/gs/quick_start.md @@ -44,8 +44,7 @@ The input values are provided only as an example and are not publicly accessible > cp $GATK_SV_ROOT/wdl/*.wdl . > zip dep.zip *.wdl > cd .. -> echo '{ "google_project_id": "my-google-project-id", "terra_billing_project_id": "my-terra-billing-project" }' > inputs/values/google_cloud.my_project.json -> bash scripts/inputs/build_default_inputs.sh -d $GATK_SV_ROOT -c google_cloud.my_project +> bash scripts/inputs/build_default_inputs.sh -d $GATK_SV_ROOT > cp $GATK_SV_ROOT/inputs/build/ref_panel_1kg/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json GATKSVPipelineBatch.my_run.json > cromshell submit wdl/GATKSVPipelineBatch.wdl GATKSVPipelineBatch.my_run.json cromwell_config.json wdl/dep.zip ```