Skip to content

Commit

Permalink
Merge pull request #148 from ENCODE-DCC/dev
Browse files Browse the repository at this point in the history
v2.0.0
  • Loading branch information
leepc12 authored Oct 26, 2021
2 parents e9ea0ba + a1a05a6 commit 83df376
Show file tree
Hide file tree
Showing 20 changed files with 1,554 additions and 1,052 deletions.
17 changes: 13 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ update_apt: &update_apt
sudo apt-get update
install_python3: &install_python3_java
name: Install python3, pip3, java
install_python3: &install_python3
name: Install python3, pip3
command: |
sudo apt-get install software-properties-common git wget curl default-jre -y
sudo apt-get install software-properties-common git wget curl -y
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt-get update && sudo apt-get install python3.6 -y
sudo wget --no-check-certificate https://bootstrap.pypa.io/get-pip.py
Expand Down Expand Up @@ -61,17 +61,26 @@ install_aws_lib: &install_aws_lib
sudo pip3 install boto3 awscli
install_java: &install_java
name: Install openjdk-11
command: |
sudo add-apt-repository ppa:openjdk-r/ppa -y
sudo apt-get update && sudo apt-get install openjdk-11-jdk -y
# automatically set 11 as default java
sudo update-java-alternatives -a
jobs:
pytest:
<<: *machine_defaults
steps:
- checkout
- run: *update_apt
- run: *install_python3_java
- run: *install_python3
- run: *install_singularity
- run: *install_py3_packages
- run: *install_gcs_lib
- run: *install_aws_lib
- run: *install_java
- run:
no_output_timeout: 60m
command: |
Expand Down
384 changes: 380 additions & 4 deletions DETAILS.md

Large diffs are not rendered by default.

564 changes: 59 additions & 505 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion caper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .caper_runner import CaperRunner

__all__ = ['CaperClient', 'CaperClientSubmit', 'CaperRunner']
__version__ = '1.6.3'
__version__ = '2.0.0'
63 changes: 37 additions & 26 deletions caper/caper_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
LOCAL_HASH_STRAT_FILE,
LOCAL_HASH_STRAT_PATH,
LOCAL_HASH_STRAT_PATH_MTIME,
CromwellBackendAWS,
CromwellBackendAws,
CromwellBackendBase,
CromwellBackendCommon,
CromwellBackendDatabase,
CromwellBackendGCP,
CromwellBackendGcp,
CromwellBackendLocal,
CromwellBackendSlurm,
)
from .cromwell_rest_api import CromwellRestAPI
from .resource_analysis import ResourceAnalysis
from .server_heartbeat import ServerHeartbeat
from .singularity import Singularity

DEFAULT_CAPER_CONF = '~/.caper/default.conf'
DEFAULT_LIST_FORMAT = 'id,status,name,str_label,user,parent,submission'
Expand Down Expand Up @@ -115,7 +115,7 @@ def get_parser_and_defaults(conf_file=None):
'localized input JSON files due to deepcopying (recursive localization). '
'Cromwell\'s MySQL/PostgreSQL DB password can be exposed on backend.conf '
'on this directory. Therefore, DO NOT USE /tmp HERE. This directory is '
'also used for storing cached files for local/slurm/sge/pbs backends.',
'also used for storing cached files for local/slurm/sge/pbs/lsf backends.',
)
group_loc.add_argument(
'--gcp-loc-dir',
Expand Down Expand Up @@ -278,7 +278,7 @@ def get_parser_and_defaults(conf_file=None):
'does not allow hard-linking. e.g. beeGFS. '
'This flag does not work with backends based on a Docker container. '
'i.e. gcp and aws. Also, '
'it does not work with local backends (local/slurm/sge/pbs) '
'it does not work with local backends (local/slurm/sge/pbs/lsf) '
'with --. However, it works fine with --singularity.',
)
group_cromwell.add_argument(
Expand All @@ -290,7 +290,7 @@ def get_parser_and_defaults(conf_file=None):
LOCAL_HASH_STRAT_PATH_MTIME,
],
help='File hashing strategy for call caching. '
'For local backends (local/slurm/sge/pbs) only. '
'For local backends (local/slurm/sge/pbs/lsf) only. '
'file: use md5sum hash (slow), path: use path only, '
'path+modtime (default): use path + mtime.',
)
Expand All @@ -305,6 +305,14 @@ def get_parser_and_defaults(conf_file=None):
'For gcp, define --gcp-out-dir. '
'For aws, define --aws-out-dir.',
)
group_local.add_argument(
'--slurm-resource-param',
help='SLURM resource parameters to be passed to sbatch. '
'You can customize this to fit your cluster\'s configuration. '
'You can use WDL syntax in ${} notation with Cromwell\'s built-in resource '
'variables. See documentation for details. ',
default=CromwellBackendSlurm.DEFAULT_SLURM_RESOURCE_PARAM,
)

group_gc_all = parent_backend.add_argument_group(
title='GCP backend arguments for server/runner/client'
Expand All @@ -325,7 +333,7 @@ def get_parser_and_defaults(conf_file=None):
)
group_gc.add_argument(
'--gcp-region',
default=CromwellBackendGCP.DEFAULT_REGION,
default=CromwellBackendGcp.DEFAULT_REGION,
help='GCP region for Google Cloud Life Sciences API. '
'This is used only when --use-google-cloud-life-sciences is defined.',
)
Expand All @@ -338,7 +346,7 @@ def get_parser_and_defaults(conf_file=None):
)
group_gc.add_argument(
'--gcp-call-caching-dup-strat',
default=CromwellBackendGCP.DEFAULT_CALL_CACHING_DUP_STRAT,
default=CromwellBackendGcp.DEFAULT_CALL_CACHING_DUP_STRAT,
choices=[CALL_CACHING_DUP_STRAT_REFERENCE, CALL_CACHING_DUP_STRAT_COPY],
help='Duplication strategy for call-cached outputs for GCP backend: '
'copy: make a copy, reference: refer to old output in metadata.json.',
Expand All @@ -360,7 +368,7 @@ def get_parser_and_defaults(conf_file=None):
)
group_aws.add_argument(
'--aws-call-caching-dup-strat',
default=CromwellBackendAWS.DEFAULT_CALL_CACHING_DUP_STRAT,
default=CromwellBackendAws.DEFAULT_CALL_CACHING_DUP_STRAT,
choices=[CALL_CACHING_DUP_STRAT_REFERENCE, CALL_CACHING_DUP_STRAT_COPY],
help='Duplication strategy for call-cached outputs for AWS backend: '
'copy: make a copy, reference: refer to old output in metadata.json.',
Expand Down Expand Up @@ -397,14 +405,6 @@ def get_parser_and_defaults(conf_file=None):
action='store_true',
help='Put a hold on a workflow when submitted to a Cromwell server.',
)
parent_submit.add_argument(
'--singularity-cachedir',
default=Singularity.DEFAULT_SINGULARITY_CACHEDIR,
help='Singularity cache directory. Equivalent to exporting an '
'environment variable SINGULARITY_CACHEDIR. '
'Define it to prevent repeatedly building a singularity image '
'for every pipeline task',
)
parent_submit.add_argument(
'--use-gsutil-for-s3',
action='store_true',
Expand Down Expand Up @@ -477,7 +477,7 @@ def get_parser_and_defaults(conf_file=None):
description='Cloud-based backends (gc and aws) will only use Docker '
'so that "--docker URI_FOR_DOCKER_IMG" must be specified '
'in the command line argument or as a comment "#CAPER '
'docker URI_FOR_DOCKER_IMG" or value for "workflow.meta.caper_docker"'
'docker URI_FOR_DOCKER_IMG" or value for "workflow.meta.default_docker"'
'in a WDL file',
)
group_dep.add_argument(
Expand All @@ -486,9 +486,9 @@ def get_parser_and_defaults(conf_file=None):
const='',
default=None,
help='URI for Docker image (e.g. ubuntu:latest). '
'This can also be used as a flag to use Docker image address '
'This can also be used as a flag to use Docker image URI '
'defined in your WDL file as a comment ("#CAPER docker") or '
'as "workflow.meta.caper_docker" in WDL.',
'as "workflow.meta.default_docker" in WDL.',
)
group_dep_local = parent_submit.add_argument_group(
title='dependency resolver for local backend',
Expand All @@ -505,21 +505,26 @@ def get_parser_and_defaults(conf_file=None):
help='URI or path for Singularity image '
'(e.g. ~/.singularity/ubuntu-latest.simg, '
'docker://ubuntu:latest, shub://vsoch/hello-world). '
'This can also be used as a flag to use Docker image address '
'This can also be used as a flag to use Singularity image URI '
'defined in your WDL file as a comment ("#CAPER singularity") or '
'as "workflow.meta.caper_singularity" in WDL.',
'as "workflow.meta.default_singularity" in WDL.',
)
group_dep_local.add_argument(
'--no-build-singularity',
action='store_true',
help='Do not build singularity image before running a workflow. ',
'--conda',
nargs='?',
const='',
default=None,
help='Default Conda environment\'s name. '
'If defined each task in WDL will be called with conda run -n ENV_NAME.'
'This can also be used as a flag to use Conda environment '
'defined in your WDL file under "workflow.meta.default_conda".',
)

group_slurm = parent_submit.add_argument_group('SLURM arguments')
group_slurm.add_argument('--slurm-partition', help='SLURM partition')
group_slurm.add_argument('--slurm-account', help='SLURM account')
group_slurm.add_argument(
'--slurm-extra-param', help='SLURM extra parameters. Must be double-quoted'
'--slurm-extra-param', help='SLURM extra parameters to be passed to sbatch. '
)

group_sge = parent_submit.add_argument_group('SGE arguments')
Expand All @@ -537,6 +542,12 @@ def get_parser_and_defaults(conf_file=None):
'--pbs-extra-param', help='PBS extra parameters. Must be double-quoted'
)

group_lsf = parent_submit.add_argument_group('LSF arguments')
group_lsf.add_argument('--lsf-queue', help='LSF queue')
group_lsf.add_argument(
'--lsf-extra-param', help='LSF extra parameters. Must be double-quoted'
)

# server
parent_server = argparse.ArgumentParser(add_help=False)
parent_server.add_argument(
Expand Down
58 changes: 44 additions & 14 deletions caper/caper_backend_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
BACKEND_AWS,
BACKEND_GCP,
BACKEND_SGE,
CromwellBackendAWS,
CromwellBackendAws,
CromwellBackendBase,
CromwellBackendCommon,
CromwellBackendDatabase,
CromwellBackendGCP,
CromwellBackendGcp,
CromwellBackendLocal,
CromwellBackendPBS,
CromwellBackendSGE,
CromwellBackendSLURM,
CromwellBackendLsf,
CromwellBackendPbs,
CromwellBackendSge,
CromwellBackendSlurm,
)
from .dict_tool import merge_dict
from .hocon_string import HOCONString
Expand Down Expand Up @@ -53,23 +54,29 @@ def __init__(
file_db=None,
gcp_prj=None,
gcp_out_dir=None,
gcp_call_caching_dup_strat=CromwellBackendGCP.DEFAULT_CALL_CACHING_DUP_STRAT,
gcp_call_caching_dup_strat=CromwellBackendGcp.DEFAULT_CALL_CACHING_DUP_STRAT,
gcp_service_account_key_json=None,
use_google_cloud_life_sciences=False,
gcp_region=CromwellBackendGCP.DEFAULT_REGION,
gcp_region=CromwellBackendGcp.DEFAULT_REGION,
aws_batch_arn=None,
aws_region=None,
aws_out_dir=None,
aws_call_caching_dup_strat=CromwellBackendAWS.DEFAULT_CALL_CACHING_DUP_STRAT,
aws_call_caching_dup_strat=CromwellBackendAws.DEFAULT_CALL_CACHING_DUP_STRAT,
gcp_zones=None,
slurm_partition=None,
slurm_account=None,
slurm_extra_param=None,
slurm_resource_param=CromwellBackendSlurm.DEFAULT_SLURM_RESOURCE_PARAM,
sge_pe=None,
sge_queue=None,
sge_extra_param=None,
sge_resource_param=CromwellBackendSge.DEFAULT_SGE_RESOURCE_PARAM,
pbs_queue=None,
pbs_extra_param=None,
pbs_resource_param=CromwellBackendPbs.DEFAULT_PBS_RESOURCE_PARAM,
lsf_queue=None,
lsf_extra_param=None,
lsf_resource_param=CromwellBackendLsf.DEFAULT_LSF_RESOURCE_PARAM,
):
"""Initializes the backend conf's stanzas.
Expand All @@ -93,7 +100,7 @@ def __init__(
max_concurrent_tasks:
Limit for concurrent number of tasks for each workflow.
soft_glob_output:
Local backends only (Local, sge, pbs, slurm).
Local backends only (Local, sge, pbs, slurm, lsf).
Glob with ln -s instead of hard-linking (ln alone).
Useful for file-system like beeGFS, which does not allow hard-linking.
local_hash_strat:
Expand Down Expand Up @@ -166,11 +173,21 @@ def __init__(
slurm_partition:
slurm_account:
slurm_extra_param:
slurm_resource_param:
For slurm backend only.
Resource parameters to be passed to sbatch.
You can use WDL syntax and Cromwell's built-in variables in ${} notation.
e.g. cpu, time, memory_mb
sge_pe:
sge_queue:
sge_extra_param:
sge_resource_param:
pbs_queue:
pbs_extra_param:
pbs_resource_param:
lsf_queue:
lsf_extra_param:
lsf_resource_param:
"""
self._template = {}

Expand Down Expand Up @@ -216,20 +233,21 @@ def __init__(

merge_dict(
self._template,
CromwellBackendSLURM(
CromwellBackendSlurm(
local_out_dir=local_out_dir,
max_concurrent_tasks=max_concurrent_tasks,
soft_glob_output=soft_glob_output,
local_hash_strat=local_hash_strat,
slurm_partition=slurm_partition,
slurm_account=slurm_account,
slurm_extra_param=slurm_extra_param,
slurm_resource_param=slurm_resource_param,
),
)

merge_dict(
self._template,
CromwellBackendSGE(
CromwellBackendSge(
local_out_dir=local_out_dir,
max_concurrent_tasks=max_concurrent_tasks,
soft_glob_output=soft_glob_output,
Expand All @@ -242,7 +260,7 @@ def __init__(

merge_dict(
self._template,
CromwellBackendPBS(
CromwellBackendPbs(
local_out_dir=local_out_dir,
max_concurrent_tasks=max_concurrent_tasks,
soft_glob_output=soft_glob_output,
Expand All @@ -252,6 +270,18 @@ def __init__(
),
)

merge_dict(
self._template,
CromwellBackendLsf(
local_out_dir=local_out_dir,
max_concurrent_tasks=max_concurrent_tasks,
soft_glob_output=soft_glob_output,
local_hash_strat=local_hash_strat,
lsf_queue=lsf_queue,
lsf_extra_param=lsf_extra_param,
),
)

# cloud backends
if gcp_prj and gcp_out_dir:
if gcp_service_account_key_json:
Expand All @@ -267,7 +297,7 @@ def __init__(

merge_dict(
self._template,
CromwellBackendGCP(
CromwellBackendGcp(
max_concurrent_tasks=max_concurrent_tasks,
gcp_prj=gcp_prj,
gcp_out_dir=gcp_out_dir,
Expand All @@ -282,7 +312,7 @@ def __init__(
if aws_batch_arn and aws_region and aws_out_dir:
merge_dict(
self._template,
CromwellBackendAWS(
CromwellBackendAws(
max_concurrent_tasks=max_concurrent_tasks,
aws_batch_arn=aws_batch_arn,
aws_region=aws_region,
Expand Down
Loading

0 comments on commit 83df376

Please sign in to comment.