diff --git a/README.md b/README.md index 79fec6e..3093707 100644 --- a/README.md +++ b/README.md @@ -14,32 +14,61 @@ scripts and a configuration file `config.yaml` that defines default values for snakemake command line arguments. The default `config.yaml` file is - restart-times: 3 - jobscript: "slurm-jobscript.sh" - cluster: "slurm-submit.py" - cluster-status: "slurm-status.py" - max-jobs-per-second: 1 - max-status-checks-per-second: 10 - local-cores: 1 - latency-wait: 60 + restart-times: 3 + jobscript: "slurm-jobscript.sh" + cluster: "slurm-submit.py" + cluster-status: "slurm-status.py" + max-jobs-per-second: 1 + max-status-checks-per-second: 10 + local-cores: 1 + latency-wait: 60 Given an installed profile `profile_name`, when snakemake is run with `--profile profile_name`, the configuration above would imply the following snakemake call: - snakemake --jobscript slurm-jobscript.sh --cluster slurm-submit.py --cluster-status slurm-status.py --restart-times 3 --max-jobs-per-second 1 --max-status-checks-per-second 10 --local-cores 1 --latency-wait 60 + snakemake --jobscript slurm-jobscript.sh --cluster slurm-submit.py --cluster-status slurm-status.py --restart-times 3 --max-jobs-per-second 1 --max-status-checks-per-second 10 --local-cores 1 --latency-wait 60 plus any additional options to snakemake that the user has applied. +Note that the use of option `--cluster-config` is discouraged, but the +profile still provides support for backwards compatibility. The +default configuration file therefore contains a commented section with +examples of resource configuration (see also [snakemake best +practices](https://snakemake.readthedocs.io/en/stable/snakefiles/best_practices.html?highlight=set-resources#best-practices)): + + # Example resource configuration + # default-resources: + # - runtime=100 + # - mem_mb=6000 + # - disk_mb=1000000 + # # set-threads: map rule names to threads + # set-threads: + # - single_core_rule=1 + # - multi_core_rule=10 + # # set-resources: map rule names to resources in general + # set-resources: + # - high_memory_rule:mem_mb=12000 + # - long_running_rule:runtime=1200 + See the [snakemake documentation on profiles](https://snakemake.readthedocs.io/en/stable/executing/cli.html?highlight=profile#profiles) for more information. +## Alternatives + +For a more light-weight alternative, see the excellent repo +[smk-simple-slurm](https://github.com/jdblischak/smk-simple-slurm) by +@jdblischak. In particular, it can handle larger amounts of jobs than this profile (see +[issue +#79](https://github.com/Snakemake-Profiles/slurm/issues/79)). + + ## Quickstart To create a slurm profile from the cookiecutter, simply run - cookiecutter https://github.com/Snakemake-Profiles/slurm.git + cookiecutter https://github.com/Snakemake-Profiles/slurm.git in a directory. You will be prompted to set some values for your profile (here assumed to be called `profile_name`), after which the @@ -47,7 +76,7 @@ profile scripts and configuration file will be installed in the current working directory as `./profile_name`. Then you can run Snakemake with - snakemake --profile profile_name ... + snakemake --profile profile_name ... Note that the `--profile` argument can be either a relative or absolute path. In addition, snakemake will search for a corresponding @@ -55,22 +84,24 @@ folder `profile_name` in `/etc/xdg/snakemake` and `$HOME/.config/snakemake`, where globally accessible profiles can be placed. +## Examples ### Example 1: project setup to use specific slurm account One typical use case is to setup a profile to use a specific slurm account: - $ cd ~ && mkdir -p my_project && cd my_project - $ cookiecutter https://github.com/Snakemake-Profiles/slurm.git - profile_name [slurm]: slurm.my_account - sbatch_defaults []: account=my_account no-requeue exclusive - cluster_config []: - Select advanced_argument_conversion: - 1 - no - 2 - yes - Choose from 1, 2 [1]: - cluster_name []: + $ cd ~ && mkdir -p my_project && cd my_project + $ cookiecutter https://github.com/Snakemake-Profiles/slurm.git + profile_name [slurm]: slurm.my_account + sbatch_defaults []: account=my_account no-requeue exclusive + Select advanced_argument_conversion: + 1 - no + 2 - yes + Choose from 1, 2 [1]: + cluster_name []: + cluster_config_help: [The use of cluster-config is discouraged. Rather, set snakemake CLI options in the profile configuration file (see snakemake documentation on best practices). Enter to continue...] + cluster_config []: The command `snakemake --profile slurm.convert_args ...` will submit @@ -84,15 +115,17 @@ Assuming our HPC defines a [multi-cluster environment](https://slurm.schedmd.com/multi_cluster.html), we can create a profile that uses a specified cluster: - $ cookiecutter slurm - profile_name [slurm]: slurm.dusk - sbatch_defaults []: account=my_account - cluster_config []: - Select advanced_argument_conversion: - 1 - no - 2 - yes - Choose from 1, 2 [1]: - cluster_name []: dusk + $ cookiecutter slurm + profile_name [slurm]: slurm.dusk + sbatch_defaults []: account=my_account + Select advanced_argument_conversion: + 1 - no + 2 - yes + Choose from 1, 2 [1]: + cluster_name []: dusk + cluster_config_help: [The use of cluster-config is discouraged. Rather, set snakemake CLI options in the profile configuration file (see snakemake documentation on best practices). Enter to continue...] + cluster_config []: + (Note that once a cookiecutter has been installed, we can reuse it without using the github URL). @@ -107,15 +140,16 @@ addition, the `slurm-status.py` script will check for jobs in the As a final example, assume we want to use advanced argument conversion: - $ cookiecutter slurm - profile_name [slurm]: slurm.convert_args - sbatch_defaults []: account=my_account - cluster_config []: - Select advanced_argument_conversion: - 1 - no - 2 - yes - Choose from 1, 2 [1]: 2 - cluster_name []: + $ cookiecutter slurm + profile_name [slurm]: slurm.convert_args + sbatch_defaults []: account=my_account + Select advanced_argument_conversion: + 1 - no + 2 - yes + Choose from 1, 2 [1]: 2 + cluster_name []: + cluster_config_help: [The use of cluster-config is discouraged. Rather, set snakemake CLI options in the profile configuration file (see snakemake documentation on best practices). Enter to continue...] + cluster_config []: The command `snakemake --profile slurm.convert_args ...` will now submit jobs with `sbatch --parsable --account=my_account`. The @@ -130,14 +164,7 @@ See the section below * `profile_name` : A name to address the profile via the `--profile` Snakemake option. * `sbatch_defaults` : List of default arguments to sbatch, e.g.: - `qos=short time=60`. This is a convenience argument to avoid - `cluster_config` for a few aruments. -* `cluster_config` : Path to a YAML or JSON configuration file - analogues to the Snakemake [`--cluster-config` - option](https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html#cluster-configuration-deprecated). - Path may relative to the profile directory or absolute including - environment variables (e.g. - `$PROJECT_ROOT/config/slurm_defaults.yaml`). + `qos=short time=60`. * `advanced_argument_conversion` : If True, try to adjust/constrain mem, time, nodes and ntasks (i.e. cpus) to parsed or default partition after converting resources. This may fail due to @@ -147,26 +174,41 @@ See the section below cluster name, leave empty to use the default. This will add the `--cluster` string to the sbatch defaults, and adjust `slurm-status.py` to check status on the relevant cluster. +* `cluster_config` (NB: discouraged): Path to a YAML or JSON configuration file + analogues to the Snakemake [`--cluster-config` + option](https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html#cluster-configuration-deprecated). + Path may be relative to the profile directory or absolute including + environment variables (e.g. + `$PROJECT_ROOT/config/slurm_defaults.yaml`). ### Default snakemake arguments Default arguments to `snakemake` may be adjusted in the `/config.yaml` file. +### Parsing arguments to SLURM (sbatch) and resource configuration + +NB!!! As previusly pointed out, the use of cluster-config is +discouraged. Rule specific resource configuration is better handled by +snakemake's CLI arguments (see [snakemake best +practices](https://snakemake.readthedocs.io/en/stable/snakefiles/best_practices.html?highlight=set-resources#best-practices)) +which can be put in the profile configuration file. -### Parsing arguments to SLURM (sbatch) -Arguments are overridden in the following order and must be named according to -[sbatch long option names](https://slurm.schedmd.com/sbatch.html): +Arguments are set and overridden in the following order and must be +named according to [sbatch long option +names](https://slurm.schedmd.com/sbatch.html): 1) `sbatch_defaults` cookiecutter option 2) Profile `cluster_config` file `__default__` entries 3) Snakefile threads and resources (time, mem) 4) Profile `cluster_config` file entries 5) `--cluster-config` parsed to Snakemake (deprecated since Snakemake 5.10) -6) Any other argument conversion (experimental, currently time, ntasks and mem) if `advanced_argument_conversion` is True. +6) Snakemake CLI resource configuration in profile configuration file +7) Any other argument conversion (experimental, currently time, ntasks and mem) if `advanced_argument_conversion` is True. -### Resources -Resources specified in Snakefiles must all be in the correct +### Rule specific resource configuration +In addition to Snakemake CLI resource configuration, resources can be +specified in Snakefile rules and must all be in the correct unit/format as expected by `sbatch`. The implemented resource names are given (and may be adjusted) in the `slurm_utils.RESOURCE_MAPPING` global. This is intended for system agnostic resources such as time @@ -189,19 +231,19 @@ available cluster resources. For instance, if the memory is set larger than the maximum memory of any node, `sbatch` will exit with the message - sbatch: error: CPU count per node can not be satisfied - sbatch: error: Batch job submission failed: Requested node configuration is not available + sbatch: error: CPU count per node can not be satisfied + sbatch: error: Batch job submission failed: Requested node configuration is not available By choosing the advanced argument conversion upon creating a profile, an attempt will be made to adjust memory, cpu and time settings if these do not comply with the cluster configuration. As an example, consider a rule with the following resources and threads: - rule bwa_mem: - resources: - mem_mb = lambda wildcards, attempt: attempt * 8000, - runtime = lambda wildcards, attempt: attempt * 1200 - threads: 1 + rule bwa_mem: + resources: + mem_mb = lambda wildcards, attempt: attempt * 8000, + runtime = lambda wildcards, attempt: attempt * 1200 + threads: 1 Assume further that the available cores provide 6400MB memory per core. If the job reaches a peak memory (8000MB), it will likely be @@ -243,7 +285,7 @@ The `__default__` entry will apply to all jobs. Tests can be run on a HPC running SLURM or locally in a docker stack. To execute tests, run - pytest -v -s tests + pytest -v -s tests from the source code root directory. Test options can be configured via the pytest configuration file `tests/pytest.ini`. @@ -279,7 +321,7 @@ swarm](https://docs.docker.com/engine/swarm/) (`docker swarm init`). The docker stack can also be deployed manually from the top-level directory as follows: - DOCKER_COMPOSE=tests/docker-compose.yaml ./tests/deploystack.sh + DOCKER_COMPOSE=tests/docker-compose.yaml ./tests/deploystack.sh See the deployment script `tests/deploystack.sh` for details. @@ -305,17 +347,17 @@ As an example, `tests/test_slurm_advanced.py` defines a fixture `profile` that uses the `cookie_factory` fixture factory to create an slurm profile that uses advanced argument conversion: - @pytest.fixture - def profile(cookie_factory, data): - cookie_factory(advanced="yes") + @pytest.fixture + def profile(cookie_factory, data): + cookie_factory(advanced="yes") The test `tests/test_slurm_advanced.py::test_adjust_runtime` depends on this fixture and `smk_runner`: - def test_adjust_runtime(smk_runner, profile): - smk_runner.make_target( - "timeout.txt", options=f"--cluster-config {smk_runner.cluster_config}" - ) + def test_adjust_runtime(smk_runner, profile): + smk_runner.make_target( + "timeout.txt", options=f"--cluster-config {smk_runner.cluster_config}" + ) The `make_target` method makes a snakemake target with additional diff --git a/cookiecutter.json b/cookiecutter.json index e6de0af..569adab 100644 --- a/cookiecutter.json +++ b/cookiecutter.json @@ -1,7 +1,8 @@ { "profile_name": "slurm", "sbatch_defaults": "", - "cluster_config": "", "advanced_argument_conversion": ["no", "yes"], - "cluster_name": "" + "cluster_name": "", + "cluster_config_help": "The use of cluster-config is discouraged. Rather, set snakemake CLI options in the profile configuration file (see snakemake documentation on best practices). Enter to continue...", + "cluster_config": "" } diff --git a/tests/conftest.py b/tests/conftest.py index 17935b5..02f61a5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -120,7 +120,7 @@ def _cookie_factory( advanced="no", cluster_name=None, cluster_config=None, - yamlconfig=_yamlconfig_default + yamlconfig=_yamlconfig_default, ): cookie_template = pjoin(os.path.abspath(pytest.dname), os.pardir) output_factory = tmpdir_factory.mktemp @@ -128,7 +128,7 @@ def _cookie_factory( c._new_output_dir = lambda: str(datadir) extra_context = { "sbatch_defaults": sbatch_defaults, - "advanced_argument_conversion": advanced + "advanced_argument_conversion": advanced, } if cluster_name is not None: extra_context["cluster_name"] = cluster_name @@ -137,7 +137,7 @@ def _cookie_factory( c.bake(extra_context=extra_context) config = datadir.join("slurm").join("config.yaml") config_d = dict( - [tuple(line.split(":")) for line in config.read().split("\n") if line != ""] + [tuple(line.split(":")) for line in config.read().split("\n") if re.search("^[a-z]", line)] ) config_d.update(**yamlconfig) config.write("\n".join(f"{k}: {v}" for k, v in config_d.items())) diff --git a/{{cookiecutter.profile_name}}/config.yaml b/{{cookiecutter.profile_name}}/config.yaml index e1b007a..05795da 100644 --- a/{{cookiecutter.profile_name}}/config.yaml +++ b/{{cookiecutter.profile_name}}/config.yaml @@ -6,3 +6,17 @@ max-jobs-per-second: 1 max-status-checks-per-second: 10 local-cores: 1 latency-wait: 60 + +# Example resource configuration +# default-resources: +# - runtime=100 +# - mem_mb=6000 +# - disk_mb=1000000 +# # set-threads: map rule names to threads +# set-threads: +# - single_core_rule=1 +# - multi_core_rule=10 +# # set-resources: map rule names to resources in general +# set-resources: +# - high_memory_rule:mem_mb=12000 +# - long_running_rule:runtime=1200