Merge pull request #84 from Snakemake-Profiles/update-docs

Update documentation on cluster-config and resources.
Snakemake-Profiles · Feb 17, 2022 · e725a99 · e725a99
2 parents 8838d43 + b737805
commit e725a99
Show file tree

Hide file tree

Showing 4 changed files with 131 additions and 74 deletions.
diff --git a/README.md b/README.md
@@ -14,63 +14,94 @@ scripts
 and a configuration file `config.yaml` that defines default values for
 snakemake command line arguments. The default `config.yaml` file is
 
-	restart-times: 3
-	jobscript: "slurm-jobscript.sh"
-	cluster: "slurm-submit.py"
-	cluster-status: "slurm-status.py"
-	max-jobs-per-second: 1
-	max-status-checks-per-second: 10
-	local-cores: 1
-	latency-wait: 60
+    restart-times: 3
+    jobscript: "slurm-jobscript.sh"
+    cluster: "slurm-submit.py"
+    cluster-status: "slurm-status.py"
+    max-jobs-per-second: 1
+    max-status-checks-per-second: 10
+    local-cores: 1
+    latency-wait: 60
 
 Given an installed profile `profile_name`, when snakemake is run with
 `--profile profile_name`, the configuration above would imply the
 following snakemake call:
 
-	snakemake --jobscript slurm-jobscript.sh --cluster slurm-submit.py --cluster-status slurm-status.py --restart-times 3 --max-jobs-per-second 1 --max-status-checks-per-second 10 --local-cores 1 --latency-wait 60
+    snakemake --jobscript slurm-jobscript.sh --cluster slurm-submit.py --cluster-status slurm-status.py --restart-times 3 --max-jobs-per-second 1 --max-status-checks-per-second 10 --local-cores 1 --latency-wait 60
 
 plus any additional options to snakemake that the user has applied.
 
+Note that the use of option `--cluster-config` is discouraged, but the
+profile still provides support for backwards compatibility. The
+default configuration file therefore contains a commented section with
+examples of resource configuration (see also [snakemake best
+practices](https://snakemake.readthedocs.io/en/stable/snakefiles/best_practices.html?highlight=set-resources#best-practices)):
+
+    # Example resource configuration
+    # default-resources:
+    #   - runtime=100
+    #   - mem_mb=6000
+    #   - disk_mb=1000000
+    # # set-threads: map rule names to threads
+    # set-threads:
+    #   - single_core_rule=1
+    #   - multi_core_rule=10
+    # # set-resources: map rule names to resources in general
+    # set-resources:
+    #   - high_memory_rule:mem_mb=12000
+    #   - long_running_rule:runtime=1200
+
 See the [snakemake documentation on
 profiles](https://snakemake.readthedocs.io/en/stable/executing/cli.html?highlight=profile#profiles)
 for more information.
 
+## Alternatives
+
+For a more light-weight alternative, see the excellent repo
+[smk-simple-slurm](https://github.com/jdblischak/smk-simple-slurm) by
+@jdblischak. In particular, it can handle larger amounts of jobs than this profile (see
+[issue
+#79](https://github.com/Snakemake-Profiles/slurm/issues/79)).
+
+
 ## Quickstart
 
 To create a slurm profile from the cookiecutter, simply run
 
-	cookiecutter https://github.com/Snakemake-Profiles/slurm.git
+    cookiecutter https://github.com/Snakemake-Profiles/slurm.git
 
 in a directory. You will be prompted to set some values for your
 profile (here assumed to be called `profile_name`), after which the
 profile scripts and configuration file will be installed in the
 current working directory as `./profile_name`. Then you can run
 Snakemake with
 
-	snakemake --profile profile_name ...
+    snakemake --profile profile_name ...
 
 Note that the `--profile` argument can be either a relative or
 absolute path. In addition, snakemake will search for a corresponding
 folder `profile_name` in `/etc/xdg/snakemake` and
 `$HOME/.config/snakemake`, where globally accessible profiles can be
 placed.
 
+## Examples
 
 ### Example 1: project setup to use specific slurm account
 
 One typical use case is to setup a profile to use a specific slurm
 account:
 
-	$ cd ~ && mkdir -p my_project && cd my_project
-	$ cookiecutter https://github.com/Snakemake-Profiles/slurm.git
-	profile_name [slurm]: slurm.my_account
-	sbatch_defaults []: account=my_account no-requeue exclusive
-	cluster_config []:
-	Select advanced_argument_conversion:
-	1 - no
-	2 - yes
-	Choose from 1, 2 [1]:
-	cluster_name []:
+    $ cd ~ && mkdir -p my_project && cd my_project
+    $ cookiecutter https://github.com/Snakemake-Profiles/slurm.git
+    profile_name [slurm]: slurm.my_account
+    sbatch_defaults []: account=my_account no-requeue exclusive
+    Select advanced_argument_conversion:
+    1 - no
+    2 - yes
+    Choose from 1, 2 [1]:
+    cluster_name []:
+    cluster_config_help: [The use of cluster-config is discouraged. Rather, set snakemake CLI options in the profile configuration file (see snakemake documentation on best practices). Enter to continue...]
+    cluster_config []:
 
 
 The command `snakemake --profile slurm.convert_args ...` will submit
@@ -84,15 +115,17 @@ Assuming our HPC defines a [multi-cluster
 environment](https://slurm.schedmd.com/multi_cluster.html), we can
 create a profile that uses a specified cluster:
 
-	$ cookiecutter slurm
-	profile_name [slurm]: slurm.dusk
-	sbatch_defaults []: account=my_account
-	cluster_config []:
-	Select advanced_argument_conversion:
-	1 - no
-	2 - yes
-	Choose from 1, 2 [1]:
-	cluster_name []: dusk
+    $ cookiecutter slurm
+    profile_name [slurm]: slurm.dusk
+    sbatch_defaults []: account=my_account
+    Select advanced_argument_conversion:
+    1 - no
+    2 - yes
+    Choose from 1, 2 [1]:
+    cluster_name []: dusk
+    cluster_config_help: [The use of cluster-config is discouraged. Rather, set snakemake CLI options in the profile configuration file (see snakemake documentation on best practices). Enter to continue...]
+    cluster_config []:
+
 
 (Note that once a cookiecutter has been installed, we can reuse it
 without using the github URL).
@@ -107,15 +140,16 @@ addition, the `slurm-status.py` script will check for jobs in the
 As a final example, assume we want to use advanced argument
 conversion:
 
-	$ cookiecutter slurm
-	profile_name [slurm]: slurm.convert_args
-	sbatch_defaults []: account=my_account
-	cluster_config []:
-	Select advanced_argument_conversion:
-	1 - no
-	2 - yes
-	Choose from 1, 2 [1]: 2
-	cluster_name []:
+    $ cookiecutter slurm
+    profile_name [slurm]: slurm.convert_args
+    sbatch_defaults []: account=my_account
+    Select advanced_argument_conversion:
+    1 - no
+    2 - yes
+    Choose from 1, 2 [1]: 2
+    cluster_name []:
+    cluster_config_help: [The use of cluster-config is discouraged. Rather, set snakemake CLI options in the profile configuration file (see snakemake documentation on best practices). Enter to continue...]
+    cluster_config []:
 
 The command `snakemake --profile slurm.convert_args ...` will now
 submit jobs with `sbatch --parsable --account=my_account`. The
@@ -130,14 +164,7 @@ See the section below
 * `profile_name` : A name to address the profile via the `--profile`
   Snakemake option.
 * `sbatch_defaults` : List of default arguments to sbatch, e.g.:
-  `qos=short time=60`. This is a convenience argument to avoid
-  `cluster_config` for a few aruments.
-* `cluster_config` : Path to a YAML or JSON configuration file
-  analogues to the Snakemake [`--cluster-config`
-  option](https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html#cluster-configuration-deprecated).
-  Path may relative to the profile directory or absolute including
-  environment variables (e.g.
-  `$PROJECT_ROOT/config/slurm_defaults.yaml`).
+  `qos=short time=60`.
 * `advanced_argument_conversion` : If True, try to adjust/constrain
   mem, time, nodes and ntasks (i.e. cpus) to parsed or default
   partition after converting resources. This may fail due to
@@ -147,26 +174,41 @@ See the section below
   cluster name, leave empty to use the default. This will add the
   `--cluster` string to the sbatch defaults, and adjust
   `slurm-status.py` to check status on the relevant cluster.
+* `cluster_config` (NB: discouraged): Path to a YAML or JSON configuration file
+  analogues to the Snakemake [`--cluster-config`
+  option](https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html#cluster-configuration-deprecated).
+  Path may be relative to the profile directory or absolute including
+  environment variables (e.g.
+  `$PROJECT_ROOT/config/slurm_defaults.yaml`).
 
 
 ### Default snakemake arguments
 Default arguments to `snakemake` may be adjusted in the `<profile
 path>/config.yaml` file.
 
+### Parsing arguments to SLURM (sbatch) and resource configuration
+
+NB!!! As previusly pointed out, the use of cluster-config is
+discouraged. Rule specific resource configuration is better handled by
+snakemake's CLI arguments (see [snakemake best
+practices](https://snakemake.readthedocs.io/en/stable/snakefiles/best_practices.html?highlight=set-resources#best-practices))
+which can be put in the profile configuration file.
 
-### Parsing arguments to SLURM (sbatch)
-Arguments are overridden in the following order and must be named according to
-[sbatch long option names](https://slurm.schedmd.com/sbatch.html):
+Arguments are set and overridden in the following order and must be
+named according to [sbatch long option
+names](https://slurm.schedmd.com/sbatch.html):
 
 1) `sbatch_defaults` cookiecutter option
 2) Profile `cluster_config` file `__default__` entries
 3) Snakefile threads and resources (time, mem)
 4) Profile `cluster_config` file <rulename> entries
 5) `--cluster-config` parsed to Snakemake (deprecated since Snakemake 5.10)
-6) Any other argument conversion (experimental, currently time, ntasks and mem) if `advanced_argument_conversion` is True.
+6) Snakemake CLI resource configuration in profile configuration file
+7) Any other argument conversion (experimental, currently time, ntasks and mem) if `advanced_argument_conversion` is True.
 
-### Resources
-Resources specified in Snakefiles must all be in the correct
+### Rule specific resource configuration
+In addition to Snakemake CLI resource configuration, resources can be
+specified in Snakefile rules and must all be in the correct
 unit/format as expected by `sbatch`. The implemented resource names
 are given (and may be adjusted) in the `slurm_utils.RESOURCE_MAPPING`
 global. This is intended for system agnostic resources such as time
@@ -189,19 +231,19 @@ available cluster resources. For instance, if the memory is set larger
 than the maximum memory of any node, `sbatch` will exit with the
 message
 
-	sbatch: error: CPU count per node can not be satisfied
-	sbatch: error: Batch job submission failed: Requested node configuration is not available
+    sbatch: error: CPU count per node can not be satisfied
+    sbatch: error: Batch job submission failed: Requested node configuration is not available
 
 By choosing the advanced argument conversion upon creating a profile,
 an attempt will be made to adjust memory, cpu and time settings if
 these do not comply with the cluster configuration. As an example,
 consider a rule with the following resources and threads:
 
-	rule bwa_mem:
-		resources:
-			mem_mb = lambda wildcards, attempt: attempt * 8000,
-			runtime = lambda wildcards, attempt: attempt * 1200
-		threads: 1
+    rule bwa_mem:
+        resources:
+            mem_mb = lambda wildcards, attempt: attempt * 8000,
+            runtime = lambda wildcards, attempt: attempt * 1200
+        threads: 1
 
 Assume further that the available cores provide 6400MB memory per
 core. If the job reaches a peak memory (8000MB), it will likely be
@@ -243,7 +285,7 @@ The `__default__` entry will apply to all jobs.
 Tests can be run on a HPC running SLURM or locally in a
 docker stack. To execute tests, run
 
-	pytest -v -s tests
+    pytest -v -s tests
 
 from the source code root directory. Test options can be configured
 via the pytest configuration file `tests/pytest.ini`.
@@ -279,7 +321,7 @@ swarm](https://docs.docker.com/engine/swarm/) (`docker swarm init`).
 The docker stack can also be deployed manually from the top-level
 directory as follows:
 
-	DOCKER_COMPOSE=tests/docker-compose.yaml ./tests/deploystack.sh
+    DOCKER_COMPOSE=tests/docker-compose.yaml ./tests/deploystack.sh
 
 See the deployment script `tests/deploystack.sh` for details.
 
@@ -305,17 +347,17 @@ As an example, `tests/test_slurm_advanced.py` defines a fixture
 `profile` that uses the `cookie_factory` fixture factory to create an
 slurm profile that uses advanced argument conversion:
 
-	@pytest.fixture
-	def profile(cookie_factory, data):
-		cookie_factory(advanced="yes")
+    @pytest.fixture
+    def profile(cookie_factory, data):
+        cookie_factory(advanced="yes")
 
 The test `tests/test_slurm_advanced.py::test_adjust_runtime` depends
 on this fixture and `smk_runner`:
 
-	def test_adjust_runtime(smk_runner, profile):
-		smk_runner.make_target(
-			"timeout.txt", options=f"--cluster-config {smk_runner.cluster_config}"
-		)
+    def test_adjust_runtime(smk_runner, profile):
+        smk_runner.make_target(
+            "timeout.txt", options=f"--cluster-config {smk_runner.cluster_config}"
+        )
 
 
 The `make_target` method makes a snakemake target with additional

diff --git a/cookiecutter.json b/cookiecutter.json
@@ -1,7 +1,8 @@
 {
     "profile_name": "slurm",
     "sbatch_defaults": "",
-    "cluster_config": "",
     "advanced_argument_conversion": ["no", "yes"],
-    "cluster_name": ""
+    "cluster_name": "",
+    "cluster_config_help": "The use of cluster-config is discouraged. Rather, set snakemake CLI options in the profile configuration file (see snakemake documentation on best practices). Enter to continue...",
+    "cluster_config": ""
 }
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -120,15 +120,15 @@ def _cookie_factory(
         advanced="no",
         cluster_name=None,
         cluster_config=None,
-        yamlconfig=_yamlconfig_default
+        yamlconfig=_yamlconfig_default,
     ):
         cookie_template = pjoin(os.path.abspath(pytest.dname), os.pardir)
         output_factory = tmpdir_factory.mktemp
         c = Cookies(cookie_template, output_factory, _cookiecutter_config_file)
         c._new_output_dir = lambda: str(datadir)
         extra_context = {
             "sbatch_defaults": sbatch_defaults,
-            "advanced_argument_conversion": advanced
+            "advanced_argument_conversion": advanced,
         }
         if cluster_name is not None:
             extra_context["cluster_name"] = cluster_name
@@ -137,7 +137,7 @@ def _cookie_factory(
         c.bake(extra_context=extra_context)
         config = datadir.join("slurm").join("config.yaml")
         config_d = dict(
-            [tuple(line.split(":")) for line in config.read().split("\n") if line != ""]
+            [tuple(line.split(":")) for line in config.read().split("\n") if re.search("^[a-z]", line)]
         )
         config_d.update(**yamlconfig)
         config.write("\n".join(f"{k}: {v}" for k, v in config_d.items()))

diff --git a/{{cookiecutter.profile_name}}/config.yaml b/{{cookiecutter.profile_name}}/config.yaml
@@ -6,3 +6,17 @@ max-jobs-per-second: 1
 max-status-checks-per-second: 10
 local-cores: 1
 latency-wait: 60
+
+# Example resource configuration
+# default-resources:
+#   - runtime=100
+#   - mem_mb=6000
+#   - disk_mb=1000000
+# # set-threads: map rule names to threads
+# set-threads:
+#   - single_core_rule=1
+#   - multi_core_rule=10
+# # set-resources: map rule names to resources in general
+# set-resources:
+#   - high_memory_rule:mem_mb=12000
+#   - long_running_rule:runtime=1200