Skip to content

Commit

Permalink
Scheduler: Allow a memory specification of zero for the SLURM plugin (#…
Browse files Browse the repository at this point in the history
…6605)

In the SLURM memory specification, a value of zero is treated as a special
case indicating that no memory limit should be used.
This is documented here: https://slurm.schedmd.com/sbatch.html under the
'--mem' section.

However, currently the SLURM plugin demands a positive integer, but this logic
puts an unexpected limitation on the SLURM plugin. This commit changes
this logic to allow a value of 0 to be accepted.
  • Loading branch information
ConradJohnston authored Nov 5, 2024
1 parent 7057238 commit 0fa9582
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/aiida/schedulers/plugins/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,14 +366,14 @@ def _get_submit_script_header(self, job_tmpl):
lines.append(f'#SBATCH --time={days:d}-{hours:02d}:{minutes:02d}:{seconds:02d}')

# It is the memory per node, not per cpu!
if job_tmpl.max_memory_kb:
if job_tmpl.max_memory_kb is not None:
try:
physical_memory_kb = int(job_tmpl.max_memory_kb)
if physical_memory_kb <= 0:
if physical_memory_kb < 0: # 0 is allowed and means no limit (https://slurm.schedmd.com/sbatch.html)
raise ValueError
except ValueError:
raise ValueError(
f'max_memory_kb must be a positive integer (in kB)! It is instead `{job_tmpl.max_memory_kb}`'
f'max_memory_kb must be a non-negative integer (in kB)! It is instead `{job_tmpl.max_memory_kb}`'
)
# --mem: Specify the real memory required per node in MegaBytes.
# --mem and --mem-per-cpu are mutually exclusive.
Expand Down
52 changes: 52 additions & 0 deletions tests/schedulers/test_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,58 @@ def test_submit_script_with_num_cores_per_machine_and_mpiproc2(self):
num_machines=1, num_mpiprocs_per_machine=1, num_cores_per_machine=24, num_cores_per_mpiproc=23
)

def test_submit_script_with_mem(self):
"""Test to verify if script can be created with memory specification.
It should pass this check:
if physical_memory_kb < 0: # 0 is allowed and means no limit (https://slurm.schedmd.com/sbatch.html)
raise ValueError
and correctly set the memory value in the script with the --mem option.
"""
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = SlurmScheduler()
job_tmpl = JobTemplate()

job_tmpl.uuid = str(uuid.uuid4())
job_tmpl.max_wallclock_seconds = 24 * 3600
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
tmpl_code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL
job_tmpl.job_resource = scheduler.create_job_resource(num_machines=1, num_mpiprocs_per_machine=1)
# Check for a regular (positive) value
job_tmpl.max_memory_kb = 316 * 1024
submit_script_text = scheduler.get_submit_script(job_tmpl)
assert '#SBATCH --mem=316' in submit_script_text
# Check for the special zero value
job_tmpl.max_memory_kb = 0
submit_script_text = scheduler.get_submit_script(job_tmpl)
assert '#SBATCH --mem=0' in submit_script_text

def test_submit_script_with_negative_mem_value(self):
"""Test to verify if script can be created with an invalid memory value.
It should fail in check:
if physical_memory_kb < 0: # 0 is allowed and means no limit (https://slurm.schedmd.com/sbatch.html)
raise ValueError
"""
import re

from aiida.schedulers.datastructures import JobTemplate

scheduler = SlurmScheduler()
job_tmpl = JobTemplate()

with pytest.raises(
ValueError, match=re.escape('max_memory_kb must be a non-negative integer (in kB)! It is instead `-9`')
):
job_tmpl.job_resource = scheduler.create_job_resource(num_machines=1, num_mpiprocs_per_machine=1)
job_tmpl.max_memory_kb = -9
scheduler.get_submit_script(job_tmpl)

def test_submit_script_rerunnable(self):
"""Test the creation of a submission script with the `rerunnable` option."""
from aiida.common.datastructures import CodeRunMode
Expand Down

0 comments on commit 0fa9582

Please sign in to comment.