From d136cb0478823cbf59c47e65f4d8d98ceedb87fd Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Fri, 1 Mar 2024 02:03:02 +0100 Subject: [PATCH] builds.py: let mq lock be reclaimed after 30min (#337) A typical job takes about 5-10 min. If the lock has been held for 30 min in a single job, we assume something has gone wrong and the lock should be released so other jobs (or humans needing the machine) can proceed. This step is only necessary when other timeouts have failed. It is to guard against the case where the lock release after timeout has failed, and the post-step lock release also has failed. This can happen when the machine queue server is temporarily unreachable on the network, and then comes back with the lock still in place. In this case, there is nothing the scripts here can do to release that lock. Signed-off-by: Gerwin Klein --- seL4-platforms/builds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seL4-platforms/builds.py b/seL4-platforms/builds.py index 97e8164e..d7dab602 100644 --- a/seL4-platforms/builds.py +++ b/seL4-platforms/builds.py @@ -451,8 +451,8 @@ def mq_run(success_str: str, def mq_lock(machine: str) -> List[str]: - """Get lock for a machine.""" - return ['time', 'mq.sh', 'sem', '-wait', machine, '-k', job_key()] + """Get lock for a machine. Allow lock to be reclaimed after 30min.""" + return ['time', 'mq.sh', 'sem', '-wait', machine, '-k', job_key(), '-T', '1800'] def mq_release(machine: str) -> List[str]: