diff --git a/supervisor/options.py b/supervisor/options.py index 20c1b07aa..75f61562b 100644 --- a/supervisor/options.py +++ b/supervisor/options.py @@ -942,6 +942,7 @@ def get(section, opt, *args, **kwargs): serverurl = get(section, 'serverurl', None) if serverurl and serverurl.strip().upper() == 'AUTO': serverurl = None + disable_force_shutdown = get(section, 'disable_force_shutdown', False) # find uid from "user" option user = get(section, 'user', None) @@ -1067,7 +1068,8 @@ def get(section, opt, *args, **kwargs): exitcodes=exitcodes, redirect_stderr=redirect_stderr, environment=environment, - serverurl=serverurl) + serverurl=serverurl, + disable_force_shutdown=disable_force_shutdown) programs.append(pconfig) @@ -1887,8 +1889,8 @@ class ProcessConfig(Config): 'stderr_logfile_backups', 'stderr_logfile_maxbytes', 'stderr_events_enabled', 'stderr_syslog', 'stopsignal', 'stopwaitsecs', 'stopasgroup', 'killasgroup', - 'exitcodes', 'redirect_stderr' ] - optional_param_names = [ 'environment', 'serverurl' ] + 'exitcodes', 'redirect_stderr'] + optional_param_names = [ 'environment', 'serverurl', 'disable_force_shutdown'] def __init__(self, options, **params): self.options = options diff --git a/supervisor/process.py b/supervisor/process.py index b394be812..7d0059275 100644 --- a/supervisor/process.py +++ b/supervisor/process.py @@ -5,6 +5,7 @@ import shlex import time import traceback +import psutil from supervisor.compat import maxint from supervisor.compat import as_bytes @@ -376,11 +377,11 @@ def _check_and_adjust_for_system_clock_rollback(self, test_time): if self.delay > 0 and test_time < (self.delay - self.backoff): self.delay = test_time + self.backoff - def stop(self): + def stop(self, supervisor=None): """ Administrative stop """ self.administrative_stop = True self.laststopreport = 0 - return self.kill(self.config.stopsignal) + return self.kill(self.config.stopsignal, supervisor) def stop_report(self): """ Log a 'waiting for x to stop' message with throttling. """ @@ -401,12 +402,18 @@ def give_up(self): self._assertInState(ProcessStates.BACKOFF) self.change_state(ProcessStates.FATAL) - def kill(self, sig): + def kill(self, sig, supervisor=None): """Send a signal to the subprocess with the intention to kill it (to make it exit). This may or may not actually kill it. Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not running. + Parameters: + sig: signal to kill the process + supervsior: supervisor instance to keep track of signaled + processes. This is needed to check if a process properly + terminated. If no supervisor instance is given, this will + not be checked. """ now = time.time() options = self.config.options @@ -464,7 +471,25 @@ def kill(self, sig): try: try: + # make sure all child processes are properly stopped as well + parent = psutil.Process(abs(pid)) + children = parent.children(recursive=True) + for child in reversed(children): + try: + # kill all child processes with same signal as parent + options.kill(child.pid, sig) + # add to list but only if not disabled + if not self.config.disable_force_shutdown and sig is not signal.SIGKILL: + supervisor.terminated_processes.append((child.pid, time.time())) + except PermissionError: + msg = ("No permission to signal child process with name '%s' and pid %i of parent process '%s' with pid %i. " + % (child.name(), child.pid, parent.name(), parent.pid) + + "Will continue with sending %s to parent process" % (sig.name)) + options.logger.warn(msg) options.kill(pid, sig) + if not self.config.disable_force_shutdown and sig is not signal.SIGKILL and supervisor is not None: + supervisor.terminated_processes.append((pid, time.time())) + except OSError as exc: if exc.errno == errno.ESRCH: msg = ("unable to signal %s (pid %s), it probably just exited " @@ -653,7 +678,7 @@ def __repr__(self): def get_state(self): return self.state - def transition(self): + def transition(self, supervisor=None): now = time.time() state = self.state @@ -715,7 +740,7 @@ def transition(self): self.config.options.logger.warn( 'killing \'%s\' (%s) with SIGKILL' % (processname, self.pid)) - self.kill(signal.SIGKILL) + self.kill(signal.SIGKILL, supervisor) class FastCGISubprocess(Subprocess): """Extends Subprocess class to handle FastCGI subprocesses""" @@ -810,7 +835,7 @@ def reopenlogs(self): for process in self.processes.values(): process.reopenlogs() - def stop_all(self): + def stop_all(self, supervisor=None): processes = list(self.processes.values()) processes.sort() processes.reverse() # stop in desc priority order @@ -819,10 +844,10 @@ def stop_all(self): state = proc.get_state() if state == ProcessStates.RUNNING: # RUNNING -> STOPPING - proc.stop() + proc.stop(supervisor) elif state == ProcessStates.STARTING: # STARTING -> STOPPING - proc.stop() + proc.stop(supervisor) elif state == ProcessStates.BACKOFF: # BACKOFF -> FATAL proc.give_up() diff --git a/supervisor/rpcinterface.py b/supervisor/rpcinterface.py index 7b32ae8c9..12882df93 100644 --- a/supervisor/rpcinterface.py +++ b/supervisor/rpcinterface.py @@ -411,7 +411,10 @@ def stopProcess(self, name, wait=True): if process.get_state() not in RUNNING_STATES: raise RPCError(Faults.NOT_RUNNING, name) - msg = process.stop() + try: + msg = process.stop(self.supervisord) + except: + msg = process.stop() if msg is not None: raise RPCError(Faults.FAILED, msg) diff --git a/supervisor/supervisord.py b/supervisor/supervisord.py index 138732dd4..cb4c7ec05 100755 --- a/supervisor/supervisord.py +++ b/supervisor/supervisord.py @@ -34,6 +34,7 @@ import os import time import signal +import psutil from supervisor.medusa import asyncore_25 as asyncore @@ -55,6 +56,7 @@ def __init__(self, options): self.options = options self.process_groups = {} self.ticks = {} + self.terminated_processes = [] def main(self): if not self.options.first: @@ -156,7 +158,7 @@ def shutdown_report(self): def ordered_stop_groups_phase_1(self): if self.stop_groups: # stop the last group (the one with the "highest" priority) - self.stop_groups[-1].stop_all() + self.stop_groups[-1].stop_all(supervisor=self) def ordered_stop_groups_phase_2(self): # after phase 1 we've transitioned and reaped, let's see if we @@ -196,9 +198,12 @@ def runforever(self): self.ordered_stop_groups_phase_1() if not self.shutdown_report(): - # if there are no unstopped processes (we're done - # killing everything), it's OK to shutdown or reload - raise asyncore.ExitNow + # make sure all processes are properly terminated + self.kill_process_after_reaching_timeout() + if not self.terminated_processes: + # if there are no unstopped processes (we're done + # killing everything), it's OK to shutdown or reload + raise asyncore.ExitNow for fd, dispatcher in combined_map.items(): if dispatcher.readable(): @@ -241,6 +246,7 @@ def runforever(self): for group in pgroups: group.transition() + self.kill_process_after_reaching_timeout() self.reap() self.handle_signal() self.tick() @@ -316,6 +322,23 @@ def handle_signal(self): def get_state(self): return self.options.mood + def kill_process_after_reaching_timeout(self, timeout=60): + """ + Iterate over list of previously terminated processes and check if they + are still alive. Send SIGKILL to all processes which are not not + properly shutdown before the timeout is reached. + """ + if self.terminated_processes: + current_time = time.time() + for pid, terminate_time in self.terminated_processes: + if psutil.pid_exists(pid): + if current_time - terminate_time > timeout: + # Send SIGKILL if process does not terminate succesfully + os.kill(pid, signal.SIGKILL) + self.terminated_processes.remove((pid, terminate_time)) + else: + self.terminated_processes.remove((pid, terminate_time)) + def timeslice(period, when): return int(when - (when % period)) diff --git a/supervisor/tests/base.py b/supervisor/tests/base.py index 7553a2cce..061a63111 100644 --- a/supervisor/tests/base.py +++ b/supervisor/tests/base.py @@ -434,7 +434,7 @@ def removelogs(self): def get_state(self): return self.state - def stop(self): + def stop(self, supervisor=None): self.stop_called = True self.killing = False from supervisor.process import ProcessStates @@ -443,7 +443,7 @@ def stop(self): def stop_report(self): self.stop_report_called = True - def kill(self, signal): + def kill(self, signal, supervisor=None): self.killed_with = signal def signal(self, signal): @@ -517,7 +517,7 @@ def __init__(self, options, name, command, directory=None, umask=None, stderr_syslog=False, redirect_stderr=False, stopsignal=None, stopwaitsecs=10, stopasgroup=False, killasgroup=False, - exitcodes=(0,), environment=None, serverurl=None): + exitcodes=(0,), environment=None, serverurl=None, disable_force_shutdown=False): self.options = options self.name = name self.command = command @@ -553,6 +553,7 @@ def __init__(self, options, name, command, directory=None, umask=None, self.umask = umask self.autochildlogs_created = False self.serverurl = serverurl + self.disable_force_shutdown = disable_force_shutdown def get_path(self): return ["/bin", "/usr/bin", "/usr/local/bin"] @@ -1049,7 +1050,7 @@ def transition(self): def before_remove(self): self.before_remove_called = True - def stop_all(self): + def stop_all(self, supervisor=None): self.all_stopped = True def get_unstopped_processes(self): diff --git a/tox.ini b/tox.ini index 28953354b..7c1da8daf 100644 --- a/tox.ini +++ b/tox.ini @@ -8,6 +8,7 @@ deps = pytest pexpect == 4.7.0 # see https://github.com/Supervisor/supervisor/issues/1327 mock >= 0.5.0 + psutil == 5.9.0 passenv = END_TO_END commands = pytest {posargs}