Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scrape ps output to try and kill any grandchild processes when stopping a process #1502

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions supervisor/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,7 @@ def get(section, opt, *args, **kwargs):
serverurl = get(section, 'serverurl', None)
if serverurl and serverurl.strip().upper() == 'AUTO':
serverurl = None
disable_force_shutdown = get(section, 'disable_force_shutdown', False)

# find uid from "user" option
user = get(section, 'user', None)
Expand Down Expand Up @@ -1067,7 +1068,8 @@ def get(section, opt, *args, **kwargs):
exitcodes=exitcodes,
redirect_stderr=redirect_stderr,
environment=environment,
serverurl=serverurl)
serverurl=serverurl,
disable_force_shutdown=disable_force_shutdown)

programs.append(pconfig)

Expand Down Expand Up @@ -1887,8 +1889,8 @@ class ProcessConfig(Config):
'stderr_logfile_backups', 'stderr_logfile_maxbytes',
'stderr_events_enabled', 'stderr_syslog',
'stopsignal', 'stopwaitsecs', 'stopasgroup', 'killasgroup',
'exitcodes', 'redirect_stderr' ]
optional_param_names = [ 'environment', 'serverurl' ]
'exitcodes', 'redirect_stderr']
optional_param_names = [ 'environment', 'serverurl', 'disable_force_shutdown']

def __init__(self, options, **params):
self.options = options
Expand Down
41 changes: 33 additions & 8 deletions supervisor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import shlex
import time
import traceback
import psutil

from supervisor.compat import maxint
from supervisor.compat import as_bytes
Expand Down Expand Up @@ -376,11 +377,11 @@ def _check_and_adjust_for_system_clock_rollback(self, test_time):
if self.delay > 0 and test_time < (self.delay - self.backoff):
self.delay = test_time + self.backoff

def stop(self):
def stop(self, supervisor=None):
""" Administrative stop """
self.administrative_stop = True
self.laststopreport = 0
return self.kill(self.config.stopsignal)
return self.kill(self.config.stopsignal, supervisor)

def stop_report(self):
""" Log a 'waiting for x to stop' message with throttling. """
Expand All @@ -401,12 +402,18 @@ def give_up(self):
self._assertInState(ProcessStates.BACKOFF)
self.change_state(ProcessStates.FATAL)

def kill(self, sig):
def kill(self, sig, supervisor=None):
"""Send a signal to the subprocess with the intention to kill
it (to make it exit). This may or may not actually kill it.
Return None if the signal was sent, or an error message string
if an error occurred or if the subprocess is not running.
Parameters:
sig: signal to kill the process
supervsior: supervisor instance to keep track of signaled
processes. This is needed to check if a process properly
terminated. If no supervisor instance is given, this will
not be checked.
"""
now = time.time()
options = self.config.options
Expand Down Expand Up @@ -464,7 +471,25 @@ def kill(self, sig):

try:
try:
# make sure all child processes are properly stopped as well
parent = psutil.Process(abs(pid))
children = parent.children(recursive=True)
for child in reversed(children):
try:
# kill all child processes with same signal as parent
options.kill(child.pid, sig)
# add to list but only if not disabled
if not self.config.disable_force_shutdown and sig is not signal.SIGKILL:
supervisor.terminated_processes.append((child.pid, time.time()))
except PermissionError:
msg = ("No permission to signal child process with name '%s' and pid %i of parent process '%s' with pid %i. "
% (child.name(), child.pid, parent.name(), parent.pid)
+ "Will continue with sending %s to parent process" % (sig.name))
options.logger.warn(msg)
options.kill(pid, sig)
if not self.config.disable_force_shutdown and sig is not signal.SIGKILL and supervisor is not None:
supervisor.terminated_processes.append((pid, time.time()))

except OSError as exc:
if exc.errno == errno.ESRCH:
msg = ("unable to signal %s (pid %s), it probably just exited "
Expand Down Expand Up @@ -653,7 +678,7 @@ def __repr__(self):
def get_state(self):
return self.state

def transition(self):
def transition(self, supervisor=None):
now = time.time()
state = self.state

Expand Down Expand Up @@ -715,7 +740,7 @@ def transition(self):
self.config.options.logger.warn(
'killing \'%s\' (%s) with SIGKILL' % (processname,
self.pid))
self.kill(signal.SIGKILL)
self.kill(signal.SIGKILL, supervisor)

class FastCGISubprocess(Subprocess):
"""Extends Subprocess class to handle FastCGI subprocesses"""
Expand Down Expand Up @@ -810,7 +835,7 @@ def reopenlogs(self):
for process in self.processes.values():
process.reopenlogs()

def stop_all(self):
def stop_all(self, supervisor=None):
processes = list(self.processes.values())
processes.sort()
processes.reverse() # stop in desc priority order
Expand All @@ -819,10 +844,10 @@ def stop_all(self):
state = proc.get_state()
if state == ProcessStates.RUNNING:
# RUNNING -> STOPPING
proc.stop()
proc.stop(supervisor)
elif state == ProcessStates.STARTING:
# STARTING -> STOPPING
proc.stop()
proc.stop(supervisor)
elif state == ProcessStates.BACKOFF:
# BACKOFF -> FATAL
proc.give_up()
Expand Down
5 changes: 4 additions & 1 deletion supervisor/rpcinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,10 @@ def stopProcess(self, name, wait=True):
if process.get_state() not in RUNNING_STATES:
raise RPCError(Faults.NOT_RUNNING, name)

msg = process.stop()
try:
msg = process.stop(self.supervisord)
except:
msg = process.stop()
if msg is not None:
raise RPCError(Faults.FAILED, msg)

Expand Down
31 changes: 27 additions & 4 deletions supervisor/supervisord.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import os
import time
import signal
import psutil

from supervisor.medusa import asyncore_25 as asyncore

Expand All @@ -55,6 +56,7 @@ def __init__(self, options):
self.options = options
self.process_groups = {}
self.ticks = {}
self.terminated_processes = []

def main(self):
if not self.options.first:
Expand Down Expand Up @@ -156,7 +158,7 @@ def shutdown_report(self):
def ordered_stop_groups_phase_1(self):
if self.stop_groups:
# stop the last group (the one with the "highest" priority)
self.stop_groups[-1].stop_all()
self.stop_groups[-1].stop_all(supervisor=self)

def ordered_stop_groups_phase_2(self):
# after phase 1 we've transitioned and reaped, let's see if we
Expand Down Expand Up @@ -196,9 +198,12 @@ def runforever(self):
self.ordered_stop_groups_phase_1()

if not self.shutdown_report():
# if there are no unstopped processes (we're done
# killing everything), it's OK to shutdown or reload
raise asyncore.ExitNow
# make sure all processes are properly terminated
self.kill_process_after_reaching_timeout()
if not self.terminated_processes:
# if there are no unstopped processes (we're done
# killing everything), it's OK to shutdown or reload
raise asyncore.ExitNow

for fd, dispatcher in combined_map.items():
if dispatcher.readable():
Expand Down Expand Up @@ -241,6 +246,7 @@ def runforever(self):
for group in pgroups:
group.transition()

self.kill_process_after_reaching_timeout()
self.reap()
self.handle_signal()
self.tick()
Expand Down Expand Up @@ -316,6 +322,23 @@ def handle_signal(self):
def get_state(self):
return self.options.mood

def kill_process_after_reaching_timeout(self, timeout=60):
"""
Iterate over list of previously terminated processes and check if they
are still alive. Send SIGKILL to all processes which are not not
properly shutdown before the timeout is reached.
"""
if self.terminated_processes:
current_time = time.time()
for pid, terminate_time in self.terminated_processes:
if psutil.pid_exists(pid):
if current_time - terminate_time > timeout:
# Send SIGKILL if process does not terminate succesfully
os.kill(pid, signal.SIGKILL)
self.terminated_processes.remove((pid, terminate_time))
else:
self.terminated_processes.remove((pid, terminate_time))

def timeslice(period, when):
return int(when - (when % period))

Expand Down
9 changes: 5 additions & 4 deletions supervisor/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ def removelogs(self):
def get_state(self):
return self.state

def stop(self):
def stop(self, supervisor=None):
self.stop_called = True
self.killing = False
from supervisor.process import ProcessStates
Expand All @@ -443,7 +443,7 @@ def stop(self):
def stop_report(self):
self.stop_report_called = True

def kill(self, signal):
def kill(self, signal, supervisor=None):
self.killed_with = signal

def signal(self, signal):
Expand Down Expand Up @@ -517,7 +517,7 @@ def __init__(self, options, name, command, directory=None, umask=None,
stderr_syslog=False,
redirect_stderr=False,
stopsignal=None, stopwaitsecs=10, stopasgroup=False, killasgroup=False,
exitcodes=(0,), environment=None, serverurl=None):
exitcodes=(0,), environment=None, serverurl=None, disable_force_shutdown=False):
self.options = options
self.name = name
self.command = command
Expand Down Expand Up @@ -553,6 +553,7 @@ def __init__(self, options, name, command, directory=None, umask=None,
self.umask = umask
self.autochildlogs_created = False
self.serverurl = serverurl
self.disable_force_shutdown = disable_force_shutdown

def get_path(self):
return ["/bin", "/usr/bin", "/usr/local/bin"]
Expand Down Expand Up @@ -1049,7 +1050,7 @@ def transition(self):
def before_remove(self):
self.before_remove_called = True

def stop_all(self):
def stop_all(self, supervisor=None):
self.all_stopped = True

def get_unstopped_processes(self):
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ deps =
pytest
pexpect == 4.7.0 # see https://github.com/Supervisor/supervisor/issues/1327
mock >= 0.5.0
psutil == 5.9.0
passenv = END_TO_END
commands =
pytest {posargs}
Expand Down