Skip to content

Commit

Permalink
WIP: Forward parent death to descendant processes
Browse files Browse the repository at this point in the history
If the uds_fd connection to the parent BEAM is broken or closed, react
by killing all children and any descendants in the same process group.

Changing from _exit to exit is significant. This patch installs an
atexit handler, which wouldn't be executed when using the _exit
variant.  It's unclear why the "immediate" variant was being used
until now, it would make sense if there were a danger of eg.  memory
corruption which would cause segfaults on freeing, but many of these
were only being used on wire protocol errors.

To start a child process which can outlive BEAM termination, give it a
new process group for example by using `setsid`:

    erl -noshell -eval 'os:cmd("setsid sleep 60")'

FIXME: Killing child processes may be a breaking change for any code
which assumed that the child process will outlive the VM.  Is this
fixing a bug, or causing one?  If the default should remain unchanged
we can add an optional flag such as `kill_group` to open_port making
this behavior switchable per child.

TODO: Needs docs and tests.
  • Loading branch information
adamwight committed Feb 19, 2025
1 parent 7bf33ef commit 9f87bc1
Showing 1 changed file with 24 additions and 5 deletions.
29 changes: 24 additions & 5 deletions erts/emulator/sys/unix/erl_child_setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ static ssize_t write_all(int fd, const char *buff, size_t size) {
return pos;
}

static void kill_all_children(void);
static void add_os_pid_to_port_id_mapping(Eterm, pid_t);
static Eterm get_port_id(pid_t);
static int forker_hash_init(void);
Expand Down Expand Up @@ -300,7 +301,7 @@ start_new_child(int pipes[])
if (wd && chdir(wd) < 0) {
int err = errno;
fprintf(stderr,"spawn: Could not cd to %s\r\n", wd);
_exit(err);
exit(err);
}

DEBUG_PRINT("Do that forking business: '%s'",cmd);
Expand Down Expand Up @@ -364,12 +365,12 @@ start_new_child(int pipes[])
}

DEBUG_PRINT("exec error: %d",errno);
_exit(errno);
exit(errno);

child_error:
fprintf(stderr,"erl_child_setup: failed with error %d on line %d\r\n",
errno, errln);
_exit(errno);
exit(errno);
}


Expand Down Expand Up @@ -537,6 +538,10 @@ main(int argc, char *argv[])

DEBUG_PRINT("Starting forker %d", max_files);

if (atexit(kill_all_children) != 0) {
ABORT("Failed to install atexit handler");
}

while (1) {
fd_set read_fds;
int res;
Expand Down Expand Up @@ -564,15 +569,15 @@ main(int argc, char *argv[])
tcsetattr(0,TCSANOW,&initial_tty_mode);
}
DEBUG_PRINT("erl_child_setup failed to read from uds: %d, %d", res, errno);
_exit(0);
exit(0);
}

if (res == 0) {
DEBUG_PRINT("uds was closed!");
if (isatty(0) && isatty(1)) {
tcsetattr(0,TCSANOW,&initial_tty_mode);
}
_exit(0);
exit(0);
}
/* Since we use unix domain sockets and send the entire data in
one go we *should* get the entire payload at once. */
Expand Down Expand Up @@ -642,6 +647,20 @@ typedef struct exit_status {

static Hash *forker_hash;

/*
* Kill child processes so they don't become orphaned.
* TODO: Decide whether there should be an option to leave some children unkilled. */

static void fun_kill_foreach(ErtsSysExitStatus *es, void *unused) {
if (kill(-es->os_pid, SIGKILL) != 0) {
DEBUG_PRINT("error killing process group %d: %d", es->os_pid, errno);
}
}

static void kill_all_children(void) {
hash_foreach(forker_hash, (HFOREACH_FUN)fun_kill_foreach, NULL);
}

static void add_os_pid_to_port_id_mapping(Eterm port_id, pid_t os_pid)
{
ErtsSysExitStatus es;
Expand Down

0 comments on commit 9f87bc1

Please sign in to comment.