Skip to content

Commit

Permalink
Fix kernelkill nrKilled accounting
Browse files Browse the repository at this point in the history
Summary:
Recently we added kernelkill support to kill using cgroup2 cgroup.kill control file. Turns out it often marks kill as failed because it does not account nrKilled correctly. This makes post action delay no longer work, and thus we may kill sibling cgroups that's not misbehaving.

The root cause of such behavior is that writing to cgroup.kill does not immediately reduce the number in pids.current. The previous kill logics walks through cgroup.procs in all nested cgroups and send sigkill one by one. That could fail in may cases e.g. process gone by the time you send signal. Therefore it needs to keep track of nrKilled and have a loop with multiple trials. However, with cgroup.kill, we can simply assume writing to it should get all processes kill eventually. If not, oomd will handle it after post action delay.

This diff:
- Make freezing cgroup optional. If freeze fails, we should still kill
- No longer unfreeze cgroup at the end. The cgroup is supposed to be gone after killed
- Remove the while loop because writing to cgroup.kill should ensure all processes are eventually killed
- Only read pids.current once and assume that will be the nrKilled if writing to cgroup.kill succeeds, and don't kill if the file cannot be read or reads zero
- Log if nrKilled is zero

Differential Revision: D64848249

fbshipit-source-id: af2a70d80aeba5ce7aaaac376128255ad7248bcb
  • Loading branch information
lnyng authored and facebook-github-bot committed Oct 24, 2024
1 parent a621170 commit e9e97ae
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 44 deletions.
62 changes: 20 additions & 42 deletions src/oomd/plugins/BaseKillPlugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,22 +445,11 @@ int BaseKillPlugin::getAndTryToKillPids(const CgroupContext& target) {
}

int BaseKillPlugin::kernelKillCgroup(const CgroupContext& target) {
if (!Fs::writeKillAt(target.fd())) {
OLOG << "Failed to write to cgroup.kill for "
<< target.cgroup().absolutePath();
return 1;
}
return 0;
return Fs::writeKillAt(target.fd()) ? 0 : 1;
}

int BaseKillPlugin::freezeCgroup(const CgroupContext& target, int freeze) {
if (!Fs::writeFreezeAt(target.fd(), freeze)) {
OLOG << "Failed to write to cgroup.freeze for "
<< target.cgroup().absolutePath();
return 1;
}

return 0;
int BaseKillPlugin::freezeCgroup(const CgroupContext& target) {
return Fs::writeFreezeAt(target.fd(), 1) ? 0 : 1;
}

int BaseKillPlugin::dumpMemoryStat(const CgroupContext& target) {
Expand Down Expand Up @@ -509,36 +498,22 @@ int BaseKillPlugin::tryToKillCgroup(
reportKillInitiationToXattr(cgroupPath);

if (kernelKill_) {
if (freezeCgroup(target, 1)) {
return 0;
if (freezeCgroup(target)) {
OLOG << "Failed to freeze cgroup " << target.cgroup().absolutePath()
<< ", proceed to kill without freezing";
}

while (tries--) {
auto procsBeforeKill = Fs::readPidsCurrentAt(target.fd());

if (kernelKillCgroup(target)) {
// Killing failed, so we report the number of processes
// killed up to this point.
return nrKilled;
}

auto procsAfterKill = Fs::readPidsCurrentAt(target.fd());

if (!procsBeforeKill || !procsAfterKill) {
OLOG << "Failed to read pids from " << target.cgroup().absolutePath();
return nrKilled;
}

if (procsAfterKill.value() == 0 ||
procsBeforeKill.value() == procsAfterKill.value()) {
break;
}

nrKilled += procsBeforeKill.value() - procsAfterKill.value();
}

if (freezeCgroup(target, 0)) {
return nrKilled;
auto procsBeforeKill = Fs::readPidsCurrentAt(target.fd());
if (!procsBeforeKill) {
OLOG << "Failed to read pids from " << target.cgroup().absolutePath()
<< ", skip killing cgroup";
} else if (*procsBeforeKill == 0) {
OLOG << "No pids in " << target.cgroup().absolutePath()
<< ", skip killing cgroup";
} else if (kernelKillCgroup(target)) {
OLOG << "Failed to kill cgroup " << target.cgroup().absolutePath();
} else {
nrKilled = procsBeforeKill.value();
}
} else {
while (tries--) {
Expand Down Expand Up @@ -696,6 +671,9 @@ bool BaseKillPlugin::tryToLogAndKillCgroup(
Oomd::incrementStat(CoreStats::kKillsKey, 1);
}
OOMD_KMSG_LOG(oss.str(), "oomd kill");
} else {
OLOG << "Failed to kill any process from "
<< candidate.cgroupCtx.get().cgroup().absolutePath();
}

dumpKillInfo(
Expand Down
2 changes: 1 addition & 1 deletion src/oomd/plugins/BaseKillPlugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ class BaseKillPlugin : public Engine::BasePlugin {
virtual bool pastPrekillHookTimeout(const OomdContext& ctx) const;
virtual int dumpMemoryStat(const CgroupContext& target);

virtual int freezeCgroup(const CgroupContext& target, int freeze);
virtual int freezeCgroup(const CgroupContext& target);
virtual int kernelKillCgroup(const CgroupContext& target);

private:
Expand Down
2 changes: 1 addition & 1 deletion src/oomd/plugins/CorePluginsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class BaseKillPluginMock : public BaseKillPlugin {
/*
* Since we are just simulating killing, we don't need to freeze the cgroup
*/
int freezeCgroup(const CgroupContext& target, int freeze) override {
int freezeCgroup(const CgroupContext& target) override {
return 0;
}

Expand Down

0 comments on commit e9e97ae

Please sign in to comment.