Skip to content

Commit

Permalink
Using kernel cgroup.kill and cgroup.freeze in kill plugins
Browse files Browse the repository at this point in the history
Summary:
Commit [2 / 3]

Replaces manual SIGKILLing of cgroups with the kernel killer (cgroup.kill) and cgroup freezer (cgroup.kill) to prevent proc growth once a cgroup has been selected to be sacrificed.

Reviewed By: lnyng

Differential Revision: D62256368

fbshipit-source-id: cc96edf6d38fa03dad136082d21d658e6404d4b5
  • Loading branch information
Joshua Hahn authored and facebook-github-bot committed Sep 12, 2024
1 parent 914a864 commit a621170
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 17 deletions.
88 changes: 74 additions & 14 deletions src/oomd/plugins/BaseKillPlugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ int BaseKillPlugin::init(
argParser_.addArgument("dry", dry_);
argParser_.addArgument("always_continue", alwaysContinue_);
argParser_.addArgument("debug", debug_);
argParser_.addArgument("kernelkill", kernelKill_);

if (!argParser_.parse(args)) {
return 1;
Expand Down Expand Up @@ -443,6 +444,25 @@ int BaseKillPlugin::getAndTryToKillPids(const CgroupContext& target) {
return nrKilled;
}

int BaseKillPlugin::kernelKillCgroup(const CgroupContext& target) {
if (!Fs::writeKillAt(target.fd())) {
OLOG << "Failed to write to cgroup.kill for "
<< target.cgroup().absolutePath();
return 1;
}
return 0;
}

int BaseKillPlugin::freezeCgroup(const CgroupContext& target, int freeze) {
if (!Fs::writeFreezeAt(target.fd(), freeze)) {
OLOG << "Failed to write to cgroup.freeze for "
<< target.cgroup().absolutePath();
return 1;
}

return 0;
}

int BaseKillPlugin::dumpMemoryStat(const CgroupContext& target) {
auto stats =
Fs::readFileByLine(Fs::Fd::openat(target.fd(), Fs::kMemStatFile));
Expand Down Expand Up @@ -474,7 +494,12 @@ int BaseKillPlugin::tryToKillCgroup(
return true;
}

OLOG << "Trying to kill " << cgroupPath;
if (kernelKill_) {
OLOG << "Trying to kill " << cgroupPath
<< " with cgroup.freeze and cgroup.kill";
} else {
OLOG << "Trying to kill " << cgroupPath;
}

if (dumpMemoryStat(target)) {
OLOG << "Failed to open " << cgroupPath << "/memory.stat";
Expand All @@ -483,26 +508,61 @@ int BaseKillPlugin::tryToKillCgroup(
reportKillUuidToXattr(cgroupPath, killUuid);
reportKillInitiationToXattr(cgroupPath);

while (tries--) {
// Descendent cgroups created during killing will be missed because
// getAndTryToKillPids reads cgroup children from OomdContext's cache
if (kernelKill_) {
if (freezeCgroup(target, 1)) {
return 0;
}

nrKilled += getAndTryToKillPids(target);
while (tries--) {
auto procsBeforeKill = Fs::readPidsCurrentAt(target.fd());

if (nrKilled == lastNrKilled) {
break;
if (kernelKillCgroup(target)) {
// Killing failed, so we report the number of processes
// killed up to this point.
return nrKilled;
}

auto procsAfterKill = Fs::readPidsCurrentAt(target.fd());

if (!procsBeforeKill || !procsAfterKill) {
OLOG << "Failed to read pids from " << target.cgroup().absolutePath();
return nrKilled;
}

if (procsAfterKill.value() == 0 ||
procsBeforeKill.value() == procsAfterKill.value()) {
break;
}

nrKilled += procsBeforeKill.value() - procsAfterKill.value();
}

// Give it a breather before killing again
//
// Don't sleep after the first round of kills b/c the majority of the
// time the sleep isn't necessary. The system responds fast enough.
if (lastNrKilled) {
std::this_thread::sleep_for(1s);
if (freezeCgroup(target, 0)) {
return nrKilled;
}
} else {
while (tries--) {
// Descendent cgroups created during killing will be missed because
// getAndTryToKillPids reads cgroup children from OomdContext's cache

lastNrKilled = nrKilled;
nrKilled += getAndTryToKillPids(target);

if (nrKilled == lastNrKilled) {
break;
}

// Give it a breather before killing again
//
// Don't sleep after the first round of kills b/c the majority of the
// time the sleep isn't necessary. The system responds fast enough.
if (lastNrKilled) {
std::this_thread::sleep_for(1s);
}

lastNrKilled = nrKilled;
}
}

reportKillCompletionToXattr(cgroupPath, nrKilled);
return nrKilled;
}
Expand Down
8 changes: 6 additions & 2 deletions src/oomd/plugins/BaseKillPlugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ class BaseKillPlugin : public Engine::BasePlugin {
}
}

int getAndTryToKillPids(const CgroupContext& target);

protected:
/*
* Required implementation point for kill plugins
Expand Down Expand Up @@ -192,9 +194,10 @@ class BaseKillPlugin : public Engine::BasePlugin {
virtual bool pastPrekillHookTimeout(const OomdContext& ctx) const;
virtual int dumpMemoryStat(const CgroupContext& target);

private:
virtual int getAndTryToKillPids(const CgroupContext& target);
virtual int freezeCgroup(const CgroupContext& target, int freeze);
virtual int kernelKillCgroup(const CgroupContext& target);

private:
enum class KillResult {
SUCCESS,
FAILED,
Expand Down Expand Up @@ -249,6 +252,7 @@ class BaseKillPlugin : public Engine::BasePlugin {
bool dry_{false};
bool alwaysContinue_{false};
bool debug_{false};
bool kernelKill_{false};

struct ActivePrekillHook {
public:
Expand Down
10 changes: 9 additions & 1 deletion src/oomd/plugins/CorePluginsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

#include <fcntl.h>
#include <gmock/gmock.h>
#include <gtest/gtest.h>

Expand Down Expand Up @@ -48,11 +49,18 @@ std::unique_ptr<Engine::BasePlugin> createPlugin(const std::string& name) {
namespace Oomd {
class BaseKillPluginMock : public BaseKillPlugin {
public:
/*
* Since we are just simulating killing, we don't need to freeze the cgroup
*/
int freezeCgroup(const CgroupContext& target, int freeze) override {
return 0;
}

/*
* We don't actually need to dump memory.stat since we won't
* actually be killing any live processes
*/
int dumpMemoryStat(const CgroupContext&) override {
int dumpMemoryStat(const CgroupContext& target) override {
return 0;
}

Expand Down
26 changes: 26 additions & 0 deletions src/oomd/util/Fs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,14 @@ SystemMaybe<int64_t> Fs::readSwapMaxAt(const DirFd& dirfd) {
return ret;
}

SystemMaybe<int64_t> Fs::readPidsCurrentAt(const DirFd& dirfd) {
auto line = Fs::readFileByLine(Fs::Fd::openat(dirfd, Fs::kPidsCurr));
if (!line) {
return SYSTEM_ERROR(line.error());
}
return std::stoll((*line)[0]);
}

SystemMaybe<std::unordered_map<std::string, int64_t>> Fs::getVmstat(
const std::string& path) {
auto lines = readFileByLine(path);
Expand Down Expand Up @@ -774,6 +782,24 @@ SystemMaybe<Unit> Fs::writeMemReclaimAt(
return noSystemError();
}

SystemMaybe<Unit> Fs::writeFreezeAt(const DirFd& dirfd, int freeze) {
auto val_str = std::to_string(freeze);
auto ret =
writeControlFileAt(Fs::Fd::openat(dirfd, kCgroupFreeze, false), val_str);
if (!ret) {
return SYSTEM_ERROR(ret.error());
}
return noSystemError();
}

SystemMaybe<Unit> Fs::writeKillAt(const DirFd& dirfd) {
auto ret = writeControlFileAt(Fs::Fd::openat(dirfd, kCgroupKill, false), "1");
if (!ret) {
return SYSTEM_ERROR(ret.error());
}
return noSystemError();
}

SystemMaybe<int64_t> Fs::getNrDyingDescendantsAt(const DirFd& dirfd) {
auto lines = readFileByLine(Fs::Fd::openat(dirfd, kCgroupStatFile));
if (!lines) {
Expand Down
6 changes: 6 additions & 0 deletions src/oomd/util/Fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Fs {
static constexpr auto kSubtreeControlFile = "cgroup.subtree_control";
static constexpr auto kProcsFile = "cgroup.procs";
static constexpr auto kEventsFile = "cgroup.events";
static constexpr auto kCgroupFreeze = "cgroup.freeze";
static constexpr auto kMemCurrentFile = "memory.current";
static constexpr auto kMemPressureFile = "memory.pressure";
static constexpr auto kMemLowFile = "memory.low";
Expand All @@ -58,6 +59,8 @@ class Fs {
static constexpr auto kOomdUserPreferXAttr = "user.oomd_prefer";
static constexpr auto kOomdSystemAvoidXAttr = "trusted.oomd_avoid";
static constexpr auto kOomdUserAvoidXAttr = "user.oomd_avoid";
static constexpr auto kPidsCurr = "pids.current";
static constexpr auto kCgroupKill = "cgroup.kill";

struct DirEnts {
std::vector<std::string> dirs;
Expand Down Expand Up @@ -225,6 +228,7 @@ class Fs {
static SystemMaybe<ResourcePressure> readIopressureAt(
const DirFd& dirfd,
PressureType type = PressureType::FULL);
static SystemMaybe<int64_t> readPidsCurrentAt(const DirFd& dirfd);

static SystemMaybe<Unit> writeMemhighAt(const DirFd& dirfd, int64_t value);
static SystemMaybe<Unit> writeMemhightmpAt(
Expand All @@ -235,6 +239,8 @@ class Fs {
const DirFd& dirfd,
int64_t value,
std::optional<int64_t> swappiness);
static SystemMaybe<Unit> writeFreezeAt(const DirFd& dirfd, int value);
static SystemMaybe<Unit> writeKillAt(const DirFd& dirfd);

static SystemMaybe<int64_t> getNrDyingDescendantsAt(const DirFd& dirfd);
static SystemMaybe<KillPreference> readKillPreferenceAt(const DirFd& path);
Expand Down

0 comments on commit a621170

Please sign in to comment.