From 1651104f66d5f3bba95fe8de5e125e31286dae52 Mon Sep 17 00:00:00 2001 From: Joshua Hahn Date: Thu, 24 Oct 2024 13:26:07 -0700 Subject: [PATCH] Testing to prevent double killing Summary: A recent patch fixes a bug that would unnecessarily kill innocent (well-behaving) victims in a noisy cgroup. This diff adds tests to ensure that double killing does not happen when the kernelkill flag is on for BaseKillPlugin. The problem before was that when using cgroup.kill, kernelKill would return PluginRet::CONTINUE as an indication that the kill failed, even though the kernel had already initiated a kill (and likely, the cgroup was already dead) since pids.current isn't immediately updated. Since we expect cgroup.kill to always work eventually, kernelKillCgroup should always return a nonzero number --> tryToKillCgroup should always return STOP (if there are nonempty cgroups). We test this by issuing a kernelKill to a fake non-empty cgroup, and never empty out pids.current. We should still expect PluginRet::STOP, stopping the action chain and preventing double kills. Reviewed By: lnyng Differential Revision: D64911473 fbshipit-source-id: e0e3c119e538643a0ee0984c1e0a03bd1887934d --- src/oomd/plugins/CorePluginsTest.cpp | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/oomd/plugins/CorePluginsTest.cpp b/src/oomd/plugins/CorePluginsTest.cpp index 46504f6f..b2b4f54a 100644 --- a/src/oomd/plugins/CorePluginsTest.cpp +++ b/src/oomd/plugins/CorePluginsTest.cpp @@ -701,6 +701,39 @@ TEST_F(StandardKillRecursionTest, PrerunsRecursively) { (std::unordered_set{"A/Y", "A/X", "B", "Z"})); } +class KernelKillPlugin : public AlphabeticStandardKillPlugin { + public: + using BaseKillPlugin::tryToKillCgroup; + using BaseKillPlugin::tryToKillPids; +}; + +class DoubleKillTest : public CorePluginsTest {}; + +TEST_F(DoubleKillTest, KillsTwice) { + F::materialize(F::makeDir( + tempdir_, + {F::makeDir( + "A", + {F::makeFile("pids.current", "1\n"), + F::makeFile("cgroup.kill", "0")})})); + + // Should do nothing, since it will write to cgroup.kill with no side effect. + // We simulate a delayed kernel kill by leaving cgroup.procs unchanged. + // However, we should still expect the kill to return true, since we expect + // cgroup.kill to always succeed. As long as it returns STOP, there will be + // no double kill. + auto plugin = std::make_shared(); + ASSERT_NE(plugin, nullptr); + const PluginConstructionContext compile_context(tempdir_); + Engine::PluginArgs args; + args["cgroup"] = "*"; + args["recursive"] = "true"; + args["kernelkill"] = "true"; + + ASSERT_EQ(plugin->init(std::move(args), compile_context), 0); + EXPECT_EQ(plugin->run(ctx_), Engine::PluginRet::STOP); +} + class PressureRisingBeyondTest : public CorePluginsTest {}; TEST_F(PressureRisingBeyondTest, DetectsHighMemPressure) {