Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rule refactor #45

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Pod-Reaper is configurable through environment variables. The pod-reaper specifi
- `EXCLUDE_LABEL_VALUES` comma-separated list of metadata label values (of key-value pair) that pod-reaper should exclude
- `REQUIRE_LABEL_KEY` pod metadata label (of key-value pair) that pod-reaper should require
- `REQUIRE_LABEL_VALUES` comma-separated list of metadata label values (of key-value pair) that pod-reaper should require
- `LOG_LEVEL` sets the detail of logging

Additionally, at least one rule must be enabled, or the pod-reaper will error and exit. See the Rules section below for configuring and enabling rules.

Expand Down Expand Up @@ -106,14 +107,16 @@ Messages this level and above will be logged. Available logging levels: Debug, I
### Example Log

```json
{"level":"info","msg":"loaded rule: chaos chance .3","time":"2017-10-18T17:09:25Z"}
{"level":"info","msg":"loaded rule: maximum run duration 2m","time":"2017-10-18T17:09:25Z"}
{"level":"info","msg":"executing reap cycle","time":"2017-10-18T17:09:55Z"}
{"level":"info","msg":"reaping pod","pod":"hello-cloud-deployment-3026746346-bj65k","reasons":["was flagged for chaos","has been running for 3m6.257891269s"],"time":"2017-10-18T17:09:55Z"}
{"level":"info","msg":"reaping pod","pod":"example-pod-deployment-125971999cgsws","reasons":["was flagged for chaos","has been running for 2m55.269615797s"],"time":"2017-10-18T17:09:55Z"}
{"level":"info","msg":"executing reap cycle","time":"2017-10-18T17:10:25Z"}
{"level":"info","msg":"reaping pod","pod":"hello-cloud-deployment-3026746346-grw12","reasons":["was flagged for chaos","has been running for 3m36.054164005s"],"time":"2017-10-18T17:10:25Z"}
{"level":"info","msg":"pod reaper is exiting","time":"2017-10-18T17:10:46Z"}
{"level":"debug","msg":"starting reap cycle","time":"2020-02-28T02:03:36Z"}
{"level":"debug","msg":"sparing pod","pod":"coredns-6955765f44-c5vh4","reasons":["random number is greater than or equal chaos chance 0.500000 (0.834684)"],"time":"2020-02-28T02:03:36Z"}
{"level":"debug","msg":"sparing pod","pod":"coredns-6955765f44-twd8g","reasons":["random number is greater than or equal chaos chance 0.500000 (0.674312)"],"time":"2020-02-28T02:03:36Z"}
{"level":"debug","msg":"sparing pod","pod":"etcd-kind-control-plane","reasons":["random number is greater than or equal chaos chance 0.500000 (0.785060)"],"time":"2020-02-28T02:03:36Z"}
{"level":"debug","msg":"sparing pod","pod":"kindnet-5d95w","reasons":["random number is greater than or equal chaos chance 0.500000 (0.625311)"],"time":"2020-02-28T02:03:36Z"}
{"level":"info","msg":"reaping pod","pod":"kube-apiserver-kind-control-plane","reasons":["random number is less than chaos chance 0.500000 (0.106679)"],"time":"2020-02-28T02:03:36Z"}
{"level":"debug","msg":"sparing pod","pod":"kube-controller-manager-kind-control-plane","reasons":["random number is greater than or equal chaos chance 0.500000 (0.500754)"],"time":"2020-02-28T02:03:36Z"}
{"level":"info","msg":"reaping pod","pod":"kube-proxy-skqbs","reasons":["random number is less than chaos chance 0.500000 (0.490510)"],"time":"2020-02-28T02:03:36Z"}
{"level":"info","msg":"reaping pod","pod":"kube-scheduler-kind-control-plane","reasons":["random number is less than chaos chance 0.500000 (0.170653)"],"time":"2020-02-28T02:03:36Z"}
{"level":"debug","msg":"sparing pod","pod":"local-path-provisioner-7745554f7f-x9l46","reasons":["random number is greater than or equal chaos chance 0.500000 (0.874489)"],"time":"2020-02-28T02:03:36Z"}
```

## Implemented Rules
Expand Down
9 changes: 0 additions & 9 deletions reaper/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ import (

"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"

"github.com/target/pod-reaper/rules"
)

// environment variable names
Expand All @@ -29,7 +27,6 @@ type options struct {
runDuration time.Duration
labelExclusion *labels.Requirement
labelRequirement *labels.Requirement
rules rules.Rules
}

func namespace() string {
Expand Down Expand Up @@ -128,11 +125,5 @@ func loadOptions() (options options, err error) {
if err != nil {
return options, err
}

// rules
options.rules, err = rules.LoadRules()
if err != nil {
return options, err
}
return options, err
}
16 changes: 0 additions & 16 deletions reaper/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,20 +159,4 @@ func TestOptionsLoad(t *testing.T) {
_, err := loadOptions()
assert.Error(t, err)
})
t.Run("no rules", func(t *testing.T) {
os.Clearenv()
_, err := loadOptions()
assert.Error(t, err)
})
t.Run("valid", func(t *testing.T) {
os.Clearenv()
// ensure at least one rule loads
os.Setenv("CHAOS_CHANCE", "1.0")
options, err := loadOptions()
assert.NoError(t, err)
assert.Equal(t, "@every 1m", options.schedule)
assert.Equal(t, 0*time.Second, options.runDuration)
assert.Nil(t, options.labelExclusion)
assert.Nil(t, options.labelRequirement)
})
}
23 changes: 16 additions & 7 deletions reaper/reaper.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/pkg/api/v1"
k8v1 "k8s.io/client-go/pkg/api/v1"
"k8s.io/client-go/rest"

"github.com/target/pod-reaper/rules"
)

type reaper struct {
Expand Down Expand Up @@ -43,10 +45,10 @@ func newReaper() reaper {
}
}

func (reaper reaper) getPods() *v1.PodList {
func (reaper reaper) getPods() *k8v1.PodList {
coreClient := reaper.clientSet.CoreV1()
pods := coreClient.Pods(reaper.options.namespace)
listOptions := v1.ListOptions{}
listOptions := k8v1.ListOptions{}
if reaper.options.labelExclusion != nil || reaper.options.labelRequirement != nil {
selector := labels.NewSelector()
if reaper.options.labelExclusion != nil {
Expand All @@ -65,12 +67,12 @@ func (reaper reaper) getPods() *v1.PodList {
return podList
}

func (reaper reaper) reapPod(pod v1.Pod, reasons []string) {
func (reaper reaper) reapPod(pod k8v1.Pod, reasons []string) {
logrus.WithFields(logrus.Fields{
"pod": pod.Name,
"reasons": reasons,
}).Info("reaping pod")
deleteOptions := &v1.DeleteOptions{
deleteOptions := &k8v1.DeleteOptions{
GracePeriodSeconds: reaper.options.gracePeriod,
}
err := reaper.clientSet.CoreV1().Pods(pod.Namespace).Delete(pod.Name, deleteOptions)
Expand All @@ -86,11 +88,18 @@ func (reaper reaper) scytheCycle() {
logrus.Debug("starting reap cycle")
pods := reaper.getPods()
for _, pod := range pods.Items {
shouldReap, reasons := reaper.options.rules.ShouldReap(pod)
shouldReap, reapReasons, spareReasons := rules.ShouldReap(pod)
if shouldReap {
reaper.reapPod(pod, reasons)
reaper.reapPod(pod, reapReasons)
} else if len(spareReasons) > 0 {
// if there are explict reasons to spare the pod, log them
logrus.WithFields(logrus.Fields{
"pod": pod.Name,
"reasons": spareReasons,
}).Debug("sparing pod")
}
}
logrus.Debug("reap cycle completed")
}

func (reaper reaper) harvest() {
Expand Down
25 changes: 9 additions & 16 deletions rules/chaos.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,27 @@ import (
"strconv"
"time"

"k8s.io/client-go/pkg/api/v1"
k8v1 "k8s.io/client-go/pkg/api/v1"
)

const envChaosChance = "CHAOS_CHANCE"

var _ Rule = (*chaos)(nil)

type chaos struct {
chance float64
}

func init() {
rand.Seed(time.Now().UnixNano())
}

func (rule *chaos) load() (bool, string, error) {
func chaos(pod k8v1.Pod) (result, string) {
value, active := os.LookupEnv(envChaosChance)
if !active {
return false, "", nil
return ignore, "not configured"
}
chance, err := strconv.ParseFloat(value, 64)
if err != nil {
return false, "", fmt.Errorf("invalid chaos chance %s", err)
panic(fmt.Errorf("failed to parse %s=%s %v", envChaosChance, value, err))
}
rule.chance = chance
return true, fmt.Sprintf("chaos chance %s", value), nil
}

func (rule *chaos) ShouldReap(pod v1.Pod) (bool, string) {
return rand.Float64() < rule.chance, "was flagged for chaos"
random := rand.Float64()
if random < chance {
return reap, fmt.Sprintf("random number is less than chaos chance %f (%f)", chance, random)
}
return spare, fmt.Sprintf("random number is greater than or equal chaos chance %f (%f)", chance, random)
}
82 changes: 39 additions & 43 deletions rules/chaos_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,51 +5,47 @@ import (
"testing"

"github.com/stretchr/testify/assert"
"k8s.io/client-go/pkg/api/v1"
k8v1 "k8s.io/client-go/pkg/api/v1"
)

func TestChaosLoad(t *testing.T) {
t.Run("load", func(t *testing.T) {
os.Clearenv()
os.Setenv(envChaosChance, "0.5")
loaded, message, err := (&chaos{}).load()
assert.NoError(t, err)
assert.Equal(t, "chaos chance 0.5", message)
assert.True(t, loaded)
})
t.Run("no load", func(t *testing.T) {
os.Clearenv()
loaded, message, err := (&chaos{}).load()
assert.NoError(t, err)
assert.Equal(t, "", message)
assert.False(t, loaded)
})
t.Run("invalid chance", func(t *testing.T) {
os.Clearenv()
os.Setenv(envChaosChance, "not-a-number")
loaded, message, err := (&chaos{}).load()
assert.Error(t, err)
assert.Equal(t, "", message)
assert.False(t, loaded)
})
func TestChaosIgnore(t *testing.T) {
os.Unsetenv(envChaosChance)
reapResult, message := chaos(k8v1.Pod{})
assert.Equal(t, ignore, reapResult)
assert.Equal(t, "not configured", message)
}

func TestChaosShouldReap(t *testing.T) {
t.Run("reap", func(t *testing.T) {
os.Clearenv()
os.Setenv(envChaosChance, "1.0") // always
chaos := chaos{}
chaos.load()
shouldReap, message := chaos.ShouldReap(v1.Pod{})
assert.True(t, shouldReap)
assert.Equal(t, "was flagged for chaos", message)
})
t.Run("no reap", func(t *testing.T) {
os.Clearenv()
os.Setenv(envChaosChance, "0.0") // never
chaos := chaos{}
chaos.load()
shouldReap, _ := chaos.ShouldReap(v1.Pod{})
assert.False(t, shouldReap)
})
func TestChaosInvalid(t *testing.T) {
os.Setenv(envChaosChance, "not-a-number")
defer func() {
err := recover()
assert.NotNil(t, err)
assert.Regexp(t, "^failed to parse.*$", err)
}()
chaos(k8v1.Pod{})
}

func TestChaos(t *testing.T) {
tests := []struct {
env string
reapResult result
messageRegex string
}{
{
env: "1.0",
reapResult: reap,
messageRegex: "^random number is less than chaos chance 1.0.*$",
},
{
env: "0.0",
reapResult: spare,
messageRegex: "^random number is greater than or equal chaos chance 0.0.*$",
},
}
for _, test := range tests {
os.Setenv(envChaosChance, test.env)
reapResult, message := chaos(k8v1.Pod{})
assert.Equal(t, test.reapResult, reapResult)
assert.Regexp(t, test.messageRegex, message)
}
}
24 changes: 7 additions & 17 deletions rules/container_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,26 @@ import (
"os"
"strings"

"k8s.io/client-go/pkg/api/v1"
k8v1 "k8s.io/client-go/pkg/api/v1"
)

const envContainerStatus = "CONTAINER_STATUSES"

var _ Rule = (*containerStatus)(nil)

type containerStatus struct {
reapStatuses []string
}

func (rule *containerStatus) load() (bool, string, error) {
func containerStatus(pod k8v1.Pod) (result, string) {
value, active := os.LookupEnv(envContainerStatus)
if !active {
return false, "", nil
return ignore, notConfigured
}
rule.reapStatuses = strings.Split(value, ",")
return true, fmt.Sprintf("container status in [%s]", value), nil
}

func (rule *containerStatus) ShouldReap(pod v1.Pod) (bool, string) {
for _, reapStatus := range rule.reapStatuses {
reapStatuses := strings.Split(value, ",")
for _, reapStatus := range reapStatuses {
for _, containerStatus := range pod.Status.ContainerStatuses {
state := containerStatus.State
// check both waiting and terminated conditions
if (state.Waiting != nil && state.Waiting.Reason == reapStatus) ||
(state.Terminated != nil && state.Terminated.Reason == reapStatus) {
return true, fmt.Sprintf("has container status %s", reapStatus)
return reap, fmt.Sprintf("has container with status '%s' in {%s}", reapStatus, value)
}
}
}
return false, ""
return spare, fmt.Sprintf("has no container with status in {%s}", value)
}
Loading