Skip to content

Commit

Permalink
Merge pull request #1839 from cncf/node-drain-random-failures
Browse files Browse the repository at this point in the history
[bug/1838] Increase wait_count in LitmusManager.wait_for_test to allow for slow verdicts
  • Loading branch information
agentpoyo authored Oct 24, 2023
2 parents 0b64940 + 82853d4 commit a785acc
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 6 deletions.
39 changes: 39 additions & 0 deletions sample-cnfs/sample-coredns-cnf3/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Set up Sample CoreDNS CNF
./sample-cnfs/sample-coredns-cnf/readme.md
# Prerequistes
### Install helm
```
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
```
### Optional: Use a helm version manager
https://github.com/yuya-takeyama/helmenv
Check out helmenv into any path (here is ${HOME}/.helmenv)
```
${HOME}/.helmenv)
$ git clone https://github.com/yuya-takeyama/helmenv.git ~/.helmenv
```
Add ~/.helmenv/bin to your $PATH any way you like
```
$ echo 'export PATH="$HOME/.helmenv/bin:$PATH"' >> ~/.bash_profile
```
```
helmenv versions
helmenv install <version 3.1?>
```

### core-dns installation
```
helm install coredns stable/coredns
```
### Pull down the helm chart code, untar it, and put it in the cnfs/coredns directory
```
helm pull stable/coredns
```
### Example cnf-testsuite config file for sample-core-dns-cnf
In ./cnfs/sample-core-dns-cnf/cnf-testsuite.yml
```
---
container_names: [coredns-coredns]
```
9 changes: 9 additions & 0 deletions sample-cnfs/sample-coredns-cnf3/cnf-testsuite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
release_name: coredns3
service_name: coredns-coredns
helm_repository:
name: stable
repo_url: https://cncf.gitlab.io/stable
helm_chart: stable/coredns
helm_install_namespace: cnfspace3
allowlist_helm_chart_container_names: [falco, node-cache, nginx, coredns, calico-node, kube-proxy, nginx-proxy]
39 changes: 39 additions & 0 deletions sample-cnfs/sample-coredns-cnf4/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Set up Sample CoreDNS CNF
./sample-cnfs/sample-coredns-cnf/readme.md
# Prerequistes
### Install helm
```
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
```
### Optional: Use a helm version manager
https://github.com/yuya-takeyama/helmenv
Check out helmenv into any path (here is ${HOME}/.helmenv)
```
${HOME}/.helmenv)
$ git clone https://github.com/yuya-takeyama/helmenv.git ~/.helmenv
```
Add ~/.helmenv/bin to your $PATH any way you like
```
$ echo 'export PATH="$HOME/.helmenv/bin:$PATH"' >> ~/.bash_profile
```
```
helmenv versions
helmenv install <version 3.1?>
```

### core-dns installation
```
helm install coredns stable/coredns
```
### Pull down the helm chart code, untar it, and put it in the cnfs/coredns directory
```
helm pull stable/coredns
```
### Example cnf-testsuite config file for sample-core-dns-cnf
In ./cnfs/sample-core-dns-cnf/cnf-testsuite.yml
```
---
container_names: [coredns-coredns]
```
9 changes: 9 additions & 0 deletions sample-cnfs/sample-coredns-cnf4/cnf-testsuite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
release_name: coredns4
service_name: coredns-coredns
helm_repository:
name: stable
repo_url: https://cncf.gitlab.io/stable
helm_chart: stable/coredns
helm_install_namespace: cnfspace4
allowlist_helm_chart_container_names: [falco, node-cache, nginx, coredns, calico-node, kube-proxy, nginx-proxy]
15 changes: 10 additions & 5 deletions src/tasks/litmus_setup.cr
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ module LitmusManager
Log.for("wait_for_test").info { "Checking experiment status #{experimentStatus_cmd}" } if check_verbose(args)

## Wait for completion of chaosengine which indicates the completion of chaos
until (status_code == 0 && experimentStatus == "Completed") || wait_count >= retry
until (status_code == 0 && experimentStatus == "Completed") || wait_count >= 1800
sleep delay
experimentStatus_cmd = "kubectl get chaosengine.litmuschaos.io #{test_name} -n #{namespace} -o jsonpath='{.status.experiments[0].status}'"
Log.for("wait_for_test").info { "Checking experiment status #{experimentStatus_cmd}" } if check_verbose(args)
Expand Down Expand Up @@ -135,17 +135,22 @@ module LitmusManager
## check_chaos_verdict will check the verdict of chaosexperiment
def self.check_chaos_verdict(chaos_result_name, chaos_experiment_name, args, namespace : String = "default") : Bool
verdict_cmd = "kubectl get chaosresults.litmuschaos.io #{chaos_result_name} -n #{namespace} -o jsonpath='{.status.experimentStatus.verdict}'"
Log.for("check_chaos_verdict").info { "Checking experiment verdict #{verdict_cmd}" } if check_verbose(args)
Log.for("LitmusManager.check_chaos_verdict").debug { "Checking experiment verdict with command: #{verdict_cmd}" }
status_code = Process.run("#{verdict_cmd}", shell: true, output: verdict_response = IO::Memory.new, error: stderr = IO::Memory.new).exit_status
Log.for("check_chaos_verdict").info { "status_code: #{status_code}" } if check_verbose(args)
Log.for("check_chaos_verdict").info { "verdict: #{verdict_response.to_s}" } if check_verbose(args)
Log.for("LitmusManager.check_chaos_verdict").debug { "status_code: #{status_code}; verdict: #{verdict_response.to_s}" }
verdict = verdict_response.to_s

emoji_test_failed= "🗡️💀♻️"
if verdict == "Pass"
return true
else
Log.info {"#{chaos_experiment_name} chaos test failed: #{chaos_result_name}, verdict: #{verdict}"}
Log.for("LitmusManager.check_chaos_verdict#details").debug do
verdict_details_cmd = "kubectl get chaosresults.litmuschaos.io #{chaos_result_name} -n #{namespace} -o json"
status_code = Process.run("#{verdict_details_cmd}", shell: true, output: verdict_details_response = IO::Memory.new, error: stderr = IO::Memory.new).exit_status
"#{verdict_details_response.to_s}"
end

Log.for("LitmusManager.check_chaos_verdict").info {"#{chaos_experiment_name} chaos test failed: #{chaos_result_name}, verdict: #{verdict}"}
return false
end
end
Expand Down
4 changes: 3 additions & 1 deletion src/tasks/workload/state.cr
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ task "node_drain", ["install_litmus"] do |t, args|

chaos_experiment_name = "node-drain"
total_chaos_duration = "90"
test_name = "#{resource["name"]}-#{Random.rand(99)}"
test_name = "#{resource["name"]}-#{Random::Secure.hex(4)}"
chaos_result_name = "#{test_name}-#{chaos_experiment_name}"

template = ChaosTemplates::NodeDrain.new(
Expand All @@ -342,6 +342,8 @@ task "node_drain", ["install_litmus"] do |t, args|
total_chaos_duration,
app_nodeName
).to_s
Log.for("node_drain").info { "Chaos test name: #{test_name}; Experiment name: #{chaos_experiment_name}; Label #{deployment_label}=#{deployment_label_value}; namespace: #{app_namespace}" }

File.write("#{destination_cnf_dir}/#{chaos_experiment_name}-chaosengine.yml", template)
KubectlClient::Apply.file("#{destination_cnf_dir}/#{chaos_experiment_name}-chaosengine.yml")
LitmusManager.wait_for_test(test_name,chaos_experiment_name,total_chaos_duration,args, namespace: app_namespace)
Expand Down

0 comments on commit a785acc

Please sign in to comment.