Skip to content

Commit

Permalink
Platform tests adapted to centralized result logging
Browse files Browse the repository at this point in the history
Signed-off-by: Martin Matyas <[email protected]>
  • Loading branch information
martin-mat committed Mar 20, 2024
1 parent 0b34594 commit d37a0a5
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 185 deletions.
20 changes: 20 additions & 0 deletions embedded_files/points.yml
Original file line number Diff line number Diff line change
Expand Up @@ -234,16 +234,35 @@

- name: k8s_conformance
tags: [platform, dynamic]
- name: clusterapi_enabled
emoji: ""
tags: [platform, dynamic]
- name: worker_reboot_recovery
tags: ["platform", "platform:resilience", dynamic]
- name: oci_compliant
empji: "📶☠"
tags: ["platform", "platform:hardware_and_scheduling", dynamic]
- name: control_plane_hardening
emoji: "🔓🔑"
tags: ["platform", "platform:security", "dynamic"]
- name: cluster_admin
emoji: "🔓🔑"
tags: ["platform", "platform:security", "dynamic"]
- name: exposed_dashboard
emoji: "🔓🔑"
tags: ["platform", "platform:security", "dynamic"]
- name: kube_state_metrics
emoji: "📶☠"
tags: [platform, "platform:observability", dynamic]
- name: node_exporter
emoji: "📶☠"
tags: [platform, "platform:observability", dynamic]
- name: prometheus_adapter
emoji: "📶☠"
tags: [platform, "platform:observability", dynamic]
- name: metrics_server
emoji: "📶☠"
tags: [platform, "platform:observability", dynamic]

- name: service_account_mapping
emoji: "🔓🔑"
Expand Down Expand Up @@ -337,6 +356,7 @@
tags: [configuration, dynamic, workload, cert, normal]

- name: helm_tiller
emoji: "🔓🔑"
tags: ["platform", "platform:security", "dynamic"]

- name: external_ips
Expand Down
16 changes: 5 additions & 11 deletions src/tasks/platform/hardware_and_scheduling.cr
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,8 @@ namespace "platform" do
end

desc "Does the Platform use a runtime that is oci compliant"
task "oci_compliant" do |_, args|
task_response = CNFManager::Task.task_runner(args, check_cnf_installed: false) do |args|
task_start_time = Time.utc
testsuite_task = "oci_compliant"
Log.for(testsuite_task).info { "Starting test" }

task "oci_compliant" do |t, args|
task_response = CNFManager::Task.task_runner(args, task: t, check_cnf_installed: false) do |args|
resp = KubectlClient::Get.container_runtimes
all_oci_runtimes = true
resp.each do |x|
Expand All @@ -27,12 +23,10 @@ namespace "platform" do
end
end
LOGGING.info "all_oci_runtimes: #{all_oci_runtimes}"
if all_oci_runtimes
emoji_chaos_oci_compliant="📶☠️"
upsert_passed_task(testsuite_task,"✔️ PASSED: Your platform is using the following runtimes: [#{KubectlClient::Get.container_runtimes.join(",")}] which are OCI compliant runtimes #{emoji_chaos_oci_compliant}", task_start_time)
if all_oci_runtimes
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "Your platform is using the following runtimes: [#{KubectlClient::Get.container_runtimes.join(",")}] which are OCI compliant runtimes")
else
emoji_chaos_oci_compliant="📶☠️"
upsert_failed_task(testsuite_task, "✖️ FAILED: Platform has at least one node that uses a non OCI compliant runtime #{emoji_chaos_oci_compliant}", task_start_time)
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "Platform has at least one node that uses a non OCI compliant runtime")
end
end
end
Expand Down
150 changes: 63 additions & 87 deletions src/tasks/platform/observability.cr
Original file line number Diff line number Diff line change
Expand Up @@ -15,107 +15,83 @@ namespace "platform" do
end

desc "Does the Platform have Kube State Metrics installed"
task "kube_state_metrics", ["install_cluster_tools"] do |_, args|
task_start_time = Time.utc
testsuite_task = "kube_state_metrics"
emoji_kube_state_metrics="📶☠️"
Log.for(testsuite_task).info { "Starting test" }

unless check_poc(args)
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Kube State Metrics not in poc mode #{emoji_kube_state_metrics}", task_start_time)
next
end
if args.named["offline"]?
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Kube State Metrics in offline mode #{emoji_kube_state_metrics}", task_start_time)
next
end
Log.info { "Running POC: kube_state_metrics" }
found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::STATE_METRICS_PROCESS)
Log.info { "Found Pod: #{found}" }
task "kube_state_metrics", ["install_cluster_tools"] do |t, args|
CNFManager::Task.task_runner(args, task: t, check_cnf_installed: false) do |args, config|
unless check_poc(args)
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "Kube State Metrics not in poc mode")
end
if args.named["offline"]?
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "Kube State Metrics in offline mode")
end
Log.info { "Running POC: kube_state_metrics" }
found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::STATE_METRICS_PROCESS)
Log.info { "Found Pod: #{found}" }

if found
upsert_passed_task(testsuite_task,"✔️ PASSED: Your platform is using the release for kube state metrics #{emoji_kube_state_metrics}", task_start_time)
else
upsert_failed_task(testsuite_task, "✖️ FAILED: Your platform does not have kube state metrics installed #{emoji_kube_state_metrics}", task_start_time)
if found
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "Your platform is using the release for kube state metrics")
else
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "Your platform does not have kube state metrics installed")
end
end
end

desc "Does the Platform have a Node Exporter installed"
task "node_exporter", ["install_cluster_tools"] do |_, args|
task_start_time = Time.utc
testsuite_task = "node_exporter"
emoji_node_exporter="📶☠️"
Log.for(testsuite_task).info { "Starting test" }

unless check_poc(args)
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: node exporter not in poc mode #{emoji_node_exporter}", task_start_time)
next
end

if args.named["offline"]?
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: node exporter in offline mode #{emoji_node_exporter}", task_start_time)
next
end
Log.info { "Running POC: node_exporter" }
found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::NODE_EXPORTER)
Log.info { "Found Process: #{found}" }
if found
upsert_passed_task(testsuite_task,"✔️ PASSED: Your platform is using the node exporter #{emoji_node_exporter}", task_start_time)
else
upsert_failed_task(testsuite_task, "✖️ FAILED: Your platform does not have the node exporter installed #{emoji_node_exporter}", task_start_time)
task "node_exporter", ["install_cluster_tools"] do |t, args|
CNFManager::Task.task_runner(args, task: t, check_cnf_installed: false) do |args, config|
unless check_poc(args)
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "node exporter not in poc mode")
end
if args.named["offline"]?
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "node exporter in offline mode")
end
Log.info { "Running POC: node_exporter" }
found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::NODE_EXPORTER)
Log.info { "Found Process: #{found}" }
if found
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "Your platform is using the node exporter")
else
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "Your platform does not have the node exporter installed")
end
end
end


desc "Does the Platform have the prometheus adapter installed"
task "prometheus_adapter", ["install_cluster_tools"] do |_, args|
task_start_time = Time.utc
testsuite_task = "prometheus_adapter"
emoji_prometheus_adapter="📶☠️"
Log.for(testsuite_task).info { "Starting test" }

unless check_poc(args)
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: prometheus adapter not in poc mode #{emoji_prometheus_adapter}", task_start_time)
next
end
if args.named["offline"]?
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: prometheus adapter in offline mode #{emoji_prometheus_adapter}", task_start_time)
next
end
Log.info { "Running POC: prometheus_adapter" }
found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::PROMETHEUS_ADAPTER)
Log.info { "Found Process: #{found}" }
task "prometheus_adapter", ["install_cluster_tools"] do |t, args|
CNFManager::Task.task_runner(args, task: t, check_cnf_installed: false) do |args, config|
unless check_poc(args)
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "prometheus adapter not in poc mode")
end
if args.named["offline"]?
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "prometheus adapter in offline mode")
end
Log.info { "Running POC: prometheus_adapter" }
found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::PROMETHEUS_ADAPTER)
Log.info { "Found Process: #{found}" }

if found
upsert_passed_task(testsuite_task,"✔️ PASSED: Your platform is using the prometheus adapter #{emoji_prometheus_adapter}", task_start_time)
else
upsert_failed_task(testsuite_task, "✖️ FAILED: Your platform does not have the prometheus adapter installed #{emoji_prometheus_adapter}", task_start_time)
if found
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "Your platform is using the prometheus adapter")
else
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "Your platform does not have the prometheus adapter installed")
end
end
end

desc "Does the Platform have the K8s Metrics Server installed"
task "metrics_server", ["install_cluster_tools"] do |_, args|
task_start_time = Time.utc
testsuite_task = "metrics_server"
emoji_metrics_server="📶☠️"
Log.for(testsuite_task).info { "Starting test" }

unless check_poc(args)
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Metrics server not in poc mode #{emoji_metrics_server}", task_start_time)
next
end
if args.named["offline"]?
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Metrics server in offline mode #{emoji_metrics_server}", task_start_time)
next
end
Log.info { "Running POC: metrics_server" }
task_response = CNFManager::Task.task_runner(args, check_cnf_installed: false) do |args|

task "metrics_server", ["install_cluster_tools"] do |t, args|
CNFManager::Task.task_runner(args, task: t, check_cnf_installed: false) do |args, config|
unless check_poc(args)
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "Metrics server not in poc mode")
end
if args.named["offline"]?
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "Metrics server in offline mode")
end
Log.info { "Running POC: metrics_server" }
found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::METRICS_SERVER)
if found
upsert_passed_task(testsuite_task, "✔️ PASSED: Your platform is using the metrics server #{emoji_metrics_server}", task_start_time)
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "Your platform is using the metrics server")
else
upsert_failed_task(testsuite_task, "✖️ FAILED: Your platform does not have the metrics server installed #{emoji_metrics_server}", task_start_time)
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "Your platform does not have the metrics server installed")
end
end
end
Expand Down Expand Up @@ -155,16 +131,16 @@ def named_sha_list(resp_json)
Log.debug { "sha list parsed json: #{parsed_json}" }
#if tags then this is a quay repository, otherwise assume docker hub repository
if parsed_json["tags"]?
parsed_json["tags"].not_nil!.as_a.reduce([] of Hash(String, String)) do |acc, i|
acc << {"name" => i["name"].not_nil!.as_s, "manifest_digest" => i["manifest_digest"].not_nil!.as_s}
end
parsed_json["tags"].not_nil!.as_a.reduce([] of Hash(String, String)) do |acc, i|
acc << {"name" => i["name"].not_nil!.as_s, "manifest_digest" => i["manifest_digest"].not_nil!.as_s}
end
else
parsed_json["results"].not_nil!.as_a.reduce([] of Hash(String, String)) do |acc, i|
# always use amd64
amd64image = i["images"].as_a.find{|x| x["architecture"].as_s == "amd64"}
Log.debug { "amd64image: #{amd64image}" }
if amd64image && amd64image["digest"]?
acc << {"name" => i["name"].not_nil!.as_s, "manifest_digest" => amd64image["digest"].not_nil!.as_s}
acc << {"name" => i["name"].not_nil!.as_s, "manifest_digest" => amd64image["digest"].not_nil!.as_s}
else
Log.error { "amd64 image not found in #{i["images"]}" }
acc
Expand Down
37 changes: 12 additions & 25 deletions src/tasks/platform/platform.cr
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,10 @@ task "platform", ["helm_local_install", "k8s_conformance", "platform:observabili
end

desc "Does the platform pass the K8s conformance tests?"
task "k8s_conformance" do |_, args|
task_start_time = Time.utc
testsuite_task = "k8s_conformance"
Log.for(testsuite_task).info { "Starting test" }

begin
task "k8s_conformance" do |t, args|
CNFManager::Task.task_runner(args, task: t, check_cnf_installed: false) do
current_dir = FileUtils.pwd
Log.for(testsuite_task).debug { "current dir: #{current_dir}" }
Log.for(t.name).debug { "current dir: #{current_dir}" }
sonobuoy = "#{tools_path}/sonobuoy/sonobuoy"

# Clean up old results
Expand All @@ -37,7 +33,7 @@ task "k8s_conformance" do |_, args|
output: delete_stdout = IO::Memory.new,
error: delete_stderr = IO::Memory.new
)
Log.for(testsuite_task).debug { "sonobuoy delete output: #{delete_stdout}" }
Log.for(t.name).debug { "sonobuoy delete output: #{delete_stdout}" }

# Run the tests
testrun_stdout = IO::Memory.new
Expand Down Expand Up @@ -72,11 +68,10 @@ task "k8s_conformance" do |_, args|
# Grab the failed line from the results

failed_count = ((results.match(/Failed: (.*)/)).try &.[1])
if failed_count.to_s.to_i > 0
upsert_failed_task(testsuite_task, "✖️ FAILED: K8s conformance test has #{failed_count} failure(s)!", task_start_time)

if failed_count.to_s.to_i > 0
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "K8s conformance test has #{failed_count} failure(s)!")
else
upsert_passed_task(testsuite_task, "✔️ PASSED: K8s conformance test has no failures", task_start_time)
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "K8s conformance test has no failures")
end
rescue ex
Log.error { ex.message }
Expand All @@ -89,16 +84,10 @@ task "k8s_conformance" do |_, args|
end

desc "Is Cluster Api available and managing a cluster?"
task "clusterapi_enabled" do |_, args|
CNFManager::Task.task_runner(args, check_cnf_installed: false) do
task_start_time = Time.utc
testsuite_task = "clusterapi_enabled"
emoji_control=""
Log.for(testsuite_task).info { "Starting test" }

task "clusterapi_enabled" do |t, args|
CNFManager::Task.task_runner(args, task: t, check_cnf_installed: false) do
unless check_poc(args)
upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Cluster API not in poc mode #{emoji_control}", task_start_time)
next
next CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Skipped, "Cluster API not in poc mode")
end

Log.for("verbose").info { "clusterapi_enabled" } if check_verbose(args)
Expand Down Expand Up @@ -139,11 +128,9 @@ task "clusterapi_enabled" do |_, args|
Log.info { "clusterapi_control_planes_json: #{clusterapi_control_planes_json}" }

if clusterapi_namespaces_json["items"]? && clusterapi_namespaces_json["items"].as_a.size > 0 && clusterapi_control_planes_json["items"]? && clusterapi_control_planes_json["items"].as_a.size > 0
resp = upsert_passed_task(testsuite_task, "✔️ PASSED: Cluster API is enabled #{emoji_control}", task_start_time)
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Passed, "Cluster API is enabled")
else
resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Cluster API NOT enabled #{emoji_control}", task_start_time)
CNFManager::TestcaseResult.new(CNFManager::ResultStatus::Failed, "Cluster API NOT enabled")
end

resp
end
end
Loading

0 comments on commit d37a0a5

Please sign in to comment.