diff --git a/src/tasks/platform/hardware_and_scheduling.cr b/src/tasks/platform/hardware_and_scheduling.cr index 6ef5cf5e0..ce0ac5261 100644 --- a/src/tasks/platform/hardware_and_scheduling.cr +++ b/src/tasks/platform/hardware_and_scheduling.cr @@ -15,6 +15,10 @@ namespace "platform" do desc "Does the Platform use a runtime that is oci compliant" task "oci_compliant" do |_, args| task_response = CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args| + task_start_time = Time.utc + testsuite_task = "oci_compliant" + Log.for(testsuite_task).info { "Starting test" } + resp = KubectlClient::Get.container_runtimes all_oci_runtimes = true resp.each do |x| @@ -25,10 +29,10 @@ namespace "platform" do LOGGING.info "all_oci_runtimes: #{all_oci_runtimes}" if all_oci_runtimes emoji_chaos_oci_compliant="📶☠️" - upsert_passed_task("oci_compliant","✔️ PASSED: Your platform is using the following runtimes: [#{KubectlClient::Get.container_runtimes.join(",")}] which are OCI compliant runtimes #{emoji_chaos_oci_compliant}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ PASSED: Your platform is using the following runtimes: [#{KubectlClient::Get.container_runtimes.join(",")}] which are OCI compliant runtimes #{emoji_chaos_oci_compliant}", task_start_time) else emoji_chaos_oci_compliant="📶☠️" - upsert_failed_task("oci_compliant", "✖️ FAILED: Platform has at least one node that uses a non OCI compliant runtime #{emoji_chaos_oci_compliant}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Platform has at least one node that uses a non OCI compliant runtime #{emoji_chaos_oci_compliant}", task_start_time) end end end diff --git a/src/tasks/platform/observability.cr b/src/tasks/platform/observability.cr index 08b8cbe7c..84de34954 100644 --- a/src/tasks/platform/observability.cr +++ b/src/tasks/platform/observability.cr @@ -16,6 +16,10 @@ namespace "platform" do desc "Does the Platform have Kube State Metrics installed" task "kube_state_metrics", ["install_cluster_tools"] do |_, args| + task_start_time = Time.utc + testsuite_task = "kube_state_metrics" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping kube_state_metrics: not in poc mode" } puts "SKIPPED: Kube State Metrics".colorize(:yellow) @@ -32,15 +36,19 @@ namespace "platform" do if found emoji_kube_state_metrics="📶☠️" - upsert_passed_task("kube_state_metrics","✔️ PASSED: Your platform is using the release for kube state metrics #{emoji_kube_state_metrics}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ PASSED: Your platform is using the release for kube state metrics #{emoji_kube_state_metrics}", task_start_time) else emoji_kube_state_metrics="📶☠️" - upsert_failed_task("kube_state_metrics", "✖️ FAILED: Your platform does not have kube state metrics installed #{emoji_kube_state_metrics}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Your platform does not have kube state metrics installed #{emoji_kube_state_metrics}", task_start_time) end end desc "Does the Platform have a Node Exporter installed" task "node_exporter", ["install_cluster_tools"] do |_, args| + task_start_time = Time.utc + testsuite_task = "node_exporter" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping node_exporter: not in poc mode" } puts "SKIPPED: Node Exporter".colorize(:yellow) @@ -57,16 +65,20 @@ namespace "platform" do Log.info { "Found Process: #{found}" } if found emoji_node_exporter="📶☠️" - upsert_passed_task("node_exporter","✔️ PASSED: Your platform is using the node exporter #{emoji_node_exporter}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ PASSED: Your platform is using the node exporter #{emoji_node_exporter}", task_start_time) else emoji_node_exporter="📶☠️" - upsert_failed_task("node_exporter", "✖️ FAILED: Your platform does not have the node exporter installed #{emoji_node_exporter}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Your platform does not have the node exporter installed #{emoji_node_exporter}", task_start_time) end end desc "Does the Platform have the prometheus adapter installed" task "prometheus_adapter", ["install_cluster_tools"] do |_, args| + task_start_time = Time.utc + testsuite_task = "prometheus_adapter" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping prometheus_adapter: not in poc mode" } puts "SKIPPED: Prometheus Adapter".colorize(:yellow) @@ -83,15 +95,19 @@ namespace "platform" do if found emoji_prometheus_adapter="📶☠️" - upsert_passed_task("prometheus_adapter","✔️ PASSED: Your platform is using the prometheus adapter #{emoji_prometheus_adapter}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ PASSED: Your platform is using the prometheus adapter #{emoji_prometheus_adapter}", task_start_time) else emoji_prometheus_adapter="📶☠️" - upsert_failed_task("prometheus_adapter", "✖️ FAILED: Your platform does not have the prometheus adapter installed #{emoji_prometheus_adapter}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Your platform does not have the prometheus adapter installed #{emoji_prometheus_adapter}", task_start_time) end end desc "Does the Platform have the K8s Metrics Server installed" task "metrics_server", ["install_cluster_tools"] do |_, args| + task_start_time = Time.utc + testsuite_task = "metrics_server" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping metrics_server: not in poc mode" } puts "SKIPPED: Metrics Server".colorize(:yellow) @@ -108,10 +124,10 @@ namespace "platform" do found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::METRICS_SERVER) if found emoji_metrics_server="📶☠️" - upsert_passed_task("metrics_server","✔️ PASSED: Your platform is using the metrics server #{emoji_metrics_server}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Your platform is using the metrics server #{emoji_metrics_server}", task_start_time) else emoji_metrics_server="📶☠️" - upsert_failed_task("metrics_server", "✖️ FAILED: Your platform does not have the metrics server installed #{emoji_metrics_server}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Your platform does not have the metrics server installed #{emoji_metrics_server}", task_start_time) end end end diff --git a/src/tasks/platform/platform.cr b/src/tasks/platform/platform.cr index 53217ef27..96b2d797c 100644 --- a/src/tasks/platform/platform.cr +++ b/src/tasks/platform/platform.cr @@ -20,10 +20,13 @@ end desc "Does the platform pass the K8s conformance tests?" task "k8s_conformance" do |_, args| - VERBOSE_LOGGING.info "k8s_conformance" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "k8s_conformance" + Log.for(testsuite_task).info { "Starting test" } + begin current_dir = FileUtils.pwd - VERBOSE_LOGGING.debug current_dir if check_verbose(args) + Log.for(testsuite_task).debug { "current dir: #{current_dir}" } sonobuoy = "#{tools_path}/sonobuoy/sonobuoy" # Clean up old results @@ -34,7 +37,7 @@ task "k8s_conformance" do |_, args| output: delete_stdout = IO::Memory.new, error: delete_stderr = IO::Memory.new ) - Log.for("verbose").info { delete_stdout } if check_verbose(args) + Log.for(testsuite_task).debug { "sonobuoy delete output: #{delete_stdout}" } # Run the tests testrun_stdout = IO::Memory.new @@ -70,10 +73,10 @@ task "k8s_conformance" do |_, args| failed_count = ((results.match(/Failed: (.*)/)).try &.[1]) if failed_count.to_s.to_i > 0 - upsert_failed_task("k8s_conformance", "✖️ FAILED: K8s conformance test has #{failed_count} failure(s)!", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: K8s conformance test has #{failed_count} failure(s)!", task_start_time) else - upsert_passed_task("k8s_conformance", "✔️ PASSED: K8s conformance test has no failures", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: K8s conformance test has no failures", task_start_time) end rescue ex Log.error { ex.message } @@ -88,6 +91,10 @@ end desc "Is Cluster Api available and managing a cluster?" task "clusterapi_enabled" do |_, args| CNFManager::Task.task_runner(args, check_cnf_installed=false) do + task_start_time = Time.utc + testsuite_task = "clusterapi_enabled" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping clusterapi_enabled: not in poc mode" } puts "SKIPPED: ClusterAPI Enabled".colorize(:yellow) @@ -134,9 +141,9 @@ task "clusterapi_enabled" do |_, args| emoji_control="✨" if clusterapi_namespaces_json["items"]? && clusterapi_namespaces_json["items"].as_a.size > 0 && clusterapi_control_planes_json["items"]? && clusterapi_control_planes_json["items"].as_a.size > 0 - resp = upsert_passed_task("clusterapi_enabled", "✔️ Cluster API is enabled #{emoji_control}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ Cluster API is enabled #{emoji_control}", task_start_time) else - resp = upsert_failed_task("clusterapi_enabled", "✖️ Cluster API NOT enabled #{emoji_control}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ Cluster API NOT enabled #{emoji_control}", task_start_time) end resp diff --git a/src/tasks/platform/resilience.cr b/src/tasks/platform/resilience.cr index f5ed5c594..138268401 100644 --- a/src/tasks/platform/resilience.cr +++ b/src/tasks/platform/resilience.cr @@ -14,6 +14,10 @@ namespace "platform" do desc "Does the Platform recover the node and reschedule pods when a worker node fails" task "worker_reboot_recovery" do |_, args| + task_start_time = Time.utc + testsuite_task = "worker_reboot_recovery" + Log.for(testsuite_task).info { "Starting test" } + unless check_destructive(args) Log.info { "skipping node_failure: not in destructive mode" } puts "SKIPPED: Node Failure".colorize(:yellow) @@ -43,7 +47,7 @@ namespace "platform" do pod_ready = KubectlClient::Get.pod_status("reboot", "--field-selector spec.nodeName=#{worker_node}").split(",")[2] pod_ready_timeout = pod_ready_timeout - 1 if pod_ready_timeout == 0 - upsert_failed_task("worker_reboot_recovery", "✖️ FAILED: Failed to install reboot daemon", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Failed to install reboot daemon", task_start_time) exit 1 end sleep 1 @@ -67,7 +71,7 @@ namespace "platform" do Log.info { "Node Ready Status: #{node_ready}" } node_failure_timeout = node_failure_timeout - 1 if node_failure_timeout == 0 - upsert_failed_task("worker_reboot_recovery", "✖️ FAILED: Node failed to go offline", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Node failed to go offline", task_start_time) exit 1 end sleep 1 @@ -85,14 +89,14 @@ namespace "platform" do Log.info { "Node Ready Status: #{node_ready}" } node_online_timeout = node_online_timeout - 1 if node_online_timeout == 0 - upsert_failed_task("worker_reboot_recovery", "✖️ FAILED: Node failed to come back online", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Node failed to come back online", task_start_time) exit 1 end sleep 1 end emoji_worker_reboot_recovery="" - resp = upsert_passed_task("worker_reboot_recovery","✔️ PASSED: Node came back online #{emoji_worker_reboot_recovery}", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ PASSED: Node came back online #{emoji_worker_reboot_recovery}", task_start_time) ensure diff --git a/src/tasks/platform/security.cr b/src/tasks/platform/security.cr index 76cb1a7b5..c4fdb8a50 100644 --- a/src/tasks/platform/security.cr +++ b/src/tasks/platform/security.cr @@ -13,16 +13,19 @@ namespace "platform" do desc "Is the platform control plane hardened" task "control_plane_hardening", ["kubescape_scan"] do |_, args| task_response = CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args| - VERBOSE_LOGGING.info "control_plane_hardening" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "control_plane_hardening" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Control plane hardening") test_report = Kubescape.parse_test_report(test_json) emoji_security="🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("control_plane_hardening", "✔️ PASSED: Control plane hardened #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Control plane hardened #{emoji_security}", task_start_time) else - resp = upsert_failed_task("control_plane_hardening", "✖️ FAILED: Control plane not hardened #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Control plane not hardened #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -34,16 +37,19 @@ namespace "platform" do task "cluster_admin", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args, config| - VERBOSE_LOGGING.info "cluster_admin" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "cluster_admin" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Cluster-admin binding") test_report = Kubescape.parse_test_report(test_json) emoji_security="🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("cluster_admin", "✔️ PASSED: No users with cluster admin role found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No users with cluster admin role found #{emoji_security}", task_start_time) else - resp = upsert_failed_task("cluster_admin", "✖️ FAILED: Users with cluster admin role found #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Users with cluster admin role found #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -56,16 +62,19 @@ namespace "platform" do next if args.named["offline"]? CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args, config| - Log.for("verbose").info { "exposed_dashboard" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "exposed_dashboard" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Exposed dashboard") test_report = Kubescape.parse_test_report(test_json) emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("exposed_dashboard", "✔️ PASSED: No exposed dashboard found in the cluster #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No exposed dashboard found in the cluster #{emoji_security}", task_start_time) else - resp = upsert_failed_task("exposed_dashboard", "✖️ FAILED: Found exposed dashboard in the cluster #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Found exposed dashboard in the cluster #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -76,7 +85,10 @@ namespace "platform" do desc "Check if the CNF is running containers with name tiller in their image name?" task "helm_tiller" do |_, args| emoji_security="🔓🔑" - Log.for("verbose").info { "platform:helm_tiller" } + task_start_time = Time.utc + testsuite_task = "helm_tiller" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args, config| @@ -84,9 +96,9 @@ namespace "platform" do failures = Kyverno::PolicyAudit.run(policy_path, EXCLUDE_NAMESPACES) if failures.size == 0 - resp = upsert_passed_task("helm_tiller", "✔️ PASSED: No Helm Tiller containers are running #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ PASSED: No Helm Tiller containers are running #{emoji_security}", task_start_time) else - resp = upsert_failed_task("helm_tiller", "✖️ FAILED: Containers with the Helm Tiller image are running #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Containers with the Helm Tiller image are running #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) diff --git a/src/tasks/utils/points.cr b/src/tasks/utils/points.cr index 5eec74f4b..8ad37c9d8 100644 --- a/src/tasks/utils/points.cr +++ b/src/tasks/utils/points.cr @@ -389,6 +389,8 @@ module CNFManager end_time = Time.utc task_runtime = (end_time - start_time).milliseconds + Log.for("#{task}").info { "task_runtime=#{task_runtime}; start_time=#{start_time}; end_time:#{end_time}" } + # The task result info has to be appeneded to an array of YAML::Any # So encode it into YAML and parse it back again to assign it. # diff --git a/src/tasks/workload/compatibility.cr b/src/tasks/workload/compatibility.cr index 9304e7ed5..145c869d0 100644 --- a/src/tasks/workload/compatibility.cr +++ b/src/tasks/workload/compatibility.cr @@ -24,10 +24,13 @@ rolling_version_change_test_names.each do |tn| desc "Test if the CNF containers are loosely coupled by performing a #{pretty_test_name}" task "#{tn}" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - LOGGING.debug "cnf_config: #{config}" - VERBOSE_LOGGING.info "#{tn}" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = tn + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } container_names = config.cnf_config[:container_names] - LOGGING.debug "container_names: #{container_names}" + Log.for(testsuite_task).debug { "container_names: #{container_names}" } update_applied = true unless container_names puts "Please add a container names set of entries into your cnf-testsuite.yml".colorize(:red) @@ -43,8 +46,8 @@ rolling_version_change_test_names.each do |tn| namespace = resource["namespace"] || config.cnf_config[:helm_install_namespace] test_passed = true valid_cnf_testsuite_yml = true - LOGGING.debug "#{tn} container: #{container}" - LOGGING.debug "container_names: #{container_names}" + Log.for(testsuite_task).debug { "container: #{container}" } + Log.for(testsuite_task).debug { "container_names: #{container_names}" } #todo use skopeo to get the next and previous versions of the cnf image dynamically config_container = container_names.find{|x| x["name"]==container.as_h["name"]} if container_names LOGGING.debug "config_container: #{config_container}" @@ -83,9 +86,9 @@ rolling_version_change_test_names.each do |tn| end VERBOSE_LOGGING.debug "#{tn}: task_response=#{task_response}" if check_verbose(args) if task_response - resp = upsert_passed_task("#{tn}","✔️ PASSED: CNF for #{pretty_test_name_capitalized} Passed", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ PASSED: CNF for #{pretty_test_name_capitalized} Passed", task_start_time) else - resp = upsert_failed_task("#{tn}", "✖️ FAILED: CNF for #{pretty_test_name_capitalized} Failed", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: CNF for #{pretty_test_name_capitalized} Failed", task_start_time) end resp # TODO should we roll the image back to original version in an ensure? @@ -97,11 +100,14 @@ end desc "Test if the CNF can perform a rollback" task "rollback" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "rollback" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "rollback" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } container_names = config.cnf_config[:container_names] - LOGGING.debug "container_names: #{container_names}" + Log.for(testsuite_task).debug { "container_names: #{container_names}" } update_applied = true rollout_status = true @@ -122,11 +128,9 @@ task "rollback" do |_, args| image_name = full_image_name_tag[0] image_tag = full_image_name_tag[2] - VERBOSE_LOGGING.debug "resource: #{resource_kind}/#{resource_name}" if check_verbose(args) - VERBOSE_LOGGING.debug "container_name: #{container_name}" if check_verbose(args) - VERBOSE_LOGGING.debug "image_name: #{image_name}" if check_verbose(args) - VERBOSE_LOGGING.debug "image_tag: #{image_tag}" if check_verbose(args) - LOGGING.debug "rollback: setting new version" + Log.for(testsuite_task).debug { + "Rollback: setting new version; resource=#{resource_kind}/#{resource_name}; container_name=#{container_name}; image_name=#{image_name}; image_tag: #{image_tag}" + } #do_update = `kubectl set image deployment/coredns-coredns coredns=coredns/coredns:latest --record` version_change_applied = true @@ -144,7 +148,9 @@ task "rollback" do |_, args| version_change_applied=false end - VERBOSE_LOGGING.debug "rollback: update #{resource_kind}/#{resource_name}, container: #{container_name}, image: #{image_name}, tag: #{rollback_from_tag}" if check_verbose(args) + Log.for(testsuite_task).debug { + "rollback: update #{resource_kind}/#{resource_name}, container: #{container_name}, image: #{image_name}, tag: #{rollback_from_tag}" + } # set a temporary image/tag, so that we can rollback to the current (original) tag later version_change_applied = KubectlClient::Set.image( resource_kind, @@ -156,34 +162,38 @@ task "rollback" do |_, args| ) end - LOGGING.info "rollback version change successful? #{version_change_applied}" + Log.for(testsuite_task).info { "rollback version change successful? #{version_change_applied}" } - VERBOSE_LOGGING.debug "rollback: checking status new version" if check_verbose(args) + Log.for(testsuite_task).debug { "rollback: checking status new version" } rollout_status = KubectlClient::Rollout.status(resource_kind, resource_name, namespace: namespace, timeout: "180s") if rollout_status == false stdout_failure("Rollback failed on resource: #{resource_kind}/#{resource_name} and container: #{container_name}") end # https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#rolling-back-to-a-previous-revision - VERBOSE_LOGGING.debug "rollback: rolling back to old version" if check_verbose(args) + Log.for(testsuite_task).debug { "rollback: rolling back to old version" } rollback_status = KubectlClient::Rollout.undo(resource_kind, resource_name, namespace: namespace) end if task_response && version_change_applied && rollout_status && rollback_status - upsert_passed_task("rollback","✔️ PASSED: CNF Rollback Passed", Time.utc) + upsert_passed_task(testsuite_task,"✔️ PASSED: CNF Rollback Passed", task_start_time) else - upsert_failed_task("rollback", "✖️ FAILED: CNF Rollback Failed", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: CNF Rollback Failed", task_start_time) end end end desc "Test increasing/decreasing capacity" task "increase_decrease_capacity" do |t, args| - VERBOSE_LOGGING.info "increase_decrease_capacity" if check_verbose(args) + CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "increase_capacity" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "increase_decrease_capacity" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).info { "increase_capacity" } increase_test_base_replicas = "1" increase_test_target_replicas = "3" @@ -219,9 +229,9 @@ task "increase_decrease_capacity" do |t, args| if increase_task_response.none?(false) && decrease_task_response.none?(false) pass_msg = "✔️ 🏆 PASSED: Replicas increased to #{increase_test_target_replicas} and decreased to #{decrease_test_target_replicas} #{emoji_capacity}" - upsert_passed_task("increase_decrease_capacity", pass_msg, Time.utc) + upsert_passed_task(testsuite_task, pass_msg, task_start_time) else - upsert_failed_task("increase_decrease_capacity", "✖️ FAILURE: Capacity change failed #{emoji_capacity}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILURE: Capacity change failed #{emoji_capacity}", task_start_time) # If increased capacity failed if increase_task_response.any?(false) @@ -271,7 +281,7 @@ end # if task_response.none?(false) # upsert_passed_task("increase_capacity", "✔️ PASSED: Replicas increased to #{target_replicas} #{emoji_increase_capacity}") # else -# upsert_failed_task("increase_capacity", increase_decrease_capacity_failure_msg(target_replicas, emoji_increase_capacity)) +# upsert_failed_task(testsuite_task, increase_decrease_capacity_failure_msg(target_replicas, emoji_increase_capacity)) # end # end # end @@ -300,7 +310,7 @@ end # if task_response.none?(false) # ret = upsert_passed_task("decrease_capacity", "✔️ PASSED: Replicas decreased to #{target_replicas} #{emoji_decrease_capacity}") # else -# ret = upsert_failed_task("decrease_capacity", increase_decrease_capacity_failure_msg(target_replicas, emoji_decrease_capacity)) +# ret = upsert_failed_task(testsuite_task, increase_decrease_capacity_failure_msg(target_replicas, emoji_decrease_capacity)) # end # puts "1 ret: #{ret}" # ret @@ -416,10 +426,15 @@ end desc "Will the CNF install using helm with helm_deploy?" task "helm_deploy" do |_, args| - Log.for("helm_deploy").info { "Starting test" } - Log.info { "helm_deploy args: #{args.inspect}" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "helm_deploy" + Log.for(testsuite_task).info { "Running #{testsuite_task}" } + Log.for(testsuite_task).info { "helm_deploy args: #{args.inspect}" } if check_verbose(args) + if check_cnf_config(args) || CNFManager.destination_cnfs_exist? CNFManager::Task.task_runner(args) do |args, config| + Log.for(testsuite_task).info { "Starting test" } + emoji_helm_deploy="⎈🚀" helm_chart = config.cnf_config[:helm_chart] helm_directory = config.cnf_config[:helm_directory] @@ -430,20 +445,23 @@ task "helm_deploy" do |_, args| helm_used = configmap["data"].as_h["helm_used"].as_s if helm_used == "true" - upsert_passed_task("helm_deploy", "✔️ PASSED: Helm deploy successful #{emoji_helm_deploy}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Helm deploy successful #{emoji_helm_deploy}", task_start_time) else - upsert_failed_task("helm_deploy", "✖️ FAILED: Helm deploy failed #{emoji_helm_deploy}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Helm deploy failed #{emoji_helm_deploy}", task_start_time) end end else - upsert_failed_task("helm_deploy", "✖️ FAILED: No cnf_testsuite.yml found! Did you run the setup task?", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: No cnf_testsuite.yml found! Did you run the setup task?", task_start_time) end end task "helm_chart_published", ["helm_local_install"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "helm_chart_published" + Log.for(testsuite_task).info { "Starting test" } + if check_verbose(args) - Log.for("verbose").info { "helm_chart_published" } Log.for("verbose").debug { "helm_chart_published args.raw: #{args.raw}" } Log.for("verbose").debug { "helm_chart_published args.named: #{args.named}" } end @@ -460,7 +478,7 @@ task "helm_chart_published", ["helm_local_install"] do |_, args| if CNFManager.helm_repo_add(args: args) unless helm_chart.empty? helm_search_cmd = "#{helm} search repo #{helm_chart}" - Log.info { "helm search command: #{helm_search_cmd}" } + Log.for(testsuite_task).info { "helm search command: #{helm_search_cmd}" } Process.run( helm_search_cmd, shell: true, @@ -470,23 +488,26 @@ task "helm_chart_published", ["helm_local_install"] do |_, args| helm_search = helm_search_stdout.to_s Log.for("verbose").debug { "#{helm_search}" } if check_verbose(args) unless helm_search =~ /No results found/ - upsert_passed_task("helm_chart_published", "✔️ PASSED: Published Helm Chart Found #{emoji_published_helm_chart}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Published Helm Chart Found #{emoji_published_helm_chart}", task_start_time) else - upsert_failed_task("helm_chart_published", "✖️ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", task_start_time) end else - upsert_failed_task("helm_chart_published", "✖️ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", task_start_time) end else - upsert_failed_task("helm_chart_published", "✖️ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", task_start_time) end end end task "helm_chart_valid", ["helm_local_install"] do |_, args| CNFManager::Task.task_runner(args) do |args| + task_start_time = Time.utc + testsuite_task = "helm_chart_valid" + Log.for(testsuite_task).info { "Starting test" } + if check_verbose(args) - Log.for("verbose").info { "helm_chart_valid" } Log.for("verbose").debug { "helm_chart_valid args.raw: #{args.raw}" } Log.for("verbose").debug { "helm_chart_valid args.named: #{args.named}" } end @@ -509,7 +530,7 @@ task "helm_chart_valid", ["helm_local_install"] do |_, args| Log.for("verbose").debug { "working_chart_directory: #{working_chart_directory}" } if check_verbose(args) current_dir = FileUtils.pwd - Log.for("verbose").debug { current_dir } if check_verbose(args) + Log.for(testsuite_task).debug { "current dir: #{current_dir}" } helm = Helm::BinarySingleton.helm emoji_helm_lint="⎈📝☑️" @@ -523,12 +544,12 @@ task "helm_chart_valid", ["helm_local_install"] do |_, args| error: helm_link_stderr = IO::Memory.new ) helm_lint = helm_lint_stdout.to_s - Log.for("verbose").debug { "helm_lint: #{helm_lint}" } if check_verbose(args) + Log.for(testsuite_task).debug { "helm_lint: #{helm_lint}" } if check_verbose(args) if helm_lint_status.success? - upsert_passed_task("helm_chart_valid", "✔️ PASSED: Helm Chart #{working_chart_directory} Lint Passed #{emoji_helm_lint}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Helm Chart #{working_chart_directory} Lint Passed #{emoji_helm_lint}", task_start_time) else - upsert_failed_task("helm_chart_valid", "✖️ FAILED: Helm Chart #{working_chart_directory} Lint Failed #{emoji_helm_lint}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Helm Chart #{working_chart_directory} Lint Failed #{emoji_helm_lint}", task_start_time) end end end @@ -625,7 +646,10 @@ end desc "CNFs should work with any Certified Kubernetes product and any CNI-compatible network that meet their functionality requirements." task "cni_compatible" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "cni_compatible" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "cni_compatible" + Log.for(testsuite_task).info { "Starting test" } + emoji_security="🔓🔑" docker_version = DockerClient.version_info() @@ -650,9 +674,9 @@ task "cni_compatible" do |_, args| puts "CNF failed to install on Cilium CNI cluster".colorize(:red) unless cilium_cnf_passed if calico_cnf_passed && cilium_cnf_passed - upsert_passed_task("cni_compatible", "✔️ PASSED: CNF compatible with both Calico and Cilium #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: CNF compatible with both Calico and Cilium #{emoji_security}", task_start_time) else - upsert_failed_task("cni_compatible", "✖️ FAILED: CNF not compatible with either Calico or Cillium #{emoji_security}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: CNF not compatible with either Calico or Cillium #{emoji_security}", task_start_time) end ensure kind_manager = KindManager.new @@ -661,7 +685,7 @@ task "cni_compatible" do |_, args| ENV["KUBECONFIG"]="#{kubeconfig_orig}" end else - upsert_skipped_task("cni_compatible", "✖️ SKIPPED: Docker not installed #{emoji_security}", Time.utc) + upsert_skipped_task(testsuite_task, "✖️ SKIPPED: Docker not installed #{emoji_security}", task_start_time) end end end diff --git a/src/tasks/workload/configuration.cr b/src/tasks/workload/configuration.cr index 1aa471c0a..95d99cb7b 100644 --- a/src/tasks/workload/configuration.cr +++ b/src/tasks/workload/configuration.cr @@ -34,7 +34,10 @@ end desc "Check if the CNF is running containers with labels configured?" task "require_labels" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "require-labels" } + task_start_time = Time.utc + testsuite_task = "require_labels" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_passed = "🏷️✔️" emoji_failed = "🏷️❌" @@ -45,9 +48,9 @@ task "require_labels" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("require_labels", "✔️ PASSED: Pods have the app.kubernetes.io/name label #{emoji_passed}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ PASSED: Pods have the app.kubernetes.io/name label #{emoji_passed}", task_start_time) else - resp = upsert_failed_task("require_labels", "✖️ FAILED: Pods should have the app.kubernetes.io/name label. #{emoji_failed}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Pods should have the app.kubernetes.io/name label. #{emoji_failed}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -60,7 +63,10 @@ end desc "Check if the CNF installs resources in the default namespace" task "default_namespace" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "default_namespace" } + task_start_time = Time.utc + testsuite_task = "default_namespace" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_passed = "🏷️✔️" emoji_failed = "🏷️❌" @@ -71,9 +77,9 @@ task "default_namespace" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("default_namespace", "✔️ PASSED: default namespace is not being used #{emoji_passed}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ PASSED: default namespace is not being used #{emoji_passed}", task_start_time) else - resp = upsert_failed_task("default_namespace", "✖️ FAILED: Resources are created in the default namespace #{emoji_failed}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Resources are created in the default namespace #{emoji_failed}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -86,7 +92,10 @@ end desc "Check if the CNF uses container images with the latest tag" task "latest_tag" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "latest_tag" } + task_start_time = Time.utc + testsuite_task = "latest_tag" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_passed = "🏷️✔️" @@ -98,9 +107,9 @@ task "latest_tag" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("latest_tag", "✔️ 🏆 PASSED: Container images are not using the latest tag #{emoji_passed}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Container images are not using the latest tag #{emoji_passed}", task_start_time) else - resp = upsert_failed_task("latest_tag", "✖️ 🏆 FAILED: Container images are using the latest tag #{emoji_failed}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Container images are using the latest tag #{emoji_failed}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -113,8 +122,10 @@ end desc "Does a search for IP addresses or subnets come back as negative?" task "ip_addresses" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "ip_addresses" if check_verbose(args) - LOGGING.info("ip_addresses args #{args.inspect}") + task_start_time = Time.utc + testsuite_task = "ip_addresses" + Log.for(testsuite_task).info { "Starting test" } + cdir = FileUtils.pwd() response = String::Builder.new helm_directory = config.cnf_config[:helm_directory] @@ -123,7 +134,7 @@ task "ip_addresses" do |_, args| # Switch to the helm chart directory Dir.cd(helm_chart_path) # Look for all ip addresses that are not comments - LOGGING.info "current directory: #{ FileUtils.pwd()}" + Log.for(testsuite_task).info { "current directory: #{ FileUtils.pwd()}" } # should catch comments (# // or /*) and ignore 0.0.0.0 # note: grep wants * escaped twice Process.run("grep -r -P '^(?!.+0\.0\.0\.0)(?![[:space:]]*0\.0\.0\.0)(?!#)(?![[:space:]]*#)(?!\/\/)(?![[:space:]]*\/\/)(?!\/\\*)(?![[:space:]]*\/\\*)(.+([0-9]{1,3}[\.]){3}[0-9]{1,3})' --exclude=*.txt", shell: true) do |proc| @@ -143,16 +154,16 @@ task "ip_addresses" do |_, args| matching_line = line_parts.join(":").strip() stdout_failure(" * In file #{file_name}: #{matching_line}") end - resp = upsert_failed_task("ip_addresses","✖️ FAILED: IP addresses found", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ FAILED: IP addresses found", task_start_time) else - resp = upsert_passed_task("ip_addresses", "✔️ PASSED: No IP addresses found", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ PASSED: No IP addresses found", task_start_time) end resp else # TODO If no helm chart directory, exit with 0 points # ADD SKIPPED tag for points.yml to allow for 0 points Dir.cd(cdir) - resp = upsert_passed_task("ip_addresses", "✔️ PASSED: No IP addresses found", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ PASSED: No IP addresses found", task_start_time) end end end @@ -173,8 +184,11 @@ task "versioned_tag", ["install_opa"] do |_, args| # end # CNFManager::Task.task_runner(args) do |args,config| - VERBOSE_LOGGING.info "versioned_tag" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "versioned_tag" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } fail_msgs = [] of String task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| test_passed = true @@ -204,9 +218,9 @@ task "versioned_tag", ["install_opa"] do |_, args| emoji_non_versioned_tag="🏷️❌" if task_response - upsert_passed_task("versioned_tag", "✔️ PASSED: Container images use versioned tags #{emoji_versioned_tag}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Container images use versioned tags #{emoji_versioned_tag}", task_start_time) else - upsert_failed_task("versioned_tag", "✖️ FAILED: Container images do not use versioned tags #{emoji_non_versioned_tag}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Container images do not use versioned tags #{emoji_non_versioned_tag}", task_start_time) fail_msgs.each do |msg| stdout_failure(msg) end @@ -218,20 +232,23 @@ desc "Does the CNF use NodePort" task "nodeport_not_used" do |_, args| # TODO rename task_runner to multi_cnf_task_runner CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "nodeport_not_used" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "nodeport_not_used" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } + release_name = config.cnf_config[:release_name] service_name = config.cnf_config[:service_name] destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config, check_containers:false, check_service: true) do |resource, container, initialized| - LOGGING.info "nodeport_not_used resource: #{resource}" + Log.for(testsuite_task).info { "nodeport_not_used resource: #{resource}" } if resource["kind"].downcase == "service" - LOGGING.info "resource kind: #{resource}" + Log.for(testsuite_task).info { "resource kind: #{resource}" } service = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) - LOGGING.debug "service: #{service}" + Log.for(testsuite_task).debug { "service: #{service}" } service_type = service.dig?("spec", "type") - LOGGING.info "service_type: #{service_type}" - VERBOSE_LOGGING.debug service_type if check_verbose(args) + Log.for(testsuite_task).info { "service_type: #{service_type}" } if service_type == "NodePort" #TODO make a service selector and display the related resources # that are tied to this service @@ -242,9 +259,9 @@ task "nodeport_not_used" do |_, args| end end if task_response - upsert_passed_task("nodeport_not_used", "✔️ PASSED: NodePort is not used", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: NodePort is not used", task_start_time) else - upsert_failed_task("nodeport_not_used", "✖️ FAILED: NodePort is being used", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: NodePort is being used", task_start_time) end end end @@ -252,32 +269,35 @@ end desc "Does the CNF use HostPort" task "hostport_not_used" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "hostport_not_used" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "hostport_not_used" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } release_name = config.cnf_config[:release_name] service_name = config.cnf_config[:service_name] destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config, check_containers:false, check_service: true) do |resource, container, initialized| - LOGGING.info "hostport_not_used resource: #{resource}" + Log.for(testsuite_task).info { "hostport_not_used resource: #{resource}" } test_passed=true - LOGGING.info "resource kind: #{resource}" + Log.for(testsuite_task).info { "resource kind: #{resource}" } k8s_resource = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) - LOGGING.debug "resource: #{k8s_resource}" + Log.for(testsuite_task).debug { "resource: #{k8s_resource}" } # per examaple https://github.com/cncf/cnf-testsuite/issues/164#issuecomment-904890977 containers = k8s_resource.dig?("spec", "template", "spec", "containers") - LOGGING.debug "containers: #{containers}" + Log.for(testsuite_task).debug { "containers: #{containers}" } containers && containers.as_a.each do |single_container| ports = single_container.dig?("ports") ports && ports.as_a.each do |single_port| - LOGGING.debug "single_port: #{single_port}" + Log.for(testsuite_task).debug { "single_port: #{single_port}" } hostport = single_port.dig?("hostPort") - LOGGING.debug "DAS hostPort: #{hostport}" + Log.for(testsuite_task).debug { "DAS hostPort: #{hostport}" } if hostport stdout_failure("Resource #{resource[:kind]}/#{resource[:name]} in #{resource[:namespace]} namespace is using a HostPort") @@ -289,9 +309,9 @@ task "hostport_not_used" do |_, args| test_passed end if task_response - upsert_passed_task("hostport_not_used", "✔️ 🏆 PASSED: HostPort is not used", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: HostPort is not used", task_start_time) else - upsert_failed_task("hostport_not_used", "✖️ 🏆 FAILED: HostPort is being used", Time.utc) + upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: HostPort is being used", task_start_time) end end end @@ -299,7 +319,10 @@ end desc "Does the CNF have hardcoded IPs in the K8s resource configuration" task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| task_response = CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "Task Name: hardcoded_ip_addresses_in_k8s_runtime_configuration" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "hardcoded_ip_addresses_in_k8s_runtime_configuration" + Log.for(testsuite_task).info { "Starting test" } + helm_chart = config.cnf_config[:helm_chart] helm_directory = config.cnf_config[:helm_directory] release_name = config.cnf_config[:release_name] @@ -312,7 +335,7 @@ task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| unless helm_chart.empty? if args.named["offline"]? info = AirGap.tar_info_by_config_src(helm_chart) - LOGGING.info "hardcoded_ip_addresses_in_k8s_runtime_configuration airgapped mode info: #{info}" + Log.for(testsuite_task).info { "airgapped mode info: #{info}" } helm_chart = info[:tar_name] end helm_install = Helm.install("--namespace hardcoded-ip-test hardcoded-ip-test #{helm_chart} --dry-run --debug > #{destination_cnf_dir}/helm_chart.yml") @@ -330,12 +353,12 @@ task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| VERBOSE_LOGGING.info "IPs: #{ip_search}" if check_verbose(args) if ip_search.empty? - upsert_passed_task("hardcoded_ip_addresses_in_k8s_runtime_configuration", "✔️ 🏆 PASSED: No hard-coded IP addresses found in the runtime K8s configuration", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: No hard-coded IP addresses found in the runtime K8s configuration", task_start_time) else - upsert_failed_task("hardcoded_ip_addresses_in_k8s_runtime_configuration", "✖️ 🏆 FAILED: Hard-coded IP addresses found in the runtime K8s configuration", Time.utc) + upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Hard-coded IP addresses found in the runtime K8s configuration", task_start_time) end rescue - upsert_skipped_task("hardcoded_ip_addresses_in_k8s_runtime_configuration", "⏭️ 🏆 SKIPPED: unknown exception", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ 🏆 SKIPPED: unknown exception", Time.utc) ensure KubectlClient::Delete.command("namespace hardcoded-ip-test --force --grace-period 0") end @@ -344,29 +367,33 @@ end desc "Does the CNF use K8s Secrets?" task "secrets_used" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.debug { "cnf_config: #{config}" } - Log.for("verbose").info { "secrets_used" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "secrets_used" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } + # Parse the cnf-testsuite.yml resp = "" emoji_probe="🧫" task_response = CNFManager.workload_resource_test(args, config, check_containers=false) do |resource, containers, volumes, initialized| - Log.info { "resource: #{resource}" } - Log.info { "volumes: #{volumes}" } + Log.for(testsuite_task).info { "resource: #{resource}" } + Log.for(testsuite_task).info { "volumes: #{volumes}" } volume_test_passed = false container_secret_mounted = false # Check to see any volume secrets are actually used volumes.as_a.each do |secret_volume| if secret_volume["secret"]? - LOGGING.info "secret_volume: #{secret_volume["name"]}" + Log.for(testsuite_task).info { "secret_volume: #{secret_volume["name"]}" } container_secret_mounted = false containers.as_a.each do |container| if container["volumeMounts"]? vmount = container["volumeMounts"].as_a - LOGGING.info "vmount: #{vmount}" - LOGGING.debug "container[env]: #{container["env"]}" + Log.for(testsuite_task).info { "vmount: #{vmount}" } + Log.for(testsuite_task).debug { "container[env]: #{container["env"]}" } if (vmount.find { |x| x["name"] == secret_volume["name"]? }) - LOGGING.debug secret_volume["name"] + Log.for(testsuite_task).debug { secret_volume["name"] } container_secret_mounted = true volume_test_passed = true end @@ -391,26 +418,26 @@ task "secrets_used" do |_, args| s_name = s["metadata"]["name"] s_type = s["type"] s_namespace = s.dig("metadata", "namespace") - Log.for("verbose").info {"secret name: #{s_name}, type: #{s_type}, namespace: #{s_namespace}"} if check_verbose(args) + Log.for(testsuite_task).info {"secret name: #{s_name}, type: #{s_type}, namespace: #{s_namespace}"} if check_verbose(args) end secret_keyref_found_and_not_ignored = false containers.as_a.each do |container| c_name = container["name"] - Log.for("verbose").info { "container: #{c_name} envs #{container["env"]?}" } if check_verbose(args) + Log.for(testsuite_task).info { "container: #{c_name} envs #{container["env"]?}" } if check_verbose(args) if container["env"]? Log.for("container_info").info { container["env"] } container["env"].as_a.find do |env| - Log.for("verbose").debug { "checking container: #{c_name}" } if check_verbose(args) + Log.for(testsuite_task).debug { "checking container: #{c_name}" } if check_verbose(args) secret_keyref_found_and_not_ignored = secrets["items"].as_a.find do |s| s_name = s["metadata"]["name"] if IGNORED_SECRET_TYPES.includes?(s["type"]) Log.for("verbose").info { "container: #{c_name} ignored secret: #{s_name}" } if check_verbose(args) next end - Log.for("checking_secret").info { s_name } + Log.for(testsuite_task).info { "Checking secret: #{s_name}" } found = (s_name == env.dig?("valueFrom", "secretKeyRef", "name")) if found - Log.for("secret_reference_found").info { "container: #{c_name} found secret reference: #{s_name}" } + Log.for(testsuite_task).info { "secret_reference_found. container: #{c_name} found secret reference: #{s_name}" } end found end @@ -432,9 +459,9 @@ task "secrets_used" do |_, args| test_passed end if task_response - resp = upsert_passed_task("secrets_used","✔️ ✨PASSED: Secrets defined and used #{emoji_probe}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ ✨PASSED: Secrets defined and used #{emoji_probe}", task_start_time) else - resp = upsert_skipped_task("secrets_used","⏭ ✨#{secrets_used_skipped_msg(emoji_probe)}", Time.utc) + resp = upsert_skipped_task(testsuite_task, "⏭ ✨#{secrets_used_skipped_msg(emoji_probe)}", task_start_time) end resp end @@ -545,8 +572,11 @@ task "immutable_configmap" do |_, args| emoji_probe="⚖️" task_response = CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "immutable_configmap" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "immutable_configmap" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] @@ -558,14 +588,14 @@ task "immutable_configmap" do |_, args| test_config_map_filename = "#{destination_cnf_dir}/config_maps/test_config_map.yml"; template = ImmutableConfigMapTemplate.new("doesnt_matter").to_s - Log.debug { "test immutable_configmap template: #{template}" } + Log.for(testsuite_task).debug { "test immutable_configmap template: #{template}" } File.write(test_config_map_filename, template) KubectlClient::Apply.file(test_config_map_filename) # now we change then apply again template = ImmutableConfigMapTemplate.new("doesnt_matter_again").to_s - Log.debug { "test immutable_configmap change template: #{template}" } + Log.for(testsuite_task).debug { "test immutable_configmap change template: #{template}" } File.write(test_config_map_filename, template) immutable_configmap_supported = true @@ -579,14 +609,14 @@ task "immutable_configmap" do |_, args| KubectlClient::Delete.file(test_config_map_filename) if apply_result[:status].success? - Log.info { "kubectl apply on immutable configmap succeeded for: #{test_config_map_filename}" } + Log.for(testsuite_task).info { "kubectl apply on immutable configmap succeeded for: #{test_config_map_filename}" } k8s_ver = KubectlClient.server_version if version_less_than(k8s_ver, "1.19.0") resp = " ⏭️ SKIPPED: immmutable configmaps are not supported in this k8s cluster.".colorize(:yellow) - upsert_skipped_task("immutable_configmap", resp, Time.utc) + upsert_skipped_task(testsuite_task, resp, task_start_time) else resp = "✖️ FAILED: immmutable configmaps are not enabled in this k8s cluster.".colorize(:red) - upsert_failed_task("immutable_configmap", resp, Time.utc) + upsert_failed_task(testsuite_task, resp, task_start_time) end else @@ -594,8 +624,8 @@ task "immutable_configmap" do |_, args| envs_with_mutable_configmap = [] of MutableConfigMapsInEnvResult cnf_manager_workload_resource_task_response = CNFManager.workload_resource_test(args, config, check_containers=false, check_service=true) do |resource, containers, volumes, initialized| - Log.info { "resource: #{resource}" } - Log.info { "volumes: #{volumes}" } + Log.for(testsuite_task).info { "resource: #{resource}" } + Log.for(testsuite_task).info { "volumes: #{volumes}" } # If the install type is manifest, the namesapce would be in the manifest. # Else rely on config for helm-based install @@ -620,10 +650,10 @@ task "immutable_configmap" do |_, args| if cnf_manager_workload_resource_task_response resp = "✔️ ✨PASSED: All volume or container mounted configmaps immutable #{emoji_probe}".colorize(:green) - upsert_passed_task("immutable_configmap", resp, Time.utc) + upsert_passed_task(testsuite_task, resp, task_start_time) elsif immutable_configmap_supported resp = "✖️ ✨FAILED: Found mutable configmap(s) #{emoji_probe}".colorize(:red) - upsert_failed_task("immutable_configmap", resp, Time.utc) + upsert_failed_task(testsuite_task, resp, task_start_time) # Print out any mutable configmaps mounted as volumes volumes_test_results.each do |result| @@ -649,7 +679,9 @@ end desc "Check if CNF uses Kubernetes alpha APIs" task "alpha_k8s_apis" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "alpha_k8s_apis" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "alpha_k8s_apis" + Log.for(testsuite_task).info { "Starting test" } unless check_poc(args) Log.info { "Skipping alpha_k8s_apis: not in poc mode" } @@ -663,7 +695,7 @@ task "alpha_k8s_apis" do |_, args| # No offline support for this task for now if args.named["offline"]? && args.named["offline"]? != "false" - upsert_skipped_task("alpha_k8s_apis","⏭️ SKIPPED: alpha_k8s_apis chaos test skipped #{emoji}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: alpha_k8s_apis chaos test skipped #{emoji}", task_start_time) next end @@ -687,7 +719,7 @@ task "alpha_k8s_apis" do |_, args| # CNF setup failed on kind cluster. Inform in test output. unless cnf_setup_complete puts "CNF failed to install on apisnoop cluster".colorize(:red) - upsert_failed_task("alpha_k8s_apis", "✖️ FAILED: Could not check CNF for usage of Kubernetes alpha APIs #{emoji}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Could not check CNF for usage of Kubernetes alpha APIs #{emoji}", task_start_time) next end @@ -704,9 +736,9 @@ task "alpha_k8s_apis" do |_, args| api_count = result[:output].split("\n")[2].to_i if api_count == 0 - upsert_passed_task("alpha_k8s_apis", "✔️ PASSED: CNF does not use Kubernetes alpha APIs #{emoji}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: CNF does not use Kubernetes alpha APIs #{emoji}", task_start_time) else - upsert_failed_task("alpha_k8s_apis", "✖️ FAILED: CNF uses Kubernetes alpha APIs #{emoji}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: CNF uses Kubernetes alpha APIs #{emoji}", task_start_time) end ensure if cluster_name != nil @@ -729,8 +761,11 @@ end desc "Does the CNF install an Operator with OLM?" task "operator_installed" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "operator_installed" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "operator_installed" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } subscription_names = CNFManager.cnf_resources(args, config) do |resource| kind = resource.dig("kind").as_s @@ -739,7 +774,7 @@ task "operator_installed" do |_, args| end end.compact - Log.info { "Subscription Names: #{subscription_names}" } + Log.for(testsuite_task).info { "Subscription Names: #{subscription_names}" } #TODO Warn if csv is not found for a subscription. @@ -757,7 +792,7 @@ task "operator_installed" do |_, args| end end.compact - Log.info { "CSV Names: #{csv_names}" } + Log.for(testsuite_task).info { "CSV Names: #{csv_names}" } succeeded = csv_names.map do |csv| @@ -767,12 +802,12 @@ task "operator_installed" do |_, args| csv_succeeded end - Log.info { "Succeeded CSV Names: #{succeeded}" } + Log.for(testsuite_task).info { "Succeeded CSV Names: #{succeeded}" } test_passed = false if succeeded.size > 0 && succeeded.all?(true) - Log.info { "Succeeded All True?" } + Log.for(testsuite_task).info { "Succeeded All True?" } test_passed = true end @@ -783,9 +818,9 @@ task "operator_installed" do |_, args| emoji_big="🦖" if test_passed - upsert_passed_task("operator_installed", "✔️ PASSED: Operator is installed: #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Operator is installed: #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_na_task("operator_installed", "✖️ NA: No Operators Found #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_na_task(testsuite_task, "✖️ NA: No Operators Found #{emoji_big} #{emoji_image_size}", task_start_time) end end end diff --git a/src/tasks/workload/microservice.cr b/src/tasks/workload/microservice.cr index 8014d0993..54d5ac833 100644 --- a/src/tasks/workload/microservice.cr +++ b/src/tasks/workload/microservice.cr @@ -24,13 +24,17 @@ REASONABLE_STARTUP_BUFFER = 10.0 desc "To check if the CNF has multiple microservices that share a database" task "shared_database", ["install_cluster_tools"] do |_, args| - Log.info { "Running shared_database test" } + CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "shared_database" + Log.for(testsuite_task).info { "Starting test" } + # todo loop through local resources and see if db match found db_match = Netstat::Mariadb.match if db_match[:found] == false - upsert_na_task("shared_database", "⏭️ N/A: [shared_database] No MariaDB containers were found", Time.utc) + upsert_na_task(testsuite_task, "⏭️ N/A: [shared_database] No MariaDB containers were found", task_start_time) next end @@ -98,9 +102,9 @@ task "shared_database", ["install_cluster_tools"] do |_, args| failed_emoji = "(ভ_ভ) ރ 💾" passed_emoji = "🖥️ 💾" if integrated_database_found - upsert_failed_task("shared_database", "✖️ FAILED: Found a shared database #{failed_emoji}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Found a shared database #{failed_emoji}", task_start_time) else - upsert_passed_task("shared_database", "✔️ PASSED: No shared database found #{passed_emoji}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No shared database found #{passed_emoji}", task_start_time) end end end @@ -109,8 +113,11 @@ desc "Does the CNF have a reasonable startup time (< 30 seconds)?" task "reasonable_startup_time" do |_, args| Log.info { "Running reasonable_startup_time test" } CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "reasonable_startup_time" } if check_verbose(args) - Log.debug { "cnf_config: #{config.cnf_config}" } + task_start_time = Time.utc + testsuite_task = "reasonable_startup_time" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config.cnf_config}" } yml_file_path = config.cnf_config[:yml_file_path] helm_chart = config.cnf_config[:helm_chart] @@ -181,9 +188,9 @@ task "reasonable_startup_time" do |_, args| Log.info { "startup_time: #{startup_time.to_i}" } if startup_time.to_i <= startup_time_limit - upsert_passed_task("reasonable_startup_time", "✔️ PASSED: CNF had a reasonable startup time #{emoji_fast}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: CNF had a reasonable startup time #{emoji_fast}", task_start_time) else - upsert_failed_task("reasonable_startup_time", "✖️ FAILED: CNF had a startup time of #{startup_time} seconds #{emoji_slow}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: CNF had a startup time of #{startup_time} seconds #{emoji_slow}", task_start_time) end end @@ -199,16 +206,20 @@ end desc "Does the CNF have a reasonable container image size (< 5GB)?" task "reasonable_image_size" do |_, args| CNFManager::Task.task_runner(args) do |args,config| + task_start_time = Time.utc + testsuite_task = "reasonable_image_size" + Log.for(testsuite_task).info { "Starting test" } + docker_insecure_registries = [] of String if config.cnf_config[:docker_insecure_registries]? && !config.cnf_config[:docker_insecure_registries].nil? docker_insecure_registries = config.cnf_config[:docker_insecure_registries].not_nil! end unless Dockerd.install(docker_insecure_registries) - upsert_skipped_task("reasonable_image_size", "⏭️ SKIPPED: Skipping reasonable_image_size: Dockerd tool failed to install", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Skipping reasonable_image_size: Dockerd tool failed to install", task_start_time) next end - Log.for("verbose").info { "reasonable_image_size" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| yml_file_path = config.cnf_config[:yml_file_path] @@ -284,7 +295,7 @@ task "reasonable_image_size" do |_, args| test_passed=false end rescue ex - Log.error { "invalid compressed_size: #{fqdn_image} = '#{compressed_size.to_s}', #{ex.message}".colorize(:red) } + Log.for(testsuite_task).error { "invalid compressed_size: #{fqdn_image} = '#{compressed_size.to_s}', #{ex.message}".colorize(:red) } test_passed = false end else @@ -298,9 +309,9 @@ task "reasonable_image_size" do |_, args| emoji_big="🦖" if task_response - upsert_passed_task("reasonable_image_size", "✔️ PASSED: Image size is good #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Image size is good #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("reasonable_image_size", "✖️ FAILED: Image size too large #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Image size too large #{emoji_big} #{emoji_image_size}", task_start_time) end end end @@ -317,8 +328,11 @@ end desc "Do the containers in a pod have only one process type?" task "single_process_type" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "single_process_type" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "single_process_type" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } fail_msgs = [] of String all_node_proc_statuses = [] of NamedTuple(node_name: String, proc_statuses: Array(String)) @@ -333,15 +347,15 @@ task "single_process_type" do |_, args| containers = KubectlClient::Get.resource_containers(kind, resource[:name], resource[:namespace]) pods.map do |pod| pod_name = pod.dig("metadata", "name") - Log.info { "pod_name: #{pod_name}" } + Log.for(testsuite_task).info { "pod_name: #{pod_name}" } status = pod["status"] if status["containerStatuses"]? container_statuses = status["containerStatuses"].as_a - Log.info { "container_statuses: #{container_statuses}" } - Log.info { "pod_name: #{pod_name}" } + Log.for(testsuite_task).info { "container_statuses: #{container_statuses}" } + Log.for(testsuite_task).info { "pod_name: #{pod_name}" } nodes = KubectlClient::Get.nodes_by_pod(pod) - Log.info { "nodes_by_resource done" } + Log.for(testsuite_task).info { "nodes_by_resource done" } node = nodes.first container_statuses.map do |container_status| container_name = container_status.dig("name") @@ -349,15 +363,15 @@ task "single_process_type" do |_, args| container_id = container_status.dig("containerID").as_s ready = container_status.dig("ready").as_bool next unless ready - Log.info { "containerStatuses container_id #{container_id}" } + Log.for(testsuite_task).info { "containerStatuses container_id #{container_id}" } pid = ClusterTools.node_pid_by_container_id(container_id, node) - Log.info { "node pid (should never be pid 1): #{pid}" } + Log.for(testsuite_task).info { "node pid (should never be pid 1): #{pid}" } next unless pid node_name = node.dig("metadata", "name").as_s - Log.info { "node name : #{node_name}" } + Log.for(testsuite_task).info { "node name : #{node_name}" } # filtered_proc_statuses = all_node_proc_statuses.find {|x| x[:node_name] == node_name} # proc_statuses = filtered_proc_statuses ? filtered_proc_statuses[:proc_statuses] : nil # Log.debug { "node statuses : #{proc_statuses}" } @@ -375,12 +389,12 @@ task "single_process_type" do |_, args| proc_statuses) statuses.map do |status| - Log.debug { "status: #{status}" } - Log.info { "status cmdline: #{status["cmdline"]}" } + Log.for(testsuite_task).debug { "status: #{status}" } + Log.for(testsuite_task).info { "status cmdline: #{status["cmdline"]}" } status_name = status["Name"].strip ppid = status["PPid"].strip - Log.info { "status name: #{status_name}" } - Log.info { "previous status name: #{previous_process_type}" } + Log.for(testsuite_task).info { "status name: #{status_name}" } + Log.for(testsuite_task).info { "previous status name: #{previous_process_type}" } # Fail if more than one process type #todo make work if processes out of order if status_name != previous_process_type && @@ -390,7 +404,7 @@ task "single_process_type" do |_, args| status_name, statuses) unless verified - Log.info { "multiple proc types detected verified: #{verified}" } + Log.for(testsuite_task).info { "multiple proc types detected verified: #{verified}" } fail_msg = "resource: #{resource}, pod #{pod_name} and container: #{container_name} has more than one process type (#{statuses.map{|x|x["cmdline"]?}.compact.uniq.join(", ")})" unless fail_msgs.find{|x| x== fail_msg} puts fail_msg.colorize(:red) @@ -412,9 +426,9 @@ task "single_process_type" do |_, args| emoji_big="🦖" if task_response - upsert_passed_task("single_process_type", "✔️ 🏆 PASSED: Only one process type used #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Only one process type used #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("single_process_type", "✖️ 🏆 FAILED: More than one process type used #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: More than one process type used #{emoji_big} #{emoji_image_size}", task_start_time) end end end @@ -422,15 +436,18 @@ end desc "Are the SIGTERM signals handled?" task "zombie_handled" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "zombie_handled" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "zombie_handled" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } task_response = CNFManager.workload_resource_test(args, config, check_containers:false ) do |resource, container, initialized| ClusterTools.all_containers_by_resource?(resource, resource[:namespace]) do | container_id, container_pid_on_node, node, container_proctree_statuses, container_status| resp = ClusterTools.exec_by_node("runc --root /run/containerd/runc/k8s.io/ state #{container_id}", node) - Log.info { "resp[:output] #{resp[:output]}" } + Log.for(testsuite_task).info { "resp[:output] #{resp[:output]}" } bundle_path = JSON.parse(resp[:output].to_s) - Log.info { "bundle path: #{bundle_path["bundle"]} "} + Log.for(testsuite_task).info { "bundle path: #{bundle_path["bundle"]} "} ClusterTools.exec_by_node("nerdctl --namespace=k8s.io cp /zombie #{container_id}:/zombie", node) ClusterTools.exec_by_node("nerdctl --namespace=k8s.io cp /sleep #{container_id}:/sleep", node) # ClusterTools.exec_by_node("ctools --bundle_path --container_id ") @@ -444,15 +461,15 @@ task "zombie_handled" do |_, args| ClusterTools.all_containers_by_resource?(resource, resource[:namespace]) do | container_id, container_pid_on_node, node, container_proctree_statuses, container_status| zombies = container_proctree_statuses.map do |status| - Log.debug { "status: #{status}" } - Log.info { "status cmdline: #{status["cmdline"]}" } + Log.for(testsuite_task).debug { "status: #{status}" } + Log.for(testsuite_task).info { "status cmdline: #{status["cmdline"]}" } status_name = status["Name"].strip current_pid = status["Pid"].strip state = status["State"].strip - Log.info { "pid: #{current_pid}" } - Log.info { "status name: #{status_name}" } - Log.info { "state: #{state}" } - Log.info { "(state =~ /zombie/): #{(state =~ /zombie/)}" } + Log.for(testsuite_task).info { "pid: #{current_pid}" } + Log.for(testsuite_task).info { "status name: #{status_name}" } + Log.for(testsuite_task).info { "state: #{state}" } + Log.for(testsuite_task).info { "(state =~ /zombie/): #{(state =~ /zombie/)}" } if (state =~ /zombie/) != nil puts "Process #{status_name} has a state of #{state}".colorize(:red) true @@ -460,7 +477,7 @@ task "zombie_handled" do |_, args| nil end end - Log.info { "zombies.all?(nil): #{zombies.all?(nil)}" } + Log.for(testsuite_task).info { "zombies.all?(nil): #{zombies.all?(nil)}" } zombies.all?(nil) end end @@ -470,9 +487,9 @@ task "zombie_handled" do |_, args| emoji_big="🦖" if task_response - upsert_passed_task("zombie_handled", "✔️ 🏆 PASSED: Zombie handled #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Zombie handled #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("zombie_handled", "✖️ 🏆 FAILED: Zombie not handled #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Zombie not handled #{emoji_big} #{emoji_image_size}", task_start_time) end end @@ -483,8 +500,11 @@ end desc "Are the SIGTERM signals handled?" task "sig_term_handled" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "sig_term_handled" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "sig_term_handled" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } # test_status can be "skipped" or "failed". # Only collecting containers that failed or were skipped. @@ -525,10 +545,10 @@ task "sig_term_handled" do |_, args| status = pod["status"] if status["containerStatuses"]? container_statuses = status["containerStatuses"].as_a - Log.info { "container_statuses: #{container_statuses}" } - Log.info { "pod_name: #{pod_name}" } + Log.for(testsuite_task).info { "container_statuses: #{container_statuses}" } + Log.for(testsuite_task).info { "pod_name: #{pod_name}" } nodes = KubectlClient::Get.nodes_by_pod(pod) - Log.info { "nodes_by_resource done" } + Log.for(testsuite_task).info { "nodes_by_resource done" } node = nodes.first # there should only be one node returned for one pod sig_result = container_statuses.map do |container_status| container_name = container_status.dig("name") @@ -537,7 +557,7 @@ task "sig_term_handled" do |_, args| # Check if the container status is ready. # If this container is not ready, move on to next. container_name = container_status.dig("name").as_s - Log.info { "before ready containerStatuses pod:#{pod_name} container:#{container_name}" } + Log.for(testsuite_task).info { "before ready containerStatuses pod:#{pod_name} container:#{container_name}" } ready = container_status.dig("ready").as_bool if !ready Log.info { "container status: #{container_status} "} @@ -554,7 +574,7 @@ task "sig_term_handled" do |_, args| end container_id = container_status.dig("containerID").as_s - Log.info { "containerStatuses container_id #{container_id}" } + Log.for(testsuite_task).info { "containerStatuses container_id #{container_id}" } #get container id's pid on the node (different from inside the container) pid = "#{ClusterTools.node_pid_by_container_id(container_id, node)}" @@ -572,7 +592,7 @@ task "sig_term_handled" do |_, args| end # next if pid.empty? - Log.info { "node pid (should never be pid 1): #{pid}" } + Log.for(testsuite_task).info { "node pid (should never be pid 1): #{pid}" } # need to do the next line. how to kill the current cnf? # this was one of the reason why we did stuff like this durring the cnf install and saved it as a configmap @@ -585,9 +605,9 @@ task "sig_term_handled" do |_, args| #todo 2.1 loop through all child processes that are not threads (only include proceses where tgid = pid) #todo 2.1.1 ignore the parent pid (we are on the host so it wont be pid 1) node_name = node.dig("metadata", "name").as_s - Log.info { "node name : #{node_name}" } + Log.for(testsuite_task).info { "node name : #{node_name}" } pids = KernelIntrospection::K8s::Node.pids(node) - Log.info { "proctree_by_pid pids: #{pids}" } + Log.for(testsuite_task).info { "proctree_by_pid pids: #{pids}" } proc_statuses = KernelIntrospection::K8s::Node.all_statuses_by_pids(pids, node) statuses = KernelIntrospection::K8s::Node.proctree_by_pid(pid, node, proc_statuses) @@ -605,16 +625,16 @@ task "sig_term_handled" do |_, args| end end non_thread_statuses.map do |status| - Log.debug { "status: #{status}" } - Log.info { "status cmdline: #{status["cmdline"]}" } + Log.for(testsuite_task).debug { "status: #{status}" } + Log.for(testsuite_task).info { "status cmdline: #{status["cmdline"]}" } status_name = status["Name"].strip ppid = status["PPid"].strip current_pid = status["Pid"].strip tgid = status["Tgid"].strip # check if 'g' is uppercase - Log.info { "Pid: #{current_pid}" } - Log.info { "Tgid: #{tgid}" } - Log.info { "status name: #{status_name}" } - Log.info { "previous status name: #{previous_process_type}" } + Log.for(testsuite_task).info { "Pid: #{current_pid}" } + Log.for(testsuite_task).info { "Tgid: #{tgid}" } + Log.for(testsuite_task).info { "status name: #{status_name}" } + Log.for(testsuite_task).info { "previous status name: #{previous_process_type}" } # do not count the top pid if there are children if non_thread_statuses.size > 1 && pid == current_pid next @@ -637,7 +657,7 @@ task "sig_term_handled" do |_, args| #todo 2.2 wait for 30 seconds end ClusterTools.exec_by_node("bash -c 'sleep 10 && kill #{pid} && sleep 5 && kill -9 #{pid}'", node) - Log.info { "pid_log_names: #{pid_log_names}" } + Log.for(testsuite_task).info { "pid_log_names: #{pid_log_names}" } #todo 2.3 parse the logs #todo get the log sleep 5 @@ -656,7 +676,7 @@ task "sig_term_handled" do |_, args| false end end - Log.info { "SigTerm Found: #{sig_term_found}" } + Log.for(testsuite_task).info { "SigTerm Found: #{sig_term_found}" } # per all containers container_sig_term_check = sig_term_found.all?(true) if container_sig_term_check == false @@ -690,9 +710,9 @@ task "sig_term_handled" do |_, args| emoji_big="🦖" if task_response - upsert_passed_task("sig_term_handled", "✔️ 🏆 PASSED: Sig Term handled #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Sig Term handled #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("sig_term_handled", "✖️ 🏆 FAILED: Sig Term not handled #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Sig Term not handled #{emoji_big} #{emoji_image_size}", task_start_time) failed_containers.map do |failure_info| resource_output = "Pod: #{failure_info["pod"]}, Container: #{failure_info["container"]}, Result: #{failure_info["test_status"]}" if failure_info["test_status"] == "skipped" @@ -708,7 +728,9 @@ end desc "Are any of the containers exposed as a service?" task "service_discovery" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "service_discovery" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "service_discovery" + Log.for(testsuite_task).info { "Starting test" } # Get all resources for the CNF resource_ymls = CNFManager.cnf_workload_resources(args, config) { |resource| resource } @@ -755,18 +777,19 @@ task "service_discovery" do |_, args| emoji_big="🦖" if test_passed - upsert_passed_task("service_discovery", "✔️ ✨PASSED: Some containers exposed as a service #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ ✨PASSED: Some containers exposed as a service #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("service_discovery", "✖️ ✨FAILED: No containers exposed as a service #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ ✨FAILED: No containers exposed as a service #{emoji_big} #{emoji_image_size}", task_start_time) end end end desc "To check if the CNF uses a specialized init system" task "specialized_init_system", ["install_cluster_tools"] do |_, args| - test_name = "specialized_init_system" CNFManager::Task.task_runner(args) do |args, config| - Log.info { "Running #{test_name} test" } + task_start_time = Time.utc + testsuite_task = "specialized_init_system" + Log.for(testsuite_task).info { "Starting test" } failed_cnf_resources = [] of InitSystems::InitSystemInfo CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -774,10 +797,10 @@ task "specialized_init_system", ["install_cluster_tools"] do |_, args| case kind when "deployment","statefulset","pod","replicaset", "daemonset" namespace = resource[:namespace] - Log.for(test_name).info { "Checking resource #{resource[:kind]}/#{resource[:name]} in #{namespace}" } + Log.for(testsuite_task).info { "Checking resource #{resource[:kind]}/#{resource[:name]} in #{namespace}" } resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) pods = KubectlClient::Get.pods_by_resource(resource_yaml, namespace) - Log.for(test_name).info { "Pod count for resource #{resource[:kind]}/#{resource[:name]} in #{namespace}: #{pods.size}" } + Log.for(testsuite_task).info { "Pod count for resource #{resource[:kind]}/#{resource[:name]} in #{namespace}: #{pods.size}" } pods.each do |pod| results = InitSystems.scan(pod) failed_cnf_resources = failed_cnf_resources + results @@ -789,12 +812,12 @@ task "specialized_init_system", ["install_cluster_tools"] do |_, args| passed_emoji = "🖥️ 🚀" if failed_cnf_resources.size > 0 - upsert_failed_task(test_name, "✖️ FAILED: Containers do not use specialized init systems #{failed_emoji}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Containers do not use specialized init systems #{failed_emoji}", task_start_time) failed_cnf_resources.each do |init_info| stdout_failure "#{init_info.kind}/#{init_info.name} has container '#{init_info.container}' with #{init_info.init_cmd} as init process" end else - upsert_passed_task(test_name, "✔️ PASSED: Containers use specialized init systems #{passed_emoji}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Containers use specialized init systems #{passed_emoji}", task_start_time) end end diff --git a/src/tasks/workload/observability.cr b/src/tasks/workload/observability.cr index dd58baecd..c0ad7bafe 100644 --- a/src/tasks/workload/observability.cr +++ b/src/tasks/workload/observability.cr @@ -19,7 +19,9 @@ end desc "Check if the CNF outputs logs to stdout or stderr" task "log_output" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "log_output" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "log_output" + Log.for(testsuite_task).info { "Starting test" } task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| test_passed = false @@ -38,9 +40,9 @@ task "log_output" do |_, args| emoji_observability="📶☠️" if task_response - upsert_passed_task("log_output", "✔️ 🏆 PASSED: Resources output logs to stdout and stderr #{emoji_observability}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Resources output logs to stdout and stderr #{emoji_observability}", task_start_time) else - upsert_failed_task("log_output", "✖️ 🏆 FAILED: Resources do not output logs to stdout and stderr #{emoji_observability}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Resources do not output logs to stdout and stderr #{emoji_observability}", task_start_time) end end end @@ -50,6 +52,9 @@ task "prometheus_traffic" do |_, args| Log.info { "Running: prometheus_traffic" } next if args.named["offline"]? task_response = CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "prometheus_traffic" + Log.for(testsuite_task).info { "Starting test" } release_name = config.cnf_config[:release_name] destination_cnf_dir = config.cnf_config[:destination_cnf_dir] @@ -157,12 +162,12 @@ task "prometheus_traffic" do |_, args| # -- match ip address to cnf ip addresses # todo check if scrape_url is not an ip, assume it is a service, then do task (2) if prom_cnf_match - upsert_passed_task("prometheus_traffic","✔️ ✨PASSED: Your cnf is sending prometheus traffic #{emoji_observability}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ ✨PASSED: Your cnf is sending prometheus traffic #{emoji_observability}", task_start_time) else - upsert_failed_task("prometheus_traffic", "✖️ ✨FAILED: Your cnf is not sending prometheus traffic #{emoji_observability}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ ✨FAILED: Your cnf is not sending prometheus traffic #{emoji_observability}", task_start_time) end else - upsert_skipped_task("prometheus_traffic", "⏭️ ✨SKIPPED: Prometheus server not found #{emoji_observability}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ ✨SKIPPED: Prometheus server not found #{emoji_observability}", task_start_time) end end end @@ -172,6 +177,10 @@ task "open_metrics", ["prometheus_traffic"] do |_, args| Log.info { "Running: open_metrics" } next if args.named["offline"]? task_response = CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "open_metrics" + Log.for(testsuite_task).info { "Starting test" } + release_name = config.cnf_config[:release_name] configmap = KubectlClient::Get.configmap("cnf-testsuite-#{release_name}-open-metrics") emoji_observability="📶☠️" @@ -179,14 +188,14 @@ task "open_metrics", ["prometheus_traffic"] do |_, args| open_metrics_validated = configmap["data"].as_h["open_metrics_validated"].as_s if open_metrics_validated == "true" - upsert_passed_task("open_metrics","✔️ ✨PASSED: Your cnf's metrics traffic is OpenMetrics compatible #{emoji_observability}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ ✨PASSED: Your cnf's metrics traffic is OpenMetrics compatible #{emoji_observability}", task_start_time) else open_metrics_response = configmap["data"].as_h["open_metrics_response"].as_s puts "OpenMetrics Failed: #{open_metrics_response}".colorize(:red) - upsert_failed_task("open_metrics", "✖️ ✨FAILED: Your cnf's metrics traffic is not OpenMetrics compatible #{emoji_observability}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ ✨FAILED: Your cnf's metrics traffic is not OpenMetrics compatible #{emoji_observability}", task_start_time) end else - upsert_skipped_task("open_metrics", "⏭️ ✨SKIPPED: Prometheus traffic not configured #{emoji_observability}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ ✨SKIPPED: Prometheus traffic not configured #{emoji_observability}", task_start_time) end end end @@ -197,6 +206,10 @@ task "routed_logs", ["install_cluster_tools"] do |_, args| next if args.named["offline"]? emoji_observability="📶☠️" task_response = CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "routed_logs" + Log.for(testsuite_task).info { "Starting test" } + fluentd_match = FluentD.match() fluentbit_match = FluentBit.match() fluentbitBitnami_match = FluentDBitnami.match() @@ -227,25 +240,30 @@ task "routed_logs", ["install_cluster_tools"] do |_, args| end Log.info { "all_resourced_logged: #{all_resourced_logged}" } if all_resourced_logged - upsert_passed_task("routed_logs","✔️ ✨PASSED: Your cnf's logs are being captured #{emoji_observability}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ ✨PASSED: Your cnf's logs are being captured #{emoji_observability}", task_start_time) else - upsert_failed_task("routed_logs", "✖️ ✨FAILED: Your cnf's logs are not being captured #{emoji_observability}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ ✨FAILED: Your cnf's logs are not being captured #{emoji_observability}", task_start_time) end else - upsert_skipped_task("routed_logs", "⏭️ ✨SKIPPED: Fluentd or FluentBit not configured #{emoji_observability}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ ✨SKIPPED: Fluentd or FluentBit not configured #{emoji_observability}", task_start_time) end end end desc "Does the CNF install use tracing?" task "tracing" do |_, args| - Log.for("verbose").info { "tracing" } if check_verbose(args) - Log.info { "tracing args: #{args.inspect}" } + testsuite_task = "tracing" + Log.for(testsuite_task).info { "Running test" } + Log.for(testsuite_task).info { "tracing args: #{args.inspect}" } + next if args.named["offline"]? - emoji_tracing_deploy="⎈🚀" + emoji_tracing_deploy="⎈🚀" if check_cnf_config(args) || CNFManager.destination_cnfs_exist? CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + Log.for(testsuite_task).info { "Starting test for CNF" } + match = JaegerManager.match() Log.info { "jaeger match: #{match}" } if match[:found] @@ -258,16 +276,16 @@ task "tracing" do |_, args| tracing_used = configmap["data"].as_h["tracing_used"].as_s if tracing_used == "true" - upsert_passed_task("tracing", "✔️ ✨PASSED: Tracing used #{emoji_tracing_deploy}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ ✨PASSED: Tracing used #{emoji_tracing_deploy}", task_start_time) else - upsert_failed_task("tracing", "✖️ ✨FAILED: Tracing not used #{emoji_tracing_deploy}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ ✨FAILED: Tracing not used #{emoji_tracing_deploy}", task_start_time) end else - upsert_skipped_task("tracing", "⏭️ ✨SKIPPED: Jaeger not configured #{emoji_tracing_deploy}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ ✨SKIPPED: Jaeger not configured #{emoji_tracing_deploy}", task_start_time) end end else - upsert_failed_task("tracing", "✖️ ✨FAILED: No cnf_testsuite.yml found! Did you run the setup task?", Time.utc) + upsert_failed_task(testsuite_task, "✖️ ✨FAILED: No cnf_testsuite.yml found! Did you run the setup task?", Time.utc) end end diff --git a/src/tasks/workload/ran.cr b/src/tasks/workload/ran.cr index 419295cc2..1bfae5ca5 100644 --- a/src/tasks/workload/ran.cr +++ b/src/tasks/workload/ran.cr @@ -8,7 +8,10 @@ require "../utils/utils.cr" desc "Test if a 5G core supports SUCI Concealment" task "suci_enabled" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.info { "Running suci_enabled test" } + task_start_time = Time.utc + testsuite_task = "suci_enabled" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } suci_found : Bool | Nil core = config.cnf_config[:core_label]? @@ -52,9 +55,9 @@ task "suci_enabled" do |_, args| if suci_found - resp = upsert_passed_task("suci_enabled","✔️ PASSED: Core uses SUCI 5g authentication", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ PASSED: Core uses SUCI 5g authentication", task_start_time) else - resp = upsert_failed_task("suci_enabled", "✖️ FAILED: Core does not use SUCI 5g authentication", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Core does not use SUCI 5g authentication", task_start_time) end resp ensure @@ -68,7 +71,10 @@ end desc "Test if RAN uses the ORAN e2 interface" task "oran_e2_connection" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.info { "Running oran_e2_connection test" } + task_start_time = Time.utc + testsuite_task = "oran_e2_connection" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } release_name = config.cnf_config[:release_name] if ORANMonitor.isCNFaRIC?(config.cnf_config) @@ -77,13 +83,13 @@ task "oran_e2_connection" do |_, args| if e2_found == "true" - resp = upsert_passed_task("oran_e2_connection","✔️ PASSED: RAN connects to a RIC using the e2 standard interface", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ PASSED: RAN connects to a RIC using the e2 standard interface", task_start_time) else - resp = upsert_failed_task("e2_established", "✖️ FAILED: RAN does not connect to a RIC using the e2 standard interface", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: RAN does not connect to a RIC using the e2 standard interface", task_start_time) end resp else - upsert_na_task("oran_e2_connection", "⏭️ N/A: [oran_e2_connection] No ric designated in cnf_testsuite.yml", Time.utc) + upsert_na_task(testsuite_task, "⏭️ N/A: [oran_e2_connection] No ric designated in cnf_testsuite.yml", task_start_time) next end end diff --git a/src/tasks/workload/reliability.cr b/src/tasks/workload/reliability.cr index 171e86c92..659205038 100644 --- a/src/tasks/workload/reliability.cr +++ b/src/tasks/workload/reliability.cr @@ -30,29 +30,32 @@ end desc "Is there a liveness entry in the helm chart?" task "liveness" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("liveness").info { "Starting test" } - Log.for("liveness").debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "liveness" + Log.for(testsuite_task).info { "Starting test" } + Log.for(testsuite_task).debug { "cnf_config: #{config}" } + resp = "" emoji_probe="⎈🧫" task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| test_passed = true resource_ref = "#{resource[:kind]}/#{resource[:name]}" begin - Log.for("liveness").debug { container.as_h["name"].as_s } if check_verbose(args) + Log.for(testsuite_task).debug { container.as_h["name"].as_s } if check_verbose(args) container.as_h["livenessProbe"].as_h rescue ex - Log.for("liveness").error { ex.message } if check_verbose(args) + Log.for(testsuite_task).error { ex.message } if check_verbose(args) test_passed = false stdout_failure("No livenessProbe found for container #{container.as_h["name"].as_s} part of #{resource_ref} in #{resource[:namespace]} namespace") end - Log.for("liveness").info { "Resource #{resource_ref} passed liveness?: #{test_passed}" } + Log.for(testsuite_task).info { "Resource #{resource_ref} passed liveness?: #{test_passed}" } test_passed end - Log.for("liveness").info { "Workload resource task response: #{task_response}" } + Log.for(testsuite_task).info { "Workload resource task response: #{task_response}" } if task_response - resp = upsert_passed_task("liveness","✔️ 🏆 PASSED: Helm liveness probe found #{emoji_probe}", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ 🏆 PASSED: Helm liveness probe found #{emoji_probe}", task_start_time) else - resp = upsert_failed_task("liveness","✖️ 🏆 FAILED: No livenessProbe found #{emoji_probe}", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ 🏆 FAILED: No livenessProbe found #{emoji_probe}", task_start_time) end resp end @@ -61,6 +64,10 @@ end desc "Is there a readiness entry in the helm chart?" task "readiness" do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "readiness" + Log.for(testsuite_task).info { "Starting test" } + Log.for("readiness").info { "Starting test" } Log.for("readiness").debug { "cnf_config: #{config}" } resp = "" @@ -69,21 +76,21 @@ task "readiness" do |_, args| test_passed = true resource_ref = "#{resource[:kind]}/#{resource[:name]}" begin - Log.for("readiness").debug { container.as_h["name"].as_s } if check_verbose(args) + Log.for(testsuite_task).debug { container.as_h["name"].as_s } if check_verbose(args) container.as_h["readinessProbe"].as_h rescue ex - Log.for("readiness").error { ex.message } if check_verbose(args) + Log.for(testsuite_task).error { ex.message } if check_verbose(args) test_passed = false stdout_failure("No readinessProbe found for container #{container.as_h["name"].as_s} part of #{resource_ref} in #{resource[:namespace]} namespace") end - Log.for("readiness").info { "Resource #{resource_ref} passed liveness?: #{test_passed}" } + Log.for(testsuite_task).info { "Resource #{resource_ref} passed liveness?: #{test_passed}" } test_passed end - Log.for("readiness").info { "Workload resource task response: #{task_response}" } + Log.for(testsuite_task).info { "Workload resource task response: #{task_response}" } if task_response - resp = upsert_passed_task("readiness","✔️ 🏆 PASSED: Helm readiness probe found #{emoji_probe}", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ 🏆 PASSED: Helm readiness probe found #{emoji_probe}", task_start_time) else - resp = upsert_failed_task("readiness","✖️ 🏆 FAILED: No readinessProbe found #{emoji_probe}", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ 🏆 FAILED: No readinessProbe found #{emoji_probe}", task_start_time) end resp end @@ -93,8 +100,10 @@ end desc "Does the CNF crash when network latency occurs" task "pod_network_latency", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_network_latency" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "pod_network_latency" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } #TODO tests should fail if cnf not installed destination_cnf_dir = config.cnf_config[:destination_cnf_dir] @@ -121,10 +130,10 @@ task "pod_network_latency", ["install_litmus"] do |_, args| # https://raw.githubusercontent.com/litmuschaos/chaos-charts/v2.14.x/charts/generic/pod-network-latency/rbac.yaml rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-network-latency/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -134,11 +143,11 @@ task "pod_network_latency", ["install_litmus"] do |_, args| chaos_experiment_name = "pod-network-latency" total_chaos_duration = "60" - test_name = "#{resource["name"]}-#{Random.rand(99)}" + test_name = "#{resource["name"]}-#{Random::Secure.hex(4)}" chaos_result_name = "#{test_name}-#{chaos_experiment_name}" spec_labels = KubectlClient::Get.resource_spec_labels(resource["kind"], resource["name"], resource["namespace"]).as_h - Log.for("#{test_name}:spec_labels").info { "Spec labels for chaos template. Key: #{spec_labels.first_key}; Value: #{spec_labels.first_value}" } + Log.for("#{testsuite_task}:spec_labels").info { "Spec labels for chaos template. Key: #{spec_labels.first_key}; Value: #{spec_labels.first_value}" } template = ChaosTemplates::PodNetworkLatency.new( test_name, "#{chaos_experiment_name}", @@ -154,9 +163,9 @@ task "pod_network_latency", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task("pod_network_latency","✔️ ✨PASSED: pod_network_latency chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ ✨PASSED: pod_network_latency chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task("pod_network_latency","✖️ ✨FAILED: pod_network_latency chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ ✨FAILED: pod_network_latency chaos test failed 🗡️💀♻️", task_start_time) end end end @@ -164,9 +173,11 @@ end desc "Does the CNF crash when network corruption occurs" task "pod_network_corruption", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_network_corruption" - Log.for(test_name).info { "Starting test" } if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "pod_network_corruption" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } #TODO tests should fail if cnf not installed destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -191,10 +202,10 @@ task "pod_network_corruption", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-network-corruption/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-network-corruption/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -223,9 +234,9 @@ task "pod_network_corruption", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task("pod_network_corruption","✔️ ✨PASSED: pod_network_corruption chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ ✨PASSED: pod_network_corruption chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task("pod_network_corruption","✖️ ✨FAILED: pod_network_corruption chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ ✨FAILED: pod_network_corruption chaos test failed 🗡️💀♻️", task_start_time) end end end @@ -233,9 +244,11 @@ end desc "Does the CNF crash when network duplication occurs" task "pod_network_duplication", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_network_duplication" - Log.for(test_name).info { "Starting test" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "pod_network_duplication" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } #TODO tests should fail if cnf not installed destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -260,10 +273,10 @@ task "pod_network_duplication", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-network-duplication/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-network-duplication/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -292,9 +305,9 @@ task "pod_network_duplication", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task("pod_network_duplication","✔️ ✨PASSED: pod_network_duplication chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ ✨PASSED: pod_network_duplication chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task("pod_network_duplication","✖️ ✨FAILED: pod_network_duplication chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ ✨FAILED: pod_network_duplication chaos test failed 🗡️💀♻️", task_start_time) end end end @@ -302,9 +315,11 @@ end desc "Does the CNF crash when disk fill occurs" task "disk_fill", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "disk_fill" - Log.for(test_name).info { "Starting test" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "disk_fill" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| app_namespace = resource[:namespace] || config.cnf_config[:helm_install_namespace] @@ -312,7 +327,7 @@ task "disk_fill", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -327,10 +342,10 @@ task "disk_fill", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/disk-fill/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/disk-fill/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -361,9 +376,9 @@ task "disk_fill", ["install_litmus"] do |_, args| test_passed end if task_response - resp = upsert_passed_task("disk_fill","✔️ PASSED: disk_fill chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ PASSED: disk_fill chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task("disk_fill","✖️ FAILED: disk_fill chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ FAILED: disk_fill chaos test failed 🗡️💀♻️", task_start_time) end end end @@ -371,8 +386,10 @@ end desc "Does the CNF crash when pod-delete occurs" task "pod_delete", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_delete" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "pod_delete" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -381,7 +398,7 @@ task "pod_delete", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -393,11 +410,11 @@ task "pod_delete", ["install_litmus"] do |_, args| else # experiment_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-delete/experiment.yaml" experiment_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-delete/experiment.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-delete/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-delete/rbac.yaml" - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -431,9 +448,9 @@ task "pod_delete", ["install_litmus"] do |_, args| test_passed=LitmusManager.check_chaos_verdict(chaos_result_name,chaos_experiment_name,args, namespace: app_namespace) end if task_response - resp = upsert_passed_task("pod_delete","✔️ PASSED: pod_delete chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ PASSED: pod_delete chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task("pod_delete","✖️ FAILED: pod_delete chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ FAILED: pod_delete chaos test failed 🗡️💀♻️", task_start_time) end end end @@ -441,8 +458,10 @@ end desc "Does the CNF crash when pod-memory-hog occurs" task "pod_memory_hog", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_memory_hog" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "pod_memory_hog" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -451,7 +470,7 @@ task "pod_memory_hog", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -466,10 +485,10 @@ task "pod_memory_hog", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-memory-hog/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-memory-hog/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -502,9 +521,9 @@ task "pod_memory_hog", ["install_litmus"] do |_, args| test_passed end if task_response - resp = upsert_passed_task("pod_memory_hog","✔️ PASSED: pod_memory_hog chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ PASSED: pod_memory_hog chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task("pod_memory_hog","✖️ FAILED: pod_memory_hog chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ FAILED: pod_memory_hog chaos test failed 🗡️💀♻️", task_start_time) end end end @@ -512,9 +531,11 @@ end desc "Does the CNF crash when pod-io-stress occurs" task "pod_io_stress", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_io_stress" - Log.for(test_name).info { "Starting test" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "pod_io_stress" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| app_namespace = resource[:namespace] || config.cnf_config[:helm_install_namespace] @@ -522,7 +543,7 @@ task "pod_io_stress", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["name"]} in #{resource["namespace"]}") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["name"]} in #{resource["namespace"]}") test_passed = false end if test_passed @@ -537,10 +558,10 @@ task "pod_io_stress", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-io-stress/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-io-stress/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -572,9 +593,9 @@ task "pod_io_stress", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task(test_name,"✔️ ✨PASSED: #{test_name} chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ ✨PASSED: #{testsuite_task} chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task(test_name,"✖️ ✨FAILED: #{test_name} chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ ✨FAILED: #{testsuite_task} chaos test failed 🗡️💀♻️", task_start_time) end end ensure @@ -587,8 +608,10 @@ end desc "Does the CNF crash when pod-dns-error occurs" task "pod_dns_error", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_dns_error" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "pod_dns_error" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] runtimes = KubectlClient::Get.container_runtimes @@ -600,7 +623,7 @@ task "pod_dns_error", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -615,10 +638,10 @@ task "pod_dns_error", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-dns-error/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-dns-error/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -649,12 +672,12 @@ task "pod_dns_error", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task("pod_dns_error","✔️ ✨PASSED: pod_dns_error chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ ✨PASSED: pod_dns_error chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task("pod_dns_error","✖️ ✨FAILED: pod_dns_error chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ ✨FAILED: pod_dns_error chaos test failed 🗡️💀♻️", task_start_time) end else - resp = upsert_skipped_task("pod_dns_error","⏭️ ✨SKIPPED: pod_dns_error docker runtime not found 🗡️💀♻️", Time.utc) + resp = upsert_skipped_task(testsuite_task,"⏭️ ✨SKIPPED: pod_dns_error docker runtime not found 🗡️💀♻️", task_start_time) end end end diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index bd3a7dcc3..a58be246e 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -38,7 +38,9 @@ end desc "Check if pods in the CNF use sysctls with restricted values" task "sysctls" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "sysctls" } + task_start_time = Time.utc + testsuite_task = "sysctls" + Log.for(testsuite_task).info { "Starting test" } Kyverno.install emoji_security = "🔓🔑" @@ -48,9 +50,9 @@ task "sysctls" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("sysctls", "✔️ PASSED: No restricted values found for sysctls #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ PASSED: No restricted values found for sysctls #{emoji_security}", task_start_time) else - resp = upsert_failed_task("sysctls", "✖️ FAILED: Restricted values for are being used for sysctls #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Restricted values for are being used for sysctls #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -63,7 +65,10 @@ end desc "Check if the CNF has services with external IPs configured" task "external_ips" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "external_ips" } + task_start_time = Time.utc + testsuite_task = "external_ips" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_security = "🔓🔑" policy_path = Kyverno.best_practice_policy("restrict-service-external-ips/restrict-service-external-ips.yaml") @@ -73,9 +78,9 @@ task "external_ips" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("external_ips", "✔️ PASSED: Services are not using external IPs #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ PASSED: Services are not using external IPs #{emoji_security}", task_start_time) else - resp = upsert_failed_task("external_ips", "✖️ FAILED: Services are using external IPs #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Services are using external IPs #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -88,7 +93,10 @@ end desc "Check if the CNF or the cluster resources have custom SELinux options" task "selinux_options" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "selinux_options" } + task_start_time = Time.utc + testsuite_task = "selinux_options" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_security = "🔓🔑" @@ -107,14 +115,14 @@ task "selinux_options" do |_, args| if check_failures.size == 0 # upsert_skipped_task("selinux_options", "⏭️ 🏆 SKIPPED: Pods are not using SELinux options #{emoji_security}", Time.utc) - upsert_na_task("selinux_options", "⏭️ 🏆 N/A: Pods are not using SELinux #{emoji_security}", Time.utc) + upsert_na_task(testsuite_task, "⏭️ 🏆 N/A: Pods are not using SELinux #{emoji_security}", task_start_time) else failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, disallow_failures) if failures.size == 0 - resp = upsert_passed_task("selinux_options", "✔️ 🏆 PASSED: Pods are not using custom SELinux options that can be used for privilege escalations #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Pods are not using custom SELinux options that can be used for privilege escalations #{emoji_security}", task_start_time) else - resp = upsert_failed_task("selinux_options", "✖️ 🏆 FAILED: Pods are using custom SELinux options that can be used for privilege escalations #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Pods are using custom SELinux options that can be used for privilege escalations #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -130,16 +138,19 @@ end desc "Check if the CNF is running containers with container sock mounts" task "container_sock_mounts" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "container_sock_mounts" } + task_start_time = Time.utc + testsuite_task = "container_sock_mounts" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_security = "🔓🔑" policy_path = Kyverno.best_practice_policy("disallow_cri_sock_mount/disallow_cri_sock_mount.yaml") failures = Kyverno::PolicyAudit.run(policy_path, EXCLUDE_NAMESPACES) if failures.size == 0 - resp = upsert_passed_task("container_sock_mounts", "✔️ 🏆 PASSED: Container engine daemon sockets are not mounted as volumes #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Container engine daemon sockets are not mounted as volumes #{emoji_security}", task_start_time) else - resp = upsert_failed_task("container_sock_mounts", "✖️ 🏆 FAILED: Container engine daemon sockets are mounted as volumes #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Container engine daemon sockets are mounted as volumes #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -151,67 +162,73 @@ end desc "Check if any containers are running in as root" task "non_root_user", ["install_falco"] do |_, args| - CNFManager::Task.task_runner(args) do |args,config| - - unless KubectlClient::Get.resource_wait_for_install("Daemonset", "falco", namespace: TESTSUITE_NAMESPACE) - Log.info { "Falco Failed to Start" } - upsert_skipped_task("non_root_user", "⏭️ SKIPPED: Skipping non_root_user: Falco failed to install. Check Kernel Headers are installed on the Host Systems(K8s).", Time.utc) - node_pods = KubectlClient::Get.pods_by_nodes(KubectlClient::Get.schedulable_nodes_list) - pods = KubectlClient::Get.pods_by_label(node_pods, "app", "falco") - - # Handle scenario when pod is not available when Falco is not installed. - if pods.size > 0 - falco_pod_name = pods[0].dig("metadata", "name").as_s - Log.info { "Falco Pod Name: #{falco_pod_name}" } - KubectlClient.logs(falco_pod_name, namespace: TESTSUITE_NAMESPACE) - end - next - end - - Log.for("verbose").info { "non_root_user" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } - fail_msgs = [] of String - task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| - test_passed = true - Log.info { "Falco is Running" } - kind = resource["kind"].downcase - case kind - when "deployment","statefulset","pod","replicaset", "daemonset" - resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) - pods = KubectlClient::Get.pods_by_resource(resource_yaml) - # containers = KubectlClient::Get.resource_containers(kind, resource[:name]) - pods.map do |pod| - # containers.as_a.map do |container| - # container_name = container.dig("name") - pod_name = pod.dig("metadata", "name").as_s - # if Falco.find_root_pod(pod_name, container_name) - if Falco.find_root_pod(pod_name) - fail_msg = "resource: #{resource} and pod #{pod_name} uses a root user" - unless fail_msgs.find{|x| x== fail_msg} - puts fail_msg.colorize(:red) - fail_msgs << fail_msg - end - test_passed=false - end - end - end - test_passed - end - emoji_no_root="🚫√" - emoji_root="√" - - if task_response - upsert_passed_task("non_root_user", "✔️ PASSED: Root user not found #{emoji_no_root}", Time.utc) - else - upsert_failed_task("non_root_user", "✖️ FAILED: Root user found #{emoji_root}", Time.utc) - end - end + CNFManager::Task.task_runner(args) do |args,config| + task_start_time = Time.utc + testsuite_task = "non_root_user" + Log.for(testsuite_task).info { "Starting test" } + + unless KubectlClient::Get.resource_wait_for_install("Daemonset", "falco", namespace: TESTSUITE_NAMESPACE) + Log.info { "Falco Failed to Start" } + upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Skipping non_root_user: Falco failed to install. Check Kernel Headers are installed on the Host Systems(K8s).", task_start_time) + node_pods = KubectlClient::Get.pods_by_nodes(KubectlClient::Get.schedulable_nodes_list) + pods = KubectlClient::Get.pods_by_label(node_pods, "app", "falco") + + # Handle scenario when pod is not available when Falco is not installed. + if pods.size > 0 + falco_pod_name = pods[0].dig("metadata", "name").as_s + Log.info { "Falco Pod Name: #{falco_pod_name}" } + KubectlClient.logs(falco_pod_name, namespace: TESTSUITE_NAMESPACE) + end + next + end + + Log.for("verbose").info { "non_root_user" } if check_verbose(args) + Log.debug { "cnf_config: #{config}" } + fail_msgs = [] of String + task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| + test_passed = true + Log.info { "Falco is Running" } + kind = resource["kind"].downcase + case kind + when "deployment","statefulset","pod","replicaset", "daemonset" + resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) + pods = KubectlClient::Get.pods_by_resource(resource_yaml) + # containers = KubectlClient::Get.resource_containers(kind, resource[:name]) + pods.map do |pod| + # containers.as_a.map do |container| + # container_name = container.dig("name") + pod_name = pod.dig("metadata", "name").as_s + # if Falco.find_root_pod(pod_name, container_name) + if Falco.find_root_pod(pod_name) + fail_msg = "resource: #{resource} and pod #{pod_name} uses a root user" + unless fail_msgs.find{|x| x== fail_msg} + puts fail_msg.colorize(:red) + fail_msgs << fail_msg + end + test_passed=false + end + end + end + test_passed + end + emoji_no_root="🚫√" + emoji_root="√" + + if task_response + upsert_passed_task(testsuite_task, "✔️ PASSED: Root user not found #{emoji_no_root}", task_start_time) + else + upsert_failed_task(testsuite_task, "✖️ FAILED: Root user found #{emoji_root}", task_start_time) + end + end end desc "Check if any containers are running in privileged mode" task "privileged" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "privileged" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "privileged" + Log.for(testsuite_task).info { "Starting test" } + white_list_container_names = config.cnf_config[:white_list_container_names] VERBOSE_LOGGING.info "white_list_container_names #{white_list_container_names.inspect}" if check_verbose(args) violation_list = [] of NamedTuple(kind: String, name: String, container: String, namespace: String) @@ -229,12 +246,12 @@ task "privileged" do |_, args| true end end - LOGGING.debug "violator list: #{violation_list.flatten}" + Log.debug { "violator list: #{violation_list.flatten}" } emoji_security="🔓🔑" if task_response - upsert_passed_task("privileged", "✔️ PASSED: No privileged containers #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No privileged containers #{emoji_security}", task_start_time) else - upsert_failed_task("privileged", "✖️ FAILED: Found #{violation_list.size} privileged containers #{emoji_security}", Time.utc) + upsert_failed_task(testsuite_task, "✖️ FAILED: Found #{violation_list.size} privileged containers #{emoji_security}", task_start_time) violation_list.each do |violation| stdout_failure("Privileged container #{violation[:container]} in #{violation[:kind]}/#{violation[:name]} in the #{violation[:namespace]} namespace") end @@ -245,7 +262,10 @@ end desc "Check if any containers are running in privileged mode" task "privilege_escalation", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "privilege_escalation" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "privilege_escalation" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Allow privilege escalation") test_report = Kubescape.parse_test_report(test_json) @@ -254,9 +274,9 @@ task "privilege_escalation", ["kubescape_scan"] do |_, args| emoji_security="🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("privilege_escalation", "✔️ PASSED: No containers that allow privilege escalation were found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No containers that allow privilege escalation were found #{emoji_security}", task_start_time) else - resp = upsert_failed_task("privilege_escalation", "✖️ FAILED: Found containers that allow privilege escalation #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Found containers that allow privilege escalation #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -267,7 +287,10 @@ end desc "Check if an attacker can use symlink for arbitrary host file system access." task "symlink_file_system", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "symlink_file_system" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "symlink_file_system" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "CVE-2021-25741 - Using symlink for arbitrary host file system access.") test_report = Kubescape.parse_test_report(test_json) @@ -276,9 +299,9 @@ task "symlink_file_system", ["kubescape_scan"] do |_, args| emoji_security="🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("symlink_file_system", "✔️ PASSED: No containers allow a symlink attack #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No containers allow a symlink attack #{emoji_security}", task_start_time) else - resp = upsert_failed_task("symlink_file_system", "✖️ FAILED: Found containers that allow a symlink attack #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Found containers that allow a symlink attack #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -289,7 +312,10 @@ end desc "Check if applications credentials are in configuration files." task "application_credentials", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "application_credentials" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "application_credentials" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Applications credentials in configuration files") test_report = Kubescape.parse_test_report(test_json) @@ -298,9 +324,9 @@ task "application_credentials", ["kubescape_scan"] do |_, args| emoji_security="🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("application_credentials", "✔️ PASSED: No applications credentials in configuration files #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No applications credentials in configuration files #{emoji_security}", task_start_time) else - resp = upsert_failed_task("application_credentials", "✖️ FAILED: Found applications credentials in configuration files #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Found applications credentials in configuration files #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -311,7 +337,10 @@ end desc "Check if potential attackers may gain access to a POD and inherit access to the entire host network. For example, in AWS case, they will have access to the entire VPC." task "host_network", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "host_network" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "host_network" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "HostNetwork access") test_report = Kubescape.parse_test_report(test_json) @@ -320,9 +349,9 @@ task "host_network", ["kubescape_scan"] do |_, args| emoji_security="🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("host_network", "✔️ PASSED: No host network attached to pod #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No host network attached to pod #{emoji_security}", task_start_time) else - resp = upsert_failed_task("host_network", "✖️ FAILED: Found host network attached to pod #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Found host network attached to pod #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -333,7 +362,10 @@ end desc "Potential attacker may gain access to a POD and steal its service account token. Therefore, it is recommended to disable automatic mapping of the service account tokens in service account configuration and enable it only for PODs that need to use them." task "service_account_mapping", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "service_account_mapping" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "service_account_mapping" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Automatic mapping of service account") test_report = Kubescape.parse_test_report(test_json) @@ -342,9 +374,9 @@ task "service_account_mapping", ["kubescape_scan"] do |_, args| emoji_security="🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("service_account_mapping", "✔️ PASSED: No service accounts automatically mapped #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No service accounts automatically mapped #{emoji_security}", task_start_time) else - resp = upsert_failed_task("service_account_mapping", "✖️ FAILED: Service accounts automatically mapped #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Service accounts automatically mapped #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -357,7 +389,10 @@ task "linux_hardening", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "linux_hardening" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "linux_hardening" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Linux hardening") test_report = Kubescape.parse_test_report(test_json) @@ -366,9 +401,9 @@ task "linux_hardening", ["kubescape_scan"] do |_, args| emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("linux_hardening", "✔️ ✨PASSED: Security services are being used to harden applications #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ ✨PASSED: Security services are being used to harden applications #{emoji_security}", task_start_time) else - resp = upsert_failed_task("linux_hardening", "✖️ ✨FAILED: Found resources that do not use security services #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ ✨FAILED: Found resources that do not use security services #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -381,7 +416,10 @@ task "insecure_capabilities", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "insecure_capabilities" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "insecure_capabilities" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Insecure capabilities") test_report = Kubescape.parse_test_report(test_json) @@ -390,9 +428,9 @@ task "insecure_capabilities", ["kubescape_scan"] do |_, args| emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("insecure_capabilities", "✔️ PASSED: Containers with insecure capabilities were not found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Containers with insecure capabilities were not found #{emoji_security}", task_start_time) else - resp = upsert_failed_task("insecure_capabilities", "✖️ FAILED: Found containers with insecure capabilities #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Found containers with insecure capabilities #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -405,7 +443,10 @@ task "resource_policies", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "resource_policies" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "resource_policies" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Resource policies") test_report = Kubescape.parse_test_report(test_json) @@ -414,9 +455,9 @@ task "resource_policies", ["kubescape_scan"] do |_, args| emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("resource_policies", "✔️ 🏆 PASSED: Containers have resource limits defined #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Containers have resource limits defined #{emoji_security}", task_start_time) else - resp = upsert_failed_task("resource_policies", "✖️ 🏆 FAILED: Found containers without resource limits defined #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Found containers without resource limits defined #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -429,7 +470,10 @@ task "ingress_egress_blocked", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "ingress_egress_blocked" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "ingress_egress_blocked" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Ingress and Egress blocked") test_report = Kubescape.parse_test_report(test_json) @@ -438,9 +482,9 @@ task "ingress_egress_blocked", ["kubescape_scan"] do |_, args| emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("ingress_egress_blocked", "✔️ ✨PASSED: Ingress and Egress traffic blocked on pods #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ ✨PASSED: Ingress and Egress traffic blocked on pods #{emoji_security}", task_start_time) else - resp = upsert_failed_task("ingress_egress_blocked", "✖️ ✨FAILED: Ingress and Egress traffic not blocked on pods #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ ✨FAILED: Ingress and Egress traffic not blocked on pods #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -453,7 +497,10 @@ task "host_pid_ipc_privileges", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "host_pid_ipc_privileges" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "host_pid_ipc_privileges" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Host PID/IPC privileges") test_report = Kubescape.parse_test_report(test_json) @@ -462,9 +509,9 @@ task "host_pid_ipc_privileges", ["kubescape_scan"] do |_, args| emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("host_pid_ipc_privileges", "✔️ PASSED: No containers with hostPID and hostIPC privileges #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: No containers with hostPID and hostIPC privileges #{emoji_security}", task_start_time) else - resp = upsert_failed_task("host_pid_ipc_privileges", "✖️ FAILED: Found containers with hostPID and hostIPC privileges #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Found containers with hostPID and hostIPC privileges #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -477,7 +524,10 @@ task "non_root_containers", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "non_root_containers" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "non_root_containers" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Non-root containers") test_report = Kubescape.parse_test_report(test_json) @@ -486,9 +536,9 @@ task "non_root_containers", ["kubescape_scan"] do |_, args| emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("non_root_containers", "✔️ 🏆 PASSED: Containers are running with non-root user with non-root group membership #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: Containers are running with non-root user with non-root group membership #{emoji_security}", task_start_time) else - resp = upsert_failed_task("non_root_containers", "✖️ 🏆 FAILED: Found containers running with root user or user with root group membership #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Found containers running with root user or user with root group membership #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -501,7 +551,10 @@ task "privileged_containers", ["kubescape_scan" ] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "privileged_containers" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "privileged_containers" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Privileged container") test_report = Kubescape.parse_test_report(test_json) @@ -511,9 +564,9 @@ task "privileged_containers", ["kubescape_scan" ] do |_, args| emoji_security = "🔓🔑" #todo whitelist if test_report.failed_resources.size == 0 - upsert_passed_task("privileged_containers", "✔️ 🏆 PASSED: No privileged containers were found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ 🏆 PASSED: No privileged containers were found #{emoji_security}", task_start_time) else - resp = upsert_failed_task("privileged_containers", "✖️ 🏆 FAILED: Found privileged containers #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ 🏆 FAILED: Found privileged containers #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -526,7 +579,10 @@ task "immutable_file_systems", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "immutable_file_systems" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "immutable_file_systems" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Immutable container filesystem") test_report = Kubescape.parse_test_report(test_json) @@ -535,9 +591,9 @@ task "immutable_file_systems", ["kubescape_scan"] do |_, args| emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("immutable_file_systems", "✔️ ✨PASSED: Containers have immutable file systems #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ ✨PASSED: Containers have immutable file systems #{emoji_security}", task_start_time) else - resp = upsert_failed_task("immutable_file_systems", "✖️ ✨FAILED: Found containers with mutable file systems #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ ✨FAILED: Found containers with mutable file systems #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -550,7 +606,10 @@ task "hostpath_mounts", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "hostpath_mounts" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "hostpath_mounts" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Allowed hostPath") test_report = Kubescape.parse_test_report(test_json) @@ -559,9 +618,9 @@ task "hostpath_mounts", ["kubescape_scan"] do |_, args| emoji_security = "🔓🔑" if test_report.failed_resources.size == 0 - upsert_passed_task("hostpath_mounts", "✔️ PASSED: Containers do not have hostPath mounts #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "✔️ PASSED: Containers do not have hostPath mounts #{emoji_security}", task_start_time) else - resp = upsert_failed_task("hostpath_mounts", "✖️ FAILED: Found containers with hostPath mounts #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "✖️ FAILED: Found containers with hostPath mounts #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp diff --git a/src/tasks/workload/state.cr b/src/tasks/workload/state.cr index a287b0695..40fba57b6 100644 --- a/src/tasks/workload/state.cr +++ b/src/tasks/workload/state.cr @@ -22,10 +22,10 @@ ELASTIC_PROVISIONING_DRIVERS_REGEX_SPEC = /kubernetes.io\/aws-ebs|kubernetes.io\ module Volume def self.elastic_by_volumes?(volumes : Array(JSON::Any), namespace : String? = nil) - Log.info {"elastic_by_volumes"} + Log.info {"Volume.elastic_by_volumes"} storage_class_names = storage_class_by_volumes(volumes, namespace) elastic = StorageClass.elastic_by_storage_class?(storage_class_names) - Log.info {"elastic_by_volumes elastic: #{elastic}"} + Log.info {"Volume.elastic_by_volumes elastic: #{elastic}"} elastic end # def self.elastic?(volumes, namespace : String? = nil) @@ -114,7 +114,7 @@ end module StorageClass def self.elastic_by_storage_class?(storage_class_names : Array(Hash(String, JSON::Any)), namespace : String? = nil) - Log.info {"elastic_by_storage_class"} + Log.info {"StorageClass.elastic_by_storage_class"} Log.for("elastic_volumes:storage_class_names").info { storage_class_names } #todo elastic_by_storage_class? @@ -153,7 +153,7 @@ end module VolumeClaimTemplate def self.pvc_name_by_vct_resource(resource) : String | Nil - Log.info {"vct_pvc_name"} + Log.info {"VolumeClaimTemplate.pvc_name_by_vct_resource"} resource_name = resource.dig("metadata", "name") vct = resource.dig?("spec", "volumeClaimTemplates") if vct && vct.size > 0 @@ -161,7 +161,7 @@ module VolumeClaimTemplate vct_name = vct[0].dig?("metadata", "name") name = "#{vct_name}-#{resource_name}-0" end - Log.info {"name: #{name}"} + Log.for("VolumeClaimTemplate.pvc_name_by_vct_resource").info {"name: #{name}"} name end @@ -218,8 +218,10 @@ end desc "Does the CNF crash when node-drain occurs" task "node_drain", ["install_litmus"] do |t, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_memory_hog" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "node_drain" + Log.for(testsuite_task).info { "Starting test" } + skipped = false Log.debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] @@ -259,7 +261,7 @@ task "node_drain", ["install_litmus"] do |t, args| if spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -317,10 +319,10 @@ task "node_drain", ["install_litmus"] do |t, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/node-drain/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/node-drain/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -365,12 +367,12 @@ task "node_drain", ["install_litmus"] do |t, args| test_passed end if skipped - Log.for("verbose").warn{"The node_drain test needs minimum 2 schedulable nodes, current number of nodes: #{KubectlClient::Get.schedulable_nodes_list.size}"} if check_verbose(args) - resp = upsert_skipped_task("node_drain","⏭️ 🏆 SKIPPED: node_drain chaos test requires the cluster to have atleast two schedulable nodes 🗡️💀♻️", Time.utc) + Log.for(testsuite_task).warn{"The node_drain test needs minimum 2 schedulable nodes, current number of nodes: #{KubectlClient::Get.schedulable_nodes_list.size}"} + resp = upsert_skipped_task(testsuite_task,"⏭️ 🏆 SKIPPED: node_drain chaos test requires the cluster to have atleast two schedulable nodes 🗡️💀♻️", task_start_time) elsif task_response - resp = upsert_passed_task("node_drain","✔️ 🏆 PASSED: node_drain chaos test passed 🗡️💀♻️", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ 🏆 PASSED: node_drain chaos test passed 🗡️💀♻️", task_start_time) else - resp = upsert_failed_task("node_drain","✖️ 🏆 FAILED: node_drain chaos test failed 🗡️💀♻️", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ 🏆 FAILED: node_drain chaos test failed 🗡️💀♻️", task_start_time) end end end @@ -378,8 +380,12 @@ end desc "Does the CNF use an elastic persistent volume" task "elastic_volumes" do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "elastic_volumes" + Log.for(testsuite_task).info { "Starting test" } + Log.info {"cnf_config: #{config}"} - Log.for("verbose").info { "elastic_volumes" } if check_verbose(args) + emoji_probe="🧫" elastic_volumes_used = false volumes_used = false @@ -396,7 +402,7 @@ task "elastic_volumes" do |_, args| full_resource = KubectlClient::Get.resource(resource["kind"], resource["name"], namespace) elastic_result = WorkloadResource.elastic?(full_resource, volumes.as_a, namespace) - Log.for("elastic_volumes:elastic_result").info {elastic_result} + Log.for("#{testsuite_task}:elastic_result").info {elastic_result} if elastic_result elastic_volumes_used = true end @@ -404,11 +410,11 @@ task "elastic_volumes" do |_, args| Log.for("elastic_volumes:result").info { "Volumes used: #{volumes_used}; Elastic?: #{elastic_volumes_used}" } if volumes_used == false - resp = upsert_skipped_task("elastic_volumes","⏭️ ✨SKIPPED: No volumes used #{emoji_probe}", Time.utc) + resp = upsert_skipped_task(testsuite_task,"⏭️ ✨SKIPPED: No volumes used #{emoji_probe}", task_start_time) elsif elastic_volumes_used - resp = upsert_passed_task("elastic_volumes","✔️ ✨PASSED: Elastic Volumes Used #{emoji_probe}", Time.utc) + resp = upsert_passed_task(testsuite_task,"✔️ ✨PASSED: Elastic Volumes Used #{emoji_probe}", task_start_time) else - resp = upsert_failed_task("elastic_volumes","✔️ ✨FAILED: Volumes used are not elastic volumes #{emoji_probe}", Time.utc) + resp = upsert_failed_task(testsuite_task,"✔️ ✨FAILED: Volumes used are not elastic volumes #{emoji_probe}", task_start_time) end resp end @@ -426,8 +432,11 @@ end desc "Does the CNF use a database which uses perisistence in a cloud native way" task "database_persistence" do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "database_persistence" + Log.for(testsuite_task).info { "Starting test" } + Log.info {"cnf_config: #{config}"} - Log.info {"database_persistence"} # VERBOSE_LOGGING.info "database_persistence" if check_verbose(args) # todo K8s Database persistence test: if a mysql (or any popular database) image is installed: emoji_probe="🧫" @@ -463,17 +472,17 @@ task "database_persistence" do |_, args| end failed_emoji = "(ভ_ভ) ރ 💾" if elastic_statefulset - resp = upsert_dynamic_task("database_persistence",CNFManager::Points::Results::ResultStatus::Pass5, "✔️ PASSED: Elastic Volumes and Statefulsets Used #{emoji_probe}", Time.utc) + resp = upsert_dynamic_task(testsuite_task,CNFManager::Points::Results::ResultStatus::Pass5, "✔️ PASSED: Elastic Volumes and Statefulsets Used #{emoji_probe}", task_start_time) elsif elastic_volume_used - resp = upsert_dynamic_task("database_persistence",CNFManager::Points::Results::ResultStatus::Pass3,"✔️ PASSED: Elastic Volumes Used #{emoji_probe}", Time.utc) + resp = upsert_dynamic_task(testsuite_task,CNFManager::Points::Results::ResultStatus::Pass3,"✔️ PASSED: Elastic Volumes Used #{emoji_probe}", task_start_time) elsif statefulset_exists - resp = upsert_dynamic_task("database_persistence",CNFManager::Points::Results::ResultStatus::Neutral, "✖️ FAILED: Statefulset used without an elastic volume #{failed_emoji}", Time.utc) + resp = upsert_dynamic_task(testsuite_task,CNFManager::Points::Results::ResultStatus::Neutral, "✖️ FAILED: Statefulset used without an elastic volume #{failed_emoji}", task_start_time) else - resp = upsert_failed_task("database_persistence","✖️ FAILED: Elastic Volumes Not Used #{failed_emoji}", Time.utc) + resp = upsert_failed_task(testsuite_task,"✖️ FAILED: Elastic Volumes Not Used #{failed_emoji}", task_start_time) end else - resp = upsert_skipped_task("database_persistence", "⏭️ SKIPPED: Mysql not installed #{emoji_probe}", Time.utc) + resp = upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Mysql not installed #{emoji_probe}", task_start_time) end resp end @@ -491,7 +500,10 @@ end desc "Does the CNF use a non-cloud native data store: hostPath volume" task "volume_hostpath_not_found" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "volume_hostpath_not_found" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "volume_hostpath_not_found" + Log.for(testsuite_task).info { "Starting test" } + failed_emoji = "(ভ_ভ) ރ 💾" passed_emoji = "🖥️ 💾" LOGGING.debug "cnf_config: #{config}" @@ -519,9 +531,9 @@ task "volume_hostpath_not_found" do |_, args| end if task_response.any?(false) - upsert_failed_task("volume_hostpath_not_found","✖️ FAILED: hostPath volumes found #{failed_emoji}", Time.utc) + upsert_failed_task(testsuite_task,"✖️ FAILED: hostPath volumes found #{failed_emoji}", task_start_time) else - upsert_passed_task("volume_hostpath_not_found","✔️ PASSED: hostPath volumes not found #{passed_emoji}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ PASSED: hostPath volumes not found #{passed_emoji}", task_start_time) end end end @@ -531,7 +543,9 @@ task "no_local_volume_configuration" do |_, args| failed_emoji = "(ভ_ভ) ރ 💾" passed_emoji = "🖥️ 💾" CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "no_local_volume_configuration" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "no_local_volume_configuration" + Log.for(testsuite_task).info { "Starting test" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.cnf_workload_resources(args, config) do | resource| @@ -545,7 +559,7 @@ task "no_local_volume_configuration" do |_, args| if resource["spec"].as_h["template"].as_h["spec"].as_h["volumes"]? volumes = resource["spec"].as_h["template"].as_h["spec"].as_h["volumes"].as_a end - LOGGING.debug "volumes: #{volumes}" + Log.for(testsuite_task).debug { "volumes: #{volumes}" } persistent_volume_claim_names = volumes.map do |volume| # get persistent volume claim that matches persistent volume claim name if volume.as_h["persistentVolumeClaim"]? && volume.as_h["persistentVolumeClaim"].as_h["claimName"]? @@ -554,7 +568,7 @@ task "no_local_volume_configuration" do |_, args| nil end end.compact - LOGGING.debug "persistent volume claim names: #{persistent_volume_claim_names}" + Log.for(testsuite_task).debug { "persistent volume claim names: #{persistent_volume_claim_names}" } # TODO (optional) check storage class of persistent volume claim # loop through all pvc names @@ -569,13 +583,13 @@ task "no_local_volume_configuration" do |_, args| local_storage_not_found = false end rescue ex - LOGGING.info ex.message + Log.for(testsuite_task).info { ex.message } local_storage_not_found = true end end end rescue ex - VERBOSE_LOGGING.error ex.message if check_verbose(args) + Log.for(testsuite_task).error { ex.message } if check_verbose(args) puts "Rescued: On resource #{resource["metadata"]["name"]?} of kind #{resource["kind"]}, local storage configuration volumes not found #{passed_emoji}".colorize(:yellow) local_storage_not_found = true end @@ -583,9 +597,9 @@ task "no_local_volume_configuration" do |_, args| end if task_response.any?(false) - upsert_failed_task("no_local_volume_configuration","✖️ ✨FAILED: local storage configuration volumes found #{failed_emoji}", Time.utc) + upsert_failed_task(testsuite_task,"✖️ ✨FAILED: local storage configuration volumes found #{failed_emoji}", task_start_time) else - upsert_passed_task("no_local_volume_configuration","✔️ ✨PASSED: local storage configuration volumes not found #{passed_emoji}", Time.utc) + upsert_passed_task(testsuite_task,"✔️ ✨PASSED: local storage configuration volumes not found #{passed_emoji}", task_start_time) end end end