From cfa51ed5d362009c273fac2a22989d48e46859bb Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 05:53:50 +0700 Subject: [PATCH 01/33] Log runtime info for every task upserted into results file Signed-off-by: Akash Manohar --- src/tasks/utils/points.cr | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tasks/utils/points.cr b/src/tasks/utils/points.cr index 5eec74f4b..14d44f200 100644 --- a/src/tasks/utils/points.cr +++ b/src/tasks/utils/points.cr @@ -389,6 +389,8 @@ module CNFManager end_time = Time.utc task_runtime = (end_time - start_time).milliseconds + Log.for(task).info { "task_runtime=#{task_runtime}; start_time=#{start_time}; end_time:#{end_time}" } + # The task result info has to be appeneded to an array of YAML::Any # So encode it into YAML and parse it back again to assign it. # From f09df9bc67630b0eb1257a469e8e4c02a4cc40db Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 05:54:35 +0700 Subject: [PATCH 02/33] Init log for hardcoded_ip_addresses_in_k8s_runtime_configuration Signed-off-by: Akash Manohar --- src/tasks/workload/configuration.cr | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/tasks/workload/configuration.cr b/src/tasks/workload/configuration.cr index 1aa471c0a..5de047da2 100644 --- a/src/tasks/workload/configuration.cr +++ b/src/tasks/workload/configuration.cr @@ -299,6 +299,8 @@ end desc "Does the CNF have hardcoded IPs in the K8s resource configuration" task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| task_response = CNFManager::Task.task_runner(args) do |args, config| + testsuite_task = "hardcoded_ip_addresses_in_k8s_runtime_configuration" + Log.for(testsuite_task).info { "Starting test" } VERBOSE_LOGGING.info "Task Name: hardcoded_ip_addresses_in_k8s_runtime_configuration" if check_verbose(args) helm_chart = config.cnf_config[:helm_chart] helm_directory = config.cnf_config[:helm_directory] @@ -330,12 +332,12 @@ task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| VERBOSE_LOGGING.info "IPs: #{ip_search}" if check_verbose(args) if ip_search.empty? - upsert_passed_task("hardcoded_ip_addresses_in_k8s_runtime_configuration", "βœ”οΈ πŸ† PASSED: No hard-coded IP addresses found in the runtime K8s configuration", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: No hard-coded IP addresses found in the runtime K8s configuration", Time.utc) else - upsert_failed_task("hardcoded_ip_addresses_in_k8s_runtime_configuration", "βœ–οΈ πŸ† FAILED: Hard-coded IP addresses found in the runtime K8s configuration", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Hard-coded IP addresses found in the runtime K8s configuration", Time.utc) end rescue - upsert_skipped_task("hardcoded_ip_addresses_in_k8s_runtime_configuration", "⏭️ πŸ† SKIPPED: unknown exception", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ πŸ† SKIPPED: unknown exception", Time.utc) ensure KubectlClient::Delete.command("namespace hardcoded-ip-test --force --grace-period 0") end From 0436d0c7c93228cc2a83c9217517e1fba0a6483f Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 10:24:51 +0700 Subject: [PATCH 03/33] Init log for privileged test Signed-off-by: Akash Manohar --- src/tasks/workload/security.cr | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index bd3a7dcc3..e3fe42389 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -211,7 +211,8 @@ end desc "Check if any containers are running in privileged mode" task "privileged" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "privileged" } if check_verbose(args) + testsuite_task = "privileged" + Log.for(testsuite_task).info { "Starting test" } white_list_container_names = config.cnf_config[:white_list_container_names] VERBOSE_LOGGING.info "white_list_container_names #{white_list_container_names.inspect}" if check_verbose(args) violation_list = [] of NamedTuple(kind: String, name: String, container: String, namespace: String) @@ -229,12 +230,12 @@ task "privileged" do |_, args| true end end - LOGGING.debug "violator list: #{violation_list.flatten}" + Log.debug { "violator list: #{violation_list.flatten}" } emoji_security="πŸ”“πŸ”‘" if task_response - upsert_passed_task("privileged", "βœ”οΈ PASSED: No privileged containers #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No privileged containers #{emoji_security}", Time.utc) else - upsert_failed_task("privileged", "βœ–οΈ FAILED: Found #{violation_list.size} privileged containers #{emoji_security}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found #{violation_list.size} privileged containers #{emoji_security}", Time.utc) violation_list.each do |violation| stdout_failure("Privileged container #{violation[:container]} in #{violation[:kind]}/#{violation[:name]} in the #{violation[:namespace]} namespace") end From 94de36f30cd40d820b106f27a6d1bd28a7df7d5a Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 10:25:26 +0700 Subject: [PATCH 04/33] Init log for privilege_escalation test Signed-off-by: Akash Manohar --- src/tasks/workload/security.cr | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index e3fe42389..2b70831f8 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -246,7 +246,8 @@ end desc "Check if any containers are running in privileged mode" task "privilege_escalation", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "privilege_escalation" if check_verbose(args) + testsuite_task = "privilege_escalation" + Log.for(testsuite_task).info { "Starting test" } results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Allow privilege escalation") test_report = Kubescape.parse_test_report(test_json) @@ -255,9 +256,9 @@ task "privilege_escalation", ["kubescape_scan"] do |_, args| emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("privilege_escalation", "βœ”οΈ PASSED: No containers that allow privilege escalation were found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No containers that allow privilege escalation were found #{emoji_security}", Time.utc) else - resp = upsert_failed_task("privilege_escalation", "βœ–οΈ FAILED: Found containers that allow privilege escalation #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers that allow privilege escalation #{emoji_security}", Time.utc) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp From e158ff1ad7b9f19ff1d7e30bd6d11eaf5392d4a4 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 10:26:11 +0700 Subject: [PATCH 05/33] Init log for symlink_file_system Signed-off-by: Akash Manohar --- src/tasks/workload/security.cr | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index 2b70831f8..88742cbe4 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -269,7 +269,8 @@ end desc "Check if an attacker can use symlink for arbitrary host file system access." task "symlink_file_system", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "symlink_file_system" if check_verbose(args) + testsuite_task = "symlink_file_system" + Log.for(testsuite_task).info { "Starting test" } results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "CVE-2021-25741 - Using symlink for arbitrary host file system access.") test_report = Kubescape.parse_test_report(test_json) @@ -278,9 +279,9 @@ task "symlink_file_system", ["kubescape_scan"] do |_, args| emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("symlink_file_system", "βœ”οΈ PASSED: No containers allow a symlink attack #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No containers allow a symlink attack #{emoji_security}", Time.utc) else - resp = upsert_failed_task("symlink_file_system", "βœ–οΈ FAILED: Found containers that allow a symlink attack #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers that allow a symlink attack #{emoji_security}", Time.utc) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp From 46040b47312898074da65ed88555663c34521093 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 10:26:48 +0700 Subject: [PATCH 06/33] Init log for application_credentials test Signed-off-by: Akash Manohar --- src/tasks/workload/security.cr | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index 88742cbe4..e2a3ffb9b 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -292,7 +292,8 @@ end desc "Check if applications credentials are in configuration files." task "application_credentials", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "application_credentials" if check_verbose(args) + testsuite_task = "application_credentials" + Log.for(testsuite_task).info { "Starting test" } results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Applications credentials in configuration files") test_report = Kubescape.parse_test_report(test_json) @@ -301,9 +302,9 @@ task "application_credentials", ["kubescape_scan"] do |_, args| emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("application_credentials", "βœ”οΈ PASSED: No applications credentials in configuration files #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No applications credentials in configuration files #{emoji_security}", Time.utc) else - resp = upsert_failed_task("application_credentials", "βœ–οΈ FAILED: Found applications credentials in configuration files #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found applications credentials in configuration files #{emoji_security}", Time.utc) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp From 7d6d9d471e4e487eb47d847f5e39945614b87a2b Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:09:30 +0700 Subject: [PATCH 07/33] Init log for sysctls Also fix start_time value --- src/tasks/workload/security.cr | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index e2a3ffb9b..988fda7b7 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -38,7 +38,9 @@ end desc "Check if pods in the CNF use sysctls with restricted values" task "sysctls" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "sysctls" } + task_start_time = Time.utc + testsuite_task = "sysctls" + Log.for(testsuite_task).info { "Starting test" } Kyverno.install emoji_security = "πŸ”“πŸ”‘" @@ -48,9 +50,9 @@ task "sysctls" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("sysctls", "βœ”οΈ PASSED: No restricted values found for sysctls #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No restricted values found for sysctls #{emoji_security}", task_start_time) else - resp = upsert_failed_task("sysctls", "βœ–οΈ FAILED: Restricted values for are being used for sysctls #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Restricted values for are being used for sysctls #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) From 2d277b90eedd0b725e183a55c06d1b2e41a524d6 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:15:39 +0700 Subject: [PATCH 08/33] Init log for external_ips --- src/tasks/workload/security.cr | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index 988fda7b7..2cfe791de 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -65,7 +65,10 @@ end desc "Check if the CNF has services with external IPs configured" task "external_ips" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "external_ips" } + task_start_time = Time.utc + testsuite_task = "external_ips" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_security = "πŸ”“πŸ”‘" policy_path = Kyverno.best_practice_policy("restrict-service-external-ips/restrict-service-external-ips.yaml") @@ -75,9 +78,9 @@ task "external_ips" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("external_ips", "βœ”οΈ PASSED: Services are not using external IPs #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Services are not using external IPs #{emoji_security}", task_start_time) else - resp = upsert_failed_task("external_ips", "βœ–οΈ FAILED: Services are using external IPs #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Services are using external IPs #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) From bbd826aee7fe950625947bf1b9935151cceb33d9 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:16:44 +0700 Subject: [PATCH 09/33] Init log for selinux_options --- src/tasks/workload/security.cr | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index 2cfe791de..b975ba295 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -93,7 +93,10 @@ end desc "Check if the CNF or the cluster resources have custom SELinux options" task "selinux_options" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "selinux_options" } + task_start_time = Time.utc + testsuite_task = "selinux_options" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_security = "πŸ”“πŸ”‘" @@ -117,9 +120,9 @@ task "selinux_options" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, disallow_failures) if failures.size == 0 - resp = upsert_passed_task("selinux_options", "βœ”οΈ πŸ† PASSED: Pods are not using custom SELinux options that can be used for privilege escalations #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Pods are not using custom SELinux options that can be used for privilege escalations #{emoji_security}", task_start_time) else - resp = upsert_failed_task("selinux_options", "βœ–οΈ πŸ† FAILED: Pods are using custom SELinux options that can be used for privilege escalations #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Pods are using custom SELinux options that can be used for privilege escalations #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) From 370e0b512e8c5be47e19f499d2252619620de9a8 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:19:49 +0700 Subject: [PATCH 10/33] Init log for container_sock_mounts --- src/tasks/workload/security.cr | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index b975ba295..60184011e 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -138,16 +138,19 @@ end desc "Check if the CNF is running containers with container sock mounts" task "container_sock_mounts" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "container_sock_mounts" } + task_start_time = Time.utc + testsuite_task = "container_sock_mounts" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_security = "πŸ”“πŸ”‘" policy_path = Kyverno.best_practice_policy("disallow_cri_sock_mount/disallow_cri_sock_mount.yaml") failures = Kyverno::PolicyAudit.run(policy_path, EXCLUDE_NAMESPACES) if failures.size == 0 - resp = upsert_passed_task("container_sock_mounts", "βœ”οΈ πŸ† PASSED: Container engine daemon sockets are not mounted as volumes #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Container engine daemon sockets are not mounted as volumes #{emoji_security}", task_start_time) else - resp = upsert_failed_task("container_sock_mounts", "βœ–οΈ πŸ† FAILED: Container engine daemon sockets are mounted as volumes #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Container engine daemon sockets are mounted as volumes #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) From 4bc39c578e298cc50caf8c95d10998c205b93d1f Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:22:02 +0700 Subject: [PATCH 11/33] Init log for non_root_user --- src/tasks/workload/security.cr | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index 60184011e..b2e9bea82 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -163,10 +163,13 @@ end desc "Check if any containers are running in as root" task "non_root_user", ["install_falco"] do |_, args| CNFManager::Task.task_runner(args) do |args,config| - + task_start_time = Time.utc + testsuite_task = "non_root_user" + Log.for(testsuite_task).info { "Starting test" } + unless KubectlClient::Get.resource_wait_for_install("Daemonset", "falco", namespace: TESTSUITE_NAMESPACE) Log.info { "Falco Failed to Start" } - upsert_skipped_task("non_root_user", "⏭️ SKIPPED: Skipping non_root_user: Falco failed to install. Check Kernel Headers are installed on the Host Systems(K8s).", Time.utc) + upsert_skipped_task("non_root_user", "⏭️ SKIPPED: Skipping non_root_user: Falco failed to install. Check Kernel Headers are installed on the Host Systems(K8s).", task_start_time) node_pods = KubectlClient::Get.pods_by_nodes(KubectlClient::Get.schedulable_nodes_list) pods = KubectlClient::Get.pods_by_label(node_pods, "app", "falco") @@ -212,9 +215,9 @@ task "non_root_user", ["install_falco"] do |_, args| emoji_root="√" if task_response - upsert_passed_task("non_root_user", "βœ”οΈ PASSED: Root user not found #{emoji_no_root}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Root user not found #{emoji_no_root}", task_start_time) else - upsert_failed_task("non_root_user", "βœ–οΈ FAILED: Root user found #{emoji_root}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Root user found #{emoji_root}", task_start_time) end end end From c0569c097078f9154030149bdddc45a423915ad0 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:23:44 +0700 Subject: [PATCH 12/33] Fix indentation in non_root_user test --- src/tasks/workload/security.cr | 108 ++++++++++++++++----------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index b2e9bea82..1d5d57504 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -162,64 +162,64 @@ end desc "Check if any containers are running in as root" task "non_root_user", ["install_falco"] do |_, args| - CNFManager::Task.task_runner(args) do |args,config| + CNFManager::Task.task_runner(args) do |args,config| task_start_time = Time.utc testsuite_task = "non_root_user" Log.for(testsuite_task).info { "Starting test" } - unless KubectlClient::Get.resource_wait_for_install("Daemonset", "falco", namespace: TESTSUITE_NAMESPACE) - Log.info { "Falco Failed to Start" } - upsert_skipped_task("non_root_user", "⏭️ SKIPPED: Skipping non_root_user: Falco failed to install. Check Kernel Headers are installed on the Host Systems(K8s).", task_start_time) - node_pods = KubectlClient::Get.pods_by_nodes(KubectlClient::Get.schedulable_nodes_list) - pods = KubectlClient::Get.pods_by_label(node_pods, "app", "falco") - - # Handle scenario when pod is not available when Falco is not installed. - if pods.size > 0 - falco_pod_name = pods[0].dig("metadata", "name").as_s - Log.info { "Falco Pod Name: #{falco_pod_name}" } - KubectlClient.logs(falco_pod_name, namespace: TESTSUITE_NAMESPACE) - end - next - end - - Log.for("verbose").info { "non_root_user" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } - fail_msgs = [] of String - task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| - test_passed = true - Log.info { "Falco is Running" } - kind = resource["kind"].downcase - case kind - when "deployment","statefulset","pod","replicaset", "daemonset" - resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) - pods = KubectlClient::Get.pods_by_resource(resource_yaml) - # containers = KubectlClient::Get.resource_containers(kind, resource[:name]) - pods.map do |pod| - # containers.as_a.map do |container| - # container_name = container.dig("name") - pod_name = pod.dig("metadata", "name").as_s - # if Falco.find_root_pod(pod_name, container_name) - if Falco.find_root_pod(pod_name) - fail_msg = "resource: #{resource} and pod #{pod_name} uses a root user" - unless fail_msgs.find{|x| x== fail_msg} - puts fail_msg.colorize(:red) - fail_msgs << fail_msg - end - test_passed=false - end - end - end - test_passed - end - emoji_no_root="🚫√" - emoji_root="√" - - if task_response - upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Root user not found #{emoji_no_root}", task_start_time) - else - upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Root user found #{emoji_root}", task_start_time) - end - end + unless KubectlClient::Get.resource_wait_for_install("Daemonset", "falco", namespace: TESTSUITE_NAMESPACE) + Log.info { "Falco Failed to Start" } + upsert_skipped_task("non_root_user", "⏭️ SKIPPED: Skipping non_root_user: Falco failed to install. Check Kernel Headers are installed on the Host Systems(K8s).", task_start_time) + node_pods = KubectlClient::Get.pods_by_nodes(KubectlClient::Get.schedulable_nodes_list) + pods = KubectlClient::Get.pods_by_label(node_pods, "app", "falco") + + # Handle scenario when pod is not available when Falco is not installed. + if pods.size > 0 + falco_pod_name = pods[0].dig("metadata", "name").as_s + Log.info { "Falco Pod Name: #{falco_pod_name}" } + KubectlClient.logs(falco_pod_name, namespace: TESTSUITE_NAMESPACE) + end + next + end + + Log.for("verbose").info { "non_root_user" } if check_verbose(args) + Log.debug { "cnf_config: #{config}" } + fail_msgs = [] of String + task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| + test_passed = true + Log.info { "Falco is Running" } + kind = resource["kind"].downcase + case kind + when "deployment","statefulset","pod","replicaset", "daemonset" + resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) + pods = KubectlClient::Get.pods_by_resource(resource_yaml) + # containers = KubectlClient::Get.resource_containers(kind, resource[:name]) + pods.map do |pod| + # containers.as_a.map do |container| + # container_name = container.dig("name") + pod_name = pod.dig("metadata", "name").as_s + # if Falco.find_root_pod(pod_name, container_name) + if Falco.find_root_pod(pod_name) + fail_msg = "resource: #{resource} and pod #{pod_name} uses a root user" + unless fail_msgs.find{|x| x== fail_msg} + puts fail_msg.colorize(:red) + fail_msgs << fail_msg + end + test_passed=false + end + end + end + test_passed + end + emoji_no_root="🚫√" + emoji_root="√" + + if task_response + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Root user not found #{emoji_no_root}", task_start_time) + else + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Root user found #{emoji_root}", task_start_time) + end + end end desc "Check if any containers are running in privileged mode" From 34516820d01a9d0dad9469325d5ace061b3d9c66 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:26:23 +0700 Subject: [PATCH 13/33] Use common snippet across recent tests that were updated --- src/tasks/workload/security.cr | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index 1d5d57504..2700e0fb9 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -225,8 +225,10 @@ end desc "Check if any containers are running in privileged mode" task "privileged" do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc testsuite_task = "privileged" Log.for(testsuite_task).info { "Starting test" } + white_list_container_names = config.cnf_config[:white_list_container_names] VERBOSE_LOGGING.info "white_list_container_names #{white_list_container_names.inspect}" if check_verbose(args) violation_list = [] of NamedTuple(kind: String, name: String, container: String, namespace: String) @@ -247,9 +249,9 @@ task "privileged" do |_, args| Log.debug { "violator list: #{violation_list.flatten}" } emoji_security="πŸ”“πŸ”‘" if task_response - upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No privileged containers #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No privileged containers #{emoji_security}", task_start_time) else - upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found #{violation_list.size} privileged containers #{emoji_security}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found #{violation_list.size} privileged containers #{emoji_security}", task_start_time) violation_list.each do |violation| stdout_failure("Privileged container #{violation[:container]} in #{violation[:kind]}/#{violation[:name]} in the #{violation[:namespace]} namespace") end @@ -260,8 +262,10 @@ end desc "Check if any containers are running in privileged mode" task "privilege_escalation", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc testsuite_task = "privilege_escalation" Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Allow privilege escalation") test_report = Kubescape.parse_test_report(test_json) @@ -270,9 +274,9 @@ task "privilege_escalation", ["kubescape_scan"] do |_, args| emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No containers that allow privilege escalation were found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No containers that allow privilege escalation were found #{emoji_security}", task_start_time) else - resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers that allow privilege escalation #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers that allow privilege escalation #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -283,8 +287,10 @@ end desc "Check if an attacker can use symlink for arbitrary host file system access." task "symlink_file_system", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc testsuite_task = "symlink_file_system" Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "CVE-2021-25741 - Using symlink for arbitrary host file system access.") test_report = Kubescape.parse_test_report(test_json) @@ -293,9 +299,9 @@ task "symlink_file_system", ["kubescape_scan"] do |_, args| emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No containers allow a symlink attack #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No containers allow a symlink attack #{emoji_security}", task_start_time) else - resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers that allow a symlink attack #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers that allow a symlink attack #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -306,8 +312,10 @@ end desc "Check if applications credentials are in configuration files." task "application_credentials", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc testsuite_task = "application_credentials" Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Applications credentials in configuration files") test_report = Kubescape.parse_test_report(test_json) @@ -316,9 +324,9 @@ task "application_credentials", ["kubescape_scan"] do |_, args| emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No applications credentials in configuration files #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No applications credentials in configuration files #{emoji_security}", task_start_time) else - resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found applications credentials in configuration files #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found applications credentials in configuration files #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp From 064b13327c82f8ca63da71e44978e29f740c1091 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:41:13 +0700 Subject: [PATCH 14/33] Init log for more security tests * host_network * service_account_mapping * linux_hardening * insecure_capabilities * resource_policies * ingress_egress_blocked * host_pid_ipc_privileges * non_root_containers * task_start_time * privileged_containers * hostpath_mounts --- src/tasks/workload/security.cr | 99 ++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 33 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index 2700e0fb9..97496bfb4 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -337,7 +337,10 @@ end desc "Check if potential attackers may gain access to a POD and inherit access to the entire host network. For example, in AWS case, they will have access to the entire VPC." task "host_network", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "host_network" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "host_network" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "HostNetwork access") test_report = Kubescape.parse_test_report(test_json) @@ -346,9 +349,9 @@ task "host_network", ["kubescape_scan"] do |_, args| emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("host_network", "βœ”οΈ PASSED: No host network attached to pod #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No host network attached to pod #{emoji_security}", task_start_time) else - resp = upsert_failed_task("host_network", "βœ–οΈ FAILED: Found host network attached to pod #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found host network attached to pod #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -359,7 +362,10 @@ end desc "Potential attacker may gain access to a POD and steal its service account token. Therefore, it is recommended to disable automatic mapping of the service account tokens in service account configuration and enable it only for PODs that need to use them." task "service_account_mapping", ["kubescape_scan"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "service_account_mapping" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "service_account_mapping" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Automatic mapping of service account") test_report = Kubescape.parse_test_report(test_json) @@ -368,9 +374,9 @@ task "service_account_mapping", ["kubescape_scan"] do |_, args| emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("service_account_mapping", "βœ”οΈ PASSED: No service accounts automatically mapped #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No service accounts automatically mapped #{emoji_security}", task_start_time) else - resp = upsert_failed_task("service_account_mapping", "βœ–οΈ FAILED: Service accounts automatically mapped #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Service accounts automatically mapped #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -383,7 +389,10 @@ task "linux_hardening", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "linux_hardening" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "linux_hardening" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Linux hardening") test_report = Kubescape.parse_test_report(test_json) @@ -392,9 +401,9 @@ task "linux_hardening", ["kubescape_scan"] do |_, args| emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("linux_hardening", "βœ”οΈ ✨PASSED: Security services are being used to harden applications #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ ✨PASSED: Security services are being used to harden applications #{emoji_security}", task_start_time) else - resp = upsert_failed_task("linux_hardening", "βœ–οΈ ✨FAILED: Found resources that do not use security services #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: Found resources that do not use security services #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -407,7 +416,10 @@ task "insecure_capabilities", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "insecure_capabilities" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "insecure_capabilities" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Insecure capabilities") test_report = Kubescape.parse_test_report(test_json) @@ -416,9 +428,9 @@ task "insecure_capabilities", ["kubescape_scan"] do |_, args| emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("insecure_capabilities", "βœ”οΈ PASSED: Containers with insecure capabilities were not found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Containers with insecure capabilities were not found #{emoji_security}", task_start_time) else - resp = upsert_failed_task("insecure_capabilities", "βœ–οΈ FAILED: Found containers with insecure capabilities #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers with insecure capabilities #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -431,7 +443,10 @@ task "resource_policies", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "resource_policies" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "resource_policies" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Resource policies") test_report = Kubescape.parse_test_report(test_json) @@ -440,9 +455,9 @@ task "resource_policies", ["kubescape_scan"] do |_, args| emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("resource_policies", "βœ”οΈ πŸ† PASSED: Containers have resource limits defined #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Containers have resource limits defined #{emoji_security}", task_start_time) else - resp = upsert_failed_task("resource_policies", "βœ–οΈ πŸ† FAILED: Found containers without resource limits defined #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Found containers without resource limits defined #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -455,7 +470,10 @@ task "ingress_egress_blocked", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "ingress_egress_blocked" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "ingress_egress_blocked" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Ingress and Egress blocked") test_report = Kubescape.parse_test_report(test_json) @@ -464,9 +482,9 @@ task "ingress_egress_blocked", ["kubescape_scan"] do |_, args| emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("ingress_egress_blocked", "βœ”οΈ ✨PASSED: Ingress and Egress traffic blocked on pods #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ ✨PASSED: Ingress and Egress traffic blocked on pods #{emoji_security}", task_start_time) else - resp = upsert_failed_task("ingress_egress_blocked", "βœ–οΈ ✨FAILED: Ingress and Egress traffic not blocked on pods #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: Ingress and Egress traffic not blocked on pods #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -479,7 +497,10 @@ task "host_pid_ipc_privileges", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "host_pid_ipc_privileges" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "host_pid_ipc_privileges" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Host PID/IPC privileges") test_report = Kubescape.parse_test_report(test_json) @@ -488,9 +509,9 @@ task "host_pid_ipc_privileges", ["kubescape_scan"] do |_, args| emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("host_pid_ipc_privileges", "βœ”οΈ PASSED: No containers with hostPID and hostIPC privileges #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No containers with hostPID and hostIPC privileges #{emoji_security}", task_start_time) else - resp = upsert_failed_task("host_pid_ipc_privileges", "βœ–οΈ FAILED: Found containers with hostPID and hostIPC privileges #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers with hostPID and hostIPC privileges #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -503,7 +524,10 @@ task "non_root_containers", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "non_root_containers" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "non_root_containers" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Non-root containers") test_report = Kubescape.parse_test_report(test_json) @@ -512,9 +536,9 @@ task "non_root_containers", ["kubescape_scan"] do |_, args| emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("non_root_containers", "βœ”οΈ πŸ† PASSED: Containers are running with non-root user with non-root group membership #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Containers are running with non-root user with non-root group membership #{emoji_security}", task_start_time) else - resp = upsert_failed_task("non_root_containers", "βœ–οΈ πŸ† FAILED: Found containers running with root user or user with root group membership #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Found containers running with root user or user with root group membership #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -527,7 +551,10 @@ task "privileged_containers", ["kubescape_scan" ] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "privileged_containers" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "privileged_containers" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Privileged container") test_report = Kubescape.parse_test_report(test_json) @@ -537,9 +564,9 @@ task "privileged_containers", ["kubescape_scan" ] do |_, args| emoji_security = "πŸ”“πŸ”‘" #todo whitelist if test_report.failed_resources.size == 0 - upsert_passed_task("privileged_containers", "βœ”οΈ πŸ† PASSED: No privileged containers were found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: No privileged containers were found #{emoji_security}", task_start_time) else - resp = upsert_failed_task("privileged_containers", "βœ–οΈ πŸ† FAILED: Found privileged containers #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Found privileged containers #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -552,7 +579,10 @@ task "immutable_file_systems", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "immutable_file_systems" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "immutable_file_systems" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Immutable container filesystem") test_report = Kubescape.parse_test_report(test_json) @@ -561,9 +591,9 @@ task "immutable_file_systems", ["kubescape_scan"] do |_, args| emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("immutable_file_systems", "βœ”οΈ ✨PASSED: Containers have immutable file systems #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ ✨PASSED: Containers have immutable file systems #{emoji_security}", task_start_time) else - resp = upsert_failed_task("immutable_file_systems", "βœ–οΈ ✨FAILED: Found containers with mutable file systems #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: Found containers with mutable file systems #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -576,7 +606,10 @@ task "hostpath_mounts", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "hostpath_mounts" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "hostpath_mounts" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Allowed hostPath") test_report = Kubescape.parse_test_report(test_json) @@ -585,9 +618,9 @@ task "hostpath_mounts", ["kubescape_scan"] do |_, args| emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("hostpath_mounts", "βœ”οΈ PASSED: Containers do not have hostPath mounts #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Containers do not have hostPath mounts #{emoji_security}", task_start_time) else - resp = upsert_failed_task("hostpath_mounts", "βœ–οΈ FAILED: Found containers with hostPath mounts #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found containers with hostPath mounts #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp From aca9feff569ade84a25c6f20ac658bc6e66b8647 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 13:44:01 +0700 Subject: [PATCH 15/33] Init log for platform:hardware_and_scheduling tests --- src/tasks/platform/hardware_and_scheduling.cr | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/tasks/platform/hardware_and_scheduling.cr b/src/tasks/platform/hardware_and_scheduling.cr index 6ef5cf5e0..ce0ac5261 100644 --- a/src/tasks/platform/hardware_and_scheduling.cr +++ b/src/tasks/platform/hardware_and_scheduling.cr @@ -15,6 +15,10 @@ namespace "platform" do desc "Does the Platform use a runtime that is oci compliant" task "oci_compliant" do |_, args| task_response = CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args| + task_start_time = Time.utc + testsuite_task = "oci_compliant" + Log.for(testsuite_task).info { "Starting test" } + resp = KubectlClient::Get.container_runtimes all_oci_runtimes = true resp.each do |x| @@ -25,10 +29,10 @@ namespace "platform" do LOGGING.info "all_oci_runtimes: #{all_oci_runtimes}" if all_oci_runtimes emoji_chaos_oci_compliant="πŸ“Άβ˜ οΈ" - upsert_passed_task("oci_compliant","βœ”οΈ PASSED: Your platform is using the following runtimes: [#{KubectlClient::Get.container_runtimes.join(",")}] which are OCI compliant runtimes #{emoji_chaos_oci_compliant}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: Your platform is using the following runtimes: [#{KubectlClient::Get.container_runtimes.join(",")}] which are OCI compliant runtimes #{emoji_chaos_oci_compliant}", task_start_time) else emoji_chaos_oci_compliant="πŸ“Άβ˜ οΈ" - upsert_failed_task("oci_compliant", "βœ–οΈ FAILED: Platform has at least one node that uses a non OCI compliant runtime #{emoji_chaos_oci_compliant}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Platform has at least one node that uses a non OCI compliant runtime #{emoji_chaos_oci_compliant}", task_start_time) end end end From 23557e495b74ba5d755a992e9f2d577af762f3c9 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 15:14:11 +0700 Subject: [PATCH 16/33] Init log for state tests * elastic_volumes * node_drain * database_persistence * volume_hostpath_not_found * no_local_volume_configuration --- src/tasks/workload/state.cr | 79 ++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/src/tasks/workload/state.cr b/src/tasks/workload/state.cr index a287b0695..1dc5a9ed3 100644 --- a/src/tasks/workload/state.cr +++ b/src/tasks/workload/state.cr @@ -22,10 +22,10 @@ ELASTIC_PROVISIONING_DRIVERS_REGEX_SPEC = /kubernetes.io\/aws-ebs|kubernetes.io\ module Volume def self.elastic_by_volumes?(volumes : Array(JSON::Any), namespace : String? = nil) - Log.info {"elastic_by_volumes"} + Log.info {"Volume.elastic_by_volumes"} storage_class_names = storage_class_by_volumes(volumes, namespace) elastic = StorageClass.elastic_by_storage_class?(storage_class_names) - Log.info {"elastic_by_volumes elastic: #{elastic}"} + Log.info {"Volume.elastic_by_volumes elastic: #{elastic}"} elastic end # def self.elastic?(volumes, namespace : String? = nil) @@ -114,7 +114,7 @@ end module StorageClass def self.elastic_by_storage_class?(storage_class_names : Array(Hash(String, JSON::Any)), namespace : String? = nil) - Log.info {"elastic_by_storage_class"} + Log.info {"StorageClass.elastic_by_storage_class"} Log.for("elastic_volumes:storage_class_names").info { storage_class_names } #todo elastic_by_storage_class? @@ -153,7 +153,7 @@ end module VolumeClaimTemplate def self.pvc_name_by_vct_resource(resource) : String | Nil - Log.info {"vct_pvc_name"} + Log.info {"VolumeClaimTemplate.pvc_name_by_vct_resource"} resource_name = resource.dig("metadata", "name") vct = resource.dig?("spec", "volumeClaimTemplates") if vct && vct.size > 0 @@ -161,7 +161,7 @@ module VolumeClaimTemplate vct_name = vct[0].dig?("metadata", "name") name = "#{vct_name}-#{resource_name}-0" end - Log.info {"name: #{name}"} + Log.for("VolumeClaimTemplate.pvc_name_by_vct_resource").info {"name: #{name}"} name end @@ -218,7 +218,10 @@ end desc "Does the CNF crash when node-drain occurs" task "node_drain", ["install_litmus"] do |t, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_memory_hog" + task_start_time = Time.utc + testsuite_task = "node_drain" + Log.for(testsuite_task).info { "Starting test" } + Log.for(test_name).info { "Starting test" } if check_verbose(args) skipped = false Log.debug { "cnf_config: #{config}" } @@ -259,7 +262,7 @@ task "node_drain", ["install_litmus"] do |t, args| if spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -365,12 +368,12 @@ task "node_drain", ["install_litmus"] do |t, args| test_passed end if skipped - Log.for("verbose").warn{"The node_drain test needs minimum 2 schedulable nodes, current number of nodes: #{KubectlClient::Get.schedulable_nodes_list.size}"} if check_verbose(args) - resp = upsert_skipped_task("node_drain","⏭️ πŸ† SKIPPED: node_drain chaos test requires the cluster to have atleast two schedulable nodes πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + Log.for(testsuite_task).warn{"The node_drain test needs minimum 2 schedulable nodes, current number of nodes: #{KubectlClient::Get.schedulable_nodes_list.size}"} + resp = upsert_skipped_task(testsuite_task,"⏭️ πŸ† SKIPPED: node_drain chaos test requires the cluster to have atleast two schedulable nodes πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) elsif task_response - resp = upsert_passed_task("node_drain","βœ”οΈ πŸ† PASSED: node_drain chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ πŸ† PASSED: node_drain chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task("node_drain","βœ–οΈ πŸ† FAILED: node_drain chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ πŸ† FAILED: node_drain chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end end @@ -378,8 +381,12 @@ end desc "Does the CNF use an elastic persistent volume" task "elastic_volumes" do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "elastic_volumes" + Log.for(testsuite_task).info { "Starting test" } + Log.info {"cnf_config: #{config}"} - Log.for("verbose").info { "elastic_volumes" } if check_verbose(args) + emoji_probe="🧫" elastic_volumes_used = false volumes_used = false @@ -396,7 +403,7 @@ task "elastic_volumes" do |_, args| full_resource = KubectlClient::Get.resource(resource["kind"], resource["name"], namespace) elastic_result = WorkloadResource.elastic?(full_resource, volumes.as_a, namespace) - Log.for("elastic_volumes:elastic_result").info {elastic_result} + Log.for("#{testsuite_task}:elastic_result").info {elastic_result} if elastic_result elastic_volumes_used = true end @@ -404,11 +411,11 @@ task "elastic_volumes" do |_, args| Log.for("elastic_volumes:result").info { "Volumes used: #{volumes_used}; Elastic?: #{elastic_volumes_used}" } if volumes_used == false - resp = upsert_skipped_task("elastic_volumes","⏭️ ✨SKIPPED: No volumes used #{emoji_probe}", Time.utc) + resp = upsert_skipped_task(testsuite_task,"⏭️ ✨SKIPPED: No volumes used #{emoji_probe}", task_start_time) elsif elastic_volumes_used - resp = upsert_passed_task("elastic_volumes","βœ”οΈ ✨PASSED: Elastic Volumes Used #{emoji_probe}", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: Elastic Volumes Used #{emoji_probe}", task_start_time) else - resp = upsert_failed_task("elastic_volumes","βœ”οΈ ✨FAILED: Volumes used are not elastic volumes #{emoji_probe}", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ”οΈ ✨FAILED: Volumes used are not elastic volumes #{emoji_probe}", task_start_time) end resp end @@ -426,8 +433,11 @@ end desc "Does the CNF use a database which uses perisistence in a cloud native way" task "database_persistence" do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "database_persistence" + Log.for(testsuite_task).info { "Starting test" } + Log.info {"cnf_config: #{config}"} - Log.info {"database_persistence"} # VERBOSE_LOGGING.info "database_persistence" if check_verbose(args) # todo K8s Database persistence test: if a mysql (or any popular database) image is installed: emoji_probe="🧫" @@ -463,17 +473,17 @@ task "database_persistence" do |_, args| end failed_emoji = "(ΰ¦­_ΰ¦­) ήƒ πŸ’Ύ" if elastic_statefulset - resp = upsert_dynamic_task("database_persistence",CNFManager::Points::Results::ResultStatus::Pass5, "βœ”οΈ PASSED: Elastic Volumes and Statefulsets Used #{emoji_probe}", Time.utc) + resp = upsert_dynamic_task(testsuite_task,CNFManager::Points::Results::ResultStatus::Pass5, "βœ”οΈ PASSED: Elastic Volumes and Statefulsets Used #{emoji_probe}", task_start_time) elsif elastic_volume_used - resp = upsert_dynamic_task("database_persistence",CNFManager::Points::Results::ResultStatus::Pass3,"βœ”οΈ PASSED: Elastic Volumes Used #{emoji_probe}", Time.utc) + resp = upsert_dynamic_task(testsuite_task,CNFManager::Points::Results::ResultStatus::Pass3,"βœ”οΈ PASSED: Elastic Volumes Used #{emoji_probe}", task_start_time) elsif statefulset_exists - resp = upsert_dynamic_task("database_persistence",CNFManager::Points::Results::ResultStatus::Neutral, "βœ–οΈ FAILED: Statefulset used without an elastic volume #{failed_emoji}", Time.utc) + resp = upsert_dynamic_task(testsuite_task,CNFManager::Points::Results::ResultStatus::Neutral, "βœ–οΈ FAILED: Statefulset used without an elastic volume #{failed_emoji}", task_start_time) else - resp = upsert_failed_task("database_persistence","βœ–οΈ FAILED: Elastic Volumes Not Used #{failed_emoji}", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ FAILED: Elastic Volumes Not Used #{failed_emoji}", task_start_time) end else - resp = upsert_skipped_task("database_persistence", "⏭️ SKIPPED: Mysql not installed #{emoji_probe}", Time.utc) + resp = upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Mysql not installed #{emoji_probe}", task_start_time) end resp end @@ -491,7 +501,10 @@ end desc "Does the CNF use a non-cloud native data store: hostPath volume" task "volume_hostpath_not_found" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "volume_hostpath_not_found" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "volume_hostpath_not_found" + Log.for(testsuite_task).info { "Starting test" } + failed_emoji = "(ΰ¦­_ΰ¦­) ήƒ πŸ’Ύ" passed_emoji = "πŸ–₯️ πŸ’Ύ" LOGGING.debug "cnf_config: #{config}" @@ -519,9 +532,9 @@ task "volume_hostpath_not_found" do |_, args| end if task_response.any?(false) - upsert_failed_task("volume_hostpath_not_found","βœ–οΈ FAILED: hostPath volumes found #{failed_emoji}", Time.utc) + upsert_failed_task(testsuite_task,"βœ–οΈ FAILED: hostPath volumes found #{failed_emoji}", task_start_time) else - upsert_passed_task("volume_hostpath_not_found","βœ”οΈ PASSED: hostPath volumes not found #{passed_emoji}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: hostPath volumes not found #{passed_emoji}", task_start_time) end end end @@ -531,7 +544,9 @@ task "no_local_volume_configuration" do |_, args| failed_emoji = "(ΰ¦­_ΰ¦­) ήƒ πŸ’Ύ" passed_emoji = "πŸ–₯️ πŸ’Ύ" CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "no_local_volume_configuration" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "no_local_volume_configuration" + Log.for(testsuite_task).info { "Starting test" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.cnf_workload_resources(args, config) do | resource| @@ -545,7 +560,7 @@ task "no_local_volume_configuration" do |_, args| if resource["spec"].as_h["template"].as_h["spec"].as_h["volumes"]? volumes = resource["spec"].as_h["template"].as_h["spec"].as_h["volumes"].as_a end - LOGGING.debug "volumes: #{volumes}" + Log.for(testsuite_task).debug "volumes: #{volumes}" persistent_volume_claim_names = volumes.map do |volume| # get persistent volume claim that matches persistent volume claim name if volume.as_h["persistentVolumeClaim"]? && volume.as_h["persistentVolumeClaim"].as_h["claimName"]? @@ -554,7 +569,7 @@ task "no_local_volume_configuration" do |_, args| nil end end.compact - LOGGING.debug "persistent volume claim names: #{persistent_volume_claim_names}" + Log.debug.for(testsuite_task) { "persistent volume claim names: #{persistent_volume_claim_names}" } # TODO (optional) check storage class of persistent volume claim # loop through all pvc names @@ -569,13 +584,13 @@ task "no_local_volume_configuration" do |_, args| local_storage_not_found = false end rescue ex - LOGGING.info ex.message + Log.for(testsuite_task).info { ex.message } local_storage_not_found = true end end end rescue ex - VERBOSE_LOGGING.error ex.message if check_verbose(args) + Log.for(testsuite_task).error { ex.message } if check_verbose(args) puts "Rescued: On resource #{resource["metadata"]["name"]?} of kind #{resource["kind"]}, local storage configuration volumes not found #{passed_emoji}".colorize(:yellow) local_storage_not_found = true end @@ -583,9 +598,9 @@ task "no_local_volume_configuration" do |_, args| end if task_response.any?(false) - upsert_failed_task("no_local_volume_configuration","βœ–οΈ ✨FAILED: local storage configuration volumes found #{failed_emoji}", Time.utc) + upsert_failed_task(testsuite_task,"βœ–οΈ ✨FAILED: local storage configuration volumes found #{failed_emoji}", task_start_time) else - upsert_passed_task("no_local_volume_configuration","βœ”οΈ ✨PASSED: local storage configuration volumes not found #{passed_emoji}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: local storage configuration volumes not found #{passed_emoji}", task_start_time) end end end From da4f3522ae137c5060109d8ee478e897366c0e9e Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 19:25:57 +0700 Subject: [PATCH 17/33] Init log for compatibility tests * rolling_update * rolling_downgrade * rolling_version_change * rollback * increase_decrease_capacity * helm_deploy * helm_chart_published * helm_chart_valid * cni_compatible --- src/tasks/workload/compatibility.cr | 78 +++++++++++++++++------------ 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/src/tasks/workload/compatibility.cr b/src/tasks/workload/compatibility.cr index 9304e7ed5..9787efbc6 100644 --- a/src/tasks/workload/compatibility.cr +++ b/src/tasks/workload/compatibility.cr @@ -24,10 +24,13 @@ rolling_version_change_test_names.each do |tn| desc "Test if the CNF containers are loosely coupled by performing a #{pretty_test_name}" task "#{tn}" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - LOGGING.debug "cnf_config: #{config}" - VERBOSE_LOGGING.info "#{tn}" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = tn + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } container_names = config.cnf_config[:container_names] - LOGGING.debug "container_names: #{container_names}" + Log.for(testsuite_task).debug { "container_names: #{container_names}" } update_applied = true unless container_names puts "Please add a container names set of entries into your cnf-testsuite.yml".colorize(:red) @@ -43,8 +46,8 @@ rolling_version_change_test_names.each do |tn| namespace = resource["namespace"] || config.cnf_config[:helm_install_namespace] test_passed = true valid_cnf_testsuite_yml = true - LOGGING.debug "#{tn} container: #{container}" - LOGGING.debug "container_names: #{container_names}" + Log.for(testsuite_task).debug { "container: #{container}" } + Log.for(testsuite_task).debug { "container_names: #{container_names}" } #todo use skopeo to get the next and previous versions of the cnf image dynamically config_container = container_names.find{|x| x["name"]==container.as_h["name"]} if container_names LOGGING.debug "config_container: #{config_container}" @@ -97,11 +100,14 @@ end desc "Test if the CNF can perform a rollback" task "rollback" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "rollback" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "rollback" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } container_names = config.cnf_config[:container_names] - LOGGING.debug "container_names: #{container_names}" + Log.for(testsuite_task).debug { "container_names: #{container_names}" } update_applied = true rollout_status = true @@ -122,11 +128,9 @@ task "rollback" do |_, args| image_name = full_image_name_tag[0] image_tag = full_image_name_tag[2] - VERBOSE_LOGGING.debug "resource: #{resource_kind}/#{resource_name}" if check_verbose(args) - VERBOSE_LOGGING.debug "container_name: #{container_name}" if check_verbose(args) - VERBOSE_LOGGING.debug "image_name: #{image_name}" if check_verbose(args) - VERBOSE_LOGGING.debug "image_tag: #{image_tag}" if check_verbose(args) - LOGGING.debug "rollback: setting new version" + Log.for(testsuite_task).debug { + "Rollback: setting new version; resource=#{resource_kind}/#{resource_name}; container_name=#{container_name}; image_name=#{image_name}; image_tag: #{image_tag}" + } #do_update = `kubectl set image deployment/coredns-coredns coredns=coredns/coredns:latest --record` version_change_applied = true @@ -144,7 +148,9 @@ task "rollback" do |_, args| version_change_applied=false end - VERBOSE_LOGGING.debug "rollback: update #{resource_kind}/#{resource_name}, container: #{container_name}, image: #{image_name}, tag: #{rollback_from_tag}" if check_verbose(args) + Log.for(testsuite_task).debug { + "rollback: update #{resource_kind}/#{resource_name}, container: #{container_name}, image: #{image_name}, tag: #{rollback_from_tag}" + } # set a temporary image/tag, so that we can rollback to the current (original) tag later version_change_applied = KubectlClient::Set.image( resource_kind, @@ -156,25 +162,25 @@ task "rollback" do |_, args| ) end - LOGGING.info "rollback version change successful? #{version_change_applied}" + Log.for(testsuite_task).info { "rollback version change successful? #{version_change_applied}" } - VERBOSE_LOGGING.debug "rollback: checking status new version" if check_verbose(args) + Log.for(testsuite_task).debug { "rollback: checking status new version" } rollout_status = KubectlClient::Rollout.status(resource_kind, resource_name, namespace: namespace, timeout: "180s") if rollout_status == false stdout_failure("Rollback failed on resource: #{resource_kind}/#{resource_name} and container: #{container_name}") end # https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#rolling-back-to-a-previous-revision - VERBOSE_LOGGING.debug "rollback: rolling back to old version" if check_verbose(args) + Log.for(testsuite_task).debug { "rollback: rolling back to old version" } rollback_status = KubectlClient::Rollout.undo(resource_kind, resource_name, namespace: namespace) end if task_response && version_change_applied && rollout_status && rollback_status - upsert_passed_task("rollback","βœ”οΈ PASSED: CNF Rollback Passed", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: CNF Rollback Passed", task_start_time) else - upsert_failed_task("rollback", "βœ–οΈ FAILED: CNF Rollback Failed", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: CNF Rollback Failed", task_start_time) end end end @@ -183,7 +189,11 @@ desc "Test increasing/decreasing capacity" task "increase_decrease_capacity" do |t, args| VERBOSE_LOGGING.info "increase_decrease_capacity" if check_verbose(args) CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "increase_capacity" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "increase_decrease_capacity" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).info { "increase_capacity" } increase_test_base_replicas = "1" increase_test_target_replicas = "3" @@ -219,9 +229,9 @@ task "increase_decrease_capacity" do |t, args| if increase_task_response.none?(false) && decrease_task_response.none?(false) pass_msg = "βœ”οΈ πŸ† PASSED: Replicas increased to #{increase_test_target_replicas} and decreased to #{decrease_test_target_replicas} #{emoji_capacity}" - upsert_passed_task("increase_decrease_capacity", pass_msg, Time.utc) + upsert_passed_task(testsuite_task, pass_msg, task_start_time) else - upsert_failed_task("increase_decrease_capacity", "βœ–οΈ FAILURE: Capacity change failed #{emoji_capacity}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILURE: Capacity change failed #{emoji_capacity}", task_start_time) # If increased capacity failed if increase_task_response.any?(false) @@ -485,8 +495,11 @@ end task "helm_chart_valid", ["helm_local_install"] do |_, args| CNFManager::Task.task_runner(args) do |args| + task_start_time = Time.utc + testsuite_task = "helm_chart_valid" + Log.for(testsuite_task).info { "Starting test" } + if check_verbose(args) - Log.for("verbose").info { "helm_chart_valid" } Log.for("verbose").debug { "helm_chart_valid args.raw: #{args.raw}" } Log.for("verbose").debug { "helm_chart_valid args.named: #{args.named}" } end @@ -509,7 +522,7 @@ task "helm_chart_valid", ["helm_local_install"] do |_, args| Log.for("verbose").debug { "working_chart_directory: #{working_chart_directory}" } if check_verbose(args) current_dir = FileUtils.pwd - Log.for("verbose").debug { current_dir } if check_verbose(args) + Log.for(testsuite_task).debug { "current dir: #{current_dir}" } helm = Helm::BinarySingleton.helm emoji_helm_lint="βŽˆπŸ“β˜‘οΈ" @@ -523,12 +536,12 @@ task "helm_chart_valid", ["helm_local_install"] do |_, args| error: helm_link_stderr = IO::Memory.new ) helm_lint = helm_lint_stdout.to_s - Log.for("verbose").debug { "helm_lint: #{helm_lint}" } if check_verbose(args) + Log.for(testsuite_task).debug { "helm_lint: #{helm_lint}" } if check_verbose(args) if helm_lint_status.success? - upsert_passed_task("helm_chart_valid", "βœ”οΈ PASSED: Helm Chart #{working_chart_directory} Lint Passed #{emoji_helm_lint}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Helm Chart #{working_chart_directory} Lint Passed #{emoji_helm_lint}", task_start_time) else - upsert_failed_task("helm_chart_valid", "βœ–οΈ FAILED: Helm Chart #{working_chart_directory} Lint Failed #{emoji_helm_lint}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Helm Chart #{working_chart_directory} Lint Failed #{emoji_helm_lint}", task_start_time) end end end @@ -625,7 +638,10 @@ end desc "CNFs should work with any Certified Kubernetes product and any CNI-compatible network that meet their functionality requirements." task "cni_compatible" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "cni_compatible" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "cni_compatible" + Log.for(testsuite_task).info { "Starting test" } + emoji_security="πŸ”“πŸ”‘" docker_version = DockerClient.version_info() @@ -650,9 +666,9 @@ task "cni_compatible" do |_, args| puts "CNF failed to install on Cilium CNI cluster".colorize(:red) unless cilium_cnf_passed if calico_cnf_passed && cilium_cnf_passed - upsert_passed_task("cni_compatible", "βœ”οΈ PASSED: CNF compatible with both Calico and Cilium #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: CNF compatible with both Calico and Cilium #{emoji_security}", task_start_time) else - upsert_failed_task("cni_compatible", "βœ–οΈ FAILED: CNF not compatible with either Calico or Cillium #{emoji_security}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: CNF not compatible with either Calico or Cillium #{emoji_security}", task_start_time) end ensure kind_manager = KindManager.new @@ -661,7 +677,7 @@ task "cni_compatible" do |_, args| ENV["KUBECONFIG"]="#{kubeconfig_orig}" end else - upsert_skipped_task("cni_compatible", "βœ–οΈ SKIPPED: Docker not installed #{emoji_security}", Time.utc) + upsert_skipped_task(testsuite_task, "βœ–οΈ SKIPPED: Docker not installed #{emoji_security}", task_start_time) end end end From cc27ad401de849224b044eade14ad25e2550e1bd Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 19:31:40 +0700 Subject: [PATCH 18/33] Init log for platform observability tests * kube_state_metrics * node_exporter * prometheus_adapter * metrics_server --- src/tasks/platform/observability.cr | 32 +++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/tasks/platform/observability.cr b/src/tasks/platform/observability.cr index 08b8cbe7c..84de34954 100644 --- a/src/tasks/platform/observability.cr +++ b/src/tasks/platform/observability.cr @@ -16,6 +16,10 @@ namespace "platform" do desc "Does the Platform have Kube State Metrics installed" task "kube_state_metrics", ["install_cluster_tools"] do |_, args| + task_start_time = Time.utc + testsuite_task = "kube_state_metrics" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping kube_state_metrics: not in poc mode" } puts "SKIPPED: Kube State Metrics".colorize(:yellow) @@ -32,15 +36,19 @@ namespace "platform" do if found emoji_kube_state_metrics="πŸ“Άβ˜ οΈ" - upsert_passed_task("kube_state_metrics","βœ”οΈ PASSED: Your platform is using the release for kube state metrics #{emoji_kube_state_metrics}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: Your platform is using the release for kube state metrics #{emoji_kube_state_metrics}", task_start_time) else emoji_kube_state_metrics="πŸ“Άβ˜ οΈ" - upsert_failed_task("kube_state_metrics", "βœ–οΈ FAILED: Your platform does not have kube state metrics installed #{emoji_kube_state_metrics}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Your platform does not have kube state metrics installed #{emoji_kube_state_metrics}", task_start_time) end end desc "Does the Platform have a Node Exporter installed" task "node_exporter", ["install_cluster_tools"] do |_, args| + task_start_time = Time.utc + testsuite_task = "node_exporter" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping node_exporter: not in poc mode" } puts "SKIPPED: Node Exporter".colorize(:yellow) @@ -57,16 +65,20 @@ namespace "platform" do Log.info { "Found Process: #{found}" } if found emoji_node_exporter="πŸ“Άβ˜ οΈ" - upsert_passed_task("node_exporter","βœ”οΈ PASSED: Your platform is using the node exporter #{emoji_node_exporter}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: Your platform is using the node exporter #{emoji_node_exporter}", task_start_time) else emoji_node_exporter="πŸ“Άβ˜ οΈ" - upsert_failed_task("node_exporter", "βœ–οΈ FAILED: Your platform does not have the node exporter installed #{emoji_node_exporter}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Your platform does not have the node exporter installed #{emoji_node_exporter}", task_start_time) end end desc "Does the Platform have the prometheus adapter installed" task "prometheus_adapter", ["install_cluster_tools"] do |_, args| + task_start_time = Time.utc + testsuite_task = "prometheus_adapter" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping prometheus_adapter: not in poc mode" } puts "SKIPPED: Prometheus Adapter".colorize(:yellow) @@ -83,15 +95,19 @@ namespace "platform" do if found emoji_prometheus_adapter="πŸ“Άβ˜ οΈ" - upsert_passed_task("prometheus_adapter","βœ”οΈ PASSED: Your platform is using the prometheus adapter #{emoji_prometheus_adapter}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: Your platform is using the prometheus adapter #{emoji_prometheus_adapter}", task_start_time) else emoji_prometheus_adapter="πŸ“Άβ˜ οΈ" - upsert_failed_task("prometheus_adapter", "βœ–οΈ FAILED: Your platform does not have the prometheus adapter installed #{emoji_prometheus_adapter}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Your platform does not have the prometheus adapter installed #{emoji_prometheus_adapter}", task_start_time) end end desc "Does the Platform have the K8s Metrics Server installed" task "metrics_server", ["install_cluster_tools"] do |_, args| + task_start_time = Time.utc + testsuite_task = "metrics_server" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping metrics_server: not in poc mode" } puts "SKIPPED: Metrics Server".colorize(:yellow) @@ -108,10 +124,10 @@ namespace "platform" do found = KernelIntrospection::K8s.find_first_process(CloudNativeIntrospection::METRICS_SERVER) if found emoji_metrics_server="πŸ“Άβ˜ οΈ" - upsert_passed_task("metrics_server","βœ”οΈ PASSED: Your platform is using the metrics server #{emoji_metrics_server}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Your platform is using the metrics server #{emoji_metrics_server}", task_start_time) else emoji_metrics_server="πŸ“Άβ˜ οΈ" - upsert_failed_task("metrics_server", "βœ–οΈ FAILED: Your platform does not have the metrics server installed #{emoji_metrics_server}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Your platform does not have the metrics server installed #{emoji_metrics_server}", task_start_time) end end end From c108fcdaf97c8f9bb861ce5aa6daa5af0d43d41f Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 19:38:03 +0700 Subject: [PATCH 19/33] Init log for platform tests * k8s_conformance * clusterapi_enabled --- src/tasks/platform/platform.cr | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/tasks/platform/platform.cr b/src/tasks/platform/platform.cr index 53217ef27..96b2d797c 100644 --- a/src/tasks/platform/platform.cr +++ b/src/tasks/platform/platform.cr @@ -20,10 +20,13 @@ end desc "Does the platform pass the K8s conformance tests?" task "k8s_conformance" do |_, args| - VERBOSE_LOGGING.info "k8s_conformance" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "k8s_conformance" + Log.for(testsuite_task).info { "Starting test" } + begin current_dir = FileUtils.pwd - VERBOSE_LOGGING.debug current_dir if check_verbose(args) + Log.for(testsuite_task).debug { "current dir: #{current_dir}" } sonobuoy = "#{tools_path}/sonobuoy/sonobuoy" # Clean up old results @@ -34,7 +37,7 @@ task "k8s_conformance" do |_, args| output: delete_stdout = IO::Memory.new, error: delete_stderr = IO::Memory.new ) - Log.for("verbose").info { delete_stdout } if check_verbose(args) + Log.for(testsuite_task).debug { "sonobuoy delete output: #{delete_stdout}" } # Run the tests testrun_stdout = IO::Memory.new @@ -70,10 +73,10 @@ task "k8s_conformance" do |_, args| failed_count = ((results.match(/Failed: (.*)/)).try &.[1]) if failed_count.to_s.to_i > 0 - upsert_failed_task("k8s_conformance", "βœ–οΈ FAILED: K8s conformance test has #{failed_count} failure(s)!", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: K8s conformance test has #{failed_count} failure(s)!", task_start_time) else - upsert_passed_task("k8s_conformance", "βœ”οΈ PASSED: K8s conformance test has no failures", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: K8s conformance test has no failures", task_start_time) end rescue ex Log.error { ex.message } @@ -88,6 +91,10 @@ end desc "Is Cluster Api available and managing a cluster?" task "clusterapi_enabled" do |_, args| CNFManager::Task.task_runner(args, check_cnf_installed=false) do + task_start_time = Time.utc + testsuite_task = "clusterapi_enabled" + Log.for(testsuite_task).info { "Starting test" } + unless check_poc(args) Log.info { "skipping clusterapi_enabled: not in poc mode" } puts "SKIPPED: ClusterAPI Enabled".colorize(:yellow) @@ -134,9 +141,9 @@ task "clusterapi_enabled" do |_, args| emoji_control="✨" if clusterapi_namespaces_json["items"]? && clusterapi_namespaces_json["items"].as_a.size > 0 && clusterapi_control_planes_json["items"]? && clusterapi_control_planes_json["items"].as_a.size > 0 - resp = upsert_passed_task("clusterapi_enabled", "βœ”οΈ Cluster API is enabled #{emoji_control}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ Cluster API is enabled #{emoji_control}", task_start_time) else - resp = upsert_failed_task("clusterapi_enabled", "βœ–οΈ Cluster API NOT enabled #{emoji_control}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ Cluster API NOT enabled #{emoji_control}", task_start_time) end resp From 47def1a745ca4478c03a6b61f83cffa76b2e3a67 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 19:39:59 +0700 Subject: [PATCH 20/33] Init log for platform resilience tests * worker_reboot_recovery --- src/tasks/platform/resilience.cr | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/tasks/platform/resilience.cr b/src/tasks/platform/resilience.cr index f5ed5c594..138268401 100644 --- a/src/tasks/platform/resilience.cr +++ b/src/tasks/platform/resilience.cr @@ -14,6 +14,10 @@ namespace "platform" do desc "Does the Platform recover the node and reschedule pods when a worker node fails" task "worker_reboot_recovery" do |_, args| + task_start_time = Time.utc + testsuite_task = "worker_reboot_recovery" + Log.for(testsuite_task).info { "Starting test" } + unless check_destructive(args) Log.info { "skipping node_failure: not in destructive mode" } puts "SKIPPED: Node Failure".colorize(:yellow) @@ -43,7 +47,7 @@ namespace "platform" do pod_ready = KubectlClient::Get.pod_status("reboot", "--field-selector spec.nodeName=#{worker_node}").split(",")[2] pod_ready_timeout = pod_ready_timeout - 1 if pod_ready_timeout == 0 - upsert_failed_task("worker_reboot_recovery", "βœ–οΈ FAILED: Failed to install reboot daemon", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Failed to install reboot daemon", task_start_time) exit 1 end sleep 1 @@ -67,7 +71,7 @@ namespace "platform" do Log.info { "Node Ready Status: #{node_ready}" } node_failure_timeout = node_failure_timeout - 1 if node_failure_timeout == 0 - upsert_failed_task("worker_reboot_recovery", "βœ–οΈ FAILED: Node failed to go offline", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Node failed to go offline", task_start_time) exit 1 end sleep 1 @@ -85,14 +89,14 @@ namespace "platform" do Log.info { "Node Ready Status: #{node_ready}" } node_online_timeout = node_online_timeout - 1 if node_online_timeout == 0 - upsert_failed_task("worker_reboot_recovery", "βœ–οΈ FAILED: Node failed to come back online", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Node failed to come back online", task_start_time) exit 1 end sleep 1 end emoji_worker_reboot_recovery="" - resp = upsert_passed_task("worker_reboot_recovery","βœ”οΈ PASSED: Node came back online #{emoji_worker_reboot_recovery}", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: Node came back online #{emoji_worker_reboot_recovery}", task_start_time) ensure From 68c4fe4cbbc7e6492d93a98da9dd4811a8d306d1 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 19:43:24 +0700 Subject: [PATCH 21/33] Init log for platform security tests * control_plane_hardening * cluster_admin * exposed_dashboard --- src/tasks/platform/security.cr | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/tasks/platform/security.cr b/src/tasks/platform/security.cr index 76cb1a7b5..075843c52 100644 --- a/src/tasks/platform/security.cr +++ b/src/tasks/platform/security.cr @@ -13,16 +13,19 @@ namespace "platform" do desc "Is the platform control plane hardened" task "control_plane_hardening", ["kubescape_scan"] do |_, args| task_response = CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args| - VERBOSE_LOGGING.info "control_plane_hardening" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "control_plane_hardening" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Control plane hardening") test_report = Kubescape.parse_test_report(test_json) emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("control_plane_hardening", "βœ”οΈ PASSED: Control plane hardened #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Control plane hardened #{emoji_security}", task_start_time) else - resp = upsert_failed_task("control_plane_hardening", "βœ–οΈ FAILED: Control plane not hardened #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Control plane not hardened #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -34,16 +37,19 @@ namespace "platform" do task "cluster_admin", ["kubescape_scan"] do |_, args| next if args.named["offline"]? CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args, config| - VERBOSE_LOGGING.info "cluster_admin" if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "cluster_admin" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Cluster-admin binding") test_report = Kubescape.parse_test_report(test_json) emoji_security="πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("cluster_admin", "βœ”οΈ PASSED: No users with cluster admin role found #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No users with cluster admin role found #{emoji_security}", task_start_time) else - resp = upsert_failed_task("cluster_admin", "βœ–οΈ FAILED: Users with cluster admin role found #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Users with cluster admin role found #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp @@ -56,16 +62,19 @@ namespace "platform" do next if args.named["offline"]? CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args, config| - Log.for("verbose").info { "exposed_dashboard" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "exposed_dashboard" + Log.for(testsuite_task).info { "Starting test" } + results_json = Kubescape.parse test_json = Kubescape.test_by_test_name(results_json, "Exposed dashboard") test_report = Kubescape.parse_test_report(test_json) emoji_security = "πŸ”“πŸ”‘" if test_report.failed_resources.size == 0 - upsert_passed_task("exposed_dashboard", "βœ”οΈ PASSED: No exposed dashboard found in the cluster #{emoji_security}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No exposed dashboard found in the cluster #{emoji_security}", task_start_time) else - resp = upsert_failed_task("exposed_dashboard", "βœ–οΈ FAILED: Found exposed dashboard in the cluster #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found exposed dashboard in the cluster #{emoji_security}", task_start_time) test_report.failed_resources.map {|r| stdout_failure(r.alert_message) } stdout_failure("Remediation: #{test_report.remediation}") resp From f81243c2e5dda3a6494d29202c2e4280c244ee67 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 19:44:57 +0700 Subject: [PATCH 22/33] Add init log for helm_tiller test (platform security) --- src/tasks/platform/security.cr | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tasks/platform/security.cr b/src/tasks/platform/security.cr index 075843c52..c4fdb8a50 100644 --- a/src/tasks/platform/security.cr +++ b/src/tasks/platform/security.cr @@ -85,7 +85,10 @@ namespace "platform" do desc "Check if the CNF is running containers with name tiller in their image name?" task "helm_tiller" do |_, args| emoji_security="πŸ”“πŸ”‘" - Log.for("verbose").info { "platform:helm_tiller" } + task_start_time = Time.utc + testsuite_task = "helm_tiller" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install CNFManager::Task.task_runner(args, check_cnf_installed=false) do |args, config| @@ -93,9 +96,9 @@ namespace "platform" do failures = Kyverno::PolicyAudit.run(policy_path, EXCLUDE_NAMESPACES) if failures.size == 0 - resp = upsert_passed_task("helm_tiller", "βœ”οΈ PASSED: No Helm Tiller containers are running #{emoji_security}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No Helm Tiller containers are running #{emoji_security}", task_start_time) else - resp = upsert_failed_task("helm_tiller", "βœ–οΈ FAILED: Containers with the Helm Tiller image are running #{emoji_security}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Containers with the Helm Tiller image are running #{emoji_security}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) From 3aebfac6a2e304931776416ae26ed1d2c1357a22 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 19:51:41 +0700 Subject: [PATCH 23/33] Init log for workload observability tests * open_metrics * log_output * prometheus_traffic * routed_logs * tracing --- src/tasks/workload/observability.cr | 49 +++++++++++++++++++---------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/src/tasks/workload/observability.cr b/src/tasks/workload/observability.cr index dd58baecd..ebf01ccd3 100644 --- a/src/tasks/workload/observability.cr +++ b/src/tasks/workload/observability.cr @@ -19,7 +19,9 @@ end desc "Check if the CNF outputs logs to stdout or stderr" task "log_output" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "log_output" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "log_output" + Log.for(testsuite_task).info { "Starting test" } task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| test_passed = false @@ -38,9 +40,9 @@ task "log_output" do |_, args| emoji_observability="πŸ“Άβ˜ οΈ" if task_response - upsert_passed_task("log_output", "βœ”οΈ πŸ† PASSED: Resources output logs to stdout and stderr #{emoji_observability}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Resources output logs to stdout and stderr #{emoji_observability}", task_start_time) else - upsert_failed_task("log_output", "βœ–οΈ πŸ† FAILED: Resources do not output logs to stdout and stderr #{emoji_observability}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Resources do not output logs to stdout and stderr #{emoji_observability}", task_start_time) end end end @@ -50,6 +52,9 @@ task "prometheus_traffic" do |_, args| Log.info { "Running: prometheus_traffic" } next if args.named["offline"]? task_response = CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "prometheus_traffic" + Log.for(testsuite_task).info { "Starting test" } release_name = config.cnf_config[:release_name] destination_cnf_dir = config.cnf_config[:destination_cnf_dir] @@ -157,12 +162,12 @@ task "prometheus_traffic" do |_, args| # -- match ip address to cnf ip addresses # todo check if scrape_url is not an ip, assume it is a service, then do task (2) if prom_cnf_match - upsert_passed_task("prometheus_traffic","βœ”οΈ ✨PASSED: Your cnf is sending prometheus traffic #{emoji_observability}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: Your cnf is sending prometheus traffic #{emoji_observability}", task_start_time) else - upsert_failed_task("prometheus_traffic", "βœ–οΈ ✨FAILED: Your cnf is not sending prometheus traffic #{emoji_observability}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: Your cnf is not sending prometheus traffic #{emoji_observability}", task_start_time) end else - upsert_skipped_task("prometheus_traffic", "⏭️ ✨SKIPPED: Prometheus server not found #{emoji_observability}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ ✨SKIPPED: Prometheus server not found #{emoji_observability}", task_start_time) end end end @@ -172,6 +177,10 @@ task "open_metrics", ["prometheus_traffic"] do |_, args| Log.info { "Running: open_metrics" } next if args.named["offline"]? task_response = CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "open_metrics" + Log.for(testsuite_task).info { "Starting test" } + release_name = config.cnf_config[:release_name] configmap = KubectlClient::Get.configmap("cnf-testsuite-#{release_name}-open-metrics") emoji_observability="πŸ“Άβ˜ οΈ" @@ -179,14 +188,14 @@ task "open_metrics", ["prometheus_traffic"] do |_, args| open_metrics_validated = configmap["data"].as_h["open_metrics_validated"].as_s if open_metrics_validated == "true" - upsert_passed_task("open_metrics","βœ”οΈ ✨PASSED: Your cnf's metrics traffic is OpenMetrics compatible #{emoji_observability}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: Your cnf's metrics traffic is OpenMetrics compatible #{emoji_observability}", task_start_time) else open_metrics_response = configmap["data"].as_h["open_metrics_response"].as_s puts "OpenMetrics Failed: #{open_metrics_response}".colorize(:red) - upsert_failed_task("open_metrics", "βœ–οΈ ✨FAILED: Your cnf's metrics traffic is not OpenMetrics compatible #{emoji_observability}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: Your cnf's metrics traffic is not OpenMetrics compatible #{emoji_observability}", task_start_time) end else - upsert_skipped_task("open_metrics", "⏭️ ✨SKIPPED: Prometheus traffic not configured #{emoji_observability}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ ✨SKIPPED: Prometheus traffic not configured #{emoji_observability}", task_start_time) end end end @@ -197,6 +206,10 @@ task "routed_logs", ["install_cluster_tools"] do |_, args| next if args.named["offline"]? emoji_observability="πŸ“Άβ˜ οΈ" task_response = CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "routed_logs" + Log.for(testsuite_task).info { "Starting test" } + fluentd_match = FluentD.match() fluentbit_match = FluentBit.match() fluentbitBitnami_match = FluentDBitnami.match() @@ -227,12 +240,12 @@ task "routed_logs", ["install_cluster_tools"] do |_, args| end Log.info { "all_resourced_logged: #{all_resourced_logged}" } if all_resourced_logged - upsert_passed_task("routed_logs","βœ”οΈ ✨PASSED: Your cnf's logs are being captured #{emoji_observability}", Time.utc) + upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: Your cnf's logs are being captured #{emoji_observability}", task_start_time) else - upsert_failed_task("routed_logs", "βœ–οΈ ✨FAILED: Your cnf's logs are not being captured #{emoji_observability}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: Your cnf's logs are not being captured #{emoji_observability}", task_start_time) end else - upsert_skipped_task("routed_logs", "⏭️ ✨SKIPPED: Fluentd or FluentBit not configured #{emoji_observability}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ ✨SKIPPED: Fluentd or FluentBit not configured #{emoji_observability}", task_start_time) end end end @@ -246,6 +259,10 @@ task "tracing" do |_, args| if check_cnf_config(args) || CNFManager.destination_cnfs_exist? CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "tracing" + Log.for(testsuite_task).info { "Starting test" } + match = JaegerManager.match() Log.info { "jaeger match: #{match}" } if match[:found] @@ -258,16 +275,16 @@ task "tracing" do |_, args| tracing_used = configmap["data"].as_h["tracing_used"].as_s if tracing_used == "true" - upsert_passed_task("tracing", "βœ”οΈ ✨PASSED: Tracing used #{emoji_tracing_deploy}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ ✨PASSED: Tracing used #{emoji_tracing_deploy}", task_start_time) else - upsert_failed_task("tracing", "βœ–οΈ ✨FAILED: Tracing not used #{emoji_tracing_deploy}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: Tracing not used #{emoji_tracing_deploy}", task_start_time) end else - upsert_skipped_task("tracing", "⏭️ ✨SKIPPED: Jaeger not configured #{emoji_tracing_deploy}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ ✨SKIPPED: Jaeger not configured #{emoji_tracing_deploy}", task_start_time) end end else - upsert_failed_task("tracing", "βœ–οΈ ✨FAILED: No cnf_testsuite.yml found! Did you run the setup task?", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: No cnf_testsuite.yml found! Did you run the setup task?", task_start_time) end end From 97dd2273e33e0c4858b1b56f85c0489c07b973a2 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 20:12:57 +0700 Subject: [PATCH 24/33] Init log for workload compatiblity tests Made some changes that I missed out earlier --- src/tasks/workload/compatibility.cr | 40 +++++++++++++++++------------ 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/tasks/workload/compatibility.cr b/src/tasks/workload/compatibility.cr index 9787efbc6..fdd8b01dd 100644 --- a/src/tasks/workload/compatibility.cr +++ b/src/tasks/workload/compatibility.cr @@ -86,9 +86,9 @@ rolling_version_change_test_names.each do |tn| end VERBOSE_LOGGING.debug "#{tn}: task_response=#{task_response}" if check_verbose(args) if task_response - resp = upsert_passed_task("#{tn}","βœ”οΈ PASSED: CNF for #{pretty_test_name_capitalized} Passed", Time.utc) + resp = upsert_passed_task("#{tn}","βœ”οΈ PASSED: CNF for #{pretty_test_name_capitalized} Passed", task_start_time) else - resp = upsert_failed_task("#{tn}", "βœ–οΈ FAILED: CNF for #{pretty_test_name_capitalized} Failed", Time.utc) + resp = upsert_failed_task("#{tn}", "βœ–οΈ FAILED: CNF for #{pretty_test_name_capitalized} Failed", task_start_time) end resp # TODO should we roll the image back to original version in an ensure? @@ -187,7 +187,7 @@ end desc "Test increasing/decreasing capacity" task "increase_decrease_capacity" do |t, args| - VERBOSE_LOGGING.info "increase_decrease_capacity" if check_verbose(args) + CNFManager::Task.task_runner(args) do |args, config| task_start_time = Time.utc testsuite_task = "increase_decrease_capacity" @@ -281,7 +281,7 @@ end # if task_response.none?(false) # upsert_passed_task("increase_capacity", "βœ”οΈ PASSED: Replicas increased to #{target_replicas} #{emoji_increase_capacity}") # else -# upsert_failed_task("increase_capacity", increase_decrease_capacity_failure_msg(target_replicas, emoji_increase_capacity)) +# upsert_failed_task(testsuite_task, increase_decrease_capacity_failure_msg(target_replicas, emoji_increase_capacity)) # end # end # end @@ -310,7 +310,7 @@ end # if task_response.none?(false) # ret = upsert_passed_task("decrease_capacity", "βœ”οΈ PASSED: Replicas decreased to #{target_replicas} #{emoji_decrease_capacity}") # else -# ret = upsert_failed_task("decrease_capacity", increase_decrease_capacity_failure_msg(target_replicas, emoji_decrease_capacity)) +# ret = upsert_failed_task(testsuite_task, increase_decrease_capacity_failure_msg(target_replicas, emoji_decrease_capacity)) # end # puts "1 ret: #{ret}" # ret @@ -426,10 +426,15 @@ end desc "Will the CNF install using helm with helm_deploy?" task "helm_deploy" do |_, args| - Log.for("helm_deploy").info { "Starting test" } - Log.info { "helm_deploy args: #{args.inspect}" } if check_verbose(args) + testsuite_task = "helm_deploy" + Log.for(testsuite_task).info { "Running #{testsuite_task}" } + Log.for(testsuite_task).info { "helm_deploy args: #{args.inspect}" } if check_verbose(args) + if check_cnf_config(args) || CNFManager.destination_cnfs_exist? CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + Log.for(testsuite_task).info { "Starting test" } + emoji_helm_deploy="βŽˆπŸš€" helm_chart = config.cnf_config[:helm_chart] helm_directory = config.cnf_config[:helm_directory] @@ -440,20 +445,23 @@ task "helm_deploy" do |_, args| helm_used = configmap["data"].as_h["helm_used"].as_s if helm_used == "true" - upsert_passed_task("helm_deploy", "βœ”οΈ PASSED: Helm deploy successful #{emoji_helm_deploy}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Helm deploy successful #{emoji_helm_deploy}", task_start_time) else - upsert_failed_task("helm_deploy", "βœ–οΈ FAILED: Helm deploy failed #{emoji_helm_deploy}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Helm deploy failed #{emoji_helm_deploy}", task_start_time) end end else - upsert_failed_task("helm_deploy", "βœ–οΈ FAILED: No cnf_testsuite.yml found! Did you run the setup task?", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: No cnf_testsuite.yml found! Did you run the setup task?", task_start_time) end end task "helm_chart_published", ["helm_local_install"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "helm_chart_published" + Log.for(testsuite_task).info { "Starting test" } + if check_verbose(args) - Log.for("verbose").info { "helm_chart_published" } Log.for("verbose").debug { "helm_chart_published args.raw: #{args.raw}" } Log.for("verbose").debug { "helm_chart_published args.named: #{args.named}" } end @@ -470,7 +478,7 @@ task "helm_chart_published", ["helm_local_install"] do |_, args| if CNFManager.helm_repo_add(args: args) unless helm_chart.empty? helm_search_cmd = "#{helm} search repo #{helm_chart}" - Log.info { "helm search command: #{helm_search_cmd}" } + Log.for(testsuite_task).info { "helm search command: #{helm_search_cmd}" } Process.run( helm_search_cmd, shell: true, @@ -480,15 +488,15 @@ task "helm_chart_published", ["helm_local_install"] do |_, args| helm_search = helm_search_stdout.to_s Log.for("verbose").debug { "#{helm_search}" } if check_verbose(args) unless helm_search =~ /No results found/ - upsert_passed_task("helm_chart_published", "βœ”οΈ PASSED: Published Helm Chart Found #{emoji_published_helm_chart}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Published Helm Chart Found #{emoji_published_helm_chart}", task_start_time) else - upsert_failed_task("helm_chart_published", "βœ–οΈ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", task_start_time) end else - upsert_failed_task("helm_chart_published", "βœ–οΈ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", task_start_time) end else - upsert_failed_task("helm_chart_published", "βœ–οΈ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Published Helm Chart Not Found #{emoji_published_helm_chart}", task_start_time) end end end From 0d81735495f819c384d4a9096e5fc215008bbc38 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 21:02:06 +0700 Subject: [PATCH 25/33] Init log for reliability tests * liveness * readiness * pod_network_latency * pod_network_corruption * pod_network_duplication * disk_fill * pod_delete * pod_memory_hog * pod_io_stress * pod_dns_error --- src/tasks/workload/reliability.cr | 171 +++++++++++++++++------------- 1 file changed, 97 insertions(+), 74 deletions(-) diff --git a/src/tasks/workload/reliability.cr b/src/tasks/workload/reliability.cr index 171e86c92..659205038 100644 --- a/src/tasks/workload/reliability.cr +++ b/src/tasks/workload/reliability.cr @@ -30,29 +30,32 @@ end desc "Is there a liveness entry in the helm chart?" task "liveness" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("liveness").info { "Starting test" } - Log.for("liveness").debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "liveness" + Log.for(testsuite_task).info { "Starting test" } + Log.for(testsuite_task).debug { "cnf_config: #{config}" } + resp = "" emoji_probe="⎈🧫" task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| test_passed = true resource_ref = "#{resource[:kind]}/#{resource[:name]}" begin - Log.for("liveness").debug { container.as_h["name"].as_s } if check_verbose(args) + Log.for(testsuite_task).debug { container.as_h["name"].as_s } if check_verbose(args) container.as_h["livenessProbe"].as_h rescue ex - Log.for("liveness").error { ex.message } if check_verbose(args) + Log.for(testsuite_task).error { ex.message } if check_verbose(args) test_passed = false stdout_failure("No livenessProbe found for container #{container.as_h["name"].as_s} part of #{resource_ref} in #{resource[:namespace]} namespace") end - Log.for("liveness").info { "Resource #{resource_ref} passed liveness?: #{test_passed}" } + Log.for(testsuite_task).info { "Resource #{resource_ref} passed liveness?: #{test_passed}" } test_passed end - Log.for("liveness").info { "Workload resource task response: #{task_response}" } + Log.for(testsuite_task).info { "Workload resource task response: #{task_response}" } if task_response - resp = upsert_passed_task("liveness","βœ”οΈ πŸ† PASSED: Helm liveness probe found #{emoji_probe}", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ πŸ† PASSED: Helm liveness probe found #{emoji_probe}", task_start_time) else - resp = upsert_failed_task("liveness","βœ–οΈ πŸ† FAILED: No livenessProbe found #{emoji_probe}", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ πŸ† FAILED: No livenessProbe found #{emoji_probe}", task_start_time) end resp end @@ -61,6 +64,10 @@ end desc "Is there a readiness entry in the helm chart?" task "readiness" do |_, args| CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "readiness" + Log.for(testsuite_task).info { "Starting test" } + Log.for("readiness").info { "Starting test" } Log.for("readiness").debug { "cnf_config: #{config}" } resp = "" @@ -69,21 +76,21 @@ task "readiness" do |_, args| test_passed = true resource_ref = "#{resource[:kind]}/#{resource[:name]}" begin - Log.for("readiness").debug { container.as_h["name"].as_s } if check_verbose(args) + Log.for(testsuite_task).debug { container.as_h["name"].as_s } if check_verbose(args) container.as_h["readinessProbe"].as_h rescue ex - Log.for("readiness").error { ex.message } if check_verbose(args) + Log.for(testsuite_task).error { ex.message } if check_verbose(args) test_passed = false stdout_failure("No readinessProbe found for container #{container.as_h["name"].as_s} part of #{resource_ref} in #{resource[:namespace]} namespace") end - Log.for("readiness").info { "Resource #{resource_ref} passed liveness?: #{test_passed}" } + Log.for(testsuite_task).info { "Resource #{resource_ref} passed liveness?: #{test_passed}" } test_passed end - Log.for("readiness").info { "Workload resource task response: #{task_response}" } + Log.for(testsuite_task).info { "Workload resource task response: #{task_response}" } if task_response - resp = upsert_passed_task("readiness","βœ”οΈ πŸ† PASSED: Helm readiness probe found #{emoji_probe}", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ πŸ† PASSED: Helm readiness probe found #{emoji_probe}", task_start_time) else - resp = upsert_failed_task("readiness","βœ–οΈ πŸ† FAILED: No readinessProbe found #{emoji_probe}", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ πŸ† FAILED: No readinessProbe found #{emoji_probe}", task_start_time) end resp end @@ -93,8 +100,10 @@ end desc "Does the CNF crash when network latency occurs" task "pod_network_latency", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_network_latency" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "pod_network_latency" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } #TODO tests should fail if cnf not installed destination_cnf_dir = config.cnf_config[:destination_cnf_dir] @@ -121,10 +130,10 @@ task "pod_network_latency", ["install_litmus"] do |_, args| # https://raw.githubusercontent.com/litmuschaos/chaos-charts/v2.14.x/charts/generic/pod-network-latency/rbac.yaml rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-network-latency/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -134,11 +143,11 @@ task "pod_network_latency", ["install_litmus"] do |_, args| chaos_experiment_name = "pod-network-latency" total_chaos_duration = "60" - test_name = "#{resource["name"]}-#{Random.rand(99)}" + test_name = "#{resource["name"]}-#{Random::Secure.hex(4)}" chaos_result_name = "#{test_name}-#{chaos_experiment_name}" spec_labels = KubectlClient::Get.resource_spec_labels(resource["kind"], resource["name"], resource["namespace"]).as_h - Log.for("#{test_name}:spec_labels").info { "Spec labels for chaos template. Key: #{spec_labels.first_key}; Value: #{spec_labels.first_value}" } + Log.for("#{testsuite_task}:spec_labels").info { "Spec labels for chaos template. Key: #{spec_labels.first_key}; Value: #{spec_labels.first_value}" } template = ChaosTemplates::PodNetworkLatency.new( test_name, "#{chaos_experiment_name}", @@ -154,9 +163,9 @@ task "pod_network_latency", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task("pod_network_latency","βœ”οΈ ✨PASSED: pod_network_latency chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: pod_network_latency chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task("pod_network_latency","βœ–οΈ ✨FAILED: pod_network_latency chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ ✨FAILED: pod_network_latency chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end end @@ -164,9 +173,11 @@ end desc "Does the CNF crash when network corruption occurs" task "pod_network_corruption", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_network_corruption" - Log.for(test_name).info { "Starting test" } if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "pod_network_corruption" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } #TODO tests should fail if cnf not installed destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -191,10 +202,10 @@ task "pod_network_corruption", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-network-corruption/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-network-corruption/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -223,9 +234,9 @@ task "pod_network_corruption", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task("pod_network_corruption","βœ”οΈ ✨PASSED: pod_network_corruption chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: pod_network_corruption chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task("pod_network_corruption","βœ–οΈ ✨FAILED: pod_network_corruption chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ ✨FAILED: pod_network_corruption chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end end @@ -233,9 +244,11 @@ end desc "Does the CNF crash when network duplication occurs" task "pod_network_duplication", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_network_duplication" - Log.for(test_name).info { "Starting test" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "pod_network_duplication" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } #TODO tests should fail if cnf not installed destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -260,10 +273,10 @@ task "pod_network_duplication", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-network-duplication/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-network-duplication/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -292,9 +305,9 @@ task "pod_network_duplication", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task("pod_network_duplication","βœ”οΈ ✨PASSED: pod_network_duplication chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: pod_network_duplication chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task("pod_network_duplication","βœ–οΈ ✨FAILED: pod_network_duplication chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ ✨FAILED: pod_network_duplication chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end end @@ -302,9 +315,11 @@ end desc "Does the CNF crash when disk fill occurs" task "disk_fill", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "disk_fill" - Log.for(test_name).info { "Starting test" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "disk_fill" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| app_namespace = resource[:namespace] || config.cnf_config[:helm_install_namespace] @@ -312,7 +327,7 @@ task "disk_fill", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -327,10 +342,10 @@ task "disk_fill", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/disk-fill/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/disk-fill/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -361,9 +376,9 @@ task "disk_fill", ["install_litmus"] do |_, args| test_passed end if task_response - resp = upsert_passed_task("disk_fill","βœ”οΈ PASSED: disk_fill chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: disk_fill chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task("disk_fill","βœ–οΈ FAILED: disk_fill chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ FAILED: disk_fill chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end end @@ -371,8 +386,10 @@ end desc "Does the CNF crash when pod-delete occurs" task "pod_delete", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_delete" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "pod_delete" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -381,7 +398,7 @@ task "pod_delete", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -393,11 +410,11 @@ task "pod_delete", ["install_litmus"] do |_, args| else # experiment_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-delete/experiment.yaml" experiment_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-delete/experiment.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-delete/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-delete/rbac.yaml" - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -431,9 +448,9 @@ task "pod_delete", ["install_litmus"] do |_, args| test_passed=LitmusManager.check_chaos_verdict(chaos_result_name,chaos_experiment_name,args, namespace: app_namespace) end if task_response - resp = upsert_passed_task("pod_delete","βœ”οΈ PASSED: pod_delete chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: pod_delete chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task("pod_delete","βœ–οΈ FAILED: pod_delete chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ FAILED: pod_delete chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end end @@ -441,8 +458,10 @@ end desc "Does the CNF crash when pod-memory-hog occurs" task "pod_memory_hog", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_memory_hog" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "pod_memory_hog" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -451,7 +470,7 @@ task "pod_memory_hog", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -466,10 +485,10 @@ task "pod_memory_hog", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-memory-hog/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-memory-hog/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -502,9 +521,9 @@ task "pod_memory_hog", ["install_litmus"] do |_, args| test_passed end if task_response - resp = upsert_passed_task("pod_memory_hog","βœ”οΈ PASSED: pod_memory_hog chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: pod_memory_hog chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task("pod_memory_hog","βœ–οΈ FAILED: pod_memory_hog chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ FAILED: pod_memory_hog chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end end @@ -512,9 +531,11 @@ end desc "Does the CNF crash when pod-io-stress occurs" task "pod_io_stress", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_io_stress" - Log.for(test_name).info { "Starting test" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "pod_io_stress" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| app_namespace = resource[:namespace] || config.cnf_config[:helm_install_namespace] @@ -522,7 +543,7 @@ task "pod_io_stress", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["name"]} in #{resource["namespace"]}") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["name"]} in #{resource["namespace"]}") test_passed = false end if test_passed @@ -537,10 +558,10 @@ task "pod_io_stress", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-io-stress/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-io-stress/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -572,9 +593,9 @@ task "pod_io_stress", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task(test_name,"βœ”οΈ ✨PASSED: #{test_name} chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: #{testsuite_task} chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task(test_name,"βœ–οΈ ✨FAILED: #{test_name} chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ ✨FAILED: #{testsuite_task} chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end ensure @@ -587,8 +608,10 @@ end desc "Does the CNF crash when pod-dns-error occurs" task "pod_dns_error", ["install_litmus"] do |_, args| CNFManager::Task.task_runner(args) do |args, config| - test_name = "pod_dns_error" - Log.for(test_name).info { "Starting test" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "pod_dns_error" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] runtimes = KubectlClient::Get.container_runtimes @@ -600,7 +623,7 @@ task "pod_dns_error", ["install_litmus"] do |_, args| if spec_labels.as_h? && spec_labels.as_h.size > 0 test_passed = true else - stdout_failure("No resource label found for #{test_name} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") + stdout_failure("No resource label found for #{testsuite_task} test for resource: #{resource["kind"]}/#{resource["name"]} in #{resource["namespace"]} namespace") test_passed = false end if test_passed @@ -615,10 +638,10 @@ task "pod_dns_error", ["install_litmus"] do |_, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/pod-dns-error/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/pod-dns-error/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -649,12 +672,12 @@ task "pod_dns_error", ["install_litmus"] do |_, args| end end if task_response - resp = upsert_passed_task("pod_dns_error","βœ”οΈ ✨PASSED: pod_dns_error chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ ✨PASSED: pod_dns_error chaos test passed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) else - resp = upsert_failed_task("pod_dns_error","βœ–οΈ ✨FAILED: pod_dns_error chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ ✨FAILED: pod_dns_error chaos test failed πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end else - resp = upsert_skipped_task("pod_dns_error","⏭️ ✨SKIPPED: pod_dns_error docker runtime not found πŸ—‘οΈπŸ’€β™»οΈ", Time.utc) + resp = upsert_skipped_task(testsuite_task,"⏭️ ✨SKIPPED: pod_dns_error docker runtime not found πŸ—‘οΈπŸ’€β™»οΈ", task_start_time) end end end From cd292ad92d7f4d6dca2f2a374b57de99a0f966c1 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 21:42:13 +0700 Subject: [PATCH 26/33] Use testsuite_task var instead of tn --- src/tasks/workload/compatibility.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tasks/workload/compatibility.cr b/src/tasks/workload/compatibility.cr index fdd8b01dd..ce19e890c 100644 --- a/src/tasks/workload/compatibility.cr +++ b/src/tasks/workload/compatibility.cr @@ -86,9 +86,9 @@ rolling_version_change_test_names.each do |tn| end VERBOSE_LOGGING.debug "#{tn}: task_response=#{task_response}" if check_verbose(args) if task_response - resp = upsert_passed_task("#{tn}","βœ”οΈ PASSED: CNF for #{pretty_test_name_capitalized} Passed", task_start_time) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: CNF for #{pretty_test_name_capitalized} Passed", task_start_time) else - resp = upsert_failed_task("#{tn}", "βœ–οΈ FAILED: CNF for #{pretty_test_name_capitalized} Failed", task_start_time) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: CNF for #{pretty_test_name_capitalized} Failed", task_start_time) end resp # TODO should we roll the image back to original version in an ensure? From 8a43b45c73fa2ebbbb5cdb3b75a97ef5c3f6c940 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 21:42:41 +0700 Subject: [PATCH 27/33] Fix missed init log updates in workload security tests --- src/tasks/workload/security.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tasks/workload/security.cr b/src/tasks/workload/security.cr index 97496bfb4..a58be246e 100644 --- a/src/tasks/workload/security.cr +++ b/src/tasks/workload/security.cr @@ -115,7 +115,7 @@ task "selinux_options" do |_, args| if check_failures.size == 0 # upsert_skipped_task("selinux_options", "⏭️ πŸ† SKIPPED: Pods are not using SELinux options #{emoji_security}", Time.utc) - upsert_na_task("selinux_options", "⏭️ πŸ† N/A: Pods are not using SELinux #{emoji_security}", Time.utc) + upsert_na_task(testsuite_task, "⏭️ πŸ† N/A: Pods are not using SELinux #{emoji_security}", task_start_time) else failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, disallow_failures) @@ -169,7 +169,7 @@ task "non_root_user", ["install_falco"] do |_, args| unless KubectlClient::Get.resource_wait_for_install("Daemonset", "falco", namespace: TESTSUITE_NAMESPACE) Log.info { "Falco Failed to Start" } - upsert_skipped_task("non_root_user", "⏭️ SKIPPED: Skipping non_root_user: Falco failed to install. Check Kernel Headers are installed on the Host Systems(K8s).", task_start_time) + upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Skipping non_root_user: Falco failed to install. Check Kernel Headers are installed on the Host Systems(K8s).", task_start_time) node_pods = KubectlClient::Get.pods_by_nodes(KubectlClient::Get.schedulable_nodes_list) pods = KubectlClient::Get.pods_by_label(node_pods, "app", "falco") From ef10619977a6b18b686336a95101688a80ce911d Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 21:43:12 +0700 Subject: [PATCH 28/33] Init log for 5g RAN tests --- src/tasks/workload/ran.cr | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/tasks/workload/ran.cr b/src/tasks/workload/ran.cr index 419295cc2..1bfae5ca5 100644 --- a/src/tasks/workload/ran.cr +++ b/src/tasks/workload/ran.cr @@ -8,7 +8,10 @@ require "../utils/utils.cr" desc "Test if a 5G core supports SUCI Concealment" task "suci_enabled" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.info { "Running suci_enabled test" } + task_start_time = Time.utc + testsuite_task = "suci_enabled" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } suci_found : Bool | Nil core = config.cnf_config[:core_label]? @@ -52,9 +55,9 @@ task "suci_enabled" do |_, args| if suci_found - resp = upsert_passed_task("suci_enabled","βœ”οΈ PASSED: Core uses SUCI 5g authentication", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: Core uses SUCI 5g authentication", task_start_time) else - resp = upsert_failed_task("suci_enabled", "βœ–οΈ FAILED: Core does not use SUCI 5g authentication", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Core does not use SUCI 5g authentication", task_start_time) end resp ensure @@ -68,7 +71,10 @@ end desc "Test if RAN uses the ORAN e2 interface" task "oran_e2_connection" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.info { "Running oran_e2_connection test" } + task_start_time = Time.utc + testsuite_task = "oran_e2_connection" + Log.for(testsuite_task).info { "Starting test" } + Log.debug { "cnf_config: #{config}" } release_name = config.cnf_config[:release_name] if ORANMonitor.isCNFaRIC?(config.cnf_config) @@ -77,13 +83,13 @@ task "oran_e2_connection" do |_, args| if e2_found == "true" - resp = upsert_passed_task("oran_e2_connection","βœ”οΈ PASSED: RAN connects to a RIC using the e2 standard interface", Time.utc) + resp = upsert_passed_task(testsuite_task,"βœ”οΈ PASSED: RAN connects to a RIC using the e2 standard interface", task_start_time) else - resp = upsert_failed_task("e2_established", "βœ–οΈ FAILED: RAN does not connect to a RIC using the e2 standard interface", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: RAN does not connect to a RIC using the e2 standard interface", task_start_time) end resp else - upsert_na_task("oran_e2_connection", "⏭️ N/A: [oran_e2_connection] No ric designated in cnf_testsuite.yml", Time.utc) + upsert_na_task(testsuite_task, "⏭️ N/A: [oran_e2_connection] No ric designated in cnf_testsuite.yml", task_start_time) next end end From 82fe936dc04ac3cd831b22cda640437d82199162 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 21:45:20 +0700 Subject: [PATCH 29/33] Init log updates for workload config tests --- src/tasks/workload/configuration.cr | 197 ++++++++++++++++------------ 1 file changed, 115 insertions(+), 82 deletions(-) diff --git a/src/tasks/workload/configuration.cr b/src/tasks/workload/configuration.cr index 5de047da2..6e8515c43 100644 --- a/src/tasks/workload/configuration.cr +++ b/src/tasks/workload/configuration.cr @@ -34,7 +34,10 @@ end desc "Check if the CNF is running containers with labels configured?" task "require_labels" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "require-labels" } + task_start_time = Time.utc + testsuite_task = "require_labels" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_passed = "πŸ·οΈβœ”οΈ" emoji_failed = "🏷️❌" @@ -45,9 +48,9 @@ task "require_labels" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("require_labels", "βœ”οΈ PASSED: Pods have the app.kubernetes.io/name label #{emoji_passed}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Pods have the app.kubernetes.io/name label #{emoji_passed}", task_start_time) else - resp = upsert_failed_task("require_labels", "βœ–οΈ FAILED: Pods should have the app.kubernetes.io/name label. #{emoji_failed}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Pods should have the app.kubernetes.io/name label. #{emoji_failed}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -60,7 +63,10 @@ end desc "Check if the CNF installs resources in the default namespace" task "default_namespace" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "default_namespace" } + task_start_time = Time.utc + testsuite_task = "default_namespace" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_passed = "πŸ·οΈβœ”οΈ" emoji_failed = "🏷️❌" @@ -71,9 +77,9 @@ task "default_namespace" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("default_namespace", "βœ”οΈ PASSED: default namespace is not being used #{emoji_passed}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: default namespace is not being used #{emoji_passed}", task_start_time) else - resp = upsert_failed_task("default_namespace", "βœ–οΈ FAILED: Resources are created in the default namespace #{emoji_failed}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Resources are created in the default namespace #{emoji_failed}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -86,7 +92,10 @@ end desc "Check if the CNF uses container images with the latest tag" task "latest_tag" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "latest_tag" } + task_start_time = Time.utc + testsuite_task = "latest_tag" + Log.for(testsuite_task).info { "Starting test" } + Kyverno.install emoji_passed = "πŸ·οΈβœ”οΈ" @@ -98,9 +107,9 @@ task "latest_tag" do |_, args| failures = Kyverno.filter_failures_for_cnf_resources(resource_keys, failures) if failures.size == 0 - resp = upsert_passed_task("latest_tag", "βœ”οΈ πŸ† PASSED: Container images are not using the latest tag #{emoji_passed}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Container images are not using the latest tag #{emoji_passed}", task_start_time) else - resp = upsert_failed_task("latest_tag", "βœ–οΈ πŸ† FAILED: Container images are using the latest tag #{emoji_failed}", Time.utc) + resp = upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Container images are using the latest tag #{emoji_failed}", task_start_time) failures.each do |failure| failure.resources.each do |resource| puts "#{resource.kind} #{resource.name} in #{resource.namespace} namespace failed. #{failure.message}".colorize(:red) @@ -113,8 +122,10 @@ end desc "Does a search for IP addresses or subnets come back as negative?" task "ip_addresses" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "ip_addresses" if check_verbose(args) - LOGGING.info("ip_addresses args #{args.inspect}") + task_start_time = Time.utc + testsuite_task = "ip_addresses" + Log.for(testsuite_task).info { "Starting test" } + cdir = FileUtils.pwd() response = String::Builder.new helm_directory = config.cnf_config[:helm_directory] @@ -123,7 +134,7 @@ task "ip_addresses" do |_, args| # Switch to the helm chart directory Dir.cd(helm_chart_path) # Look for all ip addresses that are not comments - LOGGING.info "current directory: #{ FileUtils.pwd()}" + Log.for(testsuite_task).info { "current directory: #{ FileUtils.pwd()}" } # should catch comments (# // or /*) and ignore 0.0.0.0 # note: grep wants * escaped twice Process.run("grep -r -P '^(?!.+0\.0\.0\.0)(?![[:space:]]*0\.0\.0\.0)(?!#)(?![[:space:]]*#)(?!\/\/)(?![[:space:]]*\/\/)(?!\/\\*)(?![[:space:]]*\/\\*)(.+([0-9]{1,3}[\.]){3}[0-9]{1,3})' --exclude=*.txt", shell: true) do |proc| @@ -143,16 +154,16 @@ task "ip_addresses" do |_, args| matching_line = line_parts.join(":").strip() stdout_failure(" * In file #{file_name}: #{matching_line}") end - resp = upsert_failed_task("ip_addresses","βœ–οΈ FAILED: IP addresses found", Time.utc) + resp = upsert_failed_task(testsuite_task,"βœ–οΈ FAILED: IP addresses found", task_start_time) else - resp = upsert_passed_task("ip_addresses", "βœ”οΈ PASSED: No IP addresses found", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No IP addresses found", task_start_time) end resp else # TODO If no helm chart directory, exit with 0 points # ADD SKIPPED tag for points.yml to allow for 0 points Dir.cd(cdir) - resp = upsert_passed_task("ip_addresses", "βœ”οΈ PASSED: No IP addresses found", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No IP addresses found", task_start_time) end end end @@ -173,8 +184,11 @@ task "versioned_tag", ["install_opa"] do |_, args| # end # CNFManager::Task.task_runner(args) do |args,config| - VERBOSE_LOGGING.info "versioned_tag" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "versioned_tag" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } fail_msgs = [] of String task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| test_passed = true @@ -204,9 +218,9 @@ task "versioned_tag", ["install_opa"] do |_, args| emoji_non_versioned_tag="🏷️❌" if task_response - upsert_passed_task("versioned_tag", "βœ”οΈ PASSED: Container images use versioned tags #{emoji_versioned_tag}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Container images use versioned tags #{emoji_versioned_tag}", task_start_time) else - upsert_failed_task("versioned_tag", "βœ–οΈ FAILED: Container images do not use versioned tags #{emoji_non_versioned_tag}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Container images do not use versioned tags #{emoji_non_versioned_tag}", task_start_time) fail_msgs.each do |msg| stdout_failure(msg) end @@ -218,20 +232,23 @@ desc "Does the CNF use NodePort" task "nodeport_not_used" do |_, args| # TODO rename task_runner to multi_cnf_task_runner CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "nodeport_not_used" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "nodeport_not_used" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } + release_name = config.cnf_config[:release_name] service_name = config.cnf_config[:service_name] destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config, check_containers:false, check_service: true) do |resource, container, initialized| - LOGGING.info "nodeport_not_used resource: #{resource}" + Log.for(testsuite_task).info { "nodeport_not_used resource: #{resource}" } if resource["kind"].downcase == "service" - LOGGING.info "resource kind: #{resource}" + Log.for(testsuite_task).info { "resource kind: #{resource}" } service = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) - LOGGING.debug "service: #{service}" + Log.for(testsuite_task).debug { "service: #{service}" } service_type = service.dig?("spec", "type") - LOGGING.info "service_type: #{service_type}" - VERBOSE_LOGGING.debug service_type if check_verbose(args) + Log.for(testsuite_task).info { "service_type: #{service_type}" } if service_type == "NodePort" #TODO make a service selector and display the related resources # that are tied to this service @@ -242,9 +259,9 @@ task "nodeport_not_used" do |_, args| end end if task_response - upsert_passed_task("nodeport_not_used", "βœ”οΈ PASSED: NodePort is not used", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: NodePort is not used", task_start_time) else - upsert_failed_task("nodeport_not_used", "βœ–οΈ FAILED: NodePort is being used", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: NodePort is being used", task_start_time) end end end @@ -252,32 +269,35 @@ end desc "Does the CNF use HostPort" task "hostport_not_used" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "hostport_not_used" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "hostport_not_used" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } release_name = config.cnf_config[:release_name] service_name = config.cnf_config[:service_name] destination_cnf_dir = config.cnf_config[:destination_cnf_dir] task_response = CNFManager.workload_resource_test(args, config, check_containers:false, check_service: true) do |resource, container, initialized| - LOGGING.info "hostport_not_used resource: #{resource}" + Log.for(testsuite_task).info { "hostport_not_used resource: #{resource}" } test_passed=true - LOGGING.info "resource kind: #{resource}" + Log.for(testsuite_task).info { "resource kind: #{resource}" } k8s_resource = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) - LOGGING.debug "resource: #{k8s_resource}" + Log.for(testsuite_task).debug { "resource: #{k8s_resource}" } # per examaple https://github.com/cncf/cnf-testsuite/issues/164#issuecomment-904890977 containers = k8s_resource.dig?("spec", "template", "spec", "containers") - LOGGING.debug "containers: #{containers}" + Log.for(testsuite_task).debug { "containers: #{containers}" } containers && containers.as_a.each do |single_container| ports = single_container.dig?("ports") ports && ports.as_a.each do |single_port| - LOGGING.debug "single_port: #{single_port}" + Log.for(testsuite_task).debug { "single_port: #{single_port}" } hostport = single_port.dig?("hostPort") - LOGGING.debug "DAS hostPort: #{hostport}" + Log.for(testsuite_task).debug { "DAS hostPort: #{hostport}" } if hostport stdout_failure("Resource #{resource[:kind]}/#{resource[:name]} in #{resource[:namespace]} namespace is using a HostPort") @@ -289,9 +309,9 @@ task "hostport_not_used" do |_, args| test_passed end if task_response - upsert_passed_task("hostport_not_used", "βœ”οΈ πŸ† PASSED: HostPort is not used", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: HostPort is not used", task_start_time) else - upsert_failed_task("hostport_not_used", "βœ–οΈ πŸ† FAILED: HostPort is being used", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: HostPort is being used", task_start_time) end end end @@ -299,9 +319,10 @@ end desc "Does the CNF have hardcoded IPs in the K8s resource configuration" task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| task_response = CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc testsuite_task = "hardcoded_ip_addresses_in_k8s_runtime_configuration" Log.for(testsuite_task).info { "Starting test" } - VERBOSE_LOGGING.info "Task Name: hardcoded_ip_addresses_in_k8s_runtime_configuration" if check_verbose(args) + helm_chart = config.cnf_config[:helm_chart] helm_directory = config.cnf_config[:helm_directory] release_name = config.cnf_config[:release_name] @@ -314,7 +335,7 @@ task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| unless helm_chart.empty? if args.named["offline"]? info = AirGap.tar_info_by_config_src(helm_chart) - LOGGING.info "hardcoded_ip_addresses_in_k8s_runtime_configuration airgapped mode info: #{info}" + Log.for(testsuite_task).info { "airgapped mode info: #{info}" } helm_chart = info[:tar_name] end helm_install = Helm.install("--namespace hardcoded-ip-test hardcoded-ip-test #{helm_chart} --dry-run --debug > #{destination_cnf_dir}/helm_chart.yml") @@ -332,12 +353,12 @@ task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| VERBOSE_LOGGING.info "IPs: #{ip_search}" if check_verbose(args) if ip_search.empty? - upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: No hard-coded IP addresses found in the runtime K8s configuration", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: No hard-coded IP addresses found in the runtime K8s configuration", task_start_time) else - upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Hard-coded IP addresses found in the runtime K8s configuration", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Hard-coded IP addresses found in the runtime K8s configuration", task_start_time) end rescue - upsert_skipped_task(testsuite_task, "⏭️ πŸ† SKIPPED: unknown exception", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ πŸ† SKIPPED: unknown exception", task_start_time) ensure KubectlClient::Delete.command("namespace hardcoded-ip-test --force --grace-period 0") end @@ -346,29 +367,33 @@ end desc "Does the CNF use K8s Secrets?" task "secrets_used" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.debug { "cnf_config: #{config}" } - Log.for("verbose").info { "secrets_used" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "secrets_used" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } + # Parse the cnf-testsuite.yml resp = "" emoji_probe="🧫" task_response = CNFManager.workload_resource_test(args, config, check_containers=false) do |resource, containers, volumes, initialized| - Log.info { "resource: #{resource}" } - Log.info { "volumes: #{volumes}" } + Log.for(testsuite_task).info { "resource: #{resource}" } + Log.for(testsuite_task).info { "volumes: #{volumes}" } volume_test_passed = false container_secret_mounted = false # Check to see any volume secrets are actually used volumes.as_a.each do |secret_volume| if secret_volume["secret"]? - LOGGING.info "secret_volume: #{secret_volume["name"]}" + Log.for(testsuite_task).info { "secret_volume: #{secret_volume["name"]}" } container_secret_mounted = false containers.as_a.each do |container| if container["volumeMounts"]? vmount = container["volumeMounts"].as_a - LOGGING.info "vmount: #{vmount}" - LOGGING.debug "container[env]: #{container["env"]}" + Log.for(testsuite_task).info { "vmount: #{vmount}" } + Log.for(testsuite_task).debug { "container[env]: #{container["env"]}" } if (vmount.find { |x| x["name"] == secret_volume["name"]? }) - LOGGING.debug secret_volume["name"] + Log.for(testsuite_task).debug { secret_volume["name"] } container_secret_mounted = true volume_test_passed = true end @@ -393,26 +418,26 @@ task "secrets_used" do |_, args| s_name = s["metadata"]["name"] s_type = s["type"] s_namespace = s.dig("metadata", "namespace") - Log.for("verbose").info {"secret name: #{s_name}, type: #{s_type}, namespace: #{s_namespace}"} if check_verbose(args) + Log.for(testsuite_task).info {"secret name: #{s_name}, type: #{s_type}, namespace: #{s_namespace}"} if check_verbose(args) end secret_keyref_found_and_not_ignored = false containers.as_a.each do |container| c_name = container["name"] - Log.for("verbose").info { "container: #{c_name} envs #{container["env"]?}" } if check_verbose(args) + Log.for(testsuite_task).info { "container: #{c_name} envs #{container["env"]?}" } if check_verbose(args) if container["env"]? Log.for("container_info").info { container["env"] } container["env"].as_a.find do |env| - Log.for("verbose").debug { "checking container: #{c_name}" } if check_verbose(args) + Log.for(testsuite_task).debug { "checking container: #{c_name}" } if check_verbose(args) secret_keyref_found_and_not_ignored = secrets["items"].as_a.find do |s| s_name = s["metadata"]["name"] if IGNORED_SECRET_TYPES.includes?(s["type"]) Log.for("verbose").info { "container: #{c_name} ignored secret: #{s_name}" } if check_verbose(args) next end - Log.for("checking_secret").info { s_name } + Log.for(testsuite_task).info { "Checking secret: #{s_name}" } found = (s_name == env.dig?("valueFrom", "secretKeyRef", "name")) if found - Log.for("secret_reference_found").info { "container: #{c_name} found secret reference: #{s_name}" } + Log.for(testsuite_task).info { "secret_reference_found. container: #{c_name} found secret reference: #{s_name}" } end found end @@ -434,9 +459,9 @@ task "secrets_used" do |_, args| test_passed end if task_response - resp = upsert_passed_task("secrets_used","βœ”οΈ ✨PASSED: Secrets defined and used #{emoji_probe}", Time.utc) + resp = upsert_passed_task(testsuite_task, "βœ”οΈ ✨PASSED: Secrets defined and used #{emoji_probe}", task_start_time) else - resp = upsert_skipped_task("secrets_used","⏭ ✨#{secrets_used_skipped_msg(emoji_probe)}", Time.utc) + resp = upsert_skipped_task(testsuite_task, "⏭ ✨#{secrets_used_skipped_msg(emoji_probe)}", task_start_time) end resp end @@ -547,8 +572,11 @@ task "immutable_configmap" do |_, args| emoji_probe="βš–οΈ" task_response = CNFManager::Task.task_runner(args) do |args, config| - VERBOSE_LOGGING.info "immutable_configmap" if check_verbose(args) - LOGGING.debug "cnf_config: #{config}" + task_start_time = Time.utc + testsuite_task = "immutable_configmap" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] @@ -560,14 +588,14 @@ task "immutable_configmap" do |_, args| test_config_map_filename = "#{destination_cnf_dir}/config_maps/test_config_map.yml"; template = ImmutableConfigMapTemplate.new("doesnt_matter").to_s - Log.debug { "test immutable_configmap template: #{template}" } + Log.for(testsuite_task).debug { "test immutable_configmap template: #{template}" } File.write(test_config_map_filename, template) KubectlClient::Apply.file(test_config_map_filename) # now we change then apply again template = ImmutableConfigMapTemplate.new("doesnt_matter_again").to_s - Log.debug { "test immutable_configmap change template: #{template}" } + Log.for(testsuite_task)debug { "test immutable_configmap change template: #{template}" } File.write(test_config_map_filename, template) immutable_configmap_supported = true @@ -581,14 +609,14 @@ task "immutable_configmap" do |_, args| KubectlClient::Delete.file(test_config_map_filename) if apply_result[:status].success? - Log.info { "kubectl apply on immutable configmap succeeded for: #{test_config_map_filename}" } + Log.for(testsuite_task).info { "kubectl apply on immutable configmap succeeded for: #{test_config_map_filename}" } k8s_ver = KubectlClient.server_version if version_less_than(k8s_ver, "1.19.0") resp = " ⏭️ SKIPPED: immmutable configmaps are not supported in this k8s cluster.".colorize(:yellow) - upsert_skipped_task("immutable_configmap", resp, Time.utc) + upsert_skipped_task(testsuite_task, resp, task_start_time) else resp = "βœ–οΈ FAILED: immmutable configmaps are not enabled in this k8s cluster.".colorize(:red) - upsert_failed_task("immutable_configmap", resp, Time.utc) + upsert_failed_task(testsuite_task, resp, task_start_time) end else @@ -596,8 +624,8 @@ task "immutable_configmap" do |_, args| envs_with_mutable_configmap = [] of MutableConfigMapsInEnvResult cnf_manager_workload_resource_task_response = CNFManager.workload_resource_test(args, config, check_containers=false, check_service=true) do |resource, containers, volumes, initialized| - Log.info { "resource: #{resource}" } - Log.info { "volumes: #{volumes}" } + Log.for(testsuite_task).info { "resource: #{resource}" } + Log.for(testsuite_task).info { "volumes: #{volumes}" } # If the install type is manifest, the namesapce would be in the manifest. # Else rely on config for helm-based install @@ -622,10 +650,10 @@ task "immutable_configmap" do |_, args| if cnf_manager_workload_resource_task_response resp = "βœ”οΈ ✨PASSED: All volume or container mounted configmaps immutable #{emoji_probe}".colorize(:green) - upsert_passed_task("immutable_configmap", resp, Time.utc) + upsert_passed_task(testsuite_task, resp, task_start_time) elsif immutable_configmap_supported resp = "βœ–οΈ ✨FAILED: Found mutable configmap(s) #{emoji_probe}".colorize(:red) - upsert_failed_task("immutable_configmap", resp, Time.utc) + upsert_failed_task(testsuite_task, resp, task_start_time) # Print out any mutable configmaps mounted as volumes volumes_test_results.each do |result| @@ -651,7 +679,9 @@ end desc "Check if CNF uses Kubernetes alpha APIs" task "alpha_k8s_apis" do |_, args| CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "alpha_k8s_apis" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "alpha_k8s_apis" + Log.for(testsuite_task).info { "Starting test" } unless check_poc(args) Log.info { "Skipping alpha_k8s_apis: not in poc mode" } @@ -665,7 +695,7 @@ task "alpha_k8s_apis" do |_, args| # No offline support for this task for now if args.named["offline"]? && args.named["offline"]? != "false" - upsert_skipped_task("alpha_k8s_apis","⏭️ SKIPPED: alpha_k8s_apis chaos test skipped #{emoji}", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: alpha_k8s_apis chaos test skipped #{emoji}", task_start_time) next end @@ -689,7 +719,7 @@ task "alpha_k8s_apis" do |_, args| # CNF setup failed on kind cluster. Inform in test output. unless cnf_setup_complete puts "CNF failed to install on apisnoop cluster".colorize(:red) - upsert_failed_task("alpha_k8s_apis", "βœ–οΈ FAILED: Could not check CNF for usage of Kubernetes alpha APIs #{emoji}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Could not check CNF for usage of Kubernetes alpha APIs #{emoji}", task_start_time) next end @@ -706,9 +736,9 @@ task "alpha_k8s_apis" do |_, args| api_count = result[:output].split("\n")[2].to_i if api_count == 0 - upsert_passed_task("alpha_k8s_apis", "βœ”οΈ PASSED: CNF does not use Kubernetes alpha APIs #{emoji}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: CNF does not use Kubernetes alpha APIs #{emoji}", task_start_time) else - upsert_failed_task("alpha_k8s_apis", "βœ–οΈ FAILED: CNF uses Kubernetes alpha APIs #{emoji}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: CNF uses Kubernetes alpha APIs #{emoji}", task_start_time) end ensure if cluster_name != nil @@ -731,8 +761,11 @@ end desc "Does the CNF install an Operator with OLM?" task "operator_installed" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "operator_installed" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "operator_installed" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } subscription_names = CNFManager.cnf_resources(args, config) do |resource| kind = resource.dig("kind").as_s @@ -741,7 +774,7 @@ task "operator_installed" do |_, args| end end.compact - Log.info { "Subscription Names: #{subscription_names}" } + Log.for(testsuite_task).info { "Subscription Names: #{subscription_names}" } #TODO Warn if csv is not found for a subscription. @@ -759,7 +792,7 @@ task "operator_installed" do |_, args| end end.compact - Log.info { "CSV Names: #{csv_names}" } + Log.for(testsuite_task).info { "CSV Names: #{csv_names}" } succeeded = csv_names.map do |csv| @@ -769,12 +802,12 @@ task "operator_installed" do |_, args| csv_succeeded end - Log.info { "Succeeded CSV Names: #{succeeded}" } + Log.for(testsuite_task).info { "Succeeded CSV Names: #{succeeded}" } test_passed = false if succeeded.size > 0 && succeeded.all?(true) - Log.info { "Succeeded All True?" } + Log.for(testsuite_task).info { "Succeeded All True?" } test_passed = true end @@ -785,9 +818,9 @@ task "operator_installed" do |_, args| emoji_big="πŸ¦–" if test_passed - upsert_passed_task("operator_installed", "βœ”οΈ PASSED: Operator is installed: #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Operator is installed: #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_na_task("operator_installed", "βœ–οΈ NA: No Operators Found #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_na_task(testsuite_task, "βœ–οΈ NA: No Operators Found #{emoji_big} #{emoji_image_size}", task_start_time) end end end From c54cea655f8f334aec4a7161fa93f7825f63e511 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 21:47:24 +0700 Subject: [PATCH 30/33] Init log updates for workload microservice tests --- src/tasks/workload/microservice.cr | 167 ++++++++++++++++------------- 1 file changed, 95 insertions(+), 72 deletions(-) diff --git a/src/tasks/workload/microservice.cr b/src/tasks/workload/microservice.cr index 8014d0993..54d5ac833 100644 --- a/src/tasks/workload/microservice.cr +++ b/src/tasks/workload/microservice.cr @@ -24,13 +24,17 @@ REASONABLE_STARTUP_BUFFER = 10.0 desc "To check if the CNF has multiple microservices that share a database" task "shared_database", ["install_cluster_tools"] do |_, args| - Log.info { "Running shared_database test" } + CNFManager::Task.task_runner(args) do |args, config| + task_start_time = Time.utc + testsuite_task = "shared_database" + Log.for(testsuite_task).info { "Starting test" } + # todo loop through local resources and see if db match found db_match = Netstat::Mariadb.match if db_match[:found] == false - upsert_na_task("shared_database", "⏭️ N/A: [shared_database] No MariaDB containers were found", Time.utc) + upsert_na_task(testsuite_task, "⏭️ N/A: [shared_database] No MariaDB containers were found", task_start_time) next end @@ -98,9 +102,9 @@ task "shared_database", ["install_cluster_tools"] do |_, args| failed_emoji = "(ΰ¦­_ΰ¦­) ήƒ πŸ’Ύ" passed_emoji = "πŸ–₯️ πŸ’Ύ" if integrated_database_found - upsert_failed_task("shared_database", "βœ–οΈ FAILED: Found a shared database #{failed_emoji}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Found a shared database #{failed_emoji}", task_start_time) else - upsert_passed_task("shared_database", "βœ”οΈ PASSED: No shared database found #{passed_emoji}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: No shared database found #{passed_emoji}", task_start_time) end end end @@ -109,8 +113,11 @@ desc "Does the CNF have a reasonable startup time (< 30 seconds)?" task "reasonable_startup_time" do |_, args| Log.info { "Running reasonable_startup_time test" } CNFManager::Task.task_runner(args) do |args, config| - Log.for("verbose").info { "reasonable_startup_time" } if check_verbose(args) - Log.debug { "cnf_config: #{config.cnf_config}" } + task_start_time = Time.utc + testsuite_task = "reasonable_startup_time" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config.cnf_config}" } yml_file_path = config.cnf_config[:yml_file_path] helm_chart = config.cnf_config[:helm_chart] @@ -181,9 +188,9 @@ task "reasonable_startup_time" do |_, args| Log.info { "startup_time: #{startup_time.to_i}" } if startup_time.to_i <= startup_time_limit - upsert_passed_task("reasonable_startup_time", "βœ”οΈ PASSED: CNF had a reasonable startup time #{emoji_fast}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: CNF had a reasonable startup time #{emoji_fast}", task_start_time) else - upsert_failed_task("reasonable_startup_time", "βœ–οΈ FAILED: CNF had a startup time of #{startup_time} seconds #{emoji_slow}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: CNF had a startup time of #{startup_time} seconds #{emoji_slow}", task_start_time) end end @@ -199,16 +206,20 @@ end desc "Does the CNF have a reasonable container image size (< 5GB)?" task "reasonable_image_size" do |_, args| CNFManager::Task.task_runner(args) do |args,config| + task_start_time = Time.utc + testsuite_task = "reasonable_image_size" + Log.for(testsuite_task).info { "Starting test" } + docker_insecure_registries = [] of String if config.cnf_config[:docker_insecure_registries]? && !config.cnf_config[:docker_insecure_registries].nil? docker_insecure_registries = config.cnf_config[:docker_insecure_registries].not_nil! end unless Dockerd.install(docker_insecure_registries) - upsert_skipped_task("reasonable_image_size", "⏭️ SKIPPED: Skipping reasonable_image_size: Dockerd tool failed to install", Time.utc) + upsert_skipped_task(testsuite_task, "⏭️ SKIPPED: Skipping reasonable_image_size: Dockerd tool failed to install", task_start_time) next end - Log.for("verbose").info { "reasonable_image_size" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } task_response = CNFManager.workload_resource_test(args, config) do |resource, container, initialized| yml_file_path = config.cnf_config[:yml_file_path] @@ -284,7 +295,7 @@ task "reasonable_image_size" do |_, args| test_passed=false end rescue ex - Log.error { "invalid compressed_size: #{fqdn_image} = '#{compressed_size.to_s}', #{ex.message}".colorize(:red) } + Log.for(testsuite_task).error { "invalid compressed_size: #{fqdn_image} = '#{compressed_size.to_s}', #{ex.message}".colorize(:red) } test_passed = false end else @@ -298,9 +309,9 @@ task "reasonable_image_size" do |_, args| emoji_big="πŸ¦–" if task_response - upsert_passed_task("reasonable_image_size", "βœ”οΈ PASSED: Image size is good #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Image size is good #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("reasonable_image_size", "βœ–οΈ FAILED: Image size too large #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Image size too large #{emoji_big} #{emoji_image_size}", task_start_time) end end end @@ -317,8 +328,11 @@ end desc "Do the containers in a pod have only one process type?" task "single_process_type" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "single_process_type" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "single_process_type" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } fail_msgs = [] of String all_node_proc_statuses = [] of NamedTuple(node_name: String, proc_statuses: Array(String)) @@ -333,15 +347,15 @@ task "single_process_type" do |_, args| containers = KubectlClient::Get.resource_containers(kind, resource[:name], resource[:namespace]) pods.map do |pod| pod_name = pod.dig("metadata", "name") - Log.info { "pod_name: #{pod_name}" } + Log.for(testsuite_task).info { "pod_name: #{pod_name}" } status = pod["status"] if status["containerStatuses"]? container_statuses = status["containerStatuses"].as_a - Log.info { "container_statuses: #{container_statuses}" } - Log.info { "pod_name: #{pod_name}" } + Log.for(testsuite_task).info { "container_statuses: #{container_statuses}" } + Log.for(testsuite_task).info { "pod_name: #{pod_name}" } nodes = KubectlClient::Get.nodes_by_pod(pod) - Log.info { "nodes_by_resource done" } + Log.for(testsuite_task).info { "nodes_by_resource done" } node = nodes.first container_statuses.map do |container_status| container_name = container_status.dig("name") @@ -349,15 +363,15 @@ task "single_process_type" do |_, args| container_id = container_status.dig("containerID").as_s ready = container_status.dig("ready").as_bool next unless ready - Log.info { "containerStatuses container_id #{container_id}" } + Log.for(testsuite_task).info { "containerStatuses container_id #{container_id}" } pid = ClusterTools.node_pid_by_container_id(container_id, node) - Log.info { "node pid (should never be pid 1): #{pid}" } + Log.for(testsuite_task).info { "node pid (should never be pid 1): #{pid}" } next unless pid node_name = node.dig("metadata", "name").as_s - Log.info { "node name : #{node_name}" } + Log.for(testsuite_task).info { "node name : #{node_name}" } # filtered_proc_statuses = all_node_proc_statuses.find {|x| x[:node_name] == node_name} # proc_statuses = filtered_proc_statuses ? filtered_proc_statuses[:proc_statuses] : nil # Log.debug { "node statuses : #{proc_statuses}" } @@ -375,12 +389,12 @@ task "single_process_type" do |_, args| proc_statuses) statuses.map do |status| - Log.debug { "status: #{status}" } - Log.info { "status cmdline: #{status["cmdline"]}" } + Log.for(testsuite_task).debug { "status: #{status}" } + Log.for(testsuite_task).info { "status cmdline: #{status["cmdline"]}" } status_name = status["Name"].strip ppid = status["PPid"].strip - Log.info { "status name: #{status_name}" } - Log.info { "previous status name: #{previous_process_type}" } + Log.for(testsuite_task).info { "status name: #{status_name}" } + Log.for(testsuite_task).info { "previous status name: #{previous_process_type}" } # Fail if more than one process type #todo make work if processes out of order if status_name != previous_process_type && @@ -390,7 +404,7 @@ task "single_process_type" do |_, args| status_name, statuses) unless verified - Log.info { "multiple proc types detected verified: #{verified}" } + Log.for(testsuite_task).info { "multiple proc types detected verified: #{verified}" } fail_msg = "resource: #{resource}, pod #{pod_name} and container: #{container_name} has more than one process type (#{statuses.map{|x|x["cmdline"]?}.compact.uniq.join(", ")})" unless fail_msgs.find{|x| x== fail_msg} puts fail_msg.colorize(:red) @@ -412,9 +426,9 @@ task "single_process_type" do |_, args| emoji_big="πŸ¦–" if task_response - upsert_passed_task("single_process_type", "βœ”οΈ πŸ† PASSED: Only one process type used #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Only one process type used #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("single_process_type", "βœ–οΈ πŸ† FAILED: More than one process type used #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: More than one process type used #{emoji_big} #{emoji_image_size}", task_start_time) end end end @@ -422,15 +436,18 @@ end desc "Are the SIGTERM signals handled?" task "zombie_handled" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "zombie_handled" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "zombie_handled" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } task_response = CNFManager.workload_resource_test(args, config, check_containers:false ) do |resource, container, initialized| ClusterTools.all_containers_by_resource?(resource, resource[:namespace]) do | container_id, container_pid_on_node, node, container_proctree_statuses, container_status| resp = ClusterTools.exec_by_node("runc --root /run/containerd/runc/k8s.io/ state #{container_id}", node) - Log.info { "resp[:output] #{resp[:output]}" } + Log.for(testsuite_task).info { "resp[:output] #{resp[:output]}" } bundle_path = JSON.parse(resp[:output].to_s) - Log.info { "bundle path: #{bundle_path["bundle"]} "} + Log.for(testsuite_task).info { "bundle path: #{bundle_path["bundle"]} "} ClusterTools.exec_by_node("nerdctl --namespace=k8s.io cp /zombie #{container_id}:/zombie", node) ClusterTools.exec_by_node("nerdctl --namespace=k8s.io cp /sleep #{container_id}:/sleep", node) # ClusterTools.exec_by_node("ctools --bundle_path --container_id ") @@ -444,15 +461,15 @@ task "zombie_handled" do |_, args| ClusterTools.all_containers_by_resource?(resource, resource[:namespace]) do | container_id, container_pid_on_node, node, container_proctree_statuses, container_status| zombies = container_proctree_statuses.map do |status| - Log.debug { "status: #{status}" } - Log.info { "status cmdline: #{status["cmdline"]}" } + Log.for(testsuite_task).debug { "status: #{status}" } + Log.for(testsuite_task).info { "status cmdline: #{status["cmdline"]}" } status_name = status["Name"].strip current_pid = status["Pid"].strip state = status["State"].strip - Log.info { "pid: #{current_pid}" } - Log.info { "status name: #{status_name}" } - Log.info { "state: #{state}" } - Log.info { "(state =~ /zombie/): #{(state =~ /zombie/)}" } + Log.for(testsuite_task).info { "pid: #{current_pid}" } + Log.for(testsuite_task).info { "status name: #{status_name}" } + Log.for(testsuite_task).info { "state: #{state}" } + Log.for(testsuite_task).info { "(state =~ /zombie/): #{(state =~ /zombie/)}" } if (state =~ /zombie/) != nil puts "Process #{status_name} has a state of #{state}".colorize(:red) true @@ -460,7 +477,7 @@ task "zombie_handled" do |_, args| nil end end - Log.info { "zombies.all?(nil): #{zombies.all?(nil)}" } + Log.for(testsuite_task).info { "zombies.all?(nil): #{zombies.all?(nil)}" } zombies.all?(nil) end end @@ -470,9 +487,9 @@ task "zombie_handled" do |_, args| emoji_big="πŸ¦–" if task_response - upsert_passed_task("zombie_handled", "βœ”οΈ πŸ† PASSED: Zombie handled #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Zombie handled #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("zombie_handled", "βœ–οΈ πŸ† FAILED: Zombie not handled #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Zombie not handled #{emoji_big} #{emoji_image_size}", task_start_time) end end @@ -483,8 +500,11 @@ end desc "Are the SIGTERM signals handled?" task "sig_term_handled" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "sig_term_handled" } if check_verbose(args) - Log.debug { "cnf_config: #{config}" } + task_start_time = Time.utc + testsuite_task = "sig_term_handled" + Log.for(testsuite_task).info { "Starting test" } + + Log.for(testsuite_task).debug { "cnf_config: #{config}" } # test_status can be "skipped" or "failed". # Only collecting containers that failed or were skipped. @@ -525,10 +545,10 @@ task "sig_term_handled" do |_, args| status = pod["status"] if status["containerStatuses"]? container_statuses = status["containerStatuses"].as_a - Log.info { "container_statuses: #{container_statuses}" } - Log.info { "pod_name: #{pod_name}" } + Log.for(testsuite_task).info { "container_statuses: #{container_statuses}" } + Log.for(testsuite_task).info { "pod_name: #{pod_name}" } nodes = KubectlClient::Get.nodes_by_pod(pod) - Log.info { "nodes_by_resource done" } + Log.for(testsuite_task).info { "nodes_by_resource done" } node = nodes.first # there should only be one node returned for one pod sig_result = container_statuses.map do |container_status| container_name = container_status.dig("name") @@ -537,7 +557,7 @@ task "sig_term_handled" do |_, args| # Check if the container status is ready. # If this container is not ready, move on to next. container_name = container_status.dig("name").as_s - Log.info { "before ready containerStatuses pod:#{pod_name} container:#{container_name}" } + Log.for(testsuite_task).info { "before ready containerStatuses pod:#{pod_name} container:#{container_name}" } ready = container_status.dig("ready").as_bool if !ready Log.info { "container status: #{container_status} "} @@ -554,7 +574,7 @@ task "sig_term_handled" do |_, args| end container_id = container_status.dig("containerID").as_s - Log.info { "containerStatuses container_id #{container_id}" } + Log.for(testsuite_task).info { "containerStatuses container_id #{container_id}" } #get container id's pid on the node (different from inside the container) pid = "#{ClusterTools.node_pid_by_container_id(container_id, node)}" @@ -572,7 +592,7 @@ task "sig_term_handled" do |_, args| end # next if pid.empty? - Log.info { "node pid (should never be pid 1): #{pid}" } + Log.for(testsuite_task).info { "node pid (should never be pid 1): #{pid}" } # need to do the next line. how to kill the current cnf? # this was one of the reason why we did stuff like this durring the cnf install and saved it as a configmap @@ -585,9 +605,9 @@ task "sig_term_handled" do |_, args| #todo 2.1 loop through all child processes that are not threads (only include proceses where tgid = pid) #todo 2.1.1 ignore the parent pid (we are on the host so it wont be pid 1) node_name = node.dig("metadata", "name").as_s - Log.info { "node name : #{node_name}" } + Log.for(testsuite_task).info { "node name : #{node_name}" } pids = KernelIntrospection::K8s::Node.pids(node) - Log.info { "proctree_by_pid pids: #{pids}" } + Log.for(testsuite_task).info { "proctree_by_pid pids: #{pids}" } proc_statuses = KernelIntrospection::K8s::Node.all_statuses_by_pids(pids, node) statuses = KernelIntrospection::K8s::Node.proctree_by_pid(pid, node, proc_statuses) @@ -605,16 +625,16 @@ task "sig_term_handled" do |_, args| end end non_thread_statuses.map do |status| - Log.debug { "status: #{status}" } - Log.info { "status cmdline: #{status["cmdline"]}" } + Log.for(testsuite_task).debug { "status: #{status}" } + Log.for(testsuite_task).info { "status cmdline: #{status["cmdline"]}" } status_name = status["Name"].strip ppid = status["PPid"].strip current_pid = status["Pid"].strip tgid = status["Tgid"].strip # check if 'g' is uppercase - Log.info { "Pid: #{current_pid}" } - Log.info { "Tgid: #{tgid}" } - Log.info { "status name: #{status_name}" } - Log.info { "previous status name: #{previous_process_type}" } + Log.for(testsuite_task).info { "Pid: #{current_pid}" } + Log.for(testsuite_task).info { "Tgid: #{tgid}" } + Log.for(testsuite_task).info { "status name: #{status_name}" } + Log.for(testsuite_task).info { "previous status name: #{previous_process_type}" } # do not count the top pid if there are children if non_thread_statuses.size > 1 && pid == current_pid next @@ -637,7 +657,7 @@ task "sig_term_handled" do |_, args| #todo 2.2 wait for 30 seconds end ClusterTools.exec_by_node("bash -c 'sleep 10 && kill #{pid} && sleep 5 && kill -9 #{pid}'", node) - Log.info { "pid_log_names: #{pid_log_names}" } + Log.for(testsuite_task).info { "pid_log_names: #{pid_log_names}" } #todo 2.3 parse the logs #todo get the log sleep 5 @@ -656,7 +676,7 @@ task "sig_term_handled" do |_, args| false end end - Log.info { "SigTerm Found: #{sig_term_found}" } + Log.for(testsuite_task).info { "SigTerm Found: #{sig_term_found}" } # per all containers container_sig_term_check = sig_term_found.all?(true) if container_sig_term_check == false @@ -690,9 +710,9 @@ task "sig_term_handled" do |_, args| emoji_big="πŸ¦–" if task_response - upsert_passed_task("sig_term_handled", "βœ”οΈ πŸ† PASSED: Sig Term handled #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ πŸ† PASSED: Sig Term handled #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("sig_term_handled", "βœ–οΈ πŸ† FAILED: Sig Term not handled #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Sig Term not handled #{emoji_big} #{emoji_image_size}", task_start_time) failed_containers.map do |failure_info| resource_output = "Pod: #{failure_info["pod"]}, Container: #{failure_info["container"]}, Result: #{failure_info["test_status"]}" if failure_info["test_status"] == "skipped" @@ -708,7 +728,9 @@ end desc "Are any of the containers exposed as a service?" task "service_discovery" do |_, args| CNFManager::Task.task_runner(args) do |args,config| - Log.for("verbose").info { "service_discovery" } if check_verbose(args) + task_start_time = Time.utc + testsuite_task = "service_discovery" + Log.for(testsuite_task).info { "Starting test" } # Get all resources for the CNF resource_ymls = CNFManager.cnf_workload_resources(args, config) { |resource| resource } @@ -755,18 +777,19 @@ task "service_discovery" do |_, args| emoji_big="πŸ¦–" if test_passed - upsert_passed_task("service_discovery", "βœ”οΈ ✨PASSED: Some containers exposed as a service #{emoji_small} #{emoji_image_size}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ ✨PASSED: Some containers exposed as a service #{emoji_small} #{emoji_image_size}", task_start_time) else - upsert_failed_task("service_discovery", "βœ–οΈ ✨FAILED: No containers exposed as a service #{emoji_big} #{emoji_image_size}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: No containers exposed as a service #{emoji_big} #{emoji_image_size}", task_start_time) end end end desc "To check if the CNF uses a specialized init system" task "specialized_init_system", ["install_cluster_tools"] do |_, args| - test_name = "specialized_init_system" CNFManager::Task.task_runner(args) do |args, config| - Log.info { "Running #{test_name} test" } + task_start_time = Time.utc + testsuite_task = "specialized_init_system" + Log.for(testsuite_task).info { "Starting test" } failed_cnf_resources = [] of InitSystems::InitSystemInfo CNFManager.workload_resource_test(args, config) do |resource, container, initialized| @@ -774,10 +797,10 @@ task "specialized_init_system", ["install_cluster_tools"] do |_, args| case kind when "deployment","statefulset","pod","replicaset", "daemonset" namespace = resource[:namespace] - Log.for(test_name).info { "Checking resource #{resource[:kind]}/#{resource[:name]} in #{namespace}" } + Log.for(testsuite_task).info { "Checking resource #{resource[:kind]}/#{resource[:name]} in #{namespace}" } resource_yaml = KubectlClient::Get.resource(resource[:kind], resource[:name], resource[:namespace]) pods = KubectlClient::Get.pods_by_resource(resource_yaml, namespace) - Log.for(test_name).info { "Pod count for resource #{resource[:kind]}/#{resource[:name]} in #{namespace}: #{pods.size}" } + Log.for(testsuite_task).info { "Pod count for resource #{resource[:kind]}/#{resource[:name]} in #{namespace}: #{pods.size}" } pods.each do |pod| results = InitSystems.scan(pod) failed_cnf_resources = failed_cnf_resources + results @@ -789,12 +812,12 @@ task "specialized_init_system", ["install_cluster_tools"] do |_, args| passed_emoji = "πŸ–₯️ πŸš€" if failed_cnf_resources.size > 0 - upsert_failed_task(test_name, "βœ–οΈ FAILED: Containers do not use specialized init systems #{failed_emoji}", Time.utc) + upsert_failed_task(testsuite_task, "βœ–οΈ FAILED: Containers do not use specialized init systems #{failed_emoji}", task_start_time) failed_cnf_resources.each do |init_info| stdout_failure "#{init_info.kind}/#{init_info.name} has container '#{init_info.container}' with #{init_info.init_cmd} as init process" end else - upsert_passed_task(test_name, "βœ”οΈ PASSED: Containers use specialized init systems #{passed_emoji}", Time.utc) + upsert_passed_task(testsuite_task, "βœ”οΈ PASSED: Containers use specialized init systems #{passed_emoji}", task_start_time) end end From aefb1647e3cd4ab59db5e8fad46d8f72ded63b01 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 22:22:34 +0700 Subject: [PATCH 31/33] Fix typos for init log and timestamp --- src/tasks/utils/points.cr | 2 +- src/tasks/workload/compatibility.cr | 2 +- src/tasks/workload/configuration.cr | 4 ++-- src/tasks/workload/observability.cr | 11 ++++++----- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/tasks/utils/points.cr b/src/tasks/utils/points.cr index 14d44f200..8ad37c9d8 100644 --- a/src/tasks/utils/points.cr +++ b/src/tasks/utils/points.cr @@ -389,7 +389,7 @@ module CNFManager end_time = Time.utc task_runtime = (end_time - start_time).milliseconds - Log.for(task).info { "task_runtime=#{task_runtime}; start_time=#{start_time}; end_time:#{end_time}" } + Log.for("#{task}").info { "task_runtime=#{task_runtime}; start_time=#{start_time}; end_time:#{end_time}" } # The task result info has to be appeneded to an array of YAML::Any # So encode it into YAML and parse it back again to assign it. diff --git a/src/tasks/workload/compatibility.cr b/src/tasks/workload/compatibility.cr index ce19e890c..145c869d0 100644 --- a/src/tasks/workload/compatibility.cr +++ b/src/tasks/workload/compatibility.cr @@ -426,13 +426,13 @@ end desc "Will the CNF install using helm with helm_deploy?" task "helm_deploy" do |_, args| + task_start_time = Time.utc testsuite_task = "helm_deploy" Log.for(testsuite_task).info { "Running #{testsuite_task}" } Log.for(testsuite_task).info { "helm_deploy args: #{args.inspect}" } if check_verbose(args) if check_cnf_config(args) || CNFManager.destination_cnfs_exist? CNFManager::Task.task_runner(args) do |args, config| - task_start_time = Time.utc Log.for(testsuite_task).info { "Starting test" } emoji_helm_deploy="βŽˆπŸš€" diff --git a/src/tasks/workload/configuration.cr b/src/tasks/workload/configuration.cr index 6e8515c43..95d99cb7b 100644 --- a/src/tasks/workload/configuration.cr +++ b/src/tasks/workload/configuration.cr @@ -358,7 +358,7 @@ task "hardcoded_ip_addresses_in_k8s_runtime_configuration" do |_, args| upsert_failed_task(testsuite_task, "βœ–οΈ πŸ† FAILED: Hard-coded IP addresses found in the runtime K8s configuration", task_start_time) end rescue - upsert_skipped_task(testsuite_task, "⏭️ πŸ† SKIPPED: unknown exception", task_start_time) + upsert_skipped_task(testsuite_task, "⏭️ πŸ† SKIPPED: unknown exception", Time.utc) ensure KubectlClient::Delete.command("namespace hardcoded-ip-test --force --grace-period 0") end @@ -595,7 +595,7 @@ task "immutable_configmap" do |_, args| # now we change then apply again template = ImmutableConfigMapTemplate.new("doesnt_matter_again").to_s - Log.for(testsuite_task)debug { "test immutable_configmap change template: #{template}" } + Log.for(testsuite_task).debug { "test immutable_configmap change template: #{template}" } File.write(test_config_map_filename, template) immutable_configmap_supported = true diff --git a/src/tasks/workload/observability.cr b/src/tasks/workload/observability.cr index ebf01ccd3..3924058ab 100644 --- a/src/tasks/workload/observability.cr +++ b/src/tasks/workload/observability.cr @@ -252,16 +252,17 @@ end desc "Does the CNF install use tracing?" task "tracing" do |_, args| - Log.for("verbose").info { "tracing" } if check_verbose(args) - Log.info { "tracing args: #{args.inspect}" } + testsuite_task = "tracing" + Log.for(testsuite_task).info { "Running test" } + Log.for(testsuite_task).info { "tracing args: #{args.inspect}" } + next if args.named["offline"]? - emoji_tracing_deploy="βŽˆπŸš€" + emoji_tracing_deploy="βŽˆπŸš€" if check_cnf_config(args) || CNFManager.destination_cnfs_exist? CNFManager::Task.task_runner(args) do |args, config| task_start_time = Time.utc - testsuite_task = "tracing" - Log.for(testsuite_task).info { "Starting test" } + Log.for(testsuite_task).info { "Starting test for CNF" } match = JaegerManager.match() Log.info { "jaeger match: #{match}" } From 93d0f830e2888d9fa5992c55e2cb3c1c64f5cb1c Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 22:24:14 +0700 Subject: [PATCH 32/33] Use Time.utc if the task is skipped before starting the task_runner block --- src/tasks/workload/observability.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tasks/workload/observability.cr b/src/tasks/workload/observability.cr index 3924058ab..c0ad7bafe 100644 --- a/src/tasks/workload/observability.cr +++ b/src/tasks/workload/observability.cr @@ -285,7 +285,7 @@ task "tracing" do |_, args| end end else - upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: No cnf_testsuite.yml found! Did you run the setup task?", task_start_time) + upsert_failed_task(testsuite_task, "βœ–οΈ ✨FAILED: No cnf_testsuite.yml found! Did you run the setup task?", Time.utc) end end From 921de6a0ed9eba8b8471fc2fd22b51cb96fe4052 Mon Sep 17 00:00:00 2001 From: Akash Manohar Date: Thu, 26 Oct 2023 22:28:43 +0700 Subject: [PATCH 33/33] Fix typos related to init log changes in chaos tests --- src/tasks/workload/state.cr | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/tasks/workload/state.cr b/src/tasks/workload/state.cr index 1dc5a9ed3..40fba57b6 100644 --- a/src/tasks/workload/state.cr +++ b/src/tasks/workload/state.cr @@ -222,7 +222,6 @@ task "node_drain", ["install_litmus"] do |t, args| testsuite_task = "node_drain" Log.for(testsuite_task).info { "Starting test" } - Log.for(test_name).info { "Starting test" } if check_verbose(args) skipped = false Log.debug { "cnf_config: #{config}" } destination_cnf_dir = config.cnf_config[:destination_cnf_dir] @@ -320,10 +319,10 @@ task "node_drain", ["install_litmus"] do |t, args| # rbac_url = "https://hub.litmuschaos.io/api/chaos/#{LitmusManager::Version}?file=charts/generic/node-drain/rbac.yaml" rbac_url = "https://raw.githubusercontent.com/litmuschaos/chaos-charts/#{LitmusManager::Version}/charts/generic/node-drain/rbac.yaml" - experiment_path = LitmusManager.download_template(experiment_url, "#{test_name}_experiment.yaml") + experiment_path = LitmusManager.download_template(experiment_url, "#{testsuite_task}_experiment.yaml") KubectlClient::Apply.file(experiment_path, namespace: app_namespace) - rbac_path = LitmusManager.download_template(rbac_url, "#{test_name}_rbac.yaml") + rbac_path = LitmusManager.download_template(rbac_url, "#{testsuite_task}_rbac.yaml") rbac_yaml = File.read(rbac_path) rbac_yaml = rbac_yaml.gsub("namespace: default", "namespace: #{app_namespace}") File.write(rbac_path, rbac_yaml) @@ -560,7 +559,7 @@ task "no_local_volume_configuration" do |_, args| if resource["spec"].as_h["template"].as_h["spec"].as_h["volumes"]? volumes = resource["spec"].as_h["template"].as_h["spec"].as_h["volumes"].as_a end - Log.for(testsuite_task).debug "volumes: #{volumes}" + Log.for(testsuite_task).debug { "volumes: #{volumes}" } persistent_volume_claim_names = volumes.map do |volume| # get persistent volume claim that matches persistent volume claim name if volume.as_h["persistentVolumeClaim"]? && volume.as_h["persistentVolumeClaim"].as_h["claimName"]? @@ -569,7 +568,7 @@ task "no_local_volume_configuration" do |_, args| nil end end.compact - Log.debug.for(testsuite_task) { "persistent volume claim names: #{persistent_volume_claim_names}" } + Log.for(testsuite_task).debug { "persistent volume claim names: #{persistent_volume_claim_names}" } # TODO (optional) check storage class of persistent volume claim # loop through all pvc names