Skip to content

Commit

Permalink
Fixing issues 32 and 33 (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
Napsty authored Jul 29, 2022
1 parent 7e17faf commit 9ae380f
Showing 1 changed file with 38 additions and 39 deletions.
77 changes: 38 additions & 39 deletions check_rancher2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
# 20211021 1.7.0 Check for additional node (pressure) conditions (#27) #
# 20211201 1.7.1 Fix cluster state detection (#26) #
# 20220610 1.8.0 More performance data, long parameters, other improvements (#31) #
# 20220729 1.9.0 Output improvements (#32), show workload namespace (#33) #
##########################################################################################
# (Pre-)Define some fixed variables
STATE_OK=0 # define the exit code if status is OK
Expand All @@ -56,8 +57,8 @@ STATE_CRITICAL=2 # define the exit code if status is Critical
STATE_UNKNOWN=3 # define the exit code if status is Unknown
export PATH=/usr/local/bin:/usr/bin:/bin:$PATH # Set path
proto=http # Protocol to use, default is http, can be overwritten with -S parameter
version=1.8.0

version=1.9.0
##########################################################################################
# functions

# https://kubernetes.io/docs/reference/kubernetes-api/common-definitions/quantity/
Expand Down Expand Up @@ -180,23 +181,20 @@ Check Types:
"
exit ${STATE_UNKNOWN}
}

#########################################################################
# Check for necessary commands
for cmd in jq curl; do
if ! `which ${cmd} 1>/dev/null`; then
echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
exit ${STATE_UNKNOWN}
fi
done

#########################################################################

PARSED_ARGUMENTS=$(getopt -a -n check_rancher2 -o H:U:P:t:c:p:n:w:o:Ssi:h --long apihost:,apiuser:,apipass:,type:,clustername:,projectname:,namespacename:,workloadname:,podname:,secure,selfsigned,ignore:,cpu-warn:,cpu-crit:,memory-warn:,memory-crit:,pods-warn:,pods-crit: -- "$@")
VALID_ARGUMENTS=$?
if [ "$VALID_ARGUMENTS" != "0" ]; then
usage
fi

#########################################################################
# Get user-given variables
eval set -- "$PARSED_ARGUMENTS"
Expand All @@ -222,7 +220,7 @@ while :; do
--pods-crit) pods_crit=${2} ; shift 2 ;;
--) shift; break ;;
-h | --help) usage;;
*) echo "Unexpected option: $1 - this should not happen."
*) echo "Unexpected option: $1 - this should not happen. Please consult --help for valid options."
usage;;
esac
done
Expand Down Expand Up @@ -322,7 +320,7 @@ for entry in ${project_ids[*]}; do
done


printf "CHECK_RANCHER2 OK - Found ${#cluster_ids[*]} clusters: ${pretty_clusters[*]} and ${#project_ids[*]} projects: ${pretty_projects[*]}|'clusters'=${#cluster_ids[*]};;;; 'projects'=${#project_ids[*]};;;;"
echo "CHECK_RANCHER2 OK - Found ${#cluster_ids[*]} clusters: ${pretty_clusters[*]} and ${#project_ids[*]} projects: ${pretty_projects[*]}|'clusters'=${#cluster_ids[*]};;;; 'projects'=${#project_ids[*]};;;;"
exit ${STATE_OK}
;;

Expand Down Expand Up @@ -457,43 +455,43 @@ else
# cpu
if [ ! -z $cpu_warn ] || [ ! -z $cpu_crit ]; then
if [[ "$usage_cpu" -gt "$cpu_crit" ]]; then
resourceerrors+="CPU usage ${usage_cpu} higher than crit threshold of ${cpu_crit} \n"
resourceerrors+="CPU usage ${usage_cpu}% > threshold of ${cpu_crit}% "
elif [[ "$usage_cpu" -gt "$cpu_warn" ]]; then
resourceerrors+="CPU usage ${usage_cpu} higher than warn threshold of ${cpu_warn} \n"
resourceerrors+="CPU usage ${usage_cpu}% > threshold of ${cpu_warn}% "
fi
fi

# memory
if [ ! -z $memory_warn ] || [ ! -z $memory_crit ]; then
if [[ "$usage_memory" -gt "$memory_crit" ]]; then
resourceerrors+="MEMORY usage ${usage_memory} higher than crit threshold of ${memory_crit} \n"
resourceerrors+="MEMORY usage ${usage_memory}% > threshold of ${memory_crit}% "
elif [[ "$usage_memory" -gt "$memory_warn" ]]; then
resourceerrors+="MEMORY usage ${usage_memory} higher than warn threshold of ${memory_warn} \n"
resourceerrors+="MEMORY usage ${usage_memory}% > threshold of ${memory_warn}% "
fi
fi

# pods
if [ ! -z $pods_warn ] || [ ! -z $pods_crit ]; then
if [[ "$usage_pods" -gt "$pods_crit" ]]; then
resourceerrors+="PODS Usage ${usage_pods} higher than crit threshold of ${pods_crit} \n"
resourceerrors+="PODS Usage ${usage_pods} > threshold of ${pods_crit} "
elif [[ "$usage_pods" -gt "$pods_warn" ]]; then
resourceerrors+="PODS Usage ${usage_pods} higher than warn threshold of ${pods_warn} \n"
resourceerrors+="PODS Usage ${usage_pods} > threshold of ${pods_warn} "
fi
fi

perf_output="'component_errors'=${#componenterrors[*]};;;; 'cpu'=${requested_cpu};;;;${capacity_cpu} 'memory'=${requested_memory}B;;;0;${capacity_memory} 'pods'=${requested_pods};;;;${capacity_pods} 'usage_cpu'=${usage_cpu}%%;${cpu_warn};${cpu_crit};0;100 'usage_memory'=${usage_memory}%%;${memory_warn};${memory_crit};0;100 'usage_pods'=${usage_pods}%%;${pods_warn};${pods_crit};0;100"

if [[ ${#componenterrors[*]} -gt 0 && ! -z ${resourceerrors} ]]; then
printf "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias has resource problems and component errors|'cluster_healthy'=0;;;; ${perf_output}\n${resourceerrors} ${componenterrors[*]}"
echo "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias has resource problems and component errors: ${resourceerrors} ${componenterrors[*]}|'cluster_healthy'=0;;;; ${perf_output}"
exit ${STATE_CRITICAL}
elif [[ ${#componenterrors[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias: ${componenterrors[*]}|'cluster_healthy'=0;;;; ${perf_output}\n${componenterrors[*]}"
echo "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias: ${componenterrors[*]}|'cluster_healthy'=0;;;; ${perf_output}"
exit ${STATE_CRITICAL}
elif [[ ! -z ${resourceerrors} ]]; then
printf "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias has resource problems|'cluster_healthy'=0;;;; ${perf_output}\n${resourceerrors}"
echo "CHECK_RANCHER2 CRITICAL - Cluster $clusteralias has resource problems: ${resourceerrors}|'cluster_healthy'=0;;;; ${perf_output}"
exit ${STATE_CRITICAL}
else
printf "CHECK_RANCHER2 OK - Cluster $clusteralias is healthy|'cluster_healthy'=1;;;; ${perf_output}"
echo "CHECK_RANCHER2 OK - Cluster $clusteralias is healthy|'cluster_healthy'=1;;;; ${perf_output}"
exit ${STATE_OK}
fi

Expand Down Expand Up @@ -646,13 +644,13 @@ if [[ -z $clustername ]]; then
perf_output="'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;; 'nodes_cpu_total'=${nodes_requested_cpu_total};;;0;${nodes_capacity_cpu_total} 'nodes_memory_total'=${nodes_requested_memory_total}B;;;0;${nodes_capacity_memory_total} 'nodes_pods_total'=${nodes_requested_pods_total};;;0;${nodes_capacity_pods_total}"

if [[ ${#nodeerrors[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states|${perf_output}\n${nodeerrors[*]}${nodeignored[*]}"
echo "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states: ${nodeerrors[*]}${nodeignored[*]}|${perf_output}"
exit ${STATE_CRITICAL}
elif [[ ${#nodeignored[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 OK - All nodes OK - Info: ${#nodeignored[*]} node errors ignored|${perf_output}\n${nodeerrors[*]}${nodeignored[*]}"
echo "CHECK_RANCHER2 OK - All nodes OK - Info: ${#nodeignored[*]} node errors ignored: ${nodeerrors[*]}${nodeignored[*]}|${perf_output}"
exit ${STATE_OK}
else
printf "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|${perf_output}\n${nodeerrors[*]}${nodeignored[*]}"
echo "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|${perf_output}"
exit ${STATE_OK}
fi

Expand Down Expand Up @@ -887,19 +885,19 @@ else
perf_output="'nodes_total'=${#node_names[*]};;;; 'node_errors'=${#nodeerrors[*]};;;; 'node_ignored'=${#nodeignored[*]};;;; 'nodes_cpu_total'=${nodes_requested_cpu_total};;;0;${nodes_capacity_cpu_total} 'nodes_memory_total'=${nodes_requested_memory_total}B;;;0;${nodes_capacity_memory_total} 'nodes_pods_total'=${nodes_requested_pods_total};;;0;${nodes_capacity_pods_total} ${node_perf_output}"

if [[ ${#nodeerrors[*]} -gt 0 && ! -z ${resourceerrors} ]]; then
printf "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states and resource problems|${perf_output}\n${nodeerrors[*]}${resourceerrors}${nodeignored[*]}"
echo "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states and resource problems: ${nodeerrors[*]}${resourceerrors}${nodeignored[*]}|${perf_output}"
exit ${STATE_CRITICAL}
elif [[ ${#nodeerrors[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states|${perf_output}\n${nodeerrors[*]}${resourceerrors}${nodeignored[*]}"
echo "CHECK_RANCHER2 CRITICAL - ${#nodeerrors[*]} abnormal node states: ${nodeerrors[*]}${resourceerrors}${nodeignored[*]}|${perf_output}"
exit ${STATE_CRITICAL}
elif [[ ! -z ${resourceerrors} ]]; then
printf "CHECK_RANCHER2 CRITICAL - Nodes with resource problems|${perf_output}\n${nodeerrors[*]}${resourceerrors}${nodeignored[*]}"
echo "CHECK_RANCHER2 CRITICAL - Nodes with resource problems: ${nodeerrors[*]}${resourceerrors}${nodeignored[*]}|${perf_output}"
exit ${STATE_CRITICAL}
elif [[ ${#nodeignored[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 OK - All nodes OK - Info: ${nodeignored[*]}|${perf_output}"
echo "CHECK_RANCHER2 OK - All nodes OK - Info: ${nodeignored[*]}|${perf_output}"
exit ${STATE_OK}
else
printf "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|${perf_output}"
echo "CHECK_RANCHER2 OK - All ${#node_names[*]} nodes are active|${perf_output}"
exit ${STATE_OK}
fi

Expand Down Expand Up @@ -927,10 +925,10 @@ if [[ -z $projectname ]]; then
done

if [[ ${#projecterrors[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 CRITICAL - ${projecterrors[*]}|'projects_total'=${#project_ids[*]};;;; 'project_errors'=${#projecterrors[*]};;;;"
echo "CHECK_RANCHER2 CRITICAL - ${projecterrors[*]}|'projects_total'=${#project_ids[*]};;;; 'project_errors'=${#projecterrors[*]};;;;"
exit ${STATE_CRITICAL}
else
printf "CHECK_RANCHER2 OK - All projects (${#project_ids[*]}) are healthy|'projects_total'=${#project_ids[*]};;;; 'project_errors'=${#projecterrors[*]};;;;"
echo "CHECK_RANCHER2 OK - All projects (${#project_ids[*]}) are healthy|'projects_total'=${#project_ids[*]};;;; 'project_errors'=${#projecterrors[*]};;;;"
exit ${STATE_OK}
fi

Expand All @@ -947,10 +945,10 @@ else
healthstatus=$(echo "$api_out_single_project" | jq -r '.state')

if [[ ${healthstatus} != active ]]; then
printf "CHECK_RANCHER2 CRITICAL - Project $projectname is not active|'project_active'=0;;;; 'project_error'=1;;;;"
echo "CHECK_RANCHER2 CRITICAL - Project $projectname is not active|'project_active'=0;;;; 'project_error'=1;;;;"
exit ${STATE_CRITICAL}
else
printf "CHECK_RANCHER2 OK - Project $projectname is active|'project_active'=1;;;; 'project_error'=0;;;;"
echo "CHECK_RANCHER2 OK - Project $projectname is active|'project_active'=1;;;; 'project_error'=0;;;;"
exit ${STATE_OK}
fi

Expand All @@ -976,7 +974,7 @@ if [[ -z $workloadname ]]; then

if [[ -n $(echo "$api_out_workloads" | grep -i "ClusterUnavailable") ]]; then
clustername=$(echo ${projectname} | awk -F':' '{print $1}')
printf "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."
echo "CHECK_RANCHER2 CRITICAL - Cluster $clustername not found. Hint: Use '-t info' to identify cluster and project names."
exit ${STATE_CRITICAL}
fi

Expand All @@ -986,7 +984,7 @@ if [[ -z $workloadname ]]; then

# We rather WARN than silently return OK for zero workloads
if [[ ${#workload_names} -eq 0 ]]; then
printf "CHECK_RANCHER2 WARNING - No workloads found in project ${projectname}."
echo "CHECK_RANCHER2 WARNING - No workloads found in project ${projectname}."
exit ${STATE_WARNING}
fi

Expand All @@ -1008,10 +1006,10 @@ if [[ -z $workloadname ]]; then
done

if [[ ${#workloaderrors[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 CRITICAL - ${#workloaderrors[*]} workload(s) in error state|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;;\n${workloaderrors[*]}"
echo "CHECK_RANCHER2 CRITICAL - ${#workloaderrors[*]} workload(s) in error state: ${workloaderrors[*]}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;;"
exit ${STATE_CRITICAL}
elif [[ ${#workloadwarnings[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 WARNING - ${#workloadwarnings[*]} workload(s) in warning state|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;;\n${workloadwarnings[*]}"
echo "CHECK_RANCHER2 WARNING - ${#workloadwarnings[*]} workload(s) in warning state: ${workloadwarnings[*]}|'workloads_total'=${#workload_names[*]};;;; 'workloads_errors'=${#workloaderrors[*]};;;; 'workloads_warnings'=${#workloadwarnings[*]};;;; 'workloads_paused'=${#workloadpaused[*]};;;;"
exit ${STATE_WARNING}
else
if [[ ${#workloadpaused[*]} -gt 0 ]]; then
Expand All @@ -1027,6 +1025,7 @@ else
# Check status of a single workload
if [[ -n $namespacename && $namespacename != "" ]]; then
nsappend="&namespaceId=$namespacename"
nsoutputappend="in namespace $namespacename "
fi

api_out_single_workload=$(curl -s ${selfsigned} -u "${apiuser}:${apipass}" "${proto}://${apihost}/v3/project/${projectname}/workloads/?name=${workloadname}${nsappend}")
Expand All @@ -1038,7 +1037,7 @@ else

# Check if that given project name exists
if [[ -z $(echo "$api_out_single_workload" | grep -i "containers") ]]; then
echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname not found."; exit ${STATE_CRITICAL}
echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname ${nsoutputappend}not found."; exit ${STATE_CRITICAL}
fi

# Check if there are multiple workloads with the same name
Expand All @@ -1051,13 +1050,13 @@ else
healthstatus=$(echo "$api_out_single_workload" | jq -r '.data[].state')

if [[ ${healthstatus} = updating ]]; then
echo "CHECK_RANCHER2 WARNING - Workload $workloadname is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;;"
echo "CHECK_RANCHER2 WARNING - Workload $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=0;;;; 'workload_warning'=1;;;;"
exit ${STATE_WARNING}
elif [[ ${healthstatus} != active ]]; then
echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=1;;;; 'workload_warning'=0;;;;"
echo "CHECK_RANCHER2 CRITICAL - Workload $workloadname ${nsoutputappend}is ${healthstatus}|'workload_active'=0;;;; 'workload_error'=1;;;; 'workload_warning'=0;;;;"
exit ${STATE_CRITICAL}
else
echo "CHECK_RANCHER2 OK - Workload $workloadname is active|'workload_active'=1;;;; 'workload_error'=0;;;; 'workload_warning'=0;;;;"
echo "CHECK_RANCHER2 OK - Workload $workloadname ${nsoutputappend}is active|'workload_active'=1;;;; 'workload_error'=0;;;; 'workload_warning'=0;;;;"
exit ${STATE_OK}
fi

Expand Down Expand Up @@ -1107,7 +1106,7 @@ if [[ -z $podname ]]; then
done

if [[ ${#poderrors[*]} -gt 0 ]]; then
printf "CHECK_RANCHER2 CRITICAL - ${#poderrors[*]} pod(s) in project ${projectname} ${outputappend}in abnormal state|'pods_total'=${#pod_names[*]};;;; 'pods_errors'=${#poderrors[*]};;;;\n${poderrors[*]}"
echo "CHECK_RANCHER2 CRITICAL - ${#poderrors[*]} pod(s) in project ${projectname} ${outputappend}in abnormal state: ${poderrors[*]}|'pods_total'=${#pod_names[*]};;;; 'pods_errors'=${#poderrors[*]};;;;"
exit ${STATE_CRITICAL}
else
echo "CHECK_RANCHER2 OK - All pods (${#pod_names[*]}) in project ${projectname} ${outputappend}are running|'pods_total'=${#pod_names[*]};;;; 'pods_errors'=${#poderrors[*]};;;;"
Expand Down

0 comments on commit 9ae380f

Please sign in to comment.