From 97930af8c0ea630f22095b001b4c311b7446fec0 Mon Sep 17 00:00:00 2001 From: eskimo Date: Thu, 7 Jul 2022 22:48:24 +0200 Subject: [PATCH] Implemented kubernetes housekeeping script --- packages_dev/common/common.sh | 2 +- ...etup-kubesched.sh => kube-housekeeping.sh} | 61 +++++++++++------ services_setup/kube-master/setup-kubectrl.sh | 68 ------------------- services_setup/kube-master/setup.sh | 18 +++-- 4 files changed, 53 insertions(+), 96 deletions(-) rename services_setup/kube-master/{setup-kubesched.sh => kube-housekeeping.sh} (57%) delete mode 100644 services_setup/kube-master/setup-kubectrl.sh diff --git a/packages_dev/common/common.sh b/packages_dev/common/common.sh index e11f2abc..0598ba98 100755 --- a/packages_dev/common/common.sh +++ b/packages_dev/common/common.sh @@ -83,7 +83,7 @@ export SPARK_STREAMING_KAFKA_CON_VERSION=0-10 export SPARK_STREAMING_KAFKA_CLIENT_VERSION=2.0.0 export SPARK_UNUSED_VERSION=1.0.0 -export EGMI_VERSION=0.2.0 +export EGMI_VERSION=0.2.1 export GRAFANA_VERSION=8.5.2 export PROMETHEUS_VERSION=2.35.0 diff --git a/services_setup/kube-master/setup-kubesched.sh b/services_setup/kube-master/kube-housekeeping.sh similarity index 57% rename from services_setup/kube-master/setup-kubesched.sh rename to services_setup/kube-master/kube-housekeeping.sh index aac90b79..2ffbcc94 100644 --- a/services_setup/kube-master/setup-kubesched.sh +++ b/services_setup/kube-master/kube-housekeeping.sh @@ -36,34 +36,53 @@ echoerr() { echo "$@" 1>&2; } -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -. $SCRIPT_DIR/common.sh "$@" -# CHange current folder to script dir (important !) -cd $SCRIPT_DIR || exit 199 +function delete_kube_housekeeping_lock_file() { + rm -Rf /var/lib/kubernetes/kube_houskeeping_lock +} -# Loading topology -if [[ ! -f /etc/k8s/env.sh ]]; then - echo "Could not find /etc/k8s/env.sh" - exit 1 +# From here we will be messing with kubernetes and hence we need to take a lock +if [[ -f /var/lib/kubernetes/kube_houskeeping_lock ]] ; then + echo `date +"%Y-%m-%d %H:%M:%S"`" - kube-housekeeping.sh is in execution already. Skipping ..." + exit 0 fi +trap delete_kube_housekeeping_lock_file 15 +trap delete_kube_housekeeping_lock_file EXIT +trap delete_kube_housekeeping_lock_file ERR + +mkdir -p /var/lib/kubernetes/ +touch /var/lib/kubernetes/kube_houskeeping_lock + + +echo " + Sourcing kubernetes environment" . /etc/k8s/env.sh -sudo rm -Rf /tmp/kubesched_setup -mkdir /tmp/kubesched_setup -cd /tmp/kubesched_setup +export HOME=/root -# Defining topology variables -if [[ $SELF_NODE_NUMBER == "" ]]; then - echo " - No Self Node Number found in topology" - exit 1 -fi +export PATH=/usr/local/bin:$PATH -if [[ $SELF_IP_ADDRESS == "" ]]; then - echo " - No Self IP address found in topology for node $SELF_NODE_NUMBER" - exit 2 -fi +echo `date +"%Y-%m-%d %H:%M:%S"`" + Searching for failed PODs" \ + >> /var/log/kubernetes/kube-houskeeping.log + +for failedPod in `kubectl get pods --all-namespaces --field-selector 'status.phase=Failed' -o name`; do + + echo `date +"%Y-%m-%d %H:%M:%S"`"Found failed POD $failedPod" \ + >> /var/log/kubernetes/kube-houskeeping.log + + echo `date +"%Y-%m-%d %H:%M:%S"`"------------- POD $failedPod details are as follows : ---------------" \ + >> /var/log/kubernetes/kube-houskeeping.log + + kubectl get $failedPod -o json \ + >> /var/log/kubernetes/kube-houskeeping.log + + echo `date +"%Y-%m-%d %H:%M:%S"`"Now deleting POD $failedPod" \ + >> /var/log/kubernetes/kube-houskeeping.log + + kubectl delete $failedPod \ + >> /var/log/kubernetes/kube-houskeeping.log + +done -rm -Rf /tmp/kubesched_setup \ No newline at end of file +delete_kube_housekeeping_lock_file \ No newline at end of file diff --git a/services_setup/kube-master/setup-kubectrl.sh b/services_setup/kube-master/setup-kubectrl.sh deleted file mode 100644 index a0d9f676..00000000 --- a/services_setup/kube-master/setup-kubectrl.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env bash - -# -# This file is part of the eskimo project referenced at www.eskimo.sh. The licensing information below apply just as -# well to this individual file than to the Eskimo Project as a whole. -# -# Copyright 2019 - 2022 eskimo.sh / https://www.eskimo.sh - All rights reserved. -# Author : eskimo.sh / https://www.eskimo.sh -# -# Eskimo is available under a dual licensing model : commercial and GNU AGPL. -# If you did not acquire a commercial licence for Eskimo, you can still use it and consider it free software under the -# terms of the GNU Affero Public License. You can redistribute it and/or modify it under the terms of the GNU Affero -# Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) -# any later version. -# Compliance to each and every aspect of the GNU Affero Public License is mandatory for users who did no acquire a -# commercial license. -# -# Eskimo is distributed as a free software under GNU AGPL in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Affero Public License for more details. -# -# You should have received a copy of the GNU Affero Public License along with Eskimo. If not, -# see or write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA, 02110-1301 USA. -# -# You can be released from the requirements of the license by purchasing a commercial license. Buying such a -# commercial license is mandatory as soon as : -# - you develop activities involving Eskimo without disclosing the source code of your own product, software, -# platform, use cases or scripts. -# - you deploy eskimo as part of a commercial product, platform or software. -# For more information, please contact eskimo.sh at https://www.eskimo.sh -# -# The above copyright notice and this licensing notice shall be included in all copies or substantial portions of the -# Software. -# - -echoerr() { echo "$@" 1>&2; } - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -. $SCRIPT_DIR/common.sh "$@" - -# CHange current folder to script dir (important !) -cd $SCRIPT_DIR || exit 199 - -# Loading topology -if [[ ! -f /etc/k8s/env.sh ]]; then - echo "Could not find /etc/k8s/env.sh" - exit 1 -fi - -. /etc/k8s/env.sh - -sudo rm -Rf /tmp/kubectrl_setup -mkdir /tmp/kubectrl_setup -cd /tmp/kubectrl_setup - -# Defining topology variables -if [[ $SELF_NODE_NUMBER == "" ]]; then - echo " - No Self Node Number found in topology" - exit 1 -fi - -if [[ $SELF_IP_ADDRESS == "" ]]; then - echo " - No Self IP address found in topology for node $SELF_NODE_NUMBER" - exit 2 -fi - -rm -Rf /tmp/kubectrl_setup \ No newline at end of file diff --git a/services_setup/kube-master/setup.sh b/services_setup/kube-master/setup.sh index 9214c7ac..290e76a6 100644 --- a/services_setup/kube-master/setup.sh +++ b/services_setup/kube-master/setup.sh @@ -121,12 +121,6 @@ fail_if_error $? /dev/null 304 bash ./setup-kubeapi.sh fail_if_error $? /dev/null 305 -bash ./setup-kubectrl.sh -fail_if_error $? /dev/null 306 - -bash ./setup-kubesched.sh -fail_if_error $? /dev/null 307 - bash ./setup-kube-services.sh fail_if_error $? /dev/null 308 @@ -138,5 +132,17 @@ sudo chmod 755 /usr/local/sbin/start-kube-master.sh sudo cp stop-kube-master.sh /usr/local/sbin/ sudo chmod 755 /usr/local/sbin/stop-kube-master.sh +echo " - Copying kube-housekeeping.sh to /usr/local/sbin" +sudo cp kube-housekeeping.sh /usr/local/sbin/ +sudo chmod 755 /usr/local/sbin/kube-housekeeping.sh + +if [[ `sudo crontab -u root -l 2>/dev/null | grep kube-housekeeping.sh` == "" ]]; then + echo " - Scheduling periodic execution of kube-housekeeping.sh using crontab" + sudo rm -f /tmp/crontab + sudo bash -c "crontab -u root -l >> /tmp/crontab 2>/dev/null" + sudo bash -c "echo \"* * * * * /bin/bash /usr/local/sbin/kube-housekeeping.sh\" >> /tmp/crontab" + sudo crontab -u root /tmp/crontab +fi + echo " - Installing and checking systemd service file" install_and_check_service_file kube-master k8s-master_install_log