Skip to content

Commit

Permalink
Implemented kubernetes housekeeping script
Browse files Browse the repository at this point in the history
  • Loading branch information
eskimo committed Jul 7, 2022
1 parent 38d59e1 commit 97930af
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 96 deletions.
2 changes: 1 addition & 1 deletion packages_dev/common/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ export SPARK_STREAMING_KAFKA_CON_VERSION=0-10
export SPARK_STREAMING_KAFKA_CLIENT_VERSION=2.0.0
export SPARK_UNUSED_VERSION=1.0.0

export EGMI_VERSION=0.2.0
export EGMI_VERSION=0.2.1

export GRAFANA_VERSION=8.5.2
export PROMETHEUS_VERSION=2.35.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,34 +36,53 @@

echoerr() { echo "$@" 1>&2; }

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
. $SCRIPT_DIR/common.sh "$@"

# CHange current folder to script dir (important !)
cd $SCRIPT_DIR || exit 199
function delete_kube_housekeeping_lock_file() {
rm -Rf /var/lib/kubernetes/kube_houskeeping_lock
}

# Loading topology
if [[ ! -f /etc/k8s/env.sh ]]; then
echo "Could not find /etc/k8s/env.sh"
exit 1
# From here we will be messing with kubernetes and hence we need to take a lock
if [[ -f /var/lib/kubernetes/kube_houskeeping_lock ]] ; then
echo `date +"%Y-%m-%d %H:%M:%S"`" - kube-housekeeping.sh is in execution already. Skipping ..."
exit 0
fi

trap delete_kube_housekeeping_lock_file 15
trap delete_kube_housekeeping_lock_file EXIT
trap delete_kube_housekeeping_lock_file ERR

mkdir -p /var/lib/kubernetes/
touch /var/lib/kubernetes/kube_houskeeping_lock


echo " + Sourcing kubernetes environment"
. /etc/k8s/env.sh

sudo rm -Rf /tmp/kubesched_setup
mkdir /tmp/kubesched_setup
cd /tmp/kubesched_setup
export HOME=/root

# Defining topology variables
if [[ $SELF_NODE_NUMBER == "" ]]; then
echo " - No Self Node Number found in topology"
exit 1
fi
export PATH=/usr/local/bin:$PATH

if [[ $SELF_IP_ADDRESS == "" ]]; then
echo " - No Self IP address found in topology for node $SELF_NODE_NUMBER"
exit 2
fi

echo `date +"%Y-%m-%d %H:%M:%S"`" + Searching for failed PODs" \
>> /var/log/kubernetes/kube-houskeeping.log

for failedPod in `kubectl get pods --all-namespaces --field-selector 'status.phase=Failed' -o name`; do

echo `date +"%Y-%m-%d %H:%M:%S"`"Found failed POD $failedPod" \
>> /var/log/kubernetes/kube-houskeeping.log

echo `date +"%Y-%m-%d %H:%M:%S"`"------------- POD $failedPod details are as follows : ---------------" \
>> /var/log/kubernetes/kube-houskeeping.log

kubectl get $failedPod -o json \
>> /var/log/kubernetes/kube-houskeeping.log

echo `date +"%Y-%m-%d %H:%M:%S"`"Now deleting POD $failedPod" \
>> /var/log/kubernetes/kube-houskeeping.log

kubectl delete $failedPod \
>> /var/log/kubernetes/kube-houskeeping.log

done

rm -Rf /tmp/kubesched_setup
delete_kube_housekeeping_lock_file
68 changes: 0 additions & 68 deletions services_setup/kube-master/setup-kubectrl.sh

This file was deleted.

18 changes: 12 additions & 6 deletions services_setup/kube-master/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,6 @@ fail_if_error $? /dev/null 304
bash ./setup-kubeapi.sh
fail_if_error $? /dev/null 305

bash ./setup-kubectrl.sh
fail_if_error $? /dev/null 306

bash ./setup-kubesched.sh
fail_if_error $? /dev/null 307

bash ./setup-kube-services.sh
fail_if_error $? /dev/null 308

Expand All @@ -138,5 +132,17 @@ sudo chmod 755 /usr/local/sbin/start-kube-master.sh
sudo cp stop-kube-master.sh /usr/local/sbin/
sudo chmod 755 /usr/local/sbin/stop-kube-master.sh

echo " - Copying kube-housekeeping.sh to /usr/local/sbin"
sudo cp kube-housekeeping.sh /usr/local/sbin/
sudo chmod 755 /usr/local/sbin/kube-housekeeping.sh

if [[ `sudo crontab -u root -l 2>/dev/null | grep kube-housekeeping.sh` == "" ]]; then
echo " - Scheduling periodic execution of kube-housekeeping.sh using crontab"
sudo rm -f /tmp/crontab
sudo bash -c "crontab -u root -l >> /tmp/crontab 2>/dev/null"
sudo bash -c "echo \"* * * * * /bin/bash /usr/local/sbin/kube-housekeeping.sh\" >> /tmp/crontab"
sudo crontab -u root /tmp/crontab
fi

echo " - Installing and checking systemd service file"
install_and_check_service_file kube-master k8s-master_install_log

0 comments on commit 97930af

Please sign in to comment.