From 299227c0aad4c468ab9065885158b2a48b546379 Mon Sep 17 00:00:00 2001 From: Maarten Beeckmans Date: Thu, 30 Jun 2022 15:54:24 +0200 Subject: [PATCH] Add check_patroni script --- README.md | 5 ++ build.txt | 1 + check_patroni.py | 137 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 check_patroni.py diff --git a/README.md b/README.md index fcc0c21..5522986 100644 --- a/README.md +++ b/README.md @@ -468,6 +468,11 @@ An example Vagrant project has been included to get you started right away. Maarten Beeckmans upstream + + check_patroni + Maarten Beeckmans + + ### Contributions diff --git a/build.txt b/build.txt index 3a76c7d..2a7bacf 100644 --- a/build.txt +++ b/build.txt @@ -113,4 +113,5 @@ check_ceph_osd_db 1.0.0 check_ceph_osd_df 1.0.0 check_ceph_rgw 1.5.1 check_ceph_rgw_api 1.7.2 +check_patroni.py 0.1 # vim: set ts=2 sw=2 et : # diff --git a/check_patroni.py b/check_patroni.py new file mode 100644 index 0000000..15c32ef --- /dev/null +++ b/check_patroni.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2022 Maarten Beeckmans + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of mosquitto nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +''' +check_patroni.py - v0.1 - Copyright (c) 2022 Maarten Beeckmans + +Nagios check script for checking patroni status and cluster lag + +OPTIONS: + +-s : scheme used to connect to the patroni api +-H : hostname/ip of the patroni server we want to query +-p : tcp port patroni api is listening on +-w : replication lag between primary and replica, to crit on (default 50) +-c : replication lag between primary and replica, to warn on (default 100) + +EXAMPLE: ./check_patroni.py -H localhost -p 8008 w 50 -c 100 + +''' + +import sys +import argparse +import requests + +# Constants +OK = 0 +WARNING = 1 +CRITICAL = 2 +UNKNOWN = 3 + + +def main(input_args): + '''main script function''' + critical_exit=False + warning_exit=False + + parser = argparse.ArgumentParser(description="Nagios check script for checking patroni") + parser.add_argument("-s", "--scheme", + dest="scheme", default="http", + help="Scheme used to connect to the patroni api. http or https") + parser.add_argument("-H", "--host", + dest="host", default="localhost", + help="Hostname/ip of the patroni server we want to query") + parser.add_argument("-p", "--port", + dest="port", default="8008", + help="Tcp port patroni api is listening on") + parser.add_argument("-c", "--critical-lag", + dest="critical_lag", default=100, type=int, + help="replication lag between primary and replica, to crit on") + parser.add_argument("-w", "--warning-lag", + dest="warning_lag", default=50, type=int, + help="replication lag between priary and replica, to warn on") + args = parser.parse_args(input_args) + + baseurl = args.scheme + "://" + args.host + ":" + args.port + critical_lag = args.critical_lag + warning_lag = args.warning_lag + + if get_status_code(baseurl + "/health") != 200: + print("Postgresql is not running") + critical_exit=True + + try: + response = requests.get(baseurl + "/cluster") + except requests.ConnectionError as err: + print(err) + sys.exit(UNKNOWN) + + if response.status_code != 200: + print('cluster not healty, exitting') + sys.exit(UNKNOWN) + leader = next(x for x in response.json()['members'] if x["role"] == "leader") + if leader["state"] != "running": + print(f'Leader ({leader["host"]} is not running') + print() + critical_exit=True + for replica in [x for x in response.json()['members'] if x['role'] == "replica"]: + if replica["timeline"] != leader["timeline"]: + print(f'Replica ({replica["host"]}) timeline "{replica["timeline"]}" different') + print(f' from leader ({leader["host"]}) timeline "{leader["timeline"]}"') + print() + critical_exit=True + if replica["lag"] >= critical_lag: + print(f'CRIT: Replica ({replica["host"]}) lag {replica["lag"]} >= {critical_lag}') + print() + critical_exit=True + elif replica["lag"] >= warning_lag: + print(f'WARN: Replica ({replica["host"]}) lag {replica["lag"]} >= {warning_lag}') + print() + warning_exit=True + else: + print(f'Replica ({replica["host"]}) has a lag {replica["lag"]}') + print() + + if critical_exit: + sys.exit(CRITICAL) + elif warning_exit: + sys.exit(WARNING) + else: + sys.exit(OK) + +def get_status_code(url): + '''Gets the http status code of a given url''' + try: + response = requests.get(url) + except requests.ConnectionError as err: + print(err) + sys.exit(UNKNOWN) + return response.status_code + +if __name__ == "__main__": + main(sys.argv[1:])