-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathost_stats.py
executable file
·89 lines (73 loc) · 2.62 KB
/
ost_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python3.9
# Script to spit out lustre stats towards an influxdb server
# Andrew Elwell <[email protected]>, Sept 2017. Available under GPL2+
# Hints from http://wiki.lustre.org/Lustre_Monitoring_and_Statistics_Guide
import requests
import os
import socket
import time
import subprocess
import daemon
# which nutter enabled auth on influx
username = 'FIXME'
password = 'FIXME'
url = 'https://influx.example.com:8086'
hostname = socket.gethostname()
debug = True
def grabbit():
post = ''
# Overall OST space
ts = int(time.time()*1000000000)
stats = subprocess.check_output(["lctl","get_param", "obdfilter.*OST*.kbytes*"]).decode('UTF-8').splitlines()
space = {}
for stat in stats:
keys,val = stat.split("=")
_, OST, metric = keys.split(".", 2)
fs,ost = OST.split("-")
space[metric] = val
foo = ','.join([f'{key}={value}' for key, value in space.items()])
post += f'usage,fs={fs},ost={ost} {foo} {ts}\n'
# Operations per OST. Read and write data is particularly interesting
# returns multivalue with sample timestamp
stats = subprocess.check_output(["lctl","get_param", "obdfilter.*.stats"]).decode('UTF-8').splitlines()
fields = []
for line in stats:
if line.endswith("stats="):
fs,ost = line.split(".", 2)[1].split("-")
tmp = f'usage,fs={fs},ost={ost}'
if fields == []:
prefix = tmp
else:
post += f'{prefix} {",".join(fields)} {ts}\n'
fields = []
prefix = tmp
tmp = ''
elif "_bytes" in line:
k,count,null,null,min_size,max_size,sum_bytes,sum_sqared = line.split()
fields.append(f'{k}={sum_bytes}')
else:
k,v,null = line.split(None,2)
if k == "snapshot_time":
ts=v.replace('.','')
elif '_time' in k:
continue
#else:
# print('IGNORED', line)
post += f'{prefix} {",".join(fields)} {ts}\n'
# we may as well grab loadavg at the same time
load1,load5,load15 = os.getloadavg()
post += f'loadavg,fs={fs},host={hostname} load1={load1},load5={load5},load15={load15} {ts}\n'
#print(post)
p = requests.post(url+'/write?db=lustre',data=post, auth=(username, password))
#print(p.status_code, p.text)
if debug:
grabbit()
else:
with daemon.DaemonContext():
while True:
try:
grabbit()
except:
import sys
sys.exit("Whoa, that went a bit Pete Tong!")
time.sleep(10)