-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathculprits.sh
executable file
·116 lines (100 loc) · 3.85 KB
/
culprits.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/bash
#
# Performs a simple Solr log analysis to find suspicious entries.
# Used to sanity check for long response times and queries that might
# lead to OOM.
#
###############################################################################
# CONFIG
###############################################################################
: ${LOGS:="$@"}
: ${ROWS_MAX:="1000"}
: ${START_MAX:="1000"}
: ${GROUP_LIMIT_MAX:="1000"}
: ${FACET_LIMIT_MAX:="1000"}
: ${FACET_OFFSET_MAX:="1000"}
# Used to filter log entries, e.g. "grep 2020-06-15" to only process logs from that date
: ${FILTER:="cat"}
function usage() {
cat <<EOF
Usage: ./culprits.sh <logfile> <logfile>*
EOF
exit $1
}
check_parameters() {
if [[ -z "$LOGS" ]]; then
>&2 echo "Error: No logfiles specified"
usage 2
fi
}
################################################################################
# FUNCTIONS
################################################################################
grouped_print() {
local WARNINGS=0
local LAST="___________432984r7432984r792"
while read -r LINE; do
TOKENS=($LINE)
IFS="=" read -a PARAM_PAIR <<< "${TOKENS[0]}"
KEY="${PARAM_PAIR[0]}"
if [[ "$LAST" != "$KEY" ]]; then
echo ""
LAST="$KEY"
fi
if [[ "$KEY" == "rows" && "$ROWS_MAX" -lt "${PARAM_PAIR[1]}" ]]; then
echo "${LINE} ***** SUSPICIOUS: Value ${PARAM_PAIR[1]} > $ROWS_MAX *****"
WARNINGS=$((WARNINGS+1))
elif [[ "$KEY" == "start" && "$START_MAX" -lt "${PARAM_PAIR[1]}" ]]; then
echo "${LINE} ***** SUSPICIOUS: Value ${PARAM_PAIR[1]} > $START_MAX *****"
WARNINGS=$((WARNINGS+1))
elif [[ "$KEY" == *group.limit && "$GROUP_LIMIT_MAX" -lt "${PARAM_PAIR[1]}" ]]; then
echo "${LINE} ***** SUSPICIOUS: Value ${PARAM_PAIR[1]} > $GROUP_LIMIT_MAX *****"
WARNINGS=$((WARNINGS+1))
elif [[ "$KEY" == *facet.limit && "$FACET_LIMIT_MAX" -lt "${PARAM_PAIR[1]}" ]]; then
echo "${LINE} ***** SUSPICIOUS: Value ${PARAM_PAIR[1]} > $FACET_LIMIT_MAX *****"
WARNINGS=$((WARNINGS+1))
elif [[ "$KEY" == *facet.offset && "$FACET_OFFSET_MAX" -lt "${PARAM_PAIR[1]}" ]]; then
echo "${LINE} ***** SUSPICIOUS: Value ${PARAM_PAIR[1]} > $FACET_OFFSET_MAX *****"
WARNINGS=$((WARNINGS+1))
elif [[ "$KEY" == *group.size ]]; then
echo "${LINE} ***** SUSPICIOUS: group.size should probably be group.limit *****"
WARNINGS=$((WARNINGS+1))
elif [[ "$KEY" == *facet.size ]]; then
echo "${LINE} ***** SUSPICIOUS: facet.size should probably be facet.limit *****"
WARNINGS=$((WARNINGS+1))
else
echo "${LINE}"
fi
done
echo ""
echo "- Total unique suspicious entries: $WARNINGS"
}
# Returns the filename of a temporary file holding the stats
base_numeric_stats() {
local DEST=$(mktemp)
zcat -f $LOGS | $FILTER | zgrep -o "[a-zA-Z0-9_.-]\+=[0-9]\+" | grep -v "NOW\|QTime\|hits" | \
## Sort & uniqueify
sort | uniq -c | \
# Swap count and param-pair for ordered output
sed 's/[^0-9]*\([0-9]\+\) \(.*\)/\2 (\1 instances)/' | \
# sort by [primary param-name, secondary param-value] for even more ordered output
sort -t= -k1,1 -k2rn > "$DEST"
echo "$DEST"
}
numeric_param() {
local BASE_NUMERIC="$1"
echo "- Unique numeric params:"
# Print grouped by param-name
cat "$BASE_NUMERIC" | grouped_print
}
find_culprits() {
echo "- Extracting numeric stats"
local BASE_NUMERIC=$(base_numeric_stats)
numeric_param "$BASE_NUMERIC"
rm "$BASE_NUMERIC"
}
###############################################################################
# CODE
###############################################################################
check_parameters "$@"
find_culprits