-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathhprofiler.sh
180 lines (161 loc) · 5.03 KB
/
hprofiler.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#/bin/sh
#
# Entry point of hprofiler (Hadoop Profiler).
#
# Author: Joeri Hermans
# Version: 0.1
# Since: 17 March 2016
#
## BEGIN Functions. ####################################################
function usage {
cat <<-END >&2
hprofiler 0.1.0
Basic usage:
./hprofiler -j [job id] Target a specific hadoop Job on the cluster.
./hprofiler -h [hosts] Target specific hosts running YARN containers.
./hprofiler -r Remove all old files.
Options:
-f [Hz] sampling frequency Sets the sampling frequency of the profiler, default 99.
-t [s] sampling duration Sets the sampling during when sampling a mapper, default 5 seconds.
-c [url] cluster address YARN REST API address.
-i I/O Profile context switches to identify I/O methods.
-o [dir] output directory
-d detect anomalies Identifies nodes which are inheritly different from the majority.
-e extract Hadoop Aggregates the FlameGraphs in such a way that only the Hadoop part is visualized.
END
exit
}
## END Functions. ######################################################
# Initialize the default profiling parameters.
sampling_duration=5
sampling_frequency=99
job=""
hosts=""
num_hosts=0
cluster=""
enable_io=false
extract_hadoop=false
detect_anomalies=false
output_directory=.
# Parse the arguments specified by the user.
while getopts "o:j:h:f:t:c:irde" opt; do
case ${opt} in
j)
job=${OPTARG}
;;
h)
IFS=',' read -ra hosts <<< "${OPTARG}"
num_hosts=${#hosts[@]}
;;
f)
sampling_frequency=${OPTARG}
;;
o)
output_directory=${OPTARG}
;;
t)
sampling_duration=${OPTARG}
;;
c)
cluster=${OPTARG}
;;
d)
detect_anomalies=true
;;
i)
enable_io=true
;;
r)
rm -rf profiler_*
rm -rf aggregated
exit 0
;;
e)
extract_hadoop=true
;;
*)
usage
exit 1
;;
esac
done
# Clean up the old mess first :)
rm -rf profiler_*
rm -rf aggregated
# Check if a job identifier has been specified.
if [[ -z $job ]]; then
echo "No selector has been specified (-j)."
echo ""
usage
exit 1
fi
# Check if a sufficient number of hosts has been specified, else
# fetch the cluster nodes from YARN.
if [[ -z $hosts ]]; then
# Check if a YARN address has been specified.
if [[ -z $cluster ]]; then
echo "YARN REST API has not been specified."
exit 1
fi
output=$(python2 src/fetch_yarn_nodes.py $cluster $job)
IFS="," read -ra hosts <<< "$output"
num_hosts=${#hosts[@]}
# Check if a sufficient number has been fetched from YARN.
if [[ $num_hosts -eq 0 ]]; then
echo "No nodes could be fetched from YARN with the specified parameters."
exit 1
fi
fi
# Create the output director for the background processes.
rm -rf $output_directory
mkdir $output_directory
# Iterate through all acquired hosts.
for i in "${hosts[@]}"
do
bash -c "sh src/host_executor.sh $i \"$job\" $sampling_frequency $sampling_duration $enable_io $output_directory" &
done
# Wait for host processes to finish.
wait
# Prepare for file aggregation.
current_directory=$(pwd -P)
cd $output_directory
rm -rf aggregated
mkdir aggregated
destination_file=aggregated/stackcollapse.data
data_files=$(ls profiler_*/*/stackcollapse.data | tr "\n" " ")
python2 $current_directory/src/stack_aggregation.py $destination_file $data_files
cat aggregated/stackcollapse.data | $current_directory/src/flamegraph/flamegraph.pl --color=java --hash > aggregated/flamegraph.svg
# Clear the empty directories.
find profiler_* -type d -empty -delete
if [[ $enable_io == true ]]; then
countname=ns
color=io
else
countname=samples
color=java
fi
# Check if the Hadoop extraction needs to be done.
if [[ $extract_hadoop == true ]]; then
# Move into the aggregated folder.
cd aggregated
# Filter out non Hadoop related information.
cat stackcollapse.data | grep JavaThread::run | sed -r 's/^.{29}//' > stackcollapse_hadoop.data
# Generate the associated FlameGraph.
cat stackcollapse_hadoop.data | $current_directory/src/flamegraph/flamegraph.pl --countname=$countname --colors=$color --hash > aggregated/flamegraph_hadoop.svg
# Move back to the upper folder.
cd ..
fi
# Go back to the entry point.
cd ..
# Check if anomalies need to be detected.
if [[ $detect_anomalies == true ]]; then
echo "Detecting anomalies..."
echo "Listing anomalies:"
echo "------------------"
# Fetch the absolute directory of the results folder.
# Execute the outlier detection script.
python2 $current_directory/src/outlier_detection.py $output_directory
# Identifying the majority and anomily sets.
echo "------------------"
echo "Done"
fi