-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathtool.cc
190 lines (156 loc) · 5.55 KB
/
tool.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
//==============================================================
// Copyright (C) Intel Corporation
//
// SPDX-License-Identifier: MIT
// =============================================================
#include <chrono>
#include <stack>
#include <omp-tools.h>
#include "omp_region_collector.h"
using TimePoint = std::chrono::time_point<std::chrono::steady_clock>;
static thread_local std::stack<TimePoint> time_point;
static OmpRegionCollector* collector = nullptr;
static std::chrono::steady_clock::time_point start;
// Internal Tool Functionality ////////////////////////////////////////////////
static void PushTimestamp() {
time_point.push(std::chrono::steady_clock::now());
}
static uint64_t PopTimestamp() {
TimePoint end = std::chrono::steady_clock::now();
PTI_ASSERT(time_point.size() > 0);
TimePoint start = time_point.top();
time_point.pop();
std::chrono::duration<uint64_t, std::nano> time = end - start;
return time.count();
}
static void ParallelBegin(
ompt_data_t* task_data, const ompt_frame_t* task_frame,
ompt_data_t* parallel_data, unsigned int requested_parallelism,
int flags, const void* codeptr_ra) {
PushTimestamp();
}
static void ParallelEnd(
ompt_data_t* parallel_data, ompt_data_t* task_data,
int flags, const void* codeptr_ra) {
uint64_t time = PopTimestamp();
PTI_ASSERT(collector != nullptr);
collector->AddRegion(
reinterpret_cast<uint64_t>(codeptr_ra),
REGION_TYPE_PARALLEL, time, 0);
}
static void Target(
ompt_target_t kind, ompt_scope_endpoint_t endpoint,
int device_num, ompt_data_t* task_data,
ompt_id_t target_id, const void* codeptr_ra) {
if (kind != ompt_target) {
return;
}
if (endpoint == ompt_scope_begin) {
PushTimestamp();
} else {
uint64_t time = PopTimestamp();
PTI_ASSERT(collector != nullptr);
collector->AddRegion(
reinterpret_cast<uint64_t>(codeptr_ra),
REGION_TYPE_TARGET, time, 0);
}
}
static void TargetDataOp(
ompt_scope_endpoint_t endpoint, ompt_id_t target_id,
ompt_id_t host_op_id, ompt_target_data_op_t optype,
void *src_addr, int src_device_num,
void *dest_addr, int dest_device_num,
size_t bytes, const void *codeptr_ra) {
if (optype == ompt_target_data_transfer_to_device ||
optype == ompt_target_data_transfer_from_device) {
if (endpoint == ompt_scope_begin) {
PushTimestamp();
} else {
uint64_t time = PopTimestamp();
PTI_ASSERT(collector != nullptr);
if (optype == ompt_target_data_transfer_to_device) {
collector->AddRegion(
reinterpret_cast<uint64_t>(codeptr_ra),
REGION_TYPE_TRANSFER_TO_DEVICE, time, bytes);
} else if (optype == ompt_target_data_transfer_from_device) {
collector->AddRegion(
reinterpret_cast<uint64_t>(codeptr_ra),
REGION_TYPE_TRANSFER_FROM_DEVICE, time, bytes);
}
}
}
}
static void PrintResults() {
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::chrono::duration<uint64_t, std::nano> time = end - start;
PTI_ASSERT(collector != nullptr);
const RegionMap& region_map = collector->GetRegionMap();
if (region_map.size() == 0) {
return;
}
uint64_t total_duration = 0;
for (auto& value : region_map) {
total_duration += value.second.total_time;
}
std::cerr << std::endl;
std::cerr << "=== OpenMP Timing Results: ===" << std::endl;
std::cerr << std::endl;
std::cerr << "Total Execution Time (ns): " << time.count() << std::endl;
std::cerr << "Total Region Time (ns): " << total_duration << std::endl;
std::cerr << std::endl;
if (total_duration > 0) {
OmpRegionCollector::PrintRegionTable(region_map);
}
std::cerr << std::endl;
}
static int Initialize(
ompt_function_lookup_t lookup,
int initial_device_num,
ompt_data_t* data) {
ompt_set_callback_t ompt_set_callback =
reinterpret_cast<ompt_set_callback_t>(lookup("ompt_set_callback"));
if (ompt_set_callback == nullptr) {
std::cerr << "[WARNING] Unable to create OpenMP region collector" <<
std::endl;
return 0;
}
ompt_set_result_t result = ompt_set_error;
result = ompt_set_callback(ompt_callback_parallel_begin,
reinterpret_cast<ompt_callback_t>(ParallelBegin));
PTI_ASSERT(result == ompt_set_always);
result = ompt_set_callback(ompt_callback_parallel_end,
reinterpret_cast<ompt_callback_t>(ParallelEnd));
PTI_ASSERT(result == ompt_set_always);
result = ompt_set_callback(ompt_callback_target,
reinterpret_cast<ompt_callback_t>(Target));
PTI_ASSERT(result == ompt_set_always);
result = ompt_set_callback(ompt_callback_target_data_op,
reinterpret_cast<ompt_callback_t>(TargetDataOp));
PTI_ASSERT(result == ompt_set_always);
PTI_ASSERT(collector == nullptr);
collector = OmpRegionCollector::Create();
PTI_ASSERT(collector != nullptr);
start = std::chrono::steady_clock::now();
return 1;
}
static void Finalize(ompt_data_t* data) {
if (data->ptr != nullptr) {
ompt_start_tool_result_t* result =
static_cast<ompt_start_tool_result_t*>(data->ptr);
delete result;
}
if (collector != nullptr) {
PrintResults();
delete collector;
}
}
// Internal Tool Interface ////////////////////////////////////////////////////
ompt_start_tool_result_t* ompt_start_tool(
unsigned int omp_version, const char* runtime_version) {
std::cerr << "[INFO] OMP Runtime Version: " << runtime_version << std::endl;
ompt_start_tool_result_t* result = new ompt_start_tool_result_t;
result->initialize = Initialize;
result->finalize = Finalize;
result->tool_data.ptr = result;
return result;
}