-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
executable file
·400 lines (337 loc) · 15.7 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
import csv
import json
import math
import os
import time
from collections import namedtuple
import numpy as np
import psutil
import pyRAPL
import requests
import ipinfo
from matplotlib import pyplot as plt
from core.tracker import Tracker
from core.rapl_config import rapl_reader
carbon_intensity = 0
LinuxDiskIOStats = namedtuple('DiskIOStats',
['read_count', 'write_count', 'read_bytes', 'write_bytes', 'read_time', 'write_time',
'read_merged_count', 'write_merged_count', 'busy_time'])
def print_tracker_results(job, tracker_results):
print("Results for " + job + ":")
if tracker_results['dram_total_energy'] != 0:
print("Total energy consumed by RAM is:" + str(tracker_results['dram_total_energy']) + " J")
print("Average energy consumed by RAM is:" + str(tracker_results['avg_dram_energy']) + " J/sec")
print("Total carbon emitted by RAM is:" + str(
calculate_kwh(tracker_results['avg_dram_energy']) * carbon_intensity) + " gCO₂eq")
print(
"Total energy consumed by CPU is " + str(
tracker_results['total_cpu_energy']) + " J")
print("Average energy consumed by CPU is " + str(
tracker_results['avg_cpu_energy']) + " J/sec")
print("Total carbon emitted by CPU is " + str(
calculate_kwh(tracker_results['total_cpu_energy']) * carbon_intensity) + " gCO₂eq")
for i in range(len(tracker_results['cpu_total_energy_per_Soc'])):
print(
"Total energy consumed by package " + str(i) + " is " + str(
tracker_results['cpu_total_energy_per_Soc'][i]) + " J")
print("Average energy consumed by package " + str(i) + " is " + str(
tracker_results['avg_cpu_energy_per_Soc'][i]) + " J/sec")
print("Total carbon emitted by package " + str(i) + " is " + str(
calculate_kwh(tracker_results['cpu_total_energy_per_Soc'][i]) * carbon_intensity) + " gCO₂eq")
print("Total execution time is: " + str(tracker_results['total_duration']) + " sec")
def get_ip():
response = requests.get('https://api64.ipify.org?format=json').json()
return response["ip"]
def get_location():
ip_address = get_ip()
access_token = '23d3ea0900771d'
handler = ipinfo.getHandler(access_token)
details = handler.getDetails(ip_address)
location_data = {
"ip": ip_address,
"city": details.city,
# "region": response.get("region"),
"country": details.country,
"latitude": details.latitude,
"longitude": details.longitude
}
return location_data
def get_latest_carbon_intensity():
location = get_location()
print(location)
latitude = location.get("latitude")
longitude = location.get("longitude")
url = "https://api-access.electricitymaps.com/free-tier/carbon-intensity/latest?lat=" + str(
latitude) + "&lon=" + str(longitude)
headers = {
"auth-token": "9oKv71Ozq9kcZSOw9UrFK8UZL9ahMNXM"
}
response = requests.get(url, headers=headers)
print(response.text)
data = json.loads(response.text)
global carbon_intensity
carbon_intensity = data.get("carbonIntensity")
# return data
def calculate_kwh(energy_uJ):
power_kwh = energy_uJ / (3.6 * 10 ** 12) # From μJoules to kWh
return power_kwh
def initializeOutput(benchmark, database):
csv_header = ['Label', 'dram_avgE(J/sec)', 'dram_totalE(J)', 'dram_carbon(gCO₂eq)', 'cpu_avgE(J/sec)',
'cpu_totalE(J)', 'cpu_carbon(gCO₂eq)', ]
if not os.path.exists(benchmark.path + "results/csv/"):
# Create the directory
os.makedirs(benchmark.path + "results/csv/")
with open(benchmark.path + "results/csv/" + benchmark.type + "_" + benchmark.sf + "_" + database + ".csv", 'w',
encoding='UTF8') as f:
cpu_counter = 0
for package in rapl_reader.packages:
if 'package' not in package.name:
continue
csv_header.append('cpu_' + str(cpu_counter) + '_avgE(J/sec)')
csv_header.append('cpu_' + str(cpu_counter) + '_totalE(J)')
csv_header.append('cpu_' + str(cpu_counter) + '_carbon(gCO₂eq)')
cpu_counter = cpu_counter + 1
csv_header.append('Latency(sec)')
# IO headers
csv_header.append('Device')
csv_header.append('Read Count')
csv_header.append('Write Count')
csv_header.append('Read Bytes')
csv_header.append('Write Bytes')
csv_header.append('Read Time(ms)')
csv_header.append('Write Time(ms)')
csv_header.append('Busy Time(ms)')
csv_header.append('cpu_utilization(%)')
csv_header.append('dram_usage(GB)')
csv_header.append('dram_cache(GB)')
csv_header.append('dram_total(GB)')
writer = csv.writer(f)
# write the header
writer.writerow(csv_header)
def export_query_stats(benchmark, database, label, results, latency, io_counters):
data = [label]
if not os.path.exists(benchmark.path + "results/csv/"):
# Create the directory
os.makedirs(benchmark.path + "results/csv/")
with open(benchmark.path + "results/csv/" + benchmark.type + "_" + benchmark.sf + "_" + database + ".csv", 'a',
encoding='UTF8') as f:
writer = csv.writer(f)
data.append(results['avg_dram_energy'])
data.append(results['dram_total_energy'])
data.append(calculate_kwh(results['dram_total_energy']) * carbon_intensity)
data.append(results['avg_cpu_energy'])
data.append(results['total_cpu_energy'])
data.append(calculate_kwh(results['total_cpu_energy']) * carbon_intensity)
cpu_counter = 0
for package in rapl_reader.packages:
if 'package' not in package.name:
continue
data.append(results['avg_cpu_energy_per_Soc'][cpu_counter])
data.append(results['cpu_total_energy_per_Soc'][cpu_counter])
data.append(calculate_kwh(results['cpu_total_energy_per_Soc'][cpu_counter]) * carbon_intensity)
cpu_counter = cpu_counter + 1
data.append(latency)
if len(io_counters.items()) == 0:
data.append("None")
data.append(0)
data.append(0)
data.append(0)
data.append(0)
data.append(0)
data.append(0)
data.append(0)
else:
devices = []
read_count = []
write_count = []
read_bytes = []
write_bytes = []
read_time = []
write_time = []
busy_time = []
for device, io_counter in io_counters.items():
if 'loop' in device:
continue
devices.append(device)
read_count.append(io_counter.read_count)
write_count.append(io_counter.write_count)
read_bytes.append(io_counter.read_bytes)
write_bytes.append(io_counter.write_bytes)
read_time.append(io_counter.read_time)
write_time.append(io_counter.write_time)
busy_time.append(io_counter.busy_time)
data.append(devices)
data.append(read_count)
data.append(write_count)
data.append(read_bytes)
data.append(write_bytes)
data.append(read_time)
data.append(write_time)
data.append(busy_time)
data.append(results['cpu_usage'])
data.append(results['mem_usage'])
data.append(results['mem_cache'])
data.append(results['total_mem'])
writer.writerow(data)
def export_throughput(benchmark, database, throughput):
csv_header = ['Throughput(queries/sec)']
if not os.path.exists(benchmark.path + "results/csv/"):
# Create the directory
os.makedirs(benchmark.path + "results/csv/")
with open(benchmark.path + "results/csv/" + benchmark.type + "_" + benchmark.sf + "_" + database + ".csv", 'a',
encoding='UTF8') as f:
writer = csv.writer(f)
# write the header
writer.writerow(csv_header)
data = [throughput]
writer.writerow(data)
def measure_io_operations(initial_io_counters, final_io_counters):
# Calculate the difference in IO counters
io_counters_diff = {}
for device, final_counter in final_io_counters.items():
initial_counter = initial_io_counters.get(device)
diff_counter = LinuxDiskIOStats(
read_count=final_counter.read_count - initial_counter.read_count,
write_count=final_counter.write_count - initial_counter.write_count,
read_bytes=final_counter.read_bytes - initial_counter.read_bytes,
write_bytes=final_counter.write_bytes - initial_counter.write_bytes,
read_time=final_counter.read_time - initial_counter.read_time,
write_time=final_counter.write_time - initial_counter.write_time,
read_merged_count=final_counter.read_merged_count - initial_counter.read_merged_count,
write_merged_count=final_counter.write_merged_count - initial_counter.write_merged_count,
busy_time=final_counter.busy_time - initial_counter.busy_time
)
io_counters_diff[device] = diff_counter
return io_counters_diff
def filter_non_zero_counters(io_counters_diff):
# Filter out devices with no IO operations
non_zero_counters = {device: counter for device, counter in io_counters_diff.items() if any(counter)}
return non_zero_counters
def plotMemoryUsage(memory_cache, memory_usage_gb, benchmark, database, job):
x_axis = range(len(memory_usage_gb))
# Plot the memory utilization
# plt.plot(x_axis, memory_utilization, label='Memory Utilization (%)')
#memory_usage_gb = [round(value, 3) for value in memory_usage_gb]
# Plot the actual gigabytes of occupied memory
plt.plot(x_axis, memory_usage_gb, label='Memory Usage (GB)')
# Set labels and title
plt.xlabel('Measurements')
plt.ylabel('Memory Usage')
plt.title('Memory Usage Over Time - ' + job)
# Add legend
plt.legend()
# Calculate the step size based on the number of measurements
step_size = max(1, 10 ** (math.ceil(math.log10(len(memory_usage_gb))) - 1))
# Set x-axis ticks to display every 10th label
plt.xticks(x_axis[::step_size])
# plt.show(block=False)
# Check if the directory exists
if not os.path.exists(benchmark.path + "results/figures/"):
# Create the directory
os.makedirs(benchmark.path + "results/figures/")
plt.savefig(
benchmark.path + "results/figures/" + benchmark.type + "_" + benchmark.sf + "_" + database + "_" + job + "_dram" + ".png")
# Clear the current figure for the next plot
plt.clf()
# Explicitly close the figure to avoid accumulating open figures
plt.close()
# Plot the actual gigabytes of occupied memory
plt.plot(x_axis, memory_cache, label='Memory Cache (GB)')
# Set labels and title
plt.xlabel('Measurements')
plt.ylabel('Memory Cache')
plt.title('Memory Cache Over Time - ' + job)
# Add legend
plt.legend()
# Calculate the step size based on the number of measurements
step_size = max(1, 10 ** (math.ceil(math.log10(len(memory_usage_gb))) - 1))
# Set x-axis ticks to only include integers
# Set x-axis ticks to display every 10th label
plt.xticks(x_axis[::step_size])
# Check if the directory exists
if not os.path.exists(benchmark.path + "results/figures/"):
# Create the directory
os.makedirs(benchmark.path + "results/figures/")
plt.savefig(
benchmark.path + "results/figures/" + benchmark.type + "_" + benchmark.sf + "_" + database + "_" + job + "_cache" + ".png")
# Clear the current figure for the next plot
plt.clf()
# Explicitly close the figure to avoid accumulating open figures
plt.close()
def plotCPUUsage(cpu_utilization, benchmark, database, job):
# Generate thread indices
thread_indices = np.arange(len(cpu_utilization))
# Create a bar plot
plt.figure(figsize=(10, 6)) # Adjust figure size as needed
plt.bar(thread_indices, cpu_utilization, color='skyblue')
# Add labels and title
plt.xlabel('CPU Thread')
plt.ylabel('Average CPU Utilization')
plt.title('Average CPU Utilization Per Thread - ' + job)
# Add value labels on top of each bar
for i, util in enumerate(cpu_utilization):
plt.text(i, util, str(round(util, 2)), ha='center', va='bottom')
plt.grid(axis='y') # Show grid on y-axis
plt.xticks(thread_indices) # Set x ticks to match thread indices
plt.tight_layout() # Adjust layout
# Check if the directory exists
if not os.path.exists(benchmark.path + "results/figures/"):
# Create the directory
os.makedirs(benchmark.path + "results/figures/")
plt.savefig(
benchmark.path + "results/figures/" + benchmark.type + "_" + benchmark.sf + "_" + database + "_" + job + "_cpu" + ".png")
# Clear the current figure for the next plot
plt.clf()
plt.close()
def calculate_run_stats(benchmark, database, job, tracker_results, io_counters_start, io_counters_end, latency):
# Measure IO operations
io_operations_result = measure_io_operations(io_counters_start, io_counters_end)
# Filter out disk devices with no IO operations
non_zero_counters = filter_non_zero_counters(io_operations_result)
print_tracker_results(job, tracker_results)
export_query_stats(benchmark, database, job, tracker_results, latency, non_zero_counters)
plotMemoryUsage(tracker_results["mem_cache"], tracker_results["mem_usage"], benchmark, database, job)
plotCPUUsage(tracker_results["cpu_usage"], benchmark, database, job)
# Extract scale factor from data path
def extract_sf_from_path(path):
# Get the base name of the path
base_name = os.path.basename(path)
# Extract scale factor
parts = base_name.split('_')
return parts[1]
# Calculates the average energy consumption before executing anything
def calculate_idle_average_energy(duration, benchmark, db_type):
print("Measuring the energy consumption of the system in idle state for " + str(duration) + " seconds\n")
tracker = Tracker(100000, "machine", "avgEnergy")
start_time = time.time()
tracker.start()
io_counters_start = psutil.disk_io_counters(perdisk=True)
while time.time() - start_time < duration:
# Do nothing
pass
tracker.stop()
io_counters_end = psutil.disk_io_counters(perdisk=True)
print_tracker_results("idle state", tracker.results)
data = ["idle state"]
if not os.path.exists(benchmark.path + "results/csv/"):
# Create the directory
os.makedirs(benchmark.path + "results/csv/")
with open(benchmark.path + "results/csv/" + benchmark.type + "_" + benchmark.sf + "_" + db_type + ".csv", 'a',
encoding='UTF8') as f:
writer = csv.writer(f)
data.append(tracker.results['avg_dram_energy'])
data.append(tracker.results['dram_total_energy'])
data.append(calculate_kwh(tracker.results['dram_total_energy']) * carbon_intensity)
data.append(tracker.results['avg_cpu_energy'])
data.append(tracker.results['total_cpu_energy'])
data.append(calculate_kwh(tracker.results['total_cpu_energy']) * carbon_intensity)
cpu_counter = 0
for package in rapl_reader.packages:
if 'package' not in package.name:
continue
data.append(tracker.results['avg_cpu_energy_per_Soc'][cpu_counter])
data.append(tracker.results['cpu_total_energy_per_Soc'][cpu_counter])
data.append(calculate_kwh(tracker.results['cpu_total_energy_per_Soc'][cpu_counter]) * carbon_intensity)
cpu_counter = cpu_counter + 1
data.append(duration)
writer.writerow(data)