Skip to content

Commit

Permalink
feat: better overhead power integration (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
JuanPedroGHM authored Oct 25, 2023
1 parent e14b8a8 commit 4a0c984
Show file tree
Hide file tree
Showing 12 changed files with 132 additions and 44 deletions.
13 changes: 7 additions & 6 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ Options
.. csv-table:: Configuration Options
:header: "Name", "Default", "Description"

"pue", 1.58, "Power Usage Effectiveness: A measure of a data centers efficiency, calculated as
PUE = Total facilitty energy / IT equipment energy"
"emissions_factor", 417.80, "Average carbon intensity of electricity (gCO2e/kWh). Source: https://ourworldindata.org/grapher/carbon-intensity-electricity"
"price_factor", 0.3251, "Power to Currency conversion factor (Currency/kWh). Source : https://www.stromauskunft.de/strompreise/"
"power_overhead", 0.0, "Estimated power consumption of non-measured hardware components in Watts. Will be added to the power draw and energy consumed of individual nodes. Defaults to 0 Watts"
"pue", 1.0, "Power Usage Effectiveness: A measure of a data centers efficiency, calculated as
PUE = Total facilitty energy / IT equipment energy. Calculated for each run."
"emissions_factor", 417.80, "Average carbon intensity of electricity (gCO2e/kWh). Calculated for each run. Source: https://ourworldindata.org/grapher/carbon-intensity-electricity"
"price_factor", 0.3251, "Power to Currency conversion factor (Currency/kWh). Calculated for each run. Source : https://www.stromauskunft.de/strompreise/"
"price_unit", €, "Currency Icon"
"sampling_rate", 1, "Seconds between measurements"
"app_name", None, "Name to identify the app. If **None**, name will be based on the file or function name."
"run_id", None, "ID of the current run. If **None**, the current date and time will be used. If **SLURM**, perun will look for the environmental variable **SLURM_JOB_ID** and use that."
"format", "text", "Output report format [text, pickle, csv, hdf5, json, bench]"
"data_out", "./perun_results", "perun output location"
"rounds", 5, "Number of times a the application is run"
"warmup_rounds", 1, "Number of warmup rounds to run before starting the benchmarks."
"rounds", 1, "Number of times the application is run"
"warmup_rounds", 0, "Number of warmup rounds to run before starting the benchmarks."
"log_lvl", "WARNING", "Change logging output [DEBUG, INFO, WARNING, ERROR, CRITICAL]"
4 changes: 3 additions & 1 deletion example.perun.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
[post-processing]
pue = 1.58
pue = 1
power_overhead = 0
emissions_factor = 417.8
price_factor = 0.3251
price_unit=€

[monitor]
sampling_rate = 1
Expand Down
47 changes: 34 additions & 13 deletions perun/api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,23 +102,40 @@ def _get_arg_parser() -> argparse.ArgumentParser:
help="Directory where output files are saved. Defaults to ./perun_results",
)
monitor_parser.add_argument(
"--sampling_rate", type=float, help="Sampling rate in seconds"
"--sampling_rate",
type=float,
help="Sampling rate in seconds. Defaults to 1 second.",
)
monitor_parser.add_argument(
"--power_overhead",
type=float,
help="Estimated power consumption of non-measured hardware components in Watts. Will be added to measured power consumption on the text report summary. Defaults to 0 Watts",
)
monitor_parser.add_argument(
"--pue", type=float, help="Data center Power Usage Effectiveness"
"--pue", type=float, help="Data center Power Usage Effectiveness. Defaults to 1"
)
monitor_parser.add_argument(
"--price_factor",
type=float,
help="Electricity to Currency convertion factor in the form of Currency/kWh",
help="Electricity to Currency convertion factor in the form of Currency/kWh. Defaults to 0.3251 €/kWh",
)
monitor_parser.add_argument(
"--price_unit",
type=str,
help="Currency character to use on the text report summary. Defaults to €",
)
monitor_parser.add_argument(
"--emission_factor",
type=float,
help="Average carbon intensity of electricity (gCO2e/kWh). Defaults to 417.80 gC02e/kWh",
)
monitor_parser.add_argument(
"--rounds", type=int, help="Number of warmup rounds to run app."
"--rounds", type=int, help="Number of warmup rounds to run app. Defaults to 1"
)
monitor_parser.add_argument(
"--warmup_rounds",
type=int,
help="Number of warmup rounds to run the app. A warmup round is a full run of the application without gathering performance data.",
help="Number of warmup rounds to run the app. A warmup round is a full run of the application without gathering performance data. Defaults to 0",
)
monitor_parser.add_argument("script", type=str)
monitor_parser.add_argument("script_args", nargs=argparse.REMAINDER)
Expand Down Expand Up @@ -157,20 +174,24 @@ def showconf(args: argparse.Namespace):
"""Print current perun configuration in INI format."""
from perun.configuration import _default_config

if args.showconf_default:
config.read_dict(_default_config)
config.write(sys.stdout)
else:
config.write(sys.stdout)
perun = Perun(config)
if perun.comm.Get_rank() == 0:
if args.showconf_default:
config.read_dict(_default_config)
config.write(sys.stdout)
else:
config.write(sys.stdout)


def sensors(args: argparse.Namespace):
"""Print sensors assigned to each rank by perun."""
perun = Perun(config)
log.debug(f"Rank {perun.comm.Get_rank()}: Sensors initialized perun object")
sensor_config = perun.sensors_config
host_rank = perun.host_rank
log.debug(f"Rank {perun.comm.Get_rank()}: Sensors gather global configuration")
if perun.comm.Get_rank() == 0:
printableConfig = printableSensorConfiguration(
perun.sensors_config, perun.host_rank
)
printableConfig = printableSensorConfiguration(sensor_config, host_rank)
print(printableConfig)


Expand Down
3 changes: 2 additions & 1 deletion perun/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

_default_config: Mapping[str, Mapping[str, Any]] = {
"post-processing": {
"pue": 1.58,
"power_overhead": 0, # Watt
"pue": 1.0, # Global Average Power Usage Effectiveness (2022 or something)
"emissions_factor": 417.80, # gCO2eq/kWh
"price_factor": 0.3251, # Currency/kWh
"price_unit": "€",
Expand Down
2 changes: 2 additions & 0 deletions perun/data_model/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class MetricType(str, enum.Enum):
OTHER_ENERGY = "other_energy"
OTHER_MEM = "other_memory"
N_RUNS = "n_runs"
MONEY = "money"
CO2 = "co2"


class AggregateType(str, enum.Enum):
Expand Down
1 change: 1 addition & 0 deletions perun/data_model/measurement_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Unit(str, enum.Enum):
SECOND = "s"
PERCENT = "%"
SCALAR = ""
GRAM = "g"

@property
def symbol(self) -> str:
Expand Down
18 changes: 8 additions & 10 deletions perun/io/text_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
tableMetrics = [
MetricType.RUNTIME,
MetricType.ENERGY,
MetricType.POWER,
MetricType.CPU_POWER,
MetricType.CPU_UTIL,
MetricType.GPU_POWER,
Expand Down Expand Up @@ -101,20 +102,17 @@ def textReport(dataNode: DataNode, mr_id: str) -> str:
else:
region_report_str = ""

# Summary
n_runs = len(dataNode.nodes)
if MetricType.ENERGY in dataNode.metrics:
# Application Summary
total_energy = dataNode.metrics[MetricType.ENERGY].sum # type: ignore
e_pue = total_energy * config.getfloat("post-processing", "pue")
e_kWh = e_pue / (3600 * 1e3)
kgCO2 = e_kWh * config.getfloat("post-processing", "emissions_factor") / 1e3
money = e_kWh * config.getfloat(
"post-processing", "price_factor"
) # Currency / kWh
e_kWh = total_energy / (3600 * 1e3)
kgCO2 = dataNode.metrics[MetricType.CO2].sum # type: ignore
money = dataNode.metrics[MetricType.MONEY].sum # type: ignore
money_icon = config.get("post-processing", "price_unit")

summary_str = f"The application has been run {n_runs} times. Throughout its runtime, it has used {e_kWh:.3f} kWh, released a total of {kgCO2:.3f} kgCO2e into the atmosphere, and you paid {money:.2f} {money_icon} in electricity for it.\n"
app_summary_str = f"Application Summary\n\nThe application has been run {n_runs} times. Throughout its runtime, it has used {e_kWh:.3f} kWh, released a total of {kgCO2:.3f} kgCO2e into the atmosphere, and you paid {money:.2f} {money_icon} in electricity for it."
else:
summary_str = f"The application has been run {n_runs} times."
app_summary_str = f"The application has been run {n_runs} times."

return report_header + mr_report_str + region_report_str + summary_str
return report_header + mr_report_str + region_report_str + app_summary_str
7 changes: 4 additions & 3 deletions perun/perun.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def monitor_application(
nodes=multirun_nodes,
processed=False,
)
multirun_node = processDataNode(multirun_node)
multirun_node = processDataNode(multirun_node, self.config)

app_data_file = data_out / f"{app_name}.{IOFormat.HDF5.suffix}"
app_data = None
Expand All @@ -273,7 +273,7 @@ def monitor_application(
nodes={multirun_id: multirun_node},
processed=False,
)
app_data = processDataNode(app_data)
app_data = processDataNode(app_data, self.config)

self.export_to(data_out, app_data, IOFormat.HDF5)
if out_format != IOFormat.HDF5:
Expand Down Expand Up @@ -308,6 +308,7 @@ def _run_application(
self.comm.Get_rank(),
self.backends,
self.l_sensors_config,
self.config,
sp_ready_event,
start_event,
stop_event,
Expand Down Expand Up @@ -389,7 +390,7 @@ def _run_application(
nodes={node.id: node for node in dataNodes if node},
)
runNode.addRegionData(globalRegions, starttime_ns)
runNode = processDataNode(runNode)
runNode = processDataNode(runNode, self.config)

return runNode
return None
Expand Down
57 changes: 55 additions & 2 deletions perun/processing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Processing Module."""
import copy
import logging
from configparser import ConfigParser
from datetime import datetime
from itertools import chain
from typing import Any, Dict, List, Optional, Tuple
Expand Down Expand Up @@ -306,13 +307,17 @@ def processSensorData(sensorData: DataNode) -> DataNode:
return sensorData


def processDataNode(dataNode: DataNode, force_process=False) -> DataNode:
def processDataNode(
dataNode: DataNode, perunConfig: ConfigParser, force_process=False
) -> DataNode:
"""Recursively calculate metrics on the dataNode tree.
Parameters
----------
dataNode : DataNode
Root data node tree.
perunConfig: ConfigParser
Perun configuration
force_process : bool, optional
Force recomputation of child node metrics, by default False
Expand Down Expand Up @@ -342,7 +347,9 @@ def processDataNode(dataNode: DataNode, force_process=False) -> DataNode:
if subNode.type == NodeType.SENSOR:
subNode = processSensorData(subNode)
else:
subNode = processDataNode(subNode, force_process=force_process)
subNode = processDataNode(
subNode, perunConfig=perunConfig, force_process=force_process
)

if dataNode.type == NodeType.APP:
for subSubNode in subNode.nodes.values():
Expand Down Expand Up @@ -380,6 +387,52 @@ def processDataNode(dataNode: DataNode, force_process=False) -> DataNode:
metricType, aggregatedValue, metric_md, aggType
)

# Apply power overhead to each computational node if there is power data available.
if dataNode.type == NodeType.NODE and MetricType.POWER in dataNode.metrics:
power_overhead = perunConfig.getfloat("post-processing", "power_overhead")
dataNode.metrics[MetricType.POWER].value += power_overhead # type: ignore
runtime = dataNode.metrics[MetricType.RUNTIME].value
dataNode.metrics[MetricType.ENERGY].value += runtime * power_overhead # type: ignore

# If there is energy data, apply PUE, and convert to currency and CO2 emmisions.
if dataNode.type == NodeType.RUN and MetricType.ENERGY in dataNode.metrics:
pue = perunConfig.getfloat("post-processing", "pue")
emissions_factor = perunConfig.getfloat("post-processing", "emissions_factor")
price_factor = perunConfig.getfloat("post-processing", "price_factor")
total_energy = dataNode.metrics[MetricType.ENERGY].value * pue
dataNode.metrics[MetricType.ENERGY].value = total_energy # type: ignore
e_kWh = total_energy / (3600 * 1e3)

costMetric = Metric(
MetricType.MONEY,
e_kWh * price_factor,
MetricMetaData(
Unit.SCALAR,
Magnitude.ONE,
np.dtype("float32"),
np.float32(0),
np.finfo("float32").max,
np.float32(0),
),
AggregateType.SUM,
)

co2Emissions = Metric(
MetricType.CO2,
e_kWh * emissions_factor,
MetricMetaData(
Unit.GRAM,
Magnitude.ONE,
np.dtype("float32"),
np.float32(0),
np.finfo("float32").max,
np.float32(0),
),
AggregateType.SUM,
)
dataNode.metrics[MetricType.MONEY] = costMetric
dataNode.metrics[MetricType.CO2] = co2Emissions

dataNode.processed = True
return dataNode

Expand Down
6 changes: 4 additions & 2 deletions perun/subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import platform
import time
from configparser import ConfigParser
from multiprocessing import Queue
from typing import Dict, List, Set

Expand All @@ -21,6 +22,7 @@ def perunSubprocess(
rank: int,
backends: Dict[str, Backend],
l_sensors_config: Dict[str, Set[str]],
perunConfig: ConfigParser,
sp_ready_event,
start_event,
stop_event,
Expand Down Expand Up @@ -123,7 +125,7 @@ def perunSubprocess(
deviceType=deviceType,
)

dn = processDataNode(dn)
dn = processDataNode(dn, perunConfig)
deviceGroupNodes.append(dn)
else:
deviceGroupNodes.extend(sensorNodes)
Expand All @@ -136,7 +138,7 @@ def perunSubprocess(
metadata={},
nodes={node.id: node for node in deviceGroupNodes},
)
processDataNode(hostNode)
processDataNode(hostNode, perunConfig)

# This should send a single processed node for the current computational node
queue.put(hostNode, block=True)
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
from perun.perun import Perun


@pytest.fixture(scope="package")
@pytest.fixture()
def defaultConfig():
defaultConfig = configparser.ConfigParser(allow_no_value=True)
defaultConfig.read_dict(_default_config)
return defaultConfig


@pytest.fixture(scope="package")
@pytest.fixture()
def perun(defaultConfig):
return Perun(defaultConfig)
14 changes: 10 additions & 4 deletions tests/perun/api/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,25 @@ def test_showconf_command_with_default(
defaultConfig.write(configFile)

processorOut = subprocess.run(
["perun", "--log_lvl", "INFO", "--configuration", str(confPath), "showconf"],
["perun", "--log_lvl", "ERROR", "--configuration", str(confPath), "showconf"],
capture_output=True,
text=True,
).stdout
print(processorOut)
parser = configparser.ConfigParser(allow_no_value=True)
parser.read_string(processorOut)
assert parser.get("debug", "log_lvl") == "INFO"
assert defaultConfig.get("monitor", "sampling_rate") == "2"
assert defaultConfig.get("debug", "log_lvl") == "WARNING"
assert parser.get("monitor", "sampling_rate") == "2"
assert parser.get("debug", "log_lvl") == "ERROR"
assert parser != defaultConfig

defaultConfig.set("monitor", "sampling_rate", "1")
processorOut = subprocess.run(
[
"perun",
"--log_lvl",
"INFO",
"ERROR",
"--configuration",
str(confPath),
"showconf",
Expand All @@ -101,12 +104,15 @@ def test_showconf_command_with_default(
capture_output=True,
text=True,
).stdout
print(processorOut)
parser = configparser.ConfigParser(allow_no_value=True)
parser.read_string(processorOut)

assert defaultConfig.get("monitor", "sampling_rate") == "1"
assert defaultConfig.get("debug", "log_lvl") == "WARNING"
assert parser.get("debug", "log_lvl") == "WARNING"
assert parser.get("monitor", "sampling_rate") == "1"
assert parser != defaultConfig
assert parser == defaultConfig


def test_metadata_command(perun: Perun):
Expand Down

0 comments on commit 4a0c984

Please sign in to comment.