Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: better overhead power integration #95

Merged
merged 7 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ Options
.. csv-table:: Configuration Options
:header: "Name", "Default", "Description"

"pue", 1.58, "Power Usage Effectiveness: A measure of a data centers efficiency, calculated as
PUE = Total facilitty energy / IT equipment energy"
"emissions_factor", 417.80, "Average carbon intensity of electricity (gCO2e/kWh). Source: https://ourworldindata.org/grapher/carbon-intensity-electricity"
"price_factor", 0.3251, "Power to Currency conversion factor (Currency/kWh). Source : https://www.stromauskunft.de/strompreise/"
"power_overhead", 0.0, "Estimated power consumption of non-measured hardware components in Watts. Will be added to the power draw and energy consumed of individual nodes. Defaults to 0 Watts"
"pue", 1.0, "Power Usage Effectiveness: A measure of a data centers efficiency, calculated as
PUE = Total facilitty energy / IT equipment energy. Calculated for each run."
"emissions_factor", 417.80, "Average carbon intensity of electricity (gCO2e/kWh). Calculated for each run. Source: https://ourworldindata.org/grapher/carbon-intensity-electricity"
"price_factor", 0.3251, "Power to Currency conversion factor (Currency/kWh). Calculated for each run. Source : https://www.stromauskunft.de/strompreise/"
"price_unit", €, "Currency Icon"
"sampling_rate", 1, "Seconds between measurements"
"app_name", None, "Name to identify the app. If **None**, name will be based on the file or function name."
"run_id", None, "ID of the current run. If **None**, the current date and time will be used. If **SLURM**, perun will look for the environmental variable **SLURM_JOB_ID** and use that."
"format", "text", "Output report format [text, pickle, csv, hdf5, json, bench]"
"data_out", "./perun_results", "perun output location"
"rounds", 5, "Number of times a the application is run"
"warmup_rounds", 1, "Number of warmup rounds to run before starting the benchmarks."
"rounds", 1, "Number of times the application is run"
"warmup_rounds", 0, "Number of warmup rounds to run before starting the benchmarks."
"log_lvl", "WARNING", "Change logging output [DEBUG, INFO, WARNING, ERROR, CRITICAL]"
4 changes: 3 additions & 1 deletion example.perun.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
[post-processing]
pue = 1.58
pue = 1
power_overhead = 0
emissions_factor = 417.8
price_factor = 0.3251
price_unit=€

[monitor]
sampling_rate = 1
Expand Down
47 changes: 34 additions & 13 deletions perun/api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,23 +102,40 @@ def _get_arg_parser() -> argparse.ArgumentParser:
help="Directory where output files are saved. Defaults to ./perun_results",
)
monitor_parser.add_argument(
"--sampling_rate", type=float, help="Sampling rate in seconds"
"--sampling_rate",
type=float,
help="Sampling rate in seconds. Defaults to 1 second.",
)
monitor_parser.add_argument(
"--power_overhead",
type=float,
help="Estimated power consumption of non-measured hardware components in Watts. Will be added to measured power consumption on the text report summary. Defaults to 0 Watts",
)
monitor_parser.add_argument(
"--pue", type=float, help="Data center Power Usage Effectiveness"
"--pue", type=float, help="Data center Power Usage Effectiveness. Defaults to 1"
)
monitor_parser.add_argument(
"--price_factor",
type=float,
help="Electricity to Currency convertion factor in the form of Currency/kWh",
help="Electricity to Currency convertion factor in the form of Currency/kWh. Defaults to 0.3251 €/kWh",
)
monitor_parser.add_argument(
"--price_unit",
type=str,
help="Currency character to use on the text report summary. Defaults to €",
)
monitor_parser.add_argument(
"--emission_factor",
type=float,
help="Average carbon intensity of electricity (gCO2e/kWh). Defaults to 417.80 gC02e/kWh",
)
monitor_parser.add_argument(
"--rounds", type=int, help="Number of warmup rounds to run app."
"--rounds", type=int, help="Number of warmup rounds to run app. Defaults to 1"
)
monitor_parser.add_argument(
"--warmup_rounds",
type=int,
help="Number of warmup rounds to run the app. A warmup round is a full run of the application without gathering performance data.",
help="Number of warmup rounds to run the app. A warmup round is a full run of the application without gathering performance data. Defaults to 0",
)
monitor_parser.add_argument("script", type=str)
monitor_parser.add_argument("script_args", nargs=argparse.REMAINDER)
Expand Down Expand Up @@ -157,20 +174,24 @@ def showconf(args: argparse.Namespace):
"""Print current perun configuration in INI format."""
from perun.configuration import _default_config

if args.showconf_default:
config.read_dict(_default_config)
config.write(sys.stdout)
else:
config.write(sys.stdout)
perun = Perun(config)
if perun.comm.Get_rank() == 0:
if args.showconf_default:
config.read_dict(_default_config)
config.write(sys.stdout)
else:
config.write(sys.stdout)


def sensors(args: argparse.Namespace):
"""Print sensors assigned to each rank by perun."""
perun = Perun(config)
log.debug(f"Rank {perun.comm.Get_rank()}: Sensors initialized perun object")
sensor_config = perun.sensors_config
host_rank = perun.host_rank
log.debug(f"Rank {perun.comm.Get_rank()}: Sensors gather global configuration")
if perun.comm.Get_rank() == 0:
printableConfig = printableSensorConfiguration(
perun.sensors_config, perun.host_rank
)
printableConfig = printableSensorConfiguration(sensor_config, host_rank)
print(printableConfig)


Expand Down
3 changes: 2 additions & 1 deletion perun/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

_default_config: Mapping[str, Mapping[str, Any]] = {
"post-processing": {
"pue": 1.58,
"power_overhead": 0, # Watt
"pue": 1.0, # Global Average Power Usage Effectiveness (2022 or something)
"emissions_factor": 417.80, # gCO2eq/kWh
"price_factor": 0.3251, # Currency/kWh
"price_unit": "€",
Expand Down
2 changes: 2 additions & 0 deletions perun/data_model/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class MetricType(str, enum.Enum):
OTHER_ENERGY = "other_energy"
OTHER_MEM = "other_memory"
N_RUNS = "n_runs"
MONEY = "money"
CO2 = "co2"


class AggregateType(str, enum.Enum):
Expand Down
1 change: 1 addition & 0 deletions perun/data_model/measurement_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Unit(str, enum.Enum):
SECOND = "s"
PERCENT = "%"
SCALAR = ""
GRAM = "g"

@property
def symbol(self) -> str:
Expand Down
18 changes: 8 additions & 10 deletions perun/io/text_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
tableMetrics = [
MetricType.RUNTIME,
MetricType.ENERGY,
MetricType.POWER,
MetricType.CPU_POWER,
MetricType.CPU_UTIL,
MetricType.GPU_POWER,
Expand Down Expand Up @@ -101,20 +102,17 @@ def textReport(dataNode: DataNode, mr_id: str) -> str:
else:
region_report_str = ""

# Summary
n_runs = len(dataNode.nodes)
if MetricType.ENERGY in dataNode.metrics:
# Application Summary
total_energy = dataNode.metrics[MetricType.ENERGY].sum # type: ignore
e_pue = total_energy * config.getfloat("post-processing", "pue")
e_kWh = e_pue / (3600 * 1e3)
kgCO2 = e_kWh * config.getfloat("post-processing", "emissions_factor") / 1e3
money = e_kWh * config.getfloat(
"post-processing", "price_factor"
) # Currency / kWh
e_kWh = total_energy / (3600 * 1e3)
kgCO2 = dataNode.metrics[MetricType.CO2].sum # type: ignore
money = dataNode.metrics[MetricType.MONEY].sum # type: ignore
money_icon = config.get("post-processing", "price_unit")

summary_str = f"The application has been run {n_runs} times. Throughout its runtime, it has used {e_kWh:.3f} kWh, released a total of {kgCO2:.3f} kgCO2e into the atmosphere, and you paid {money:.2f} {money_icon} in electricity for it.\n"
app_summary_str = f"Application Summary\n\nThe application has been run {n_runs} times. Throughout its runtime, it has used {e_kWh:.3f} kWh, released a total of {kgCO2:.3f} kgCO2e into the atmosphere, and you paid {money:.2f} {money_icon} in electricity for it."
else:
summary_str = f"The application has been run {n_runs} times."
app_summary_str = f"The application has been run {n_runs} times."

return report_header + mr_report_str + region_report_str + summary_str
return report_header + mr_report_str + region_report_str + app_summary_str
7 changes: 4 additions & 3 deletions perun/perun.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def monitor_application(
nodes=multirun_nodes,
processed=False,
)
multirun_node = processDataNode(multirun_node)
multirun_node = processDataNode(multirun_node, self.config)

app_data_file = data_out / f"{app_name}.{IOFormat.HDF5.suffix}"
app_data = None
Expand All @@ -273,7 +273,7 @@ def monitor_application(
nodes={multirun_id: multirun_node},
processed=False,
)
app_data = processDataNode(app_data)
app_data = processDataNode(app_data, self.config)

self.export_to(data_out, app_data, IOFormat.HDF5)
if out_format != IOFormat.HDF5:
Expand Down Expand Up @@ -308,6 +308,7 @@ def _run_application(
self.comm.Get_rank(),
self.backends,
self.l_sensors_config,
self.config,
sp_ready_event,
start_event,
stop_event,
Expand Down Expand Up @@ -389,7 +390,7 @@ def _run_application(
nodes={node.id: node for node in dataNodes if node},
)
runNode.addRegionData(globalRegions, starttime_ns)
runNode = processDataNode(runNode)
runNode = processDataNode(runNode, self.config)

return runNode
return None
Expand Down
57 changes: 55 additions & 2 deletions perun/processing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Processing Module."""
import copy
import logging
from configparser import ConfigParser
from datetime import datetime
from itertools import chain
from typing import Any, Dict, List, Optional, Tuple
Expand Down Expand Up @@ -306,13 +307,17 @@ def processSensorData(sensorData: DataNode) -> DataNode:
return sensorData


def processDataNode(dataNode: DataNode, force_process=False) -> DataNode:
def processDataNode(
dataNode: DataNode, perunConfig: ConfigParser, force_process=False
) -> DataNode:
"""Recursively calculate metrics on the dataNode tree.

Parameters
----------
dataNode : DataNode
Root data node tree.
perunConfig: ConfigParser
Perun configuration
force_process : bool, optional
Force recomputation of child node metrics, by default False

Expand Down Expand Up @@ -342,7 +347,9 @@ def processDataNode(dataNode: DataNode, force_process=False) -> DataNode:
if subNode.type == NodeType.SENSOR:
subNode = processSensorData(subNode)
else:
subNode = processDataNode(subNode, force_process=force_process)
subNode = processDataNode(
subNode, perunConfig=perunConfig, force_process=force_process
)

if dataNode.type == NodeType.APP:
for subSubNode in subNode.nodes.values():
Expand Down Expand Up @@ -380,6 +387,52 @@ def processDataNode(dataNode: DataNode, force_process=False) -> DataNode:
metricType, aggregatedValue, metric_md, aggType
)

# Apply power overhead to each computational node if there is power data available.
if dataNode.type == NodeType.NODE and MetricType.POWER in dataNode.metrics:
power_overhead = perunConfig.getfloat("post-processing", "power_overhead")
dataNode.metrics[MetricType.POWER].value += power_overhead # type: ignore
runtime = dataNode.metrics[MetricType.RUNTIME].value
dataNode.metrics[MetricType.ENERGY].value += runtime * power_overhead # type: ignore

# If there is energy data, apply PUE, and convert to currency and CO2 emmisions.
if dataNode.type == NodeType.RUN and MetricType.ENERGY in dataNode.metrics:
pue = perunConfig.getfloat("post-processing", "pue")
emissions_factor = perunConfig.getfloat("post-processing", "emissions_factor")
price_factor = perunConfig.getfloat("post-processing", "price_factor")
total_energy = dataNode.metrics[MetricType.ENERGY].value * pue
dataNode.metrics[MetricType.ENERGY].value = total_energy # type: ignore
e_kWh = total_energy / (3600 * 1e3)

costMetric = Metric(
MetricType.MONEY,
e_kWh * price_factor,
MetricMetaData(
Unit.SCALAR,
Magnitude.ONE,
np.dtype("float32"),
np.float32(0),
np.finfo("float32").max,
np.float32(0),
),
AggregateType.SUM,
)

co2Emissions = Metric(
MetricType.CO2,
e_kWh * emissions_factor,
MetricMetaData(
Unit.GRAM,
Magnitude.ONE,
np.dtype("float32"),
np.float32(0),
np.finfo("float32").max,
np.float32(0),
),
AggregateType.SUM,
)
dataNode.metrics[MetricType.MONEY] = costMetric
dataNode.metrics[MetricType.CO2] = co2Emissions

dataNode.processed = True
return dataNode

Expand Down
6 changes: 4 additions & 2 deletions perun/subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import platform
import time
from configparser import ConfigParser
from multiprocessing import Queue
from typing import Dict, List, Set

Expand All @@ -21,6 +22,7 @@ def perunSubprocess(
rank: int,
backends: Dict[str, Backend],
l_sensors_config: Dict[str, Set[str]],
perunConfig: ConfigParser,
sp_ready_event,
start_event,
stop_event,
Expand Down Expand Up @@ -123,7 +125,7 @@ def perunSubprocess(
deviceType=deviceType,
)

dn = processDataNode(dn)
dn = processDataNode(dn, perunConfig)
deviceGroupNodes.append(dn)
else:
deviceGroupNodes.extend(sensorNodes)
Expand All @@ -136,7 +138,7 @@ def perunSubprocess(
metadata={},
nodes={node.id: node for node in deviceGroupNodes},
)
processDataNode(hostNode)
processDataNode(hostNode, perunConfig)

# This should send a single processed node for the current computational node
queue.put(hostNode, block=True)
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
from perun.perun import Perun


@pytest.fixture(scope="package")
@pytest.fixture()
def defaultConfig():
defaultConfig = configparser.ConfigParser(allow_no_value=True)
defaultConfig.read_dict(_default_config)
return defaultConfig


@pytest.fixture(scope="package")
@pytest.fixture()
def perun(defaultConfig):
return Perun(defaultConfig)
14 changes: 10 additions & 4 deletions tests/perun/api/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,25 @@ def test_showconf_command_with_default(
defaultConfig.write(configFile)

processorOut = subprocess.run(
["perun", "--log_lvl", "INFO", "--configuration", str(confPath), "showconf"],
["perun", "--log_lvl", "ERROR", "--configuration", str(confPath), "showconf"],
capture_output=True,
text=True,
).stdout
print(processorOut)
parser = configparser.ConfigParser(allow_no_value=True)
parser.read_string(processorOut)
assert parser.get("debug", "log_lvl") == "INFO"
assert defaultConfig.get("monitor", "sampling_rate") == "2"
assert defaultConfig.get("debug", "log_lvl") == "WARNING"
assert parser.get("monitor", "sampling_rate") == "2"
assert parser.get("debug", "log_lvl") == "ERROR"
assert parser != defaultConfig

defaultConfig.set("monitor", "sampling_rate", "1")
processorOut = subprocess.run(
[
"perun",
"--log_lvl",
"INFO",
"ERROR",
"--configuration",
str(confPath),
"showconf",
Expand All @@ -101,12 +104,15 @@ def test_showconf_command_with_default(
capture_output=True,
text=True,
).stdout
print(processorOut)
parser = configparser.ConfigParser(allow_no_value=True)
parser.read_string(processorOut)

assert defaultConfig.get("monitor", "sampling_rate") == "1"
assert defaultConfig.get("debug", "log_lvl") == "WARNING"
assert parser.get("debug", "log_lvl") == "WARNING"
assert parser.get("monitor", "sampling_rate") == "1"
assert parser != defaultConfig
assert parser == defaultConfig


def test_metadata_command(perun: Perun):
Expand Down