Skip to content

Commit

Permalink
Merge branch 'main' into pre-commit-ci-update-config
Browse files Browse the repository at this point in the history
  • Loading branch information
JuanPedroGHM authored Jun 7, 2024
2 parents 7533171 + 9f0bc46 commit 86bdb6d
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 104 deletions.
13 changes: 12 additions & 1 deletion perun/api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,17 @@ def _get_arg_parser() -> argparse.ArgumentParser:
type=int,
help="Number of warmup rounds to run the app. A warmup round is a full run of the application without gathering performance data. Defaults to 0",
)
monitor_parser.add_argument(
"--bench_metrics",
dest="metrics",
type=str,
help="List of metrics to add to the benchmark results. Only relevant when using the 'bench' format. Defaults to 'runtime,energy'",
)
monitor_parser.add_argument(
"--region_metrics",
type=str,
help="List of metrics to add to the benchmark results that are associated with individual regions. Only relevant when using the 'bench' format. Defaults to 'runtime,energy'",
)
monitor_parser.add_argument(
"-b",
"--binary",
Expand Down Expand Up @@ -253,7 +264,7 @@ def monitor(args: argparse.Namespace):
argIndex = sys.argv.index(args.cmd)
sys.argv = sys.argv[argIndex:]
cmd_args: List[str] = sys.argv.copy()
log.debug(f"Cmd args: { cmd_args }")
log.debug(f"Cmd args: {cmd_args}")
if not args.binary:
scriptPath = Path(cmd)
assert scriptPath.exists()
Expand Down
10 changes: 9 additions & 1 deletion perun/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,16 @@
"benchmarking": {
"rounds": 1,
"warmup_rounds": 0,
"metrics": "runtime,energy",
"region_metrics": "runtime,power",
},
"benchmarking.units": {
"joule": "k",
"second": "",
"percent": "",
"watt": "",
"byte": "G",
},
"benchmarking.units": {"joule": "k", "second": "", "percent": "", "power": ""},
"debug": {"log_lvl": "WARNING"},
# "horeka": {"enabled": False, "url": "", "token": "", "org": ""},
}
Expand Down
45 changes: 31 additions & 14 deletions perun/data_model/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class MetricType(str, enum.Enum):
OTHER_POWER = "other_power"
CPU_UTIL = "cpu_util"
GPU_UTIL = "gpu_util"
MEM_UTIL = "mem_util"
OTHER_UTIL = "other_util"
DRAM_MEM = "dram_mem"
GPU_MEM = "gpu_mem"
NET_READ = "net_read"
NET_WRITE = "net_write"
Expand All @@ -53,6 +54,29 @@ class MetricType(str, enum.Enum):
MONEY = "money"
CO2 = "co2"

def __str__(self):
"""Return string representation of MetricType."""
return self.value

def __repr__(self):
"""Return string representation of MetricType."""
return self.value

def fromString(self, value: str):
"""Create MetricType from string.
Parameters
----------
value : str
MetricType value.
Returns
-------
MetricType
MetricType object.
"""
return MetricType(value)


class AggregateType(str, enum.Enum):
"""Types of data aggregation."""
Expand Down Expand Up @@ -249,10 +273,7 @@ class Region:
id: str = ""
raw_data: Dict[int, np.ndarray] = dataclasses.field(default_factory=dict)
runs_per_rank: Optional[Stats] = None
runtime: Optional[Stats] = None
power: Optional[Stats] = None
cpu_util: Optional[Stats] = None
gpu_util: Optional[Stats] = None
metrics: Dict[MetricType, Stats] = dataclasses.field(default_factory=dict)
processed: bool = False

def toDict(self) -> Dict[str, Any]:
Expand All @@ -271,10 +292,7 @@ def toDict(self) -> Dict[str, Any]:
result["runs_per_rank"] = (
asdict(self.runs_per_rank) if self.runs_per_rank else None
)
result["runtime"] = asdict(self.runtime) if self.runtime else None
result["power"] = asdict(self.power) if self.power else None
result["cpu_util"] = asdict(self.cpu_util) if self.cpu_util else None
result["gpu_util"] = asdict(self.gpu_util) if self.gpu_util else None
result["metrics"] = [asdict(metric) for metric in self.metrics.values()]

return result

Expand All @@ -297,11 +315,10 @@ def fromDict(cls, regionDictionary: Dict[str, Any]):
regionObj.raw_data = regionDictionary["raw_data"]
regionObj.processed = regionDictionary["processed"]
if regionObj.processed:
regionObj.runs_per_rank = Stats.fromDict(regionDictionary["runs_per_rank"])
regionObj.runtime = Stats.fromDict(regionDictionary["runtime"])
regionObj.power = Stats.fromDict(regionDictionary["power"])
regionObj.cpu_util = Stats.fromDict(regionDictionary["cpu_util"])
regionObj.gpu_util = Stats.fromDict(regionDictionary["gpu_util"])
regionObj.metrics = {
MetricType(metric["type"]): Stats.fromDict(metric)
for metric in regionDictionary["metrics"]
}
return regionObj


Expand Down
61 changes: 26 additions & 35 deletions perun/io/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
log = logging.getLogger("perun")


lessIsBetterMetrics = [MetricType.RUNTIME, MetricType.ENERGY]


def exportBench(dataNode: DataNode, mr_id: str) -> str:
"""Export data node to json format based on the github continuous benchmark action.
Expand All @@ -36,15 +33,21 @@ def exportBench(dataNode: DataNode, mr_id: str) -> str:
metricDict = []
mrNode = dataNode.nodes[mr_id]

scriptMetrics = [
MetricType(value)
for value in mrNode.metadata["benchmarking.metrics"].split(",")
]

bench_units: Dict[str, Magnitude] = {
"JOULE": Magnitude.fromSymbol(mrNode.metadata["benchmarking.units.joule"]),
"SECOND": Magnitude.fromSymbol(mrNode.metadata["benchmarking.units.second"]),
"WATT": Magnitude.fromSymbol(mrNode.metadata["benchmarking.units.power"]),
"WATT": Magnitude.fromSymbol(mrNode.metadata["benchmarking.units.watt"]),
"PERCENT": Magnitude.fromSymbol(mrNode.metadata["benchmarking.units.percent"]),
"BYTE": Magnitude.fromSymbol(mrNode.metadata["benchmarking.units.byte"]),
}

for metricType, metric in mrNode.metrics.items():
if metricType in lessIsBetterMetrics:
if metricType in scriptMetrics:
metric_md: MetricMetaData = metric.metric_md
if metric_md.unit.name in bench_units:
mag = bench_units[metric_md.unit.name]
Expand Down Expand Up @@ -77,49 +80,37 @@ def exportBench(dataNode: DataNode, mr_id: str) -> str:
"When generating benchmarks for regions, it is preferable to if each function only runs a single time."
)

regionMetrics = [
MetricType(value)
for value in mrNode.metadata["benchmarking.region_metrics"].split(",")
]

for runNode in mrNode.nodes.values():
if runNode.regions:
for region_name, region in runNode.regions.items():
if region_name not in region_data:
region_data[region_name] = {
MetricType.RUNTIME.name: (
[region.runtime.mean],
region.runtime.metric_md,
),
MetricType.POWER.name: (
[region.power.mean],
region.power.metric_md,
),
MetricType.CPU_UTIL.name: (
[region.cpu_util.mean],
region.cpu_util.metric_md,
),
MetricType.GPU_UTIL.name: (
[region.gpu_util.mean],
region.gpu_util.metric_md,
),
metricType.name: (
[stats.mean],
stats.metric_md,
)
for metricType, stats in region.metrics.items()
if metricType in regionMetrics
}
else:
region_data[region_name][MetricType.RUNTIME.name][0].append(
region.runtime.mean
)
region_data[region_name][MetricType.POWER.name][0].append(
region.power.mean
)
region_data[region_name][MetricType.CPU_UTIL.name][0].append(
region.cpu_util.mean
)
region_data[region_name][MetricType.GPU_UTIL.name][0].append(
region.gpu_util.mean
)
for metricType, stats in region.metrics.items():
if metricType in regionMetrics:
region_data[region_name][metricType.name][0].append(
stats.mean
)

for region_name, region in region_data.items():
for metric_name, data in region.items():
values = data[0]
metadata = data[1]
if len(values) > 1:
mean = np.mean(values)
std = np.std(values)
mean = np.mean(values) # type: ignore
std = np.std(values) # type: ignore
if metadata.unit.name in bench_units:
mag = bench_units[metadata.unit.name]
old_mag = metadata.mag
Expand Down
17 changes: 8 additions & 9 deletions perun/io/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,13 @@ def _addRegions(h5Group: h5py.Group, regions: Dict[str, Region]):

def _addRegion(h5Group: h5py.Group, region: Region):
region_group = h5Group.create_group(region.id)
_addMetric(region_group, region.cpu_util) # type: ignore
_addMetric(region_group, region.gpu_util) # type: ignore
_addMetric(region_group, region.power) # type: ignore
_addMetric(region_group, region.runs_per_rank) # type: ignore
_addMetric(region_group, region.runtime) # type: ignore
region_group.attrs["id"] = region.id
region_group.attrs["processed"] = region.processed

region_metrics = region_group.create_group("metrics")
_addMetric(region_group, region.runs_per_rank) # type: ignore
for metricType, stat in region.metrics.items():
_addMetric(region_metrics, stat)
raw_data_group = region_group.create_group("raw_data")
for rank, data in region.raw_data.items():
raw_data_group.create_dataset(str(rank), data=data)
Expand All @@ -254,10 +254,9 @@ def _readRegion(group: h5py.Group) -> Region:
regionObj.id = group.attrs["id"] # type: ignore
regionObj.processed = group.attrs["processed"] # type: ignore

regionObj.cpu_util = _readMetric(group["CPU_UTIL"]) # type: ignore
regionObj.gpu_util = _readMetric(group["GPU_UTIL"]) # type: ignore
regionObj.power = _readMetric(group["POWER"]) # type: ignore
regionObj.runtime = _readMetric(group["RUNTIME"]) # type: ignore
for metric_group in group["metrics"].values(): # type: ignore
stat: Stats = _readMetric(metric_group) # type: ignore
regionObj.metrics[stat.type] = stat
regionObj.runs_per_rank = _readMetric(group["N_RUNS"]) # type: ignore

raw_data_group = group["raw_data"]
Expand Down
46 changes: 33 additions & 13 deletions perun/io/text_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,17 @@
MetricType.GPU_POWER,
MetricType.GPU_MEM,
MetricType.DRAM_POWER,
MetricType.MEM_UTIL,
MetricType.DRAM_MEM,
]

regionMetrics = {
MetricType.RUNTIME: "Avg Runtime",
MetricType.POWER: "Avg Power",
MetricType.CPU_UTIL: "Avg CPU Util",
MetricType.DRAM_MEM: "Avg RAM Mem Util",
MetricType.GPU_MEM: "Avg GPU Mem Util",
}


def textReport(dataNode: DataNode, mr_id: str) -> str:
"""Create text report from selected MULTI_RUN node.
Expand Down Expand Up @@ -61,17 +69,19 @@ def textReport(dataNode: DataNode, mr_id: str) -> str:
if run_node.regions:
for region_name, region in run_node.regions.items():
if region.processed:
region_rows.append(
row = {
"Round #": run_node.id,
"Function": region_name,
"Avg Calls / Rank": region.runs_per_rank.mean,
}
row.update(
{
"Round #": run_node.id,
"Function": region_name,
"Avg Calls / Rank": region.runs_per_rank.mean,
"Avg Runtime": value2MeanStdStr(region.runtime),
"Avg Power": value2MeanStdStr(region.power),
"Avg CPU Util": value2MeanStdStr(region.cpu_util),
"Avg GPU Mem Util": value2MeanStdStr(region.gpu_util),
regionMetrics[metric_type]: value2MeanStdStr(stats)
for metric_type, stats in region.metrics.items()
if metric_type in regionMetrics
}
)
region_rows.append(row)
for host_name, host_node in run_node.nodes.items():
entry = {
"Round #": run_number,
Expand All @@ -91,14 +101,24 @@ def textReport(dataNode: DataNode, mr_id: str) -> str:

host_device_rows.append(entry)

mr_table = pd.DataFrame.from_records(host_device_rows).sort_values(by="Host")
mr_report_str = f"RUN ID: {mr_id}\n\n" + mr_table.to_markdown(index=False) + "\n\n"
mr_table = pd.DataFrame.from_records(host_device_rows).sort_values(
by=["Host", "Round #"]
)
mr_report_str = (
f"RUN ID: {mr_id}\n\n"
+ mr_table.to_markdown(index=False, stralign="right")
+ "\n\n"
)

# Regions
if len(region_rows) > 0:
region_table = pd.DataFrame.from_records(region_rows).sort_values("Function")
region_table = pd.DataFrame.from_records(region_rows).sort_values(
by=["Function", "Round #"]
)
region_report_str = (
"Monitored Functions\n\n" + region_table.to_markdown(index=False) + "\n\n"
"Monitored Functions\n\n"
+ region_table.to_markdown(index=False, stralign="right")
+ "\n\n"
)
else:
region_report_str = ""
Expand Down
2 changes: 1 addition & 1 deletion perun/io/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,4 @@ def value2MeanStdStr(stats: Stats) -> str:
String represenation
"""
tfactor, new_mag = getTFactorMag(stats.mean, stats.metric_md)
return f"{stats.mean/tfactor:.3f}±{stats.std/tfactor:.3f} {new_mag.symbol}{stats.metric_md.unit.value}"
return f"{stats.mean/tfactor:.2f} ± {stats.std/tfactor:.2f} {new_mag.symbol}{stats.metric_md.unit.value}"
Loading

0 comments on commit 86bdb6d

Please sign in to comment.