Skip to content

Commit

Permalink
enh: Add con-duct ls
Browse files Browse the repository at this point in the history
Fixes: #185
  • Loading branch information
asmacdo committed Feb 6, 2025
1 parent 50a711e commit 2bd83e4
Show file tree
Hide file tree
Showing 8 changed files with 444 additions and 7 deletions.
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,14 @@ usage: con-duct <command> [options]
A suite of commands to manage or manipulate con-duct logs.
positional arguments:
{pp,plot} Available subcommands
pp Pretty print a JSON log.
plot Plot resource usage for an execution.
{pp,plot,ls} Available subcommands
pp Pretty print a JSON log.
plot Plot resource usage for an execution.
ls Print execution information for all runs matching
DUCT_OUTPUT_PREFIX.
options:
-h, --help show this help message and exit
-h, --help show this help message and exit
```
<!-- END EXTRAS HELP -->
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ where = src
[options.extras_require]
all =
matplotlib
PyYAML
pyout


[options.entry_points]
Expand Down
7 changes: 4 additions & 3 deletions src/con_duct/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
lgr = logging.getLogger("con-duct")
DEFAULT_LOG_LEVEL = os.environ.get("DUCT_LOG_LEVEL", "INFO").upper()

DUCT_OUTPUT_PREFIX = os.getenv(
"DUCT_OUTPUT_PREFIX", ".duct/logs/{datetime_filesafe}-{pid}_"
)
ENV_PREFIXES = ("PBS_", "SLURM_", "OSG")
SUFFIXES = {
"stdout": "stdout",
Expand Down Expand Up @@ -712,9 +715,7 @@ def from_argv(
"-p",
"--output-prefix",
type=str,
default=os.getenv(
"DUCT_OUTPUT_PREFIX", ".duct/logs/{datetime_filesafe}-{pid}_"
),
default=DUCT_OUTPUT_PREFIX,
help="File string format to be used as a prefix for the files -- the captured "
"stdout and stderr and the resource usage logs. The understood variables are "
"{datetime}, {datetime_filesafe}, and {pid}. "
Expand Down
173 changes: 173 additions & 0 deletions src/con_duct/suite/ls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import argparse
from collections import OrderedDict
import json
import logging
from typing import Any, Dict, List, Optional
from packaging.version import Version

try:
import pyout # type: ignore
except ImportError:
pyout = None

Check warning on line 11 in src/con_duct/suite/ls.py

View check run for this annotation

Codecov / codecov/patch

src/con_duct/suite/ls.py#L10-L11

Added lines #L10 - L11 were not covered by tests
import yaml
from con_duct.__main__ import SummaryFormatter

lgr = logging.getLogger(__name__)

VALUE_TRANSFORMATION_MAP: Dict[str, str] = {
"exit_code": "{value!E}",
"wall_clock_time": "{value:.3f} sec",
"peak_rss": "{value!S}",
"memory_total": "{value!S}",
"average_rss": "{value!S}",
"peak_vsz": "{value!S}",
"average_vsz": "{value!S}",
"peak_pmem": "{value:.2f!N}%",
"average_pmem": "{value:.2f!N}%",
"peak_pcpu": "{value:.2f!N}%",
"average_pcpu": "{value:.2f!N}%",
"start_time": "{value:.2f!N}",
"end_time": "{value:.2f!N}",
}

NON_TRANSFORMED_FIELDS: List[str] = [
"hostname",
"uid",
"user",
"gpu",
"duct_version",
"schema_version",
"command",
"prefix",
"num_samples",
"num_reports",
"stderr",
"usage",
"info",
"prefix",
]

LS_FIELD_CHOICES: List[str] = (
list(VALUE_TRANSFORMATION_MAP.keys()) + NON_TRANSFORMED_FIELDS
)
MINIMUM_SCHEMA_VERSION: str = "0.2.0"


def load_duct_runs(info_files: List[str]) -> List[Dict[str, Any]]:
loaded: List[Dict[str, Any]] = []
for info_file in info_files:
with open(info_file) as file:
try:
this: Dict[str, Any] = json.load(file)
# this["prefix"] is the path at execution time, could have moved
this["prefix"] = info_file.split("info.json")[0]
if Version(this["schema_version"]) >= Version(MINIMUM_SCHEMA_VERSION):
loaded.append(this)
else:
# TODO lower log level once --log-level is respected
lgr.warning(
f"Skipping {this['prefix']}, schema version {this['schema_version']} "
f"is below minimum schema version {MINIMUM_SCHEMA_VERSION}."
)
except Exception as exc:
lgr.warning("Failed to load file %s: %s", file, exc)

Check warning on line 73 in src/con_duct/suite/ls.py

View check run for this annotation

Codecov / codecov/patch

src/con_duct/suite/ls.py#L72-L73

Added lines #L72 - L73 were not covered by tests
return loaded


def process_run_data(
run_data_list: List[Dict[str, Any]], fields: List[str], formatter: SummaryFormatter
) -> List[OrderedDict[str, Any]]:
output_rows: List[OrderedDict[str, Any]] = []
for row in run_data_list:
flattened = _flatten_dict(row)
try:
restricted = _restrict_row(fields, flattened)
except KeyError:
lgr.warning(

Check warning on line 86 in src/con_duct/suite/ls.py

View check run for this annotation

Codecov / codecov/patch

src/con_duct/suite/ls.py#L85-L86

Added lines #L85 - L86 were not covered by tests
"Failed to pick fields of interest from a record, skipping. Record was: %s",
list(flattened),
)
continue

Check warning on line 90 in src/con_duct/suite/ls.py

View check run for this annotation

Codecov / codecov/patch

src/con_duct/suite/ls.py#L90

Added line #L90 was not covered by tests
formatted = _format_row(restricted, formatter)
output_rows.append(formatted)
return output_rows


def _flatten_dict(d: Dict[str, Any]) -> Dict[str, Any]:
items: List[tuple[str, Any]] = []
for k, v in d.items():
if isinstance(v, dict):
items.extend(_flatten_dict(v).items())
else:
items.append((k, v))
return dict(items)


def _restrict_row(field_list: List[str], row: Dict[str, Any]) -> OrderedDict[str, Any]:
restricted: OrderedDict[str, Any] = OrderedDict()
# prefix is the "primary key", its the only field guaranteed to be unique.
restricted["prefix"] = row["prefix"]
for field in field_list:
if field != "prefix" and field in row:
restricted[field.split(".")[-1]] = row[field]
return restricted


def _format_row(
row: OrderedDict[str, Any], formatter: SummaryFormatter
) -> OrderedDict[str, Any]:
transformed: OrderedDict[str, Any] = OrderedDict()
for col, value in row.items():
transformation: Optional[str] = VALUE_TRANSFORMATION_MAP.get(col)
if transformation is not None:
value = formatter.format(transformation, value=value)
transformed[col] = value
return transformed


def pyout_ls(run_data_list: List[OrderedDict[str, Any]]) -> None:
"""Generate and print a tabular table using pyout."""
if pyout is None:
raise RuntimeError("pyout is required for this output format.")

Check warning on line 131 in src/con_duct/suite/ls.py

View check run for this annotation

Codecov / codecov/patch

src/con_duct/suite/ls.py#L131

Added line #L131 was not covered by tests

with pyout.Tabular(
style=dict(
header_=dict(bold=True, transform=str.upper),
),
mode="final",
) as table:
for row in run_data_list:
table(row)


def ls(args: argparse.Namespace) -> int:
info_files = [path for path in args.paths if path.endswith("info.json")]
run_data_raw = load_duct_runs(info_files)
formatter = SummaryFormatter(enable_colors=args.colors)
output_rows = process_run_data(run_data_raw, args.fields, formatter)

if args.format == "auto":
args.format = "summaries" if pyout is None else "pyout"

Check warning on line 150 in src/con_duct/suite/ls.py

View check run for this annotation

Codecov / codecov/patch

src/con_duct/suite/ls.py#L150

Added line #L150 was not covered by tests

if args.format == "summaries":
for row in output_rows:
for col, value in row.items():
if not col == "prefix":
col = f"\t{col}"
print(f"{col.replace('_', ' ').title()}: {value}")
elif args.format == "pyout":
if pyout is None:
raise RuntimeError("Install pyout for pyout output")

Check warning on line 160 in src/con_duct/suite/ls.py

View check run for this annotation

Codecov / codecov/patch

src/con_duct/suite/ls.py#L160

Added line #L160 was not covered by tests
pyout_ls(output_rows)
elif args.format == "json":
print(json.dumps(output_rows))
elif args.format == "json_pp":
print(json.dumps(output_rows, indent=2))
elif args.format == "yaml":
plain_rows = [dict(row) for row in output_rows]
print(yaml.dump(plain_rows, default_flow_style=False))
else:
raise RuntimeError(

Check warning on line 170 in src/con_duct/suite/ls.py

View check run for this annotation

Codecov / codecov/patch

src/con_duct/suite/ls.py#L170

Added line #L170 was not covered by tests
f"Unexpected format encountered: {args.format}. This should have been caught by argparse.",
)
return 0
44 changes: 44 additions & 0 deletions src/con_duct/suite/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import argparse
import os
import sys
from typing import List, Optional
from con_duct.__main__ import DUCT_OUTPUT_PREFIX
from con_duct.suite.ls import LS_FIELD_CHOICES, ls
from con_duct.suite.plot import matplotlib_plot
from con_duct.suite.pprint_json import pprint_json

Expand Down Expand Up @@ -46,6 +49,47 @@ def main(argv: Optional[List[str]] = None) -> None:
# )
parser_plot.set_defaults(func=matplotlib_plot)

parser_ls = subparsers.add_parser(
"ls",
help="Print execution information for all runs matching DUCT_OUTPUT_PREFIX.",
)
parser_ls.add_argument(
"-f",
"--format",
choices=("auto", "pyout", "summaries", "json", "json_pp", "yaml"),
default="auto", # TODO dry
help="Output format. TODO Fixme. 'auto' chooses 'pyout' if pyout library is installed,"
" 'summaries' otherwise.",
)
parser_ls.add_argument(
"-F",
"--fields",
nargs="+",
metavar="FIELD",
help=f"List of fields to show. Prefix is always included implicitly as the first field. "
f"Available choices: {', '.join(LS_FIELD_CHOICES)}.",
choices=LS_FIELD_CHOICES,
default=[
"command",
"exit_code",
"wall_clock_time",
"peak_rss",
],
)
parser_ls.add_argument(
"--colors",
action="store_true",
default=os.getenv("DUCT_COLORS", False),
help="Use colors in duct output.",
)
parser_ls.add_argument(
"paths",
nargs="*",
default=[f"{DUCT_OUTPUT_PREFIX[:DUCT_OUTPUT_PREFIX.index('{')]}*"],
help="Path to duct report files, only `info.json` would be considered.",
)
parser_ls.set_defaults(func=ls)

args = parser.parse_args(argv)

if args.command is None:
Expand Down
73 changes: 73 additions & 0 deletions test/test_ls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json
from unittest.mock import mock_open, patch
from con_duct.__main__ import SummaryFormatter
from con_duct.suite.ls import (
_flatten_dict,
_restrict_row,
load_duct_runs,
process_run_data,
)


def test_load_duct_runs_sanity() -> None:
mock_json = json.dumps(
{"schema_version": "0.2.1", "prefix": "/test/path_", "command": "echo hello"}
)
with patch("builtins.open", mock_open(read_data=mock_json)):
result = load_duct_runs(["/test/path_info.json"])
assert len(result) == 1
assert result[0]["prefix"] == "/test/path_"


def test_load_duct_runs_skips_unsupported_schema() -> None:
mock_json = json.dumps(
{"schema_version": "0.1.1", "prefix": "/test/path_", "command": "echo hello"}
)
with patch("builtins.open", mock_open(read_data=mock_json)):
result = load_duct_runs(["/test/path_info.json"])
assert len(result) == 0


def test_load_duct_runs_uses_filenames_not_stored_prefix() -> None:
mock_json = json.dumps(
{
"schema_version": "0.2.1",
"prefix": "/test/not_anymore_",
"command": "echo hello",
}
)
with patch("builtins.open", mock_open(read_data=mock_json)):
result = load_duct_runs(["/actual_filepath_info.json"])
assert len(result) == 1
assert result[0]["prefix"] == "/actual_filepath_"


def test_flatten_dict() -> None:
nested = {"a": {"b": 1, "c": 2}, "d": 3}
result = _flatten_dict(nested)
assert result == {"b": 1, "c": 2, "d": 3}


def test_restrict_row() -> None:
row = {"prefix": "/test/path", "exit_code": 0, "extra": "ignore"}
fields = ["exit_code"]
result = _restrict_row(fields, row)
assert "prefix" in result
assert "exit_code" in result
assert "extra" not in result


def test_process_run_data() -> None:
run_data = [
{
"prefix": "/test/path",
"exit_code": 0,
"wall_clock_time": 0.12345678,
}
]
formatter = SummaryFormatter(enable_colors=False)
result = process_run_data(run_data, ["wall_clock_time"], formatter)
assert isinstance(result, list)
assert result[0]["prefix"] == "/test/path"
assert "exit_code" not in result[0]
assert result[0]["wall_clock_time"] == "0.123 sec"
Loading

0 comments on commit 2bd83e4

Please sign in to comment.