From 896972a8d537ff9a376a12024640e109cbecc84c Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 11 Nov 2024 15:20:50 -0600 Subject: [PATCH] enh: Add con-duct ls Fixes: https://github.com/con/duct/issues/185 Note: Ignore pyout typing; +1 upstream, but fix too big https://github.com/pyout/pyout/issues/142 https://github.com/pyout/pyout/pull/151 --- README.md | 10 ++- setup.cfg | 2 + src/con_duct/__main__.py | 7 +- src/con_duct/suite/ls.py | 167 +++++++++++++++++++++++++++++++++++++ src/con_duct/suite/main.py | 43 ++++++++++ tox.ini | 1 + 6 files changed, 223 insertions(+), 7 deletions(-) create mode 100644 src/con_duct/suite/ls.py diff --git a/README.md b/README.md index c3fede9..8fd2b0c 100644 --- a/README.md +++ b/README.md @@ -139,12 +139,14 @@ usage: con-duct [options] A suite of commands to manage or manipulate con-duct logs. positional arguments: - {pp,plot} Available subcommands - pp Pretty print a JSON log. - plot Plot resource usage for an execution. + {pp,plot,ls} Available subcommands + pp Pretty print a JSON log. + plot Plot resource usage for an execution. + ls Print execution information for all runs matching + DUCT_OUTPUT_PREFIX. options: - -h, --help show this help message and exit + -h, --help show this help message and exit ``` diff --git a/setup.cfg b/setup.cfg index 318ddd8..520ff91 100644 --- a/setup.cfg +++ b/setup.cfg @@ -58,6 +58,8 @@ where = src [options.extras_require] all = matplotlib + PyYAML + pyout [options.entry_points] diff --git a/src/con_duct/__main__.py b/src/con_duct/__main__.py index 402aa76..8a6ce3d 100755 --- a/src/con_duct/__main__.py +++ b/src/con_duct/__main__.py @@ -28,6 +28,9 @@ lgr = logging.getLogger("con-duct") DEFAULT_LOG_LEVEL = os.environ.get("DUCT_LOG_LEVEL", "INFO").upper() +DUCT_OUTPUT_PREFIX = os.getenv( + "DUCT_OUTPUT_PREFIX", ".duct/logs/{datetime_filesafe}-{pid}_" +) ENV_PREFIXES = ("PBS_", "SLURM_", "OSG") SUFFIXES = { "stdout": "stdout", @@ -712,9 +715,7 @@ def from_argv( "-p", "--output-prefix", type=str, - default=os.getenv( - "DUCT_OUTPUT_PREFIX", ".duct/logs/{datetime_filesafe}-{pid}_" - ), + default=DUCT_OUTPUT_PREFIX, help="File string format to be used as a prefix for the files -- the captured " "stdout and stderr and the resource usage logs. The understood variables are " "{datetime}, {datetime_filesafe}, and {pid}. " diff --git a/src/con_duct/suite/ls.py b/src/con_duct/suite/ls.py new file mode 100644 index 0000000..bec5745 --- /dev/null +++ b/src/con_duct/suite/ls.py @@ -0,0 +1,167 @@ +import argparse +from collections import OrderedDict +import glob +import json +import logging +from typing import List +from packaging.version import Version + +try: + import pyout # type: ignore +except ImportError: + pyout = None +import yaml +from con_duct.__main__ import SummaryFormatter + +lgr = logging.getLogger(__name__) + + +VALUE_TRANSFORMATION_MAP = { + "exit_code": "{value!E}", + "wall_clock_time": "{value:.3f} sec", + "peak_rss": "{value!S}", + "memory_total": "{value!S}", + "average_rss": "{value!S}", + "peak_vsz": "{value!S}", + "average_vsz": "{value!S}", + "peak_pmem": "{value:.2f!N}%", + "average_pmem": "{value:.2f!N}%", + "peak_pcpu": "{value:.2f!N}%", + "average_pcpu": "{value:.2f!N}%", + "start_time": "{value:.2f!N}", + "end_time": "{value:.2f!N}", +} +NON_TRANSFORMED_FIELDS = [ + "hostname", + "uid", + "user", + "gpu", + "duct_version", + "schema_version", + "command", + "prefix", + "num_samples", + "num_reports", + "stderr", + "usage", + "info", + "prefix", +] +LS_FIELD_CHOICES = list(VALUE_TRANSFORMATION_MAP.keys()) + NON_TRANSFORMED_FIELDS +MINIMUM_SCHEMA_VERSION = "0.2.0" + + +def load_duct_runs(info_files: List[str]) -> List[dict]: + loaded = [] + for info_file in info_files: + with open(info_file) as file: + try: + this = json.load(file) + # this["prefix"] is the path at execution time, could have moved + this["prefix"] = info_file.split("info.json")[0] + if Version(this["schema_version"]) >= Version(MINIMUM_SCHEMA_VERSION): + loaded.append(this) + else: + # TODO lower log level once --log-level is respected + lgr.warning( + f"Skipping {this['prefix']}, schema version {this['schema_version']} " + f"is below minimum schema version {MINIMUM_SCHEMA_VERSION}." + ) + except Exception as exc: + lgr.warning("Failed to load file %s: %s", file, exc) + return loaded + + +def process_run_data( + run_data_list: List[str], fields: List[str], formatter +) -> List[OrderedDict]: + output_rows = [] + for row in run_data_list: + flattened = _flatten_dict(row) + try: + restricted = _restrict_row(fields, flattened) + except KeyError: + lgr.warning( + "Failed to pick fields of interest from a record, skipping. Record was: %s", + list(flattened), + ) + continue + formatted = _format_row(restricted, formatter) + output_rows.append(formatted) + return output_rows + + +def _flatten_dict(d): + items = [] + for k, v in d.items(): + if isinstance(v, dict): + items.extend(_flatten_dict(v).items()) + else: + items.append((k, v)) + return dict(items) + + +def _restrict_row(field_list, row): + restricted = OrderedDict() + # prefix is the "primary key", its the only field guaranteed to be unique. + restricted["prefix"] = row["prefix"] + for field in field_list: + if field != "prefix" and field in row: + restricted[field.split(".")[-1]] = row[field] + return restricted + + +def _format_row(row, formatter): + transformed = OrderedDict() + for col, value in row.items(): + if transformation := VALUE_TRANSFORMATION_MAP.get(col): + value = formatter.format(transformation, value=value) + transformed[col] = value + return transformed + + +def pyout_ls(run_data_list): + # Generate Tabular table to output + with pyout.Tabular( + style=dict( + header_=dict(bold=True, transform=str.upper), + ), + mode="final", + ) as table: + for row in run_data_list: + table(row) + + +def ls(args: argparse.Namespace) -> int: + info_files = [] + for each in args.pattern: + matches = glob.glob(f"{each}_info.json") + info_files.extend(matches) + run_data_raw = load_duct_runs(info_files) + formatter = SummaryFormatter(enable_colors=args.colors) + output_rows = process_run_data(run_data_raw, args.fields, formatter) + + if args.format == "auto": + args.format = "summaries" if pyout is None else "pyout" + + if args.format == "summaries": + for row in output_rows: + for col, value in row.items(): + if not col == "prefix": + col = f"\t{col}" + print(f"{col.replace('_', ' ').title()}: {value}") + elif args.format == "pyout": + if pyout is None: + raise RuntimeError("Install pyout for pyout output") + pyout_ls(output_rows) + elif args.format == "json": + print(json.dumps(output_rows)) + elif args.format == "json_pp": + print(json.dumps(output_rows, indent=True)) + elif args.format == "yaml": + print(yaml.dump(output_rows, default_flow_style=False)) + else: + raise RuntimeError( + f"Unexpected format encountered: {args.format}. This should have been caught by argparse.", + ) + return 0 diff --git a/src/con_duct/suite/main.py b/src/con_duct/suite/main.py index f5baf9b..370643a 100644 --- a/src/con_duct/suite/main.py +++ b/src/con_duct/suite/main.py @@ -1,6 +1,9 @@ import argparse +import os import sys from typing import List, Optional +from con_duct.__main__ import DUCT_OUTPUT_PREFIX +from con_duct.suite.ls import LS_FIELD_CHOICES, ls from con_duct.suite.plot import matplotlib_plot from con_duct.suite.pprint_json import pprint_json @@ -46,6 +49,46 @@ def main(argv: Optional[List[str]] = None) -> None: # ) parser_plot.set_defaults(func=matplotlib_plot) + parser_ls = subparsers.add_parser( + "ls", + help="Print execution information for all runs matching DUCT_OUTPUT_PREFIX.", + ) + parser_ls.add_argument( + "-f", + "--format", + choices=("auto", "pyout", "summaries", "json", "json_pp", "yaml"), + default="auto", + help="Output format. 'auto' chooses 'pyout' if pyout library is installed, 'summaries' otherwise.", + ) + parser_ls.add_argument( + "-F", + "--fields", + nargs="+", + metavar="FIELD", + help=f"List of fields to show. Prefix is always included implicitly as the first field. " + f"Available choices: {', '.join(LS_FIELD_CHOICES)}.", + choices=LS_FIELD_CHOICES, + default=[ + "command", + "exit_code", + "wall_clock_time", + "peak_rss", + ], + ) + parser_ls.add_argument( + "--colors", + action="store_true", + default=os.getenv("DUCT_COLORS", False), + help="Use colors in duct output.", + ) + parser_ls.add_argument( + "pattern", + nargs="*", + default=[f"{DUCT_OUTPUT_PREFIX[:DUCT_OUTPUT_PREFIX.index('{')]}*"], + help="Path(s) to list, supports globbing (defaults to the non-dynamic portion of DUCT_OUTPUT_PREFIX", + ) + parser_ls.set_defaults(func=ls) + args = parser.parse_args(argv) if args.command is None: diff --git a/tox.ini b/tox.ini index 4868b96..4e58001 100644 --- a/tox.ini +++ b/tox.ini @@ -26,6 +26,7 @@ commands = deps = mypy data-science-types # TODO replace archived, https://github.com/wearepal/data-science-types + types-PyYAML {[testenv]deps} commands = mypy src test