From 56963a962724884eee883ef9fd6a3291a52dbf16 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 22 Nov 2024 12:40:01 -0500 Subject: [PATCH 1/2] ENH: add --fail-time option and by default remove all outputs if fails fast The most annoying UX for me to use duct "by default" is the fact that while preparing target command invocation, I can make mistakes. And then duct keeps generating those reports which are inconsueqntial and need cleaning (removal) etc. And such reports have no value really. Hence, by default, I decided that it is not worth keeping them. If it is desired, user can use this added option to set --fail-time to 0. But also it might be that command takes awhile to start to only to fail, and for such cases users could raise it also higher and also have logs cleared for any failed command (set to -1). --- src/con_duct/__main__.py | 36 +++++++++++++++++++++++++++++++----- test/test_execution.py | 24 ++++++++++++++++-------- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/src/con_duct/__main__.py b/src/con_duct/__main__.py index 402aa76..9fb5efc 100755 --- a/src/con_duct/__main__.py +++ b/src/con_duct/__main__.py @@ -672,6 +672,7 @@ class Arguments: output_prefix: str sample_interval: float report_interval: float + fail_time: float clobber: bool capture_outputs: Outputs outputs: Outputs @@ -772,6 +773,16 @@ def from_argv( default=float(os.getenv("DUCT_REPORT_INTERVAL", "60.0")), help="Interval in seconds at which to report aggregated data.", ) + parser.add_argument( + "--fail-time", + "--f-t", + type=float, + default=float(os.getenv("DUCT_FAIL_TIME", "3.0")), + help="If command fails in less than this specified time, duct would remove logs. " + "Set to 0 if you would like to keep logs for a failing command regardless of its run time. " + "Set to negative (e.g. -1) if you would like to not keep logs for any failing command.", + ) + parser.add_argument( "-c", "--capture-outputs", @@ -807,6 +818,7 @@ def from_argv( output_prefix=args.output_prefix, sample_interval=args.sample_interval, report_interval=args.report_interval, + fail_time=args.fail_time, capture_outputs=args.capture_outputs, outputs=args.outputs, record_types=args.record_types, @@ -954,6 +966,14 @@ def safe_close_files(file_list: Iterable[Any]) -> None: pass +def remove_files(log_paths: LogPaths, assert_empty: bool = False) -> None: + for _, file_path in log_paths: + if os.path.exists(file_path): + if assert_empty: + assert os.stat(file_path).st_size == 0 + os.remove(file_path) + + def main() -> None: logging.basicConfig( format="%(asctime)s [%(levelname)-8s] %(name)s: %(message)s", @@ -1013,10 +1033,7 @@ def execute(args: Arguments) -> int: # We should remove log etc files since they are 0-sized # degenerates etc safe_close_files(files_to_close) - for _, file_path in log_paths: - if os.path.exists(file_path): - assert os.stat(file_path).st_size == 0 - os.remove(file_path) + remove_files(log_paths, assert_empty=True) # mimicking behavior of bash and zsh. print(f"{args.command}: command not found", file=sys.stderr) return 127 # seems what zsh and bash return then @@ -1081,7 +1098,16 @@ def execute(args: Arguments) -> int: report.run_time_seconds = f"{report.end_time - report.start_time}" system_logs.write(report.dump_json()) safe_close_files(files_to_close) - lgr.info(report.execution_summary_formatted) + if process.returncode != 0 and ( + report.elapsed_time < args.fail_time or args.fail_time < 0 + ): + lgr.info( + "Removing log files since command failed%s.", + f" in less than {args.fail_time} seconds" if args.fail_time > 0 else "", + ) + remove_files(log_paths) + else: + lgr.info(report.execution_summary_formatted) return report.process.returncode diff --git a/test/test_execution.py b/test/test_execution.py index 2042cdf..24dac3c 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -67,6 +67,7 @@ def test_sanity_red( args = Arguments.from_argv( ["sh", "-c", f"exit {exit_code}"], output_prefix=temp_output_dir, + fail_time=0, # keep log files regardless of exit code ) caplog.set_level("INFO") assert execute(args) == exit_code @@ -202,12 +203,15 @@ def test_execute_unknown_command( assert_expected_files(temp_output_dir, exists=False) -def test_signal_exit(temp_output_dir: str) -> None: +@pytest.mark.parametrize("fail_time", [None, 0, 10, -1, -3.14]) +def test_signal_exit(temp_output_dir: str, fail_time: float | None) -> None: def runner() -> int: + kws = {} + if fail_time is not None: + kws["fail_time"] = fail_time args = Arguments.from_argv( - ["sleep", "60.74016230000801"], - output_prefix=temp_output_dir, + ["sleep", "60.74016230000801"], output_prefix=temp_output_dir, **kws ) return execute(args) @@ -231,12 +235,16 @@ def runner() -> int: raise RuntimeError("Failed to find sleep process") thread.join() - # Cannot retrieve the exit code from the thread, it is written to the file - with open(os.path.join(temp_output_dir, SUFFIXES["info"])) as info: - info_data = json.loads(info.read()) - exit_code = info_data["execution_summary"]["exit_code"] - assert exit_code == 128 + 15 + if fail_time is None or fail_time != 0: + assert_expected_files(temp_output_dir, exists=False) + else: + # Cannot retrieve the exit code from the thread, it is written to the file + with open(os.path.join(temp_output_dir, SUFFIXES["info"])) as info: + info_data = json.loads(info.read()) + + exit_code = info_data["execution_summary"]["exit_code"] + assert exit_code == 128 + 15 def test_duct_as_executable(temp_output_dir: str) -> None: From 4fcdcc4a6a72f6c6a68eb16aa72c2484293b9e41 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 3 Dec 2024 16:13:45 -0500 Subject: [PATCH 2/2] [DATALAD RUNCMD] Autoupdate readme === Do not change lines below === { "chain": [ "a34a0633e999f95ddfe6710e11f123dbd4b1c5bc", "26f4f0eec76de27fea98e153460cae502bd25b61" ], "cmd": "./.update-readme-help.py", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b32dd4d..952b38a 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,8 @@ usage: duct [-h] [--version] [-p OUTPUT_PREFIX] [--summary-format SUMMARY_FORMAT] [--colors] [--clobber] [-l {NONE,CRITICAL,ERROR,WARNING,INFO,DEBUG}] [-q] [--sample-interval SAMPLE_INTERVAL] - [--report-interval REPORT_INTERVAL] [-c {all,none,stdout,stderr}] - [-o {all,none,stdout,stderr}] + [--report-interval REPORT_INTERVAL] [--fail-time FAIL_TIME] + [-c {all,none,stdout,stderr}] [-o {all,none,stdout,stderr}] [-t {all,system-summary,processes-samples}] command [command_args ...] ... @@ -105,6 +105,12 @@ options: --report-interval REPORT_INTERVAL, --r-i REPORT_INTERVAL Interval in seconds at which to report aggregated data. (default: 60.0) + --fail-time FAIL_TIME, --f-t FAIL_TIME + If command fails in less than this specified time, + duct would remove logs. Set to 0 if you would like to + keep logs for a failing command regardless of its run + time. Set to negative (e.g. -1) if you would like to + not keep logs for any failing command. (default: 3.0) -c {all,none,stdout,stderr}, --capture-outputs {all,none,stdout,stderr} Record stdout, stderr, all, or none to log files. You can also provide value via DUCT_CAPTURE_OUTPUTS env