From 56963a962724884eee883ef9fd6a3291a52dbf16 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 22 Nov 2024 12:40:01 -0500
Subject: [PATCH 1/2] ENH: add --fail-time option and by default remove all
 outputs if fails fast

The most annoying UX for me to use duct "by default" is the fact that while
preparing target command invocation, I can make mistakes.  And then duct keeps
generating those reports which are inconsueqntial and need cleaning (removal)
etc. And such reports have no value really.  Hence, by default, I decided that
it is not worth keeping them.  If it is desired, user can use this added option
to set --fail-time to 0.  But also it might be that command takes awhile to
start to only to fail, and for such cases users could raise it also higher and
also have logs cleared for any failed command (set to -1).
---
 src/con_duct/__main__.py | 36 +++++++++++++++++++++++++++++++-----
 test/test_execution.py   | 24 ++++++++++++++++--------
 2 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/src/con_duct/__main__.py b/src/con_duct/__main__.py
index 402aa76..9fb5efc 100755
--- a/src/con_duct/__main__.py
+++ b/src/con_duct/__main__.py
@@ -672,6 +672,7 @@ class Arguments:
     output_prefix: str
     sample_interval: float
     report_interval: float
+    fail_time: float
     clobber: bool
     capture_outputs: Outputs
     outputs: Outputs
@@ -772,6 +773,16 @@ def from_argv(
             default=float(os.getenv("DUCT_REPORT_INTERVAL", "60.0")),
             help="Interval in seconds at which to report aggregated data.",
         )
+        parser.add_argument(
+            "--fail-time",
+            "--f-t",
+            type=float,
+            default=float(os.getenv("DUCT_FAIL_TIME", "3.0")),
+            help="If command fails in less than this specified time, duct would remove logs. "
+            "Set to 0 if you would like to keep logs for a failing command regardless of its run time. "
+            "Set to negative (e.g. -1) if you would like to not keep logs for any failing command.",
+        )
+
         parser.add_argument(
             "-c",
             "--capture-outputs",
@@ -807,6 +818,7 @@ def from_argv(
             output_prefix=args.output_prefix,
             sample_interval=args.sample_interval,
             report_interval=args.report_interval,
+            fail_time=args.fail_time,
             capture_outputs=args.capture_outputs,
             outputs=args.outputs,
             record_types=args.record_types,
@@ -954,6 +966,14 @@ def safe_close_files(file_list: Iterable[Any]) -> None:
             pass
 
 
+def remove_files(log_paths: LogPaths, assert_empty: bool = False) -> None:
+    for _, file_path in log_paths:
+        if os.path.exists(file_path):
+            if assert_empty:
+                assert os.stat(file_path).st_size == 0
+            os.remove(file_path)
+
+
 def main() -> None:
     logging.basicConfig(
         format="%(asctime)s [%(levelname)-8s] %(name)s: %(message)s",
@@ -1013,10 +1033,7 @@ def execute(args: Arguments) -> int:
         # We should remove log etc files since they are 0-sized
         # degenerates etc
         safe_close_files(files_to_close)
-        for _, file_path in log_paths:
-            if os.path.exists(file_path):
-                assert os.stat(file_path).st_size == 0
-                os.remove(file_path)
+        remove_files(log_paths, assert_empty=True)
         # mimicking behavior of bash and zsh.
         print(f"{args.command}: command not found", file=sys.stderr)
         return 127  # seems what zsh and bash return then
@@ -1081,7 +1098,16 @@ def execute(args: Arguments) -> int:
             report.run_time_seconds = f"{report.end_time - report.start_time}"
             system_logs.write(report.dump_json())
     safe_close_files(files_to_close)
-    lgr.info(report.execution_summary_formatted)
+    if process.returncode != 0 and (
+        report.elapsed_time < args.fail_time or args.fail_time < 0
+    ):
+        lgr.info(
+            "Removing log files since command failed%s.",
+            f" in less than {args.fail_time} seconds" if args.fail_time > 0 else "",
+        )
+        remove_files(log_paths)
+    else:
+        lgr.info(report.execution_summary_formatted)
     return report.process.returncode
 
 
diff --git a/test/test_execution.py b/test/test_execution.py
index 2042cdf..24dac3c 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -67,6 +67,7 @@ def test_sanity_red(
     args = Arguments.from_argv(
         ["sh", "-c", f"exit {exit_code}"],
         output_prefix=temp_output_dir,
+        fail_time=0,  # keep log files regardless of exit code
     )
     caplog.set_level("INFO")
     assert execute(args) == exit_code
@@ -202,12 +203,15 @@ def test_execute_unknown_command(
     assert_expected_files(temp_output_dir, exists=False)
 
 
-def test_signal_exit(temp_output_dir: str) -> None:
+@pytest.mark.parametrize("fail_time", [None, 0, 10, -1, -3.14])
+def test_signal_exit(temp_output_dir: str, fail_time: float | None) -> None:
 
     def runner() -> int:
+        kws = {}
+        if fail_time is not None:
+            kws["fail_time"] = fail_time
         args = Arguments.from_argv(
-            ["sleep", "60.74016230000801"],
-            output_prefix=temp_output_dir,
+            ["sleep", "60.74016230000801"], output_prefix=temp_output_dir, **kws
         )
         return execute(args)
 
@@ -231,12 +235,16 @@ def runner() -> int:
         raise RuntimeError("Failed to find sleep process")
 
     thread.join()
-    # Cannot retrieve the exit code from the thread, it is written to the file
-    with open(os.path.join(temp_output_dir, SUFFIXES["info"])) as info:
-        info_data = json.loads(info.read())
 
-    exit_code = info_data["execution_summary"]["exit_code"]
-    assert exit_code == 128 + 15
+    if fail_time is None or fail_time != 0:
+        assert_expected_files(temp_output_dir, exists=False)
+    else:
+        # Cannot retrieve the exit code from the thread, it is written to the file
+        with open(os.path.join(temp_output_dir, SUFFIXES["info"])) as info:
+            info_data = json.loads(info.read())
+
+        exit_code = info_data["execution_summary"]["exit_code"]
+        assert exit_code == 128 + 15
 
 
 def test_duct_as_executable(temp_output_dir: str) -> None:

From 4fcdcc4a6a72f6c6a68eb16aa72c2484293b9e41 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Tue, 3 Dec 2024 16:13:45 -0500
Subject: [PATCH 2/2] [DATALAD RUNCMD] Autoupdate readme

=== Do not change lines below ===
{
 "chain": [
  "a34a0633e999f95ddfe6710e11f123dbd4b1c5bc",
  "26f4f0eec76de27fea98e153460cae502bd25b61"
 ],
 "cmd": "./.update-readme-help.py",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 README.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b32dd4d..952b38a 100644
--- a/README.md
+++ b/README.md
@@ -27,8 +27,8 @@ usage: duct [-h] [--version] [-p OUTPUT_PREFIX]
             [--summary-format SUMMARY_FORMAT] [--colors] [--clobber]
             [-l {NONE,CRITICAL,ERROR,WARNING,INFO,DEBUG}] [-q]
             [--sample-interval SAMPLE_INTERVAL]
-            [--report-interval REPORT_INTERVAL] [-c {all,none,stdout,stderr}]
-            [-o {all,none,stdout,stderr}]
+            [--report-interval REPORT_INTERVAL] [--fail-time FAIL_TIME]
+            [-c {all,none,stdout,stderr}] [-o {all,none,stdout,stderr}]
             [-t {all,system-summary,processes-samples}]
             command [command_args ...] ...
 
@@ -105,6 +105,12 @@ options:
   --report-interval REPORT_INTERVAL, --r-i REPORT_INTERVAL
                         Interval in seconds at which to report aggregated
                         data. (default: 60.0)
+  --fail-time FAIL_TIME, --f-t FAIL_TIME
+                        If command fails in less than this specified time,
+                        duct would remove logs. Set to 0 if you would like to
+                        keep logs for a failing command regardless of its run
+                        time. Set to negative (e.g. -1) if you would like to
+                        not keep logs for any failing command. (default: 3.0)
   -c {all,none,stdout,stderr}, --capture-outputs {all,none,stdout,stderr}
                         Record stdout, stderr, all, or none to log files. You
                         can also provide value via DUCT_CAPTURE_OUTPUTS env