Merge pull request #227 from con/enh-f-t

Add --fail-time option and by default remove all outputs if command fails fast
con · Dec 3, 2024 · fb99001 · fb99001
2 parents 6b89dea + 4fcdcc4
commit fb99001
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -27,8 +27,8 @@ usage: duct [-h] [--version] [-p OUTPUT_PREFIX]
             [--summary-format SUMMARY_FORMAT] [--colors] [--clobber]
             [-l {NONE,CRITICAL,ERROR,WARNING,INFO,DEBUG}] [-q]
             [--sample-interval SAMPLE_INTERVAL]
-            [--report-interval REPORT_INTERVAL] [-c {all,none,stdout,stderr}]
-            [-o {all,none,stdout,stderr}]
+            [--report-interval REPORT_INTERVAL] [--fail-time FAIL_TIME]
+            [-c {all,none,stdout,stderr}] [-o {all,none,stdout,stderr}]
             [-t {all,system-summary,processes-samples}]
             command [command_args ...] ...
 
@@ -105,6 +105,12 @@ options:
   --report-interval REPORT_INTERVAL, --r-i REPORT_INTERVAL
                         Interval in seconds at which to report aggregated
                         data. (default: 60.0)
+  --fail-time FAIL_TIME, --f-t FAIL_TIME
+                        If command fails in less than this specified time,
+                        duct would remove logs. Set to 0 if you would like to
+                        keep logs for a failing command regardless of its run
+                        time. Set to negative (e.g. -1) if you would like to
+                        not keep logs for any failing command. (default: 3.0)
   -c {all,none,stdout,stderr}, --capture-outputs {all,none,stdout,stderr}
                         Record stdout, stderr, all, or none to log files. You
                         can also provide value via DUCT_CAPTURE_OUTPUTS env

diff --git a/src/con_duct/__main__.py b/src/con_duct/__main__.py
@@ -672,6 +672,7 @@ class Arguments:
     output_prefix: str
     sample_interval: float
     report_interval: float
+    fail_time: float
     clobber: bool
     capture_outputs: Outputs
     outputs: Outputs
@@ -772,6 +773,16 @@ def from_argv(
             default=float(os.getenv("DUCT_REPORT_INTERVAL", "60.0")),
             help="Interval in seconds at which to report aggregated data.",
         )
+        parser.add_argument(
+            "--fail-time",
+            "--f-t",
+            type=float,
+            default=float(os.getenv("DUCT_FAIL_TIME", "3.0")),
+            help="If command fails in less than this specified time, duct would remove logs. "
+            "Set to 0 if you would like to keep logs for a failing command regardless of its run time. "
+            "Set to negative (e.g. -1) if you would like to not keep logs for any failing command.",
+        )
+
         parser.add_argument(
             "-c",
             "--capture-outputs",
@@ -807,6 +818,7 @@ def from_argv(
             output_prefix=args.output_prefix,
             sample_interval=args.sample_interval,
             report_interval=args.report_interval,
+            fail_time=args.fail_time,
             capture_outputs=args.capture_outputs,
             outputs=args.outputs,
             record_types=args.record_types,
@@ -954,6 +966,14 @@ def safe_close_files(file_list: Iterable[Any]) -> None:
             pass
 
 
+def remove_files(log_paths: LogPaths, assert_empty: bool = False) -> None:
+    for _, file_path in log_paths:
+        if os.path.exists(file_path):
+            if assert_empty:
+                assert os.stat(file_path).st_size == 0
+            os.remove(file_path)
+
+
 def main() -> None:
     logging.basicConfig(
         format="%(asctime)s [%(levelname)-8s] %(name)s: %(message)s",
@@ -1013,10 +1033,7 @@ def execute(args: Arguments) -> int:
         # We should remove log etc files since they are 0-sized
         # degenerates etc
         safe_close_files(files_to_close)
-        for _, file_path in log_paths:
-            if os.path.exists(file_path):
-                assert os.stat(file_path).st_size == 0
-                os.remove(file_path)
+        remove_files(log_paths, assert_empty=True)
         # mimicking behavior of bash and zsh.
         print(f"{args.command}: command not found", file=sys.stderr)
         return 127  # seems what zsh and bash return then
@@ -1081,7 +1098,16 @@ def execute(args: Arguments) -> int:
             report.run_time_seconds = f"{report.end_time - report.start_time}"
             system_logs.write(report.dump_json())
     safe_close_files(files_to_close)
-    lgr.info(report.execution_summary_formatted)
+    if process.returncode != 0 and (
+        report.elapsed_time < args.fail_time or args.fail_time < 0
+    ):
+        lgr.info(
+            "Removing log files since command failed%s.",
+            f" in less than {args.fail_time} seconds" if args.fail_time > 0 else "",
+        )
+        remove_files(log_paths)
+    else:
+        lgr.info(report.execution_summary_formatted)
     return report.process.returncode
 
 

diff --git a/test/test_execution.py b/test/test_execution.py
@@ -67,6 +67,7 @@ def test_sanity_red(
     args = Arguments.from_argv(
         ["sh", "-c", f"exit {exit_code}"],
         output_prefix=temp_output_dir,
+        fail_time=0,  # keep log files regardless of exit code
     )
     caplog.set_level("INFO")
     assert execute(args) == exit_code
@@ -202,12 +203,15 @@ def test_execute_unknown_command(
     assert_expected_files(temp_output_dir, exists=False)
 
 
-def test_signal_exit(temp_output_dir: str) -> None:
+@pytest.mark.parametrize("fail_time", [None, 0, 10, -1, -3.14])
+def test_signal_exit(temp_output_dir: str, fail_time: float | None) -> None:
 
     def runner() -> int:
+        kws = {}
+        if fail_time is not None:
+            kws["fail_time"] = fail_time
         args = Arguments.from_argv(
-            ["sleep", "60.74016230000801"],
-            output_prefix=temp_output_dir,
+            ["sleep", "60.74016230000801"], output_prefix=temp_output_dir, **kws
         )
         return execute(args)
 
@@ -231,12 +235,16 @@ def runner() -> int:
         raise RuntimeError("Failed to find sleep process")
 
     thread.join()
-    # Cannot retrieve the exit code from the thread, it is written to the file
-    with open(os.path.join(temp_output_dir, SUFFIXES["info"])) as info:
-        info_data = json.loads(info.read())
 
-    exit_code = info_data["execution_summary"]["exit_code"]
-    assert exit_code == 128 + 15
+    if fail_time is None or fail_time != 0:
+        assert_expected_files(temp_output_dir, exists=False)
+    else:
+        # Cannot retrieve the exit code from the thread, it is written to the file
+        with open(os.path.join(temp_output_dir, SUFFIXES["info"])) as info:
+            info_data = json.loads(info.read())
+
+        exit_code = info_data["execution_summary"]["exit_code"]
+        assert exit_code == 128 + 15
 
 
 def test_duct_as_executable(temp_output_dir: str) -> None: