Skip to content

Commit

Permalink
Merge pull request #227 from con/enh-f-t
Browse files Browse the repository at this point in the history
Add --fail-time option and by default remove all outputs if command fails fast
  • Loading branch information
yarikoptic authored Dec 3, 2024
2 parents 6b89dea + 4fcdcc4 commit fb99001
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 15 deletions.
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ usage: duct [-h] [--version] [-p OUTPUT_PREFIX]
[--summary-format SUMMARY_FORMAT] [--colors] [--clobber]
[-l {NONE,CRITICAL,ERROR,WARNING,INFO,DEBUG}] [-q]
[--sample-interval SAMPLE_INTERVAL]
[--report-interval REPORT_INTERVAL] [-c {all,none,stdout,stderr}]
[-o {all,none,stdout,stderr}]
[--report-interval REPORT_INTERVAL] [--fail-time FAIL_TIME]
[-c {all,none,stdout,stderr}] [-o {all,none,stdout,stderr}]
[-t {all,system-summary,processes-samples}]
command [command_args ...] ...

Expand Down Expand Up @@ -105,6 +105,12 @@ options:
--report-interval REPORT_INTERVAL, --r-i REPORT_INTERVAL
Interval in seconds at which to report aggregated
data. (default: 60.0)
--fail-time FAIL_TIME, --f-t FAIL_TIME
If command fails in less than this specified time,
duct would remove logs. Set to 0 if you would like to
keep logs for a failing command regardless of its run
time. Set to negative (e.g. -1) if you would like to
not keep logs for any failing command. (default: 3.0)
-c {all,none,stdout,stderr}, --capture-outputs {all,none,stdout,stderr}
Record stdout, stderr, all, or none to log files. You
can also provide value via DUCT_CAPTURE_OUTPUTS env
Expand Down
36 changes: 31 additions & 5 deletions src/con_duct/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,7 @@ class Arguments:
output_prefix: str
sample_interval: float
report_interval: float
fail_time: float
clobber: bool
capture_outputs: Outputs
outputs: Outputs
Expand Down Expand Up @@ -772,6 +773,16 @@ def from_argv(
default=float(os.getenv("DUCT_REPORT_INTERVAL", "60.0")),
help="Interval in seconds at which to report aggregated data.",
)
parser.add_argument(
"--fail-time",
"--f-t",
type=float,
default=float(os.getenv("DUCT_FAIL_TIME", "3.0")),
help="If command fails in less than this specified time, duct would remove logs. "
"Set to 0 if you would like to keep logs for a failing command regardless of its run time. "
"Set to negative (e.g. -1) if you would like to not keep logs for any failing command.",
)

parser.add_argument(
"-c",
"--capture-outputs",
Expand Down Expand Up @@ -807,6 +818,7 @@ def from_argv(
output_prefix=args.output_prefix,
sample_interval=args.sample_interval,
report_interval=args.report_interval,
fail_time=args.fail_time,
capture_outputs=args.capture_outputs,
outputs=args.outputs,
record_types=args.record_types,
Expand Down Expand Up @@ -954,6 +966,14 @@ def safe_close_files(file_list: Iterable[Any]) -> None:
pass


def remove_files(log_paths: LogPaths, assert_empty: bool = False) -> None:
for _, file_path in log_paths:
if os.path.exists(file_path):
if assert_empty:
assert os.stat(file_path).st_size == 0
os.remove(file_path)


def main() -> None:
logging.basicConfig(
format="%(asctime)s [%(levelname)-8s] %(name)s: %(message)s",
Expand Down Expand Up @@ -1013,10 +1033,7 @@ def execute(args: Arguments) -> int:
# We should remove log etc files since they are 0-sized
# degenerates etc
safe_close_files(files_to_close)
for _, file_path in log_paths:
if os.path.exists(file_path):
assert os.stat(file_path).st_size == 0
os.remove(file_path)
remove_files(log_paths, assert_empty=True)
# mimicking behavior of bash and zsh.
print(f"{args.command}: command not found", file=sys.stderr)
return 127 # seems what zsh and bash return then
Expand Down Expand Up @@ -1081,7 +1098,16 @@ def execute(args: Arguments) -> int:
report.run_time_seconds = f"{report.end_time - report.start_time}"
system_logs.write(report.dump_json())
safe_close_files(files_to_close)
lgr.info(report.execution_summary_formatted)
if process.returncode != 0 and (
report.elapsed_time < args.fail_time or args.fail_time < 0
):
lgr.info(
"Removing log files since command failed%s.",
f" in less than {args.fail_time} seconds" if args.fail_time > 0 else "",
)
remove_files(log_paths)
else:
lgr.info(report.execution_summary_formatted)
return report.process.returncode


Expand Down
24 changes: 16 additions & 8 deletions test/test_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def test_sanity_red(
args = Arguments.from_argv(
["sh", "-c", f"exit {exit_code}"],
output_prefix=temp_output_dir,
fail_time=0, # keep log files regardless of exit code
)
caplog.set_level("INFO")
assert execute(args) == exit_code
Expand Down Expand Up @@ -202,12 +203,15 @@ def test_execute_unknown_command(
assert_expected_files(temp_output_dir, exists=False)


def test_signal_exit(temp_output_dir: str) -> None:
@pytest.mark.parametrize("fail_time", [None, 0, 10, -1, -3.14])
def test_signal_exit(temp_output_dir: str, fail_time: float | None) -> None:

def runner() -> int:
kws = {}
if fail_time is not None:
kws["fail_time"] = fail_time
args = Arguments.from_argv(
["sleep", "60.74016230000801"],
output_prefix=temp_output_dir,
["sleep", "60.74016230000801"], output_prefix=temp_output_dir, **kws
)
return execute(args)

Expand All @@ -231,12 +235,16 @@ def runner() -> int:
raise RuntimeError("Failed to find sleep process")

thread.join()
# Cannot retrieve the exit code from the thread, it is written to the file
with open(os.path.join(temp_output_dir, SUFFIXES["info"])) as info:
info_data = json.loads(info.read())

exit_code = info_data["execution_summary"]["exit_code"]
assert exit_code == 128 + 15
if fail_time is None or fail_time != 0:
assert_expected_files(temp_output_dir, exists=False)
else:
# Cannot retrieve the exit code from the thread, it is written to the file
with open(os.path.join(temp_output_dir, SUFFIXES["info"])) as info:
info_data = json.loads(info.read())

exit_code = info_data["execution_summary"]["exit_code"]
assert exit_code == 128 + 15


def test_duct_as_executable(temp_output_dir: str) -> None:
Expand Down

0 comments on commit fb99001

Please sign in to comment.