Skip to content

Commit

Permalink
Change io-watchdog --exact-timeout option to --method=TYPE, where
Browse files Browse the repository at this point in the history
TYPE can be sloppy or exact. Similarly, change exact-timeout keyword
in config file to timeout-method = TYPE.
  • Loading branch information
grondo committed Jul 6, 2007
1 parent 6d426ac commit c21a512
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 44 deletions.
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
* src/interposer/io-watchdog-interposer.c :
Ensure some stdio functions are not #defined, and undef them if so.

* src/watchdog/io-watchdog.c, src/watchdog/io-watchdog.1.in,
src/conf/conf-parser.l, src/conf/conf-parser.y,
src/conf/io-watchdog.conf.5.in :
Change --exact-timeout option in io-watchdog to --method=exact,
and exact-timeout keywork in io-watchdog.conf to timeout-method
for possible expansion later.


2007-07-05 Mark Grondona <[email protected]>

* io-watchdog.spec : Add BuildRequires.
Expand Down
4 changes: 2 additions & 2 deletions doc/example-config
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
# target = glob Define default target
# actions = action,... Define default list of actions
#
# exact-timeout Enable more precise watchdog timeouts
# (incurs some performance impact)
# timeout-method = TYPE Specify the watchdog timeout method
# (sloppy or exact, default = sloppy)
#
# program pattern Following settings only apply to programs
# that match [pattern] (globbing pattern).
Expand Down
2 changes: 1 addition & 1 deletion src/conf/conf-parser.l
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ actions { return ACTIONS; }
timeout { return TIMEOUT; }
rank { return RANK; }
target { return TARGET; }
exact-timeout { return EXACT_TIMEOUT; }
timeout-method { return METHOD; }
= { return '='; }

[^=;, \t\r\n]+ {
Expand Down
28 changes: 10 additions & 18 deletions src/conf/conf-parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ int yydebug = 0;
static int cf_action (char * name, char *val);
static int cf_program (char *program);
static int cf_timeout (char *timeout);
static int cf_exact (char *exact);
static int cf_method (char *method);
static int cf_rank (char *rank);
static int cf_target (char *target);
static int cf_actions ();
Expand All @@ -41,7 +41,7 @@ static int stringlist_append (char *string);
%token RANK
%token TARGET
%token SEARCH
%token EXACT_TIMEOUT
%token METHOD


%%
Expand All @@ -62,8 +62,7 @@ stmt : ACTION STRING '=' STRING { if (cf_action ($2, $4) < 0) YYABORT; }
| TIMEOUT '=' STRING { if (cf_timeout ($3) < 0) YYABORT; }
| RANK '=' STRING { if (cf_rank ($3) < 0) YYABORT; }
| TARGET '=' STRING { if (cf_target ($3) < 0) YYABORT; }
| EXACT_TIMEOUT { if (cf_exact ("1") < 0) YYABORT; }
| EXACT_TIMEOUT '=' STRING { if (cf_exact ($3) < 0) YYABORT; }
| METHOD '=' STRING { if (cf_method ($3) < 0) YYABORT; }
;

actions : ACTIONS '=' list { if (cf_actions () < 0) YYABORT; }
Expand Down Expand Up @@ -177,30 +176,23 @@ static int cf_timeout (char *timeout)
return (rc);
}

static int cf_exact (char *exact)
static int cf_method (char *method)
{
int val = -1;

if (exact == NULL)
val = 1;
else if ( strcmp (exact, "0") == 0
|| strcasecmp (exact, "false") == 0
|| strcasecmp (exact, "off") == 0
|| strcasecmp (exact, "no") == 0)
if ( strcasecmp (method, "sloppy") == 0
|| strcasecmp (method, "default") == 0)
val = 0;
else if ( strcmp (exact, "1") == 0
|| strcasecmp (exact, "true") == 0
|| strcasecmp (exact, "yes") == 0
|| strcasecmp (exact, "on") == 0)
else if (strcasecmp (method, "exact") == 0)
val = 1;
else
log_err ("%s: %d: Invalid value for exact-timeout \"%s\"\n",
cf_file (), cf_line (), exact);
log_err ("%s: %d: Invalid value for timeout-method: \"%s\"\n",
cf_file (), cf_line (), method);

if (val < 0)
return (-1);

log_debug2 ("%s: %d: exact-timeout\n");
log_debug2 ("%s: %d: timeout-method = %s\n", method);

io_watchdog_conf_set_exact_timeout (conf, val);

Expand Down
11 changes: 7 additions & 4 deletions src/conf/io-watchdog.conf.5.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,13 @@ it can be 's' for seconds (the default), 'm' for minutes, 'h' for
hours, or 'd' for days. N may be an arbitrary floating point
number.
.TP
\fBexact-timeout\fR [ = \fI0|1\fR]
Use a more precise method for the IO watchdog timeout. See the
documentation for the \fI\-\-exact-timeout\fR option in the
\fBio-watchdog\fR(1) man page for more information.
\fBtimeout-method\fR = \fITYPE\fR
Specify an alternate method for the IO watchdog timeout.
Valid values for \fITYPE\fR are currently \fIsloppy\fR
and \fIexact\fR. The \fIexact\fR method uses a more precise
algorithm for calculating the IO watchdog timeout, but may
cause some performance impact. The default value is
\fIsloppy\fR.
.TP
\fBactions\fR = \fRNAME\fR[,\fINAME\fR,...]
Set the list of IO watchdog actions to \fINAME\fR. Multiple actions
Expand Down
27 changes: 15 additions & 12 deletions src/watchdog/io-watchdog.1.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,21 @@ as it avoids enabling the io watchdog on the \fBtime\fR process.
Only target rank N (default = 0) of a SLURM job.

.TP
.BI "-e, --exact-timeout"
Use a more precise method for the IO watchdog timeout. By default the
IO watchdog uses a simple and very lightweight method for tracking IO
in the interest of impacting the application as little as possible.
The actual time at which the watchdog may timeout will be anywhere
from N to 2*N, where N is the timeout period specified by the user.
With the \fI\-\-exact-timeout\fR option, the IO watchdog will track
the time of the last application write so that the timeout can be
as exact as possible. However, this will affect IO performance, since
a timestamp must be generated on every write call. The magnitude of
the impact is dependent on the rate of calls to write, not necessarily
the amount of data written.
.BI "-m, --method=" TYPE
Specify an alternate method for the IO watchdog timeout. Valid values for
\fITYPE\fR are \fIsloppy\fR or \fIexact\fR. By default the IO watchdog
uses the \fIsloppy\fR method, which is a simple and very lightweight
method for tracking IO, and thus has very little impact on application
performance. With the \fIsloppy\fR method, however, the actual time at
which the watchdog will time out may be anywhere from N to 2*N, where N is
the timeout period selected by the user. When the \fIexact\fR timeout
method is selected, the IO watchdog timestamps application IO which
allows more precise calculation of the next timeout interval. However,
the \fIexact\fR method will have a greater impact on application write
performace, because an extra call to \fIgettimeofday\fR(2) is generated
for every write call. The performance impact of the \fIexact\fR method
is dependent on the rate of calls to write, not necessarily the amount
of data written.

.SH SEE ALSO
\fBio-watchdog.conf\fR(5)
36 changes: 29 additions & 7 deletions src/watchdog/io-watchdog.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ struct option opt_table [] = {
{ "list-actions", 0, NULL, 'l' },
{ "server", 0, NULL, 'S' },
{ "shared-file", 1, NULL, 'F' },
{ "exact-timeout",0, NULL, 'e' },
{ "method", 1, NULL, 'm' },
{ NULL, 0, NULL, 0 }
};

const char * const opt_string = "hvlSea:t:T:r:F:f:";
const char * const opt_string = "hvlSm:a:t:T:r:F:f:";

#define USAGE "\
Usage: %s [OPTIONS] [executable args...]\n\
Expand All @@ -56,8 +56,8 @@ Usage: %s [OPTIONS] [executable args...]\n\
time(1), as it avoids enabling the io-watchdog on \n\
the time process.\n\
-r, --rank=N Only target rank [N] (default = 0) of a SLURM job.\n\
-e, --exact-timeout Use a more precise method for the watchdog timeout.\n\
See the io-watchdog(1) man page for details.\n\
-m, --method=TYPE Specify the method used for the watchdog timeout.\n\
TYPE may be `sloppy' or `exact' (default = sloppy)\n\
\n\
-f, --config=file Specify alternate config file [file]\n\
\n\
Expand All @@ -81,6 +81,7 @@ struct io_watchdog_options {
char * config_file;

double timeout;
char * timeout_method;
unsigned int exact_timeout;
int verbose;
int rank;
Expand Down Expand Up @@ -108,6 +109,7 @@ static void prog_ctx_init (struct prog_ctx *ctx, int ac, char *av []);
static void prog_ctx_fini (struct prog_ctx *ctx);
static void process_env (struct prog_ctx *ctx);
static void parse_cmdline (struct prog_ctx *ctx, int ac, char *av []);
static void set_timeout_method (struct prog_ctx *ctx, const char *method);
static int io_watchdog_server (struct prog_ctx *ctx);
static int check_user_actions (struct prog_ctx *ctx);
static void list_all_actions (struct prog_ctx *ctx);
Expand Down Expand Up @@ -377,8 +379,10 @@ static void apply_config (struct prog_ctx *ctx)
if (!ctx->opts.target)
ctx->opts.target = xstrdup (io_watchdog_conf_target (ctx->conf));

if (!ctx->opts.exact_timeout && io_watchdog_conf_exact_timeout (ctx->conf))
if (!ctx->opts.timeout_method
&& io_watchdog_conf_exact_timeout (ctx->conf)) {
ctx->opts.exact_timeout = 1;
}
}

static void parse_cmdline (struct prog_ctx *ctx, int ac, char *av[])
Expand Down Expand Up @@ -407,8 +411,8 @@ static void parse_cmdline (struct prog_ctx *ctx, int ac, char *av[])
&ctx->opts.timeout_has_suffix) < 0)
log_fatal (1, "Invalid timeout string `%s'\n", optarg);
break;
case 'e':
ctx->opts.exact_timeout = 1;
case 'm':
ctx->opts.timeout_method = strdup (optarg);
break;
case 'a':
ctx->opts.actions =
Expand Down Expand Up @@ -473,6 +477,8 @@ static void parse_cmdline (struct prog_ctx *ctx, int ac, char *av[])
ctx->opts.actions = list_split_append (ctx->opts.actions, ",:",
ctx->opts.env_action_string);

set_timeout_method (ctx, ctx->opts.timeout_method);

apply_config (ctx);

if (ctx->opts.server_only && av [optind])
Expand Down Expand Up @@ -514,6 +520,22 @@ static int check_user_actions (struct prog_ctx *ctx)
return (list_for_each (ctx->opts.actions, (ListForF) check_action, ctx));
}

static void set_timeout_method (struct prog_ctx *ctx, const char *method)
{
if (!method)
return;

if (strcmp (method, "sloppy") == 0)
return;

if (strcmp (method, "exact") == 0) {
ctx->opts.exact_timeout = 1;
return;
}

log_fatal (1, "Invalid timeout method \"%s\" specified.\n", method);
}


/******************************************************************************
* Forked io_watchdog server
Expand Down

0 comments on commit c21a512

Please sign in to comment.