diff --git a/awx/api/serializers.py b/awx/api/serializers.py index ba2d2c9e600a..c8d60288442b 100644 --- a/awx/api/serializers.py +++ b/awx/api/serializers.py @@ -2648,8 +2648,7 @@ class JobRelaunchSerializer(JobSerializer): required=False, allow_null=True, default='all', choices=[ ('all', _('No change to job limit')), - ('failed', _('All failed and unreachable hosts')), - ('unreachable', _('Unreachable hosts')) + ('failed', _('All failed and unreachable hosts')) ], write_only=True ) diff --git a/docs/retry_by_status.md b/docs/retry_by_status.md index 41b695dcc62b..2156848a4f35 100644 --- a/docs/retry_by_status.md +++ b/docs/retry_by_status.md @@ -1,7 +1,16 @@ # Relaunch on Hosts with Status -This feature allows the user to relaunch a job, targeting only a subset -of hosts that had a particular status in the prior job. +This feature allows the user to relaunch a job, targeting only hosts marked +as failed in the original job. + +### Definition of "failed" + +This feature will relaunch against "failed hosts" in the original job, which +is different from "hosts with failed tasks". Unreachable hosts can have +no failed tasks. This means that the count of "failed hosts" can be different +from the failed count, given in the summary at the end of a playbook. + +This definition corresponds to Ansible .retry files. ### API Design of Relaunch @@ -11,25 +20,17 @@ POST to `/api/v2/jobs/N/relaunch/` without any request data should relaunch the job with the same `limit` value that the original job used, which may be an empty string. +This is implicitly the "all" option below. + #### Relaunch by Status -Providing request data containing `{"hosts": ""}` should change -the `limit` of the relaunched job to target the hosts matching that status -from the previous job (unless the default option of "all" is used). -The options and meanings of `` include: +Providing request data containing `{"hosts": "failed"}` should change +the `limit` of the relaunched job to target failed hosts from the previous +job. Hosts will be provided as a comma-separated list in the limit. Formally, +these are options - all: relaunch without changing the job limit - - ok: relaunch against all hosts with >=1 tasks that returned the "ok" status - - changed: relaunch against all hosts with >=1 tasks had a changed status - - failed: relaunch against all hosts with >=1 tasks failed plus all unreachable hosts - - unreachable: relaunch against all hosts with >=1 task when they were unreachable - -These correspond to the playbook summary states from a playbook run, with -the notable exception of "failed" hosts. Ansible does not count an unreachable -event as a failed task, so unreachable hosts can (and often do) have no -associated failed tasks. The "failed" status here will still target both -status types, because Ansible will mark the _host_ as failed and include it -in the retry file if it was unreachable. + - failed: relaunch against all hos ### Relaunch Endpoint @@ -41,16 +42,26 @@ regarding the host summary of the last job. Example response: "passwords_needed_to_start": [], "retry_counts": { "all": 30, - "failed": 18, - "ok": 25, - "changed": 4, - "unreachable": 9 + "failed": 18 } } ``` If the user launches, providing a status for which there were 0 hosts, -then the request will be rejected. +then the request will be rejected. For example, if a GET yielded: + +```json +{ + "passwords_needed_to_start": [], + "retry_counts": { + "all": 30, + "failed": 0 + } +} +``` + +Then a POST of `{"hosts": "failed"}` should return a descriptive response +with a 400-level status code. # Acceptance Criteria