diff --git a/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml b/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml new file mode 100644 index 00000000000..54e04d76d2f --- /dev/null +++ b/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Added logger to print the status and code when enrollment call to fleet failed. + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/elastic-agent/issues/6287 diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 6c3df8e8998..28f5c135794 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -524,21 +524,25 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ } c.log.Infof("1st enrollment attempt failed, retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax) + signal := make(chan struct{}) defer close(signal) backExp := backoff.NewExpBackoff(signal, enrollBackoffInit, enrollBackoffMax) for { retry := false - if errors.Is(err, fleetapi.ErrTooManyRequests) { + switch { + case errors.Is(err, fleetapi.ErrTooManyRequests): c.log.Warn("Too many requests on the remote server, will retry in a moment.") retry = true - } else if errors.Is(err, fleetapi.ErrConnRefused) { - c.log.Warn("Remote server is not ready to accept connections, will retry in a moment.") + case errors.Is(err, fleetapi.ErrConnRefused): + c.log.Warn("Remote server is not ready to accept connections(Connection Refused), will retry in a moment.") retry = true - } else if errors.Is(err, fleetapi.ErrTemporaryServerError) { - c.log.Warn("Remote server failed to handle the request, will retry in a moment.") + case errors.Is(err, fleetapi.ErrTemporaryServerError): + c.log.Warnf("Remote server failed to handle the request(%s), will retry in a moment.", err.Error()) retry = true + case err != nil: + c.log.Warnf("Enrollment failed: %s", err.Error()) } if !retry { break diff --git a/internal/pkg/fleetapi/enroll_cmd.go b/internal/pkg/fleetapi/enroll_cmd.go index 768b76cddfd..49ef33208fe 100644 --- a/internal/pkg/fleetapi/enroll_cmd.go +++ b/internal/pkg/fleetapi/enroll_cmd.go @@ -33,10 +33,10 @@ var ErrConnRefused = errors.New("connection refused") var ErrTemporaryServerError = errors.New("temporary server error, please retry later") // temporaryServerErrorCodes defines status codes that allow clients to retry their request. -var temporaryServerErrorCodes = map[int]struct{}{ - http.StatusBadGateway: {}, - http.StatusServiceUnavailable: {}, - http.StatusGatewayTimeout: {}, +var temporaryServerErrorCodes = map[int]string{ + http.StatusBadGateway: "BadGateway", + http.StatusServiceUnavailable: "ServiceUnavailable", + http.StatusGatewayTimeout: "GatewayTimeout", } const ( @@ -223,8 +223,8 @@ func (e *EnrollCmd) Execute(ctx context.Context, r *EnrollRequest) (*EnrollRespo return nil, ErrTooManyRequests } - if _, temporary := temporaryServerErrorCodes[resp.StatusCode]; temporary { - return nil, fmt.Errorf("received code %d: %w", resp.StatusCode, ErrTemporaryServerError) + if status, temporary := temporaryServerErrorCodes[resp.StatusCode]; temporary { + return nil, fmt.Errorf("received status code %d (%s): %w", resp.StatusCode, status, ErrTemporaryServerError) } if resp.StatusCode != http.StatusOK {