From 3a9c7b8d59411d9ce41057b1e1436d27056096b9 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2025 18:17:54 +0200 Subject: [PATCH] Added loggers for Fleet enrollment call failures (#6477) (#6522) Added logger to print the status and code when enrollment call to fleet failed. --------- Co-authored-by: Michal Pristas (cherry picked from commit 7ee581e6b4f9a509f5dae0aaf1163a138eca94f2) Co-authored-by: Rohit <59366992+Rohit-code14@users.noreply.github.com> --- ...-log-fleet-enroll-failure-status-code.yaml | 32 +++++++++++++++++++ internal/pkg/agent/cmd/enroll_cmd.go | 14 +++++--- internal/pkg/fleetapi/enroll_cmd.go | 12 +++---- 3 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml diff --git a/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml b/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml new file mode 100644 index 00000000000..54e04d76d2f --- /dev/null +++ b/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Added logger to print the status and code when enrollment call to fleet failed. + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/elastic-agent/issues/6287 diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 91f652c9ef5..e25b03abb1c 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -524,21 +524,25 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ } c.log.Infof("1st enrollment attempt failed, retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax) + signal := make(chan struct{}) defer close(signal) backExp := backoff.NewExpBackoff(signal, enrollBackoffInit, enrollBackoffMax) for { retry := false - if errors.Is(err, fleetapi.ErrTooManyRequests) { + switch { + case errors.Is(err, fleetapi.ErrTooManyRequests): c.log.Warn("Too many requests on the remote server, will retry in a moment.") retry = true - } else if errors.Is(err, fleetapi.ErrConnRefused) { - c.log.Warn("Remote server is not ready to accept connections, will retry in a moment.") + case errors.Is(err, fleetapi.ErrConnRefused): + c.log.Warn("Remote server is not ready to accept connections(Connection Refused), will retry in a moment.") retry = true - } else if errors.Is(err, fleetapi.ErrTemporaryServerError) { - c.log.Warn("Remote server failed to handle the request, will retry in a moment.") + case errors.Is(err, fleetapi.ErrTemporaryServerError): + c.log.Warnf("Remote server failed to handle the request(%s), will retry in a moment.", err.Error()) retry = true + case err != nil: + c.log.Warnf("Enrollment failed: %s", err.Error()) } if !retry { break diff --git a/internal/pkg/fleetapi/enroll_cmd.go b/internal/pkg/fleetapi/enroll_cmd.go index 768b76cddfd..49ef33208fe 100644 --- a/internal/pkg/fleetapi/enroll_cmd.go +++ b/internal/pkg/fleetapi/enroll_cmd.go @@ -33,10 +33,10 @@ var ErrConnRefused = errors.New("connection refused") var ErrTemporaryServerError = errors.New("temporary server error, please retry later") // temporaryServerErrorCodes defines status codes that allow clients to retry their request. -var temporaryServerErrorCodes = map[int]struct{}{ - http.StatusBadGateway: {}, - http.StatusServiceUnavailable: {}, - http.StatusGatewayTimeout: {}, +var temporaryServerErrorCodes = map[int]string{ + http.StatusBadGateway: "BadGateway", + http.StatusServiceUnavailable: "ServiceUnavailable", + http.StatusGatewayTimeout: "GatewayTimeout", } const ( @@ -223,8 +223,8 @@ func (e *EnrollCmd) Execute(ctx context.Context, r *EnrollRequest) (*EnrollRespo return nil, ErrTooManyRequests } - if _, temporary := temporaryServerErrorCodes[resp.StatusCode]; temporary { - return nil, fmt.Errorf("received code %d: %w", resp.StatusCode, ErrTemporaryServerError) + if status, temporary := temporaryServerErrorCodes[resp.StatusCode]; temporary { + return nil, fmt.Errorf("received status code %d (%s): %w", resp.StatusCode, status, ErrTemporaryServerError) } if resp.StatusCode != http.StatusOK {