Skip to content

Commit

Permalink
Added loggers for Fleet enrollment call failures (#6477) (#6522)
Browse files Browse the repository at this point in the history
Added logger to print the status and code when enrollment call to fleet failed.

---------

Co-authored-by: Michal Pristas <[email protected]>
(cherry picked from commit 7ee581e)

Co-authored-by: Rohit <[email protected]>
  • Loading branch information
mergify[bot] and Rohit-code14 authored Jan 15, 2025
1 parent ed7bfa0 commit 3a9c7b8
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 11 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: enhancement

# Change summary; a 80ish characters long description of the change.
summary: Added logger to print the status and code when enrollment call to fleet failed.

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
#pr: https://github.com/owner/repo/1234

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/elastic-agent/issues/6287
14 changes: 9 additions & 5 deletions internal/pkg/agent/cmd/enroll_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,21 +524,25 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[
}

c.log.Infof("1st enrollment attempt failed, retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax)

signal := make(chan struct{})
defer close(signal)
backExp := backoff.NewExpBackoff(signal, enrollBackoffInit, enrollBackoffMax)

for {
retry := false
if errors.Is(err, fleetapi.ErrTooManyRequests) {
switch {
case errors.Is(err, fleetapi.ErrTooManyRequests):
c.log.Warn("Too many requests on the remote server, will retry in a moment.")
retry = true
} else if errors.Is(err, fleetapi.ErrConnRefused) {
c.log.Warn("Remote server is not ready to accept connections, will retry in a moment.")
case errors.Is(err, fleetapi.ErrConnRefused):
c.log.Warn("Remote server is not ready to accept connections(Connection Refused), will retry in a moment.")
retry = true
} else if errors.Is(err, fleetapi.ErrTemporaryServerError) {
c.log.Warn("Remote server failed to handle the request, will retry in a moment.")
case errors.Is(err, fleetapi.ErrTemporaryServerError):
c.log.Warnf("Remote server failed to handle the request(%s), will retry in a moment.", err.Error())
retry = true
case err != nil:
c.log.Warnf("Enrollment failed: %s", err.Error())
}
if !retry {
break
Expand Down
12 changes: 6 additions & 6 deletions internal/pkg/fleetapi/enroll_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ var ErrConnRefused = errors.New("connection refused")
var ErrTemporaryServerError = errors.New("temporary server error, please retry later")

// temporaryServerErrorCodes defines status codes that allow clients to retry their request.
var temporaryServerErrorCodes = map[int]struct{}{
http.StatusBadGateway: {},
http.StatusServiceUnavailable: {},
http.StatusGatewayTimeout: {},
var temporaryServerErrorCodes = map[int]string{
http.StatusBadGateway: "BadGateway",
http.StatusServiceUnavailable: "ServiceUnavailable",
http.StatusGatewayTimeout: "GatewayTimeout",
}

const (
Expand Down Expand Up @@ -223,8 +223,8 @@ func (e *EnrollCmd) Execute(ctx context.Context, r *EnrollRequest) (*EnrollRespo
return nil, ErrTooManyRequests
}

if _, temporary := temporaryServerErrorCodes[resp.StatusCode]; temporary {
return nil, fmt.Errorf("received code %d: %w", resp.StatusCode, ErrTemporaryServerError)
if status, temporary := temporaryServerErrorCodes[resp.StatusCode]; temporary {
return nil, fmt.Errorf("received status code %d (%s): %w", resp.StatusCode, status, ErrTemporaryServerError)
}

if resp.StatusCode != http.StatusOK {
Expand Down

0 comments on commit 3a9c7b8

Please sign in to comment.