Skip to content

Commit

Permalink
Add retries on webhook failure (#92)
Browse files Browse the repository at this point in the history
We noticed an edge case where webhooks might fail to deliver correctly due to temporary connectivity issues with the node.

This PR adds support for retries when sending webhooks in case of errors, such as 5xx responses or connectivity problems.

Go isn't my primary language, so I’d appreciate any suggestions or improvements you might have!

Cheers,
Mario
m4rii0 authored Dec 20, 2024
1 parent 53c5037 commit 32b0023
Showing 7 changed files with 106 additions and 31 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -134,6 +134,7 @@ args:
- -webhook.contentType=application/json
- -webhook.method=POST
- -webhook.timeout=30s
- -webhook.retries=3
configMap:
data:
@@ -168,6 +169,7 @@ args:
- -webhook.contentType=text/plain
- -webhook.method=POST
- -webhook.timeout=30s
- -webhook.retries=3
configMap:
data:
@@ -200,6 +202,7 @@ args:
- -webhook.method=POST
- -webhook.timeout=30s
- -webhook.http-proxy=https://someproxy:3128
- -webhook.retries=3
configMap:
data:
@@ -345,4 +348,4 @@ helm upgrade aks-node-termination-handler \
aks-node-termination-handler/aks-node-termination-handler \
--set networkPolicy.enabled=true \
--set networkPolicy.controlPlaneIP=10.11.12.2
```
```
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ toolchain go1.22.2
require (
github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible
github.com/google/uuid v1.6.0
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/maksim-paskal/logrus-hook-sentry v0.1.1
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.19.0
@@ -44,6 +45,7 @@ require (
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/gorilla/websocket v1.5.1 // indirect
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
@@ -72,7 +74,7 @@ require (
golang.org/x/net v0.24.0 // indirect
golang.org/x/oauth2 v0.19.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.19.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/term v0.19.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.5.0 // indirect
16 changes: 14 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -25,6 +25,8 @@ github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lSh
github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4=
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps=
github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY=
github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk=
@@ -66,6 +68,12 @@ github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/
github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY=
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA=
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -86,6 +94,10 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/maksim-paskal/logrus-hook-sentry v0.1.1 h1:9IQ8kn6XwZJ/yDjkIyTLAce7k78J3WfeZtjIh3jA/MY=
github.com/maksim-paskal/logrus-hook-sentry v0.1.1/go.mod h1:FpJn8dMDsuG8/lt65HQauZuXIiG2LqAYM+vbKV//Ga0=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0=
github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0=
github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8=
@@ -177,8 +189,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q=
golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
15 changes: 8 additions & 7 deletions internal/internal.go
Original file line number Diff line number Diff line change
@@ -14,8 +14,8 @@ package internal

import (
"context"
"net/http"

"github.com/hashicorp/go-retryablehttp"
"github.com/maksim-paskal/aks-node-termination-handler/pkg/alert"
"github.com/maksim-paskal/aks-node-termination-handler/pkg/api"
"github.com/maksim-paskal/aks-node-termination-handler/pkg/cache"
@@ -44,12 +44,13 @@ func Run(ctx context.Context) error {

log.Debugf("using config: %s", config.Get().String())

webhook.SetHTTPClient(&http.Client{
Transport: metrics.NewInstrumenter("webhook").
WithProxy(*config.Get().WebhookProxy).
WithInsecureSkipVerify(*config.Get().WebhookInsecure).
InstrumentedRoundTripper(),
})
retryClient := retryablehttp.NewClient()
retryClient.HTTPClient.Transport = metrics.NewInstrumenter("webhook").
WithProxy(*config.Get().WebhookProxy).
WithInsecureSkipVerify(*config.Get().WebhookInsecure).
InstrumentedRoundTripper()
retryClient.RetryMax = *config.Get().WebhookRetries
webhook.SetHTTPClient(retryClient)

err = alert.Init()
if err != nil {
2 changes: 2 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
@@ -68,6 +68,7 @@ type Type struct {
WebHookTimeout *time.Duration
WebhookInsecure *bool
WebhookProxy *string
WebhookRetries *int
SentryDSN *string
WebHTTPAddress *string
TaintNode *bool
@@ -100,6 +101,7 @@ var config = Type{
WebHookTemplateFile: flag.String("webhook.template-file", os.Getenv("WEBHOOK_TEMPLATE_FILE"), "path to request body template file"),
WebhookInsecure: flag.Bool("webhook.insecureSkip", true, "skip tls verification for webhook"),
WebhookProxy: flag.String("webhook.http-proxy", os.Getenv("WEBHOOK_HTTP_PROXY"), "use http proxy for webhook"),
WebhookRetries: flag.Int("webhook.retries", 3, "number of retries for webhook"), //nolint:mnd
SentryDSN: flag.String("sentry.dsn", "", "sentry DSN"),
WebHTTPAddress: flag.String("web.address", ":17923", ""),
TaintNode: flag.Bool("taint.node", false, "Taint the node before cordon and draining"),
9 changes: 5 additions & 4 deletions pkg/webhook/webhook.go
Original file line number Diff line number Diff line change
@@ -19,17 +19,18 @@ import (
"net/http"
"os"

"github.com/hashicorp/go-retryablehttp"
"github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
"github.com/maksim-paskal/aks-node-termination-handler/pkg/template"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
)

var client = &http.Client{}
var client = &retryablehttp.Client{}

var errHTTPNotOK = errors.New("http result not OK")

func SetHTTPClient(c *http.Client) {
func SetHTTPClient(c *retryablehttp.Client) {
client = c
}

@@ -64,9 +65,9 @@ func SendWebHook(ctx context.Context, obj *template.MessageType) error {

requestBody := bytes.NewBufferString(webhookBody + "\n")

req, err := http.NewRequestWithContext(ctx, *config.Get().WebHookMethod, *config.Get().WebHookURL, requestBody)
req, err := retryablehttp.NewRequest(*config.Get().WebHookMethod, *config.Get().WebHookURL, requestBody)
if err != nil {
return errors.Wrap(err, "error in http.NewRequestWithContext")
return errors.Wrap(err, "error in retryablehttp.NewRequest")
}

req.Header.Set("Content-Type", *config.Get().WebHookContentType)
86 changes: 70 additions & 16 deletions pkg/webhook/webhook_test.go
Original file line number Diff line number Diff line change
@@ -23,14 +23,36 @@ import (
"net/http/httptest"
"testing"

"github.com/hashicorp/go-retryablehttp"
"github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics"
"github.com/maksim-paskal/aks-node-termination-handler/pkg/template"
"github.com/maksim-paskal/aks-node-termination-handler/pkg/webhook"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
)

var retryableRequestCount = 0

var ts = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.RequestURI == "/-/400" {
w.WriteHeader(http.StatusBadRequest)

return
}

if r.RequestURI == "/test-retryable" {
retryableRequestCount++

// return 500 for first 2 requests
if retryableRequestCount < 3 {
w.WriteHeader(http.StatusInternalServerError)
} else {
_, _ = w.Write([]byte("OK"))
}

return
}

if err := testWebhookRequest(r); err != nil {
log.WithError(err).Error()
w.WriteHeader(http.StatusInternalServerError)
@@ -39,6 +61,10 @@ var ts = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http
}
}))

func getWebhookRetryableURL() string {
return ts.URL + "/test-retryable"
}

func getWebhookURL() string {
return ts.URL + "/metrics/job/aks-node-termination-handler"
}
@@ -62,30 +88,54 @@ func testWebhookRequest(r *http.Request) error {
func TestWebHook(t *testing.T) { //nolint:funlen,tparallel
t.Parallel()

webhookClient := &http.Client{
Transport: metrics.NewInstrumenter("TestWebHook").
WithProxy("").
WithInsecureSkipVerify(true).
InstrumentedRoundTripper(),
}
retryClient := retryablehttp.NewClient()
retryClient.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHook").
WithProxy("").
WithInsecureSkipVerify(true).
InstrumentedRoundTripper()
retryClient.RetryMax = 0

webhookClientProxy := &http.Client{
Transport: metrics.NewInstrumenter("TestWebHookWithProxy").
WithProxy("http://someproxy").
WithInsecureSkipVerify(true).
InstrumentedRoundTripper(),
}
retryClientProxy := retryablehttp.NewClient()
retryClientProxy.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHookWithProxy").
WithProxy("http://someproxy").
WithInsecureSkipVerify(true).
InstrumentedRoundTripper()
retryClientProxy.RetryMax = 0

// retryable client with default retry settings
retryClientDefault := retryablehttp.NewClient()
retryClientDefault.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHookWithDefaultSettings").
WithProxy("").
WithInsecureSkipVerify(true).
InstrumentedRoundTripper()
retryClientDefault.RetryMax = 3

type Test struct {
Name string
Args map[string]string
Error bool
ErrorMessage string
NodeName string
HTTPClient *http.Client
HTTPClient *retryablehttp.Client
}

tests := []Test{
{
Name: "TestRetryable",
Args: map[string]string{
"webhook.url": getWebhookRetryableURL(),
},
HTTPClient: retryClientDefault,
},
{
Name: "TestRetryableCustomStatusCodes",
Args: map[string]string{
"webhook.url": ts.URL + "/-/400",
},
HTTPClient: retryClientDefault,
Error: true,
ErrorMessage: "http result not OK",
},
{
Name: "ValidHookAndTemplate",
Args: map[string]string{
@@ -122,7 +172,8 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel
"webhook.url": ts.URL,
"webhook.template": `{{ .NodeName }}`,
},
Error: true,
Error: true,
ErrorMessage: "giving up after 1 attempt",
},
{
Name: "InvalidMethod",
@@ -163,7 +214,7 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel
Args: map[string]string{
"webhook.url": getWebhookURL(),
},
HTTPClient: webhookClientProxy,
HTTPClient: retryClientProxy,
},
}

@@ -195,7 +246,7 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel
if httpClient := tc.HTTPClient; httpClient != nil {
webhook.SetHTTPClient(httpClient)
} else {
webhook.SetHTTPClient(webhookClient)
webhook.SetHTTPClient(retryClient)
}

err := webhook.SendWebHook(context.TODO(), messageType)
@@ -207,4 +258,7 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel
}
})
}

// Check retryable request counter, 3 requests should be made
require.Equal(t, 3, retryableRequestCount)
}

0 comments on commit 32b0023

Please sign in to comment.