From 32b00236ffbcd49aa0fe24072ca01a77849f1fe7 Mon Sep 17 00:00:00 2001 From: Mario Escobedo Date: Fri, 20 Dec 2024 08:02:40 +0100 Subject: [PATCH] Add retries on webhook failure (#92) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We noticed an edge case where webhooks might fail to deliver correctly due to temporary connectivity issues with the node. This PR adds support for retries when sending webhooks in case of errors, such as 5xx responses or connectivity problems. Go isn't my primary language, so I’d appreciate any suggestions or improvements you might have! Cheers, Mario --- README.md | 5 ++- go.mod | 4 +- go.sum | 16 ++++++- internal/internal.go | 15 ++++--- pkg/config/config.go | 2 + pkg/webhook/webhook.go | 9 ++-- pkg/webhook/webhook_test.go | 86 ++++++++++++++++++++++++++++++------- 7 files changed, 106 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index a8439de..f751701 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,7 @@ args: - -webhook.contentType=application/json - -webhook.method=POST - -webhook.timeout=30s +- -webhook.retries=3 configMap: data: @@ -168,6 +169,7 @@ args: - -webhook.contentType=text/plain - -webhook.method=POST - -webhook.timeout=30s +- -webhook.retries=3 configMap: data: @@ -200,6 +202,7 @@ args: - -webhook.method=POST - -webhook.timeout=30s - -webhook.http-proxy=https://someproxy:3128 +- -webhook.retries=3 configMap: data: @@ -345,4 +348,4 @@ helm upgrade aks-node-termination-handler \ aks-node-termination-handler/aks-node-termination-handler \ --set networkPolicy.enabled=true \ --set networkPolicy.controlPlaneIP=10.11.12.2 -``` \ No newline at end of file +``` diff --git a/go.mod b/go.mod index ee10fbe..5a95650 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ toolchain go1.22.2 require ( github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible github.com/google/uuid v1.6.0 + github.com/hashicorp/go-retryablehttp v0.7.7 github.com/maksim-paskal/logrus-hook-sentry v0.1.1 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.19.0 @@ -44,6 +45,7 @@ require ( github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect + github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -72,7 +74,7 @@ require ( golang.org/x/net v0.24.0 // indirect golang.org/x/oauth2 v0.19.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.19.0 // indirect + golang.org/x/sys v0.20.0 // indirect golang.org/x/term v0.19.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.5.0 // indirect diff --git a/go.sum b/go.sum index 22cbf05..28483ef 100644 --- a/go.sum +++ b/go.sum @@ -25,6 +25,8 @@ github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lSh github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4= github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc= +github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= +github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps= github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk= @@ -66,6 +68,12 @@ github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/ github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA= github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= +github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= +github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= +github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= +github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -86,6 +94,10 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/maksim-paskal/logrus-hook-sentry v0.1.1 h1:9IQ8kn6XwZJ/yDjkIyTLAce7k78J3WfeZtjIh3jA/MY= github.com/maksim-paskal/logrus-hook-sentry v0.1.1/go.mod h1:FpJn8dMDsuG8/lt65HQauZuXIiG2LqAYM+vbKV//Ga0= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= @@ -177,8 +189,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/internal/internal.go b/internal/internal.go index d3dd580..1048612 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -14,8 +14,8 @@ package internal import ( "context" - "net/http" + "github.com/hashicorp/go-retryablehttp" "github.com/maksim-paskal/aks-node-termination-handler/pkg/alert" "github.com/maksim-paskal/aks-node-termination-handler/pkg/api" "github.com/maksim-paskal/aks-node-termination-handler/pkg/cache" @@ -44,12 +44,13 @@ func Run(ctx context.Context) error { log.Debugf("using config: %s", config.Get().String()) - webhook.SetHTTPClient(&http.Client{ - Transport: metrics.NewInstrumenter("webhook"). - WithProxy(*config.Get().WebhookProxy). - WithInsecureSkipVerify(*config.Get().WebhookInsecure). - InstrumentedRoundTripper(), - }) + retryClient := retryablehttp.NewClient() + retryClient.HTTPClient.Transport = metrics.NewInstrumenter("webhook"). + WithProxy(*config.Get().WebhookProxy). + WithInsecureSkipVerify(*config.Get().WebhookInsecure). + InstrumentedRoundTripper() + retryClient.RetryMax = *config.Get().WebhookRetries + webhook.SetHTTPClient(retryClient) err = alert.Init() if err != nil { diff --git a/pkg/config/config.go b/pkg/config/config.go index 49c75c5..b0acfbf 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -68,6 +68,7 @@ type Type struct { WebHookTimeout *time.Duration WebhookInsecure *bool WebhookProxy *string + WebhookRetries *int SentryDSN *string WebHTTPAddress *string TaintNode *bool @@ -100,6 +101,7 @@ var config = Type{ WebHookTemplateFile: flag.String("webhook.template-file", os.Getenv("WEBHOOK_TEMPLATE_FILE"), "path to request body template file"), WebhookInsecure: flag.Bool("webhook.insecureSkip", true, "skip tls verification for webhook"), WebhookProxy: flag.String("webhook.http-proxy", os.Getenv("WEBHOOK_HTTP_PROXY"), "use http proxy for webhook"), + WebhookRetries: flag.Int("webhook.retries", 3, "number of retries for webhook"), //nolint:mnd SentryDSN: flag.String("sentry.dsn", "", "sentry DSN"), WebHTTPAddress: flag.String("web.address", ":17923", ""), TaintNode: flag.Bool("taint.node", false, "Taint the node before cordon and draining"), diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index 493f5b0..9cfa849 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -19,17 +19,18 @@ import ( "net/http" "os" + "github.com/hashicorp/go-retryablehttp" "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" "github.com/maksim-paskal/aks-node-termination-handler/pkg/template" "github.com/pkg/errors" log "github.com/sirupsen/logrus" ) -var client = &http.Client{} +var client = &retryablehttp.Client{} var errHTTPNotOK = errors.New("http result not OK") -func SetHTTPClient(c *http.Client) { +func SetHTTPClient(c *retryablehttp.Client) { client = c } @@ -64,9 +65,9 @@ func SendWebHook(ctx context.Context, obj *template.MessageType) error { requestBody := bytes.NewBufferString(webhookBody + "\n") - req, err := http.NewRequestWithContext(ctx, *config.Get().WebHookMethod, *config.Get().WebHookURL, requestBody) + req, err := retryablehttp.NewRequest(*config.Get().WebHookMethod, *config.Get().WebHookURL, requestBody) if err != nil { - return errors.Wrap(err, "error in http.NewRequestWithContext") + return errors.Wrap(err, "error in retryablehttp.NewRequest") } req.Header.Set("Content-Type", *config.Get().WebHookContentType) diff --git a/pkg/webhook/webhook_test.go b/pkg/webhook/webhook_test.go index d601085..0baf83d 100644 --- a/pkg/webhook/webhook_test.go +++ b/pkg/webhook/webhook_test.go @@ -23,6 +23,7 @@ import ( "net/http/httptest" "testing" + "github.com/hashicorp/go-retryablehttp" "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics" "github.com/maksim-paskal/aks-node-termination-handler/pkg/template" "github.com/maksim-paskal/aks-node-termination-handler/pkg/webhook" @@ -30,7 +31,28 @@ import ( "github.com/stretchr/testify/require" ) +var retryableRequestCount = 0 + var ts = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.RequestURI == "/-/400" { + w.WriteHeader(http.StatusBadRequest) + + return + } + + if r.RequestURI == "/test-retryable" { + retryableRequestCount++ + + // return 500 for first 2 requests + if retryableRequestCount < 3 { + w.WriteHeader(http.StatusInternalServerError) + } else { + _, _ = w.Write([]byte("OK")) + } + + return + } + if err := testWebhookRequest(r); err != nil { log.WithError(err).Error() w.WriteHeader(http.StatusInternalServerError) @@ -39,6 +61,10 @@ var ts = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http } })) +func getWebhookRetryableURL() string { + return ts.URL + "/test-retryable" +} + func getWebhookURL() string { return ts.URL + "/metrics/job/aks-node-termination-handler" } @@ -62,19 +88,27 @@ func testWebhookRequest(r *http.Request) error { func TestWebHook(t *testing.T) { //nolint:funlen,tparallel t.Parallel() - webhookClient := &http.Client{ - Transport: metrics.NewInstrumenter("TestWebHook"). - WithProxy(""). - WithInsecureSkipVerify(true). - InstrumentedRoundTripper(), - } + retryClient := retryablehttp.NewClient() + retryClient.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHook"). + WithProxy(""). + WithInsecureSkipVerify(true). + InstrumentedRoundTripper() + retryClient.RetryMax = 0 - webhookClientProxy := &http.Client{ - Transport: metrics.NewInstrumenter("TestWebHookWithProxy"). - WithProxy("http://someproxy"). - WithInsecureSkipVerify(true). - InstrumentedRoundTripper(), - } + retryClientProxy := retryablehttp.NewClient() + retryClientProxy.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHookWithProxy"). + WithProxy("http://someproxy"). + WithInsecureSkipVerify(true). + InstrumentedRoundTripper() + retryClientProxy.RetryMax = 0 + + // retryable client with default retry settings + retryClientDefault := retryablehttp.NewClient() + retryClientDefault.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHookWithDefaultSettings"). + WithProxy(""). + WithInsecureSkipVerify(true). + InstrumentedRoundTripper() + retryClientDefault.RetryMax = 3 type Test struct { Name string @@ -82,10 +116,26 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel Error bool ErrorMessage string NodeName string - HTTPClient *http.Client + HTTPClient *retryablehttp.Client } tests := []Test{ + { + Name: "TestRetryable", + Args: map[string]string{ + "webhook.url": getWebhookRetryableURL(), + }, + HTTPClient: retryClientDefault, + }, + { + Name: "TestRetryableCustomStatusCodes", + Args: map[string]string{ + "webhook.url": ts.URL + "/-/400", + }, + HTTPClient: retryClientDefault, + Error: true, + ErrorMessage: "http result not OK", + }, { Name: "ValidHookAndTemplate", Args: map[string]string{ @@ -122,7 +172,8 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel "webhook.url": ts.URL, "webhook.template": `{{ .NodeName }}`, }, - Error: true, + Error: true, + ErrorMessage: "giving up after 1 attempt", }, { Name: "InvalidMethod", @@ -163,7 +214,7 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel Args: map[string]string{ "webhook.url": getWebhookURL(), }, - HTTPClient: webhookClientProxy, + HTTPClient: retryClientProxy, }, } @@ -195,7 +246,7 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel if httpClient := tc.HTTPClient; httpClient != nil { webhook.SetHTTPClient(httpClient) } else { - webhook.SetHTTPClient(webhookClient) + webhook.SetHTTPClient(retryClient) } err := webhook.SendWebHook(context.TODO(), messageType) @@ -207,4 +258,7 @@ func TestWebHook(t *testing.T) { //nolint:funlen,tparallel } }) } + + // Check retryable request counter, 3 requests should be made + require.Equal(t, 3, retryableRequestCount) }