From 986ac09611339c701fdd5345f509e0bbedde0fbe Mon Sep 17 00:00:00 2001 From: Sven Walter Date: Fri, 10 Jun 2022 10:41:41 +0200 Subject: [PATCH 1/3] improve shutdown behaviour --- examples/full/cmd/server.go | 20 ++++++++------ pkg/cmdutil/context.go | 52 +++++++++++++++++++++++++++++++++++++ pkg/webutil/admin.go | 31 ++++++++++++++++------ pkg/webutil/server.go | 4 +-- 4 files changed, 89 insertions(+), 18 deletions(-) diff --git a/examples/full/cmd/server.go b/examples/full/cmd/server.go index 1076af7..af169ad 100644 --- a/examples/full/cmd/server.go +++ b/examples/full/cmd/server.go @@ -11,6 +11,7 @@ import ( "github.com/go-chi/chi/v5/middleware" "github.com/go-redis/redis/v8" "github.com/pkg/errors" + "github.com/rebuy-de/rebuy-go-sdk/v4/pkg/cmdutil" "github.com/rebuy-de/rebuy-go-sdk/v4/pkg/logutil" "github.com/rebuy-de/rebuy-go-sdk/v4/pkg/redisutil" "github.com/rebuy-de/rebuy-go-sdk/v4/pkg/webutil" @@ -25,7 +26,7 @@ type Server struct { TemplateFS fs.FS } -func (s *Server) Run(ctxRoot context.Context) error { +func (s *Server) Run(ctx context.Context) error { // Creating a new context, so we can have two stages for the graceful // shutdown. First is to make pod unready (within the admin api) and the // seconds is all the rest. @@ -38,13 +39,12 @@ func (s *Server) Run(ctxRoot context.Context) error { // running routines and should used once on program startup. group, ctx := errgroup.WithContext(ctx) - // Set up the admin API and use the root context to make sure it gets - // terminated first. We have to use ctxRoot, because this is what should - // canceled first, if any error occours. Afterwards it uses cancel() to - // cancel ctx context. - webutil.AdminAPIListenAndServe(ctxRoot, group, cancel) + // Set up the admin API and defer the function that it returns. The admin + // API lifecycle differs from the context, so it actually is the last thing + // that gets shut down. + defer webutil.AdminAPIListenAndServe(ctx)() - // Other background processes use the main context. + // Other background processes. s.setupHTTPServer(ctx, group) return errors.WithStack(group.Wait()) @@ -55,6 +55,10 @@ func (s *Server) setupHTTPServer(ctx context.Context, group *errgroup.Group) { // process, so we can see what triggered a specific log message later. ctx = logutil.Start(ctx, "http-server") + // Delay the context cancel by 5s to give Kubernetes some time to redirect + // traffic to another pod. + ctx = cmdutil.ContextWithDelay(ctx, 5*time.Second) + // Prepare some interfaces to later use. vh := webutil.NewViewHandler(s.TemplateFS, webutil.SimpleTemplateFuncMap("prettyTime", PrettyTimeTemplateFunction), @@ -71,7 +75,7 @@ func (s *Server) setupHTTPServer(ctx context.Context, group *errgroup.Group) { group.Go(func() error { logutil.Get(ctx).Info("http server listening on port 8080") - return errors.WithStack(webutil.ListenAndServerWithContext( + return errors.WithStack(webutil.ListenAndServeWithContext( ctx, "0.0.0.0:8080", router)) }) } diff --git a/pkg/cmdutil/context.go b/pkg/cmdutil/context.go index 4325328..ec1a7f6 100644 --- a/pkg/cmdutil/context.go +++ b/pkg/cmdutil/context.go @@ -5,6 +5,7 @@ import ( "os" "os/signal" "syscall" + "time" "github.com/sirupsen/logrus" "github.com/spf13/cobra" @@ -47,3 +48,54 @@ func wrapRootConext(run RunFuncWithContext) RunFunc { } } + +// ContextWithDelay delays the context cancel by the given delay. In the +// background it creates a new context with ContextWithValuesFrom and cancels +// it after the original one got canceled. +func ContextWithDelay(in context.Context, delay time.Duration) context.Context { + out := ContextWithValuesFrom(in) + out, cancel := context.WithCancel(out) + + go func() { + defer cancel() + <-in.Done() + time.Sleep(delay) + }() + return out +} + +type compositeContext struct { + deadline context.Context + done context.Context + err context.Context + value context.Context +} + +func (c compositeContext) Deadline() (deadline time.Time, ok bool) { + return c.deadline.Deadline() +} +func (c compositeContext) Done() <-chan struct{} { + return c.done.Done() +} + +func (c compositeContext) Err() error { + return c.err.Err() +} + +func (c compositeContext) Value(key any) any { + return c.value.Value(key) +} + +// ContextWithValuesFrom creates a new context, but still references the values +// from the given context. This is helpful if a background context is needed +// that needs to have the values of an exiting context. +func ContextWithValuesFrom(value context.Context) context.Context { + bg := context.Background() + + return &compositeContext{ + deadline: bg, + done: bg, + err: bg, + value: value, + } +} diff --git a/pkg/webutil/admin.go b/pkg/webutil/admin.go index b6eb98f..79c13c9 100644 --- a/pkg/webutil/admin.go +++ b/pkg/webutil/admin.go @@ -6,13 +6,11 @@ import ( "net/http" "net/http/pprof" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rebuy-de/rebuy-go-sdk/v4/pkg/logutil" - "golang.org/x/sync/errgroup" ) -func AdminAPIListenAndServe(ctx context.Context, group *errgroup.Group, fnDone func()) { +func AdminAPIListenAndServe(ctx context.Context, healthy ...func() error) func() { ctx = logutil.Start(ctx, "admin-api") mux := http.NewServeMux() @@ -24,6 +22,15 @@ func AdminAPIListenAndServe(ctx context.Context, group *errgroup.Group, fnDone f return } + for _, h := range healthy { + err := h() + if err != nil { + w.WriteHeader(http.StatusServiceUnavailable) + fmt.Fprintln(w, err.Error()) + return + } + } + w.WriteHeader(http.StatusOK) fmt.Fprintln(w, "OK") }) @@ -36,12 +43,20 @@ func AdminAPIListenAndServe(ctx context.Context, group *errgroup.Group, fnDone f mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) mux.HandleFunc("/debug/pprof/trace", pprof.Trace) - group.Go(func() error { - defer fnDone() + // The admin api gets a its own context, because we want to delay the + // server shutdown as long as possible. The reason for this is that Istio + // starts to block all outgoing connections as soon as there is no + // listening server anymore. + bg, cancel := context.WithCancel(context.Background()) + go func() { logutil.Get(ctx).Debugf("admin api listening on port 8090") - return errors.WithStack(ListenAndServerWithContext( - ctx, "0.0.0.0:8090", mux)) - }) + err := ListenAndServeWithContext(bg, "0.0.0.0:8090", mux) + if err != nil { + logutil.Get(ctx).Error(err.Error()) + } + }() + + return cancel } diff --git a/pkg/webutil/server.go b/pkg/webutil/server.go index 8d4f466..3967d40 100644 --- a/pkg/webutil/server.go +++ b/pkg/webutil/server.go @@ -11,11 +11,11 @@ import ( "golang.org/x/sync/errgroup" ) -// ListenAndServerWithContext does the same as http.ListenAndServe with the +// ListenAndServeWithContext does the same as http.ListenAndServe with the // difference that is properly utilises the context. This means it does a // graceful shutdown when the context is done and a context cancellation gets // propagated down to the actual request context. -func ListenAndServerWithContext(ctx context.Context, addr string, handler http.Handler) error { +func ListenAndServeWithContext(ctx context.Context, addr string, handler http.Handler) error { server := &http.Server{ Addr: addr, Handler: handler, From 2150395e7ad59e042316094f80bd9c56b28339e4 Mon Sep 17 00:00:00 2001 From: Sven Walter Date: Fri, 10 Jun 2022 11:35:02 +0200 Subject: [PATCH 2/3] remove extra context --- examples/full/cmd/server.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/full/cmd/server.go b/examples/full/cmd/server.go index af169ad..87ecf09 100644 --- a/examples/full/cmd/server.go +++ b/examples/full/cmd/server.go @@ -27,12 +27,6 @@ type Server struct { } func (s *Server) Run(ctx context.Context) error { - // Creating a new context, so we can have two stages for the graceful - // shutdown. First is to make pod unready (within the admin api) and the - // seconds is all the rest. - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - ctx = InstInit(ctx) // Using a errors group is a good practice to manage multiple parallel From 093f872ab87e2d763475869e58a3dc8fb4e178fb Mon Sep 17 00:00:00 2001 From: Sven Walter Date: Wed, 15 Jun 2022 10:06:25 +0200 Subject: [PATCH 3/3] do not cancel admin api --- examples/full/cmd/server.go | 7 +++---- pkg/webutil/admin.go | 9 ++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/examples/full/cmd/server.go b/examples/full/cmd/server.go index 87ecf09..989e544 100644 --- a/examples/full/cmd/server.go +++ b/examples/full/cmd/server.go @@ -33,10 +33,9 @@ func (s *Server) Run(ctx context.Context) error { // running routines and should used once on program startup. group, ctx := errgroup.WithContext(ctx) - // Set up the admin API and defer the function that it returns. The admin - // API lifecycle differs from the context, so it actually is the last thing - // that gets shut down. - defer webutil.AdminAPIListenAndServe(ctx)() + // Set up the admin API. The admin API lifecycle differs from the context, + // so it actually is the last thing that gets shut down. + webutil.AdminAPIListenAndServe(ctx) // Other background processes. s.setupHTTPServer(ctx, group) diff --git a/pkg/webutil/admin.go b/pkg/webutil/admin.go index 79c13c9..07191e7 100644 --- a/pkg/webutil/admin.go +++ b/pkg/webutil/admin.go @@ -10,7 +10,7 @@ import ( "github.com/rebuy-de/rebuy-go-sdk/v4/pkg/logutil" ) -func AdminAPIListenAndServe(ctx context.Context, healthy ...func() error) func() { +func AdminAPIListenAndServe(ctx context.Context, healthy ...func() error) { ctx = logutil.Start(ctx, "admin-api") mux := http.NewServeMux() @@ -46,8 +46,9 @@ func AdminAPIListenAndServe(ctx context.Context, healthy ...func() error) func() // The admin api gets a its own context, because we want to delay the // server shutdown as long as possible. The reason for this is that Istio // starts to block all outgoing connections as soon as there is no - // listening server anymore. - bg, cancel := context.WithCancel(context.Background()) + // listening server anymore. Also a graceful shutdown is not needed for the + // admin API, so it is also not necessary to cancel the context. + bg := context.Background() go func() { logutil.Get(ctx).Debugf("admin api listening on port 8090") @@ -57,6 +58,4 @@ func AdminAPIListenAndServe(ctx context.Context, healthy ...func() error) func() logutil.Get(ctx).Error(err.Error()) } }() - - return cancel }