Skip to content

Commit

Permalink
enhance: improve reliability of start healthz
Browse files Browse the repository at this point in the history
Signed-off-by: Donnie Adams <[email protected]>
  • Loading branch information
thedadams committed Jan 28, 2025
1 parent ed22001 commit 1d4b332
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 24 deletions.
12 changes: 10 additions & 2 deletions pkg/router/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,22 @@ func NewHandlerSet(name string, scheme *runtime.Scheme, backend backend.Backend)
}

func (m *HandlerSet) Start(ctx context.Context) error {
m.ctx = ctx
if m.ctx == nil {
m.ctx = ctx
}
if err := m.WatchGVK(m.handlers.GVKs()...); err != nil {
return err
}
return m.backend.Start(ctx)
}

func (m *HandlerSet) Preload(ctx context.Context) error {
if m.ctx == nil {
m.ctx = ctx
}
if err := m.WatchGVK(m.handlers.GVKs()...); err != nil {
return err
}
return m.backend.Preload(ctx)
}

Expand Down Expand Up @@ -253,7 +261,7 @@ func (m *HandlerSet) onChange(gvk schema.GroupVersionKind, key string, runtimeOb
}

if !fromReplay && !fromTrigger {
// Process delay have key has be reassigned from the TriggerPrefix
// Process delay have key has been reassigned from the TriggerPrefix
if !m.checkDelay(gvk, key) {
return runtimeObject, nil
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/router/healthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ func setHealthy(name string, healthy bool) {
healthz.healths[name] = healthy
}

func getHealthy() bool {
func GetHealthy() bool {
healthz.lock.RLock()
defer healthz.lock.RUnlock()
for _, healthy := range healthz.healths {
if !healthy {
return false
}
}
return true
return len(healthz.healths) > 0
}

// startHealthz starts a healthz server on the healthzPort. If the server is already running, then this is a no-op.
Expand All @@ -65,7 +65,7 @@ func startHealthz(ctx context.Context) {

mux := http.NewServeMux()
mux.HandleFunc("/healthz", func(w http.ResponseWriter, req *http.Request) {
if getHealthy() {
if GetHealthy() {
w.WriteHeader(http.StatusOK)
return
}
Expand Down
36 changes: 17 additions & 19 deletions pkg/router/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os"
"path/filepath"
"runtime"
"sync"

"github.com/obot-platform/nah/pkg/backend"
"github.com/obot-platform/nah/pkg/leader"
Expand All @@ -21,10 +22,9 @@ type Router struct {
OnErrorHandler ErrorHandler
handlers *HandlerSet
electionConfig *leader.ElectionConfig
hasHealthz bool
startLock sync.Mutex
postStarts []func(context.Context, kclient.Client)
signalStopped chan struct{}
cancel func()
}

// New returns a new *Router with given HandlerSet and ElectionConfig. Passing a nil ElectionConfig is valid and results
Expand All @@ -41,7 +41,6 @@ func New(handlerSet *HandlerSet, electionConfig *leader.ElectionConfig, healthzP

if healthzPort > 0 {
setPort(healthzPort)
r.hasHealthz = true
}

r.RouteBuilder.router = r
Expand Down Expand Up @@ -187,44 +186,43 @@ func (r RouteBuilder) Handler(h Handler) {
}

func (r *Router) Start(ctx context.Context) error {
if r.cancel != nil {
return fmt.Errorf("router already started")
}

id, err := os.Hostname()
if err != nil {
return err
}

if r.hasHealthz {
startHealthz(ctx)
}
startHealthz(ctx)

r.handlers.onError = r.OnErrorHandler

ctx, r.cancel = context.WithCancel(ctx)

// It's OK to start the electionConfig even if it's nil.
return r.electionConfig.Run(ctx, id, r.startHandlers, func(leader string) {
if id == leader {
return
}

r.startLock.Lock()
defer r.startLock.Unlock()

setHealthy(r.name, false)
defer setHealthy(r.name, true)
// I am not the leader, so I am healthy when my cache is ready.
if err := r.handlers.Preload(ctx); err != nil {
// Failed to preload caches, panic
log.Fatalf("failed to preload caches: %v", err)
}
if r.hasHealthz {
setHealthy(r.name, id != leader)
}
}, r.signalStopped)
}

// startHandlers gets called when we become the leader or if there is no leader election.
func (r *Router) startHandlers(ctx context.Context) error {
r.startLock.Lock()
defer r.startLock.Unlock()

var err error
// This is the leader now, so not ready until the controller is started and caches are ready.
if r.hasHealthz {
setHealthy(r.name, false)
defer setHealthy(r.name, err == nil)
}
setHealthy(r.name, false)
defer setHealthy(r.name, err == nil)

if err = r.handlers.Start(ctx); err != nil {
return err
Expand Down

0 comments on commit 1d4b332

Please sign in to comment.