From db7583bcc0e05166fa354437399c55c94b586bbf Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Mon, 6 Jan 2025 18:48:01 -0700 Subject: [PATCH] kv: conditionally handleReady after tick Related to #133885. Related to #133216. This commit adjusts `Replica.tick` to check `RawNode.HasReady` when deciding whether a `Ready` struct should be processed (`Replica.handleRaftReady`) from the ticked replica. Previously, we would unconditionally pass through `Replica.handleRaftReady` after a tick, even if doing so was unnecessary and we knew ahead of time that we would hit this early return: https://github.com/cockroachdb/cockroach/blob/57aab736c34ce5dc7988bd53e0604fde48cef441/pkg/kv/kvserver/replica_raft.go#L1025. This was mostly harmless in the past with quiescence and a cheap short-circuit in `handleRaftReady`, but has become more problematic recently for the following three reasons: 1. Leader leases don't permit quiescence. We have some work planned to allow follower replicas to quiesce under a leader lease (see #133885), but even after that work, we likely won't allow quiescence for the leaseholder. This means that 2. `Replica.handleRaftReady` has been getting more expensive, even in the no-op case. This is because of work done in `Replica.updateProposalQuotaRaftMuLocked` and work done for replica admission control v2 (RACv2). 3. At some point in the future, we would like to revert 5e6698 and drop the raft tick interval back down to something like 200ms. This will allow us to address #133576 and further reduce failover times. Release note: None --- pkg/kv/kvserver/replica_raft.go | 8 +++++--- pkg/kv/kvserver/store_raft.go | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index 9b23f98e582d..68023bc5fc70 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -1423,11 +1423,11 @@ func maybeFatalOnRaftReadyErr(ctx context.Context, err error) (removed bool) { // be queued for Ready processing; false otherwise. func (r *Replica) tick( ctx context.Context, livenessMap livenesspb.IsLiveMap, ioThresholdMap *ioThresholdMap, -) (exists bool, err error) { +) (processReady bool, err error) { r.raftMu.Lock() defer r.raftMu.Unlock() defer func() { - if exists && err == nil { + if processReady && err == nil { // NB: since we are returning true, there will be a Ready handling // immediately after this call, so any pings stashed in raft will be sent. // NB: Replica.mu must not be held here. @@ -1530,7 +1530,9 @@ func (r *Replica) tick( // have been pending for 1 to 2 reproposal timeouts. r.refreshProposalsLocked(ctx, refreshAtDelta, reasonTicks) } - return true, nil + + processReady = r.mu.internalRaftGroup.HasReady() + return processReady, nil } func (r *Replica) processRACv2PiggybackedAdmitted(ctx context.Context) { diff --git a/pkg/kv/kvserver/store_raft.go b/pkg/kv/kvserver/store_raft.go index 9ed83fe322ca..c792c7f1d1a3 100644 --- a/pkg/kv/kvserver/store_raft.go +++ b/pkg/kv/kvserver/store_raft.go @@ -762,12 +762,12 @@ func (s *Store) processTick(_ context.Context, rangeID roachpb.RangeID) bool { start := timeutil.Now() ctx := r.raftCtx - exists, err := r.tick(ctx, livenessMap, ioThresholds) + processReady, err := r.tick(ctx, livenessMap, ioThresholds) if err != nil { log.Errorf(ctx, "%v", err) } s.metrics.RaftTickingDurationNanos.Inc(timeutil.Since(start).Nanoseconds()) - return exists // ready + return processReady } func (s *Store) processRACv2PiggybackedAdmitted(ctx context.Context, rangeID roachpb.RangeID) {