Skip to content

Commit

Permalink
fix startup after BN exited between head and finalized blocks updates (
Browse files Browse the repository at this point in the history
…#5617)

When the BN exits after writing new `head` to database, but before
completing the `updateFinalizedBlocks` call, the database is slightly
inconsistent due to the partial write. We currently fail to start up
after that. Fix that by catching up on partial `updateFinalizedBlocks`
tasks on start up, and add a test for this edge case.
  • Loading branch information
etan-status authored Nov 22, 2023
1 parent d5fbbd9 commit 8cea8af
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 11 deletions.
5 changes: 3 additions & 2 deletions AllTests-mainnet.md
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,9 @@ OK: 3/3 Fail: 0/3 Skip: 0/3
+ init with gaps [Preset: mainnet] OK
+ orphaned epoch block [Preset: mainnet] OK
+ prune heads on finalization [Preset: mainnet] OK
+ shutdown during finalization [Preset: mainnet] OK
```
OK: 3/3 Fail: 0/3 Skip: 0/3
OK: 4/4 Fail: 0/4 Skip: 0/4
## createValidatorFiles()
```diff
+ Add keystore files [LOCAL] OK
Expand Down Expand Up @@ -720,4 +721,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
OK: 9/9 Fail: 0/9 Skip: 0/9

---TOTAL---
OK: 409/414 Fail: 0/414 Skip: 5/414
OK: 410/415 Fail: 0/415 Skip: 5/415
26 changes: 17 additions & 9 deletions beacon_chain/consensus_object_pools/blockchain_dag.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,6 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
# state - the tail is implicitly finalized, and if we have a finalized block
# table, that provides another hint
finalizedSlot = db.finalizedBlocks.high.get(tail.slot)
newFinalized: seq[BlockId]
cache: StateCache
foundHeadState = false
headBlocks: seq[BlockRef]
Expand Down Expand Up @@ -1128,28 +1127,37 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,

doAssert dag.finalizedHead.blck != nil,
"The finalized head should exist at the slot"
doAssert dag.finalizedHead.blck.parent == nil,
"...but that's the last BlockRef with a parent"

block: # Top up finalized blocks
if db.finalizedBlocks.high.isNone or
db.finalizedBlocks.high.get() < dag.finalizedHead.blck.slot:
# Versions prior to 1.7.0 did not store finalized blocks in the
# database, and / or the application might have crashed between the head
# and finalized blocks updates.
info "Loading finalized blocks",
finHigh = db.finalizedBlocks.high,
finalizedHead = shortLog(dag.finalizedHead)

for blck in db.getAncestorSummaries(dag.finalizedHead.blck.root):
var
newFinalized: seq[BlockId]
tmp = dag.finalizedHead.blck
while tmp.parent != nil:
newFinalized.add(tmp.bid)
let p = tmp.parent
tmp.parent = nil
tmp = p

for blck in db.getAncestorSummaries(tmp.root):
if db.finalizedBlocks.high.isSome and
blck.summary.slot <= db.finalizedBlocks.high.get:
break

# Versions prior to 1.7.0 did not store finalized blocks in the
# database, and / or the application might have crashed between the head
# and finalized blocks updates.
newFinalized.add(BlockId(slot: blck.summary.slot, root: blck.root))

let finalizedBlocksTick = Moment.now()
db.updateFinalizedBlocks(newFinalized)
db.updateFinalizedBlocks(newFinalized)

doAssert dag.finalizedHead.blck.parent == nil,
"The finalized head is the last BlockRef with a parent"

block:
let finalized = db.finalizedBlocks.get(db.finalizedBlocks.high.get()).expect(
Expand Down
37 changes: 37 additions & 0 deletions tests/test_blockchain_dag.nim
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,43 @@ suite "chain DAG finalization tests" & preset():
dag2.finalizedHead.slot == dag.finalizedHead.slot
getStateRoot(dag2.headState) == getStateRoot(dag.headState)

test "shutdown during finalization" & preset():
var testPassed: bool

# Configure a hook that is called during finalization while the
# database has been partially written, to test behaviour if the
# beacon node is exited while the database is inconsistent.
proc onHeadChanged(data: HeadChangeInfoObject) =
if data.epoch_transition:
# Check test assumption: Head block was written before this callback
let headBlock = dag.db.getHeadBlock().expect("Valid DB")
doAssert headBlock == data.block_root, "Head was written before CB"

# Check test assumption: New finalized blocks were not written yet
let
stateFinalizedSlot =
dag.headState.getStateField(finalized_checkpoint).epoch.start_slot
dbFinalizedSlot =
dag.db.finalizedBlocks.high.expect("Valid DB")
doAssert stateFinalizedSlot > dbFinalizedSlot, "Finalized not written"

# If the beacon node were to exit _now_, this is what the DB looks like.
# Validate that we can initialize a new DAG from this database.
let validatorMonitor2 = newClone(ValidatorMonitor.init())
discard ChainDAGRef.init(
defaultRuntimeConfig, db, validatorMonitor2, {})
testPassed = true
dag.setHeadCb(onHeadChanged)

for blck in makeTestBlocks(
dag.headState, cache, int(SLOTS_PER_EPOCH * 4), attested = true):
let added = dag.addHeadBlock(verifier, blck.phase0Data, nilPhase0Callback)
check: added.isOk
dag.updateHead(added[], quarantine, [])
dag.pruneAtFinalization()

check testPassed

suite "Old database versions" & preset():
setup:
let
Expand Down

0 comments on commit 8cea8af

Please sign in to comment.