Skip to content

Commit

Permalink
Merge pull request ethereum-optimism#4930 from ethereum-optimism/fix-…
Browse files Browse the repository at this point in the history
…sequencer-err-handling

op-node: handle sequencer errors with derivation-levels, reset when L1 origins are inconsistent
  • Loading branch information
mslipper authored Feb 24, 2023
2 parents aa766f2 + 36dca77 commit 24edceb
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 38 deletions.
47 changes: 43 additions & 4 deletions op-e2e/actions/l2_sequencer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ package actions

import (
"context"
"errors"

"github.com/ethereum/go-ethereum/log"
"github.com/stretchr/testify/require"

"github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/rollup/driver"
Expand Down Expand Up @@ -46,14 +48,18 @@ func NewL2Sequencer(t Testing, log log.Logger, l1 derive.L1Fetcher, eng L2API, c
}
return &L2Sequencer{
L2Verifier: *ver,
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector),
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector, metrics.NoopMetrics),
mockL1OriginSelector: l1OriginSelector,
failL2GossipUnsafeBlock: nil,
}
}

// ActL2StartBlock starts building of a new L2 block on top of the head
func (s *L2Sequencer) ActL2StartBlock(t Testing) {
s.ActL2StartBlockCheckErr(t, nil)
}

func (s *L2Sequencer) ActL2StartBlockCheckErr(t Testing, checkErr error) {
if !s.l2PipelineIdle {
t.InvalidAction("cannot start L2 build when derivation is not idle")
return
Expand All @@ -64,9 +70,19 @@ func (s *L2Sequencer) ActL2StartBlock(t Testing) {
}

err := s.sequencer.StartBuildingBlock(t.Ctx())
require.NoError(t, err, "failed to start block building")
if checkErr == nil {
require.NoError(t, err, "failed to start block building")
} else {
require.ErrorIs(t, err, checkErr, "expected typed error")
}

s.l2Building = true
if errors.Is(err, derive.ErrReset) {
s.derivation.Reset()
}

if err == nil {
s.l2Building = true
}
}

// ActL2EndBlock completes a new L2 block and applies it to the L2 chain as new canonical unsafe head
Expand Down Expand Up @@ -103,7 +119,16 @@ func (s *L2Sequencer) ActBuildToL1Head(t Testing) {
}
}

// ActBuildToL1HeadExcl builds empty blocks until (excl.) the L1 head becomes the L2 origin
// ActBuildToL1HeadUnsafe builds empty blocks until (incl.) the L1 head becomes the L1 origin of the L2 head
func (s *L2Sequencer) ActBuildToL1HeadUnsafe(t Testing) {
for s.derivation.UnsafeL2Head().L1Origin.Number < s.l1State.L1Head().Number {
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
s.ActL2StartBlock(t)
s.ActL2EndBlock(t)
}
}

// ActBuildToL1HeadExcl builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head
func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) {
for {
s.ActL2PipelineFull(t)
Expand All @@ -116,3 +141,17 @@ func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) {
s.ActL2EndBlock(t)
}
}

// ActBuildToL1HeadExclUnsafe builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head, without safe-head progression.
func (s *L2Sequencer) ActBuildToL1HeadExclUnsafe(t Testing) {
for {
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
nextOrigin, err := s.mockL1OriginSelector.FindL1Origin(t.Ctx(), s.derivation.UnsafeL2Head())
require.NoError(t, err)
if nextOrigin.Number >= s.l1State.L1Head().Number {
break
}
s.ActL2StartBlock(t)
s.ActL2EndBlock(t)
}
}
66 changes: 66 additions & 0 deletions op-e2e/actions/l2_sequencer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"math/big"
"testing"

"github.com/ethereum-optimism/optimism/op-node/rollup/derive"

"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/params"

Expand Down Expand Up @@ -100,3 +102,67 @@ func TestL2Sequencer_SequencerDrift(gt *testing.T) {
sequencer.ActL2StartBlock(t)
require.True(t, engine.l2ForceEmpty, "engine should not be allowed to include anything after sequencer drift is surpassed")
}

// TestL2Sequencer_SequencerOnlyReorg regression-tests a Goerli halt where the sequencer
// would build an unsafe L2 block with a L1 origin that then gets reorged out,
// while the verifier-codepath only ever sees the valid post-reorg L1 chain.
func TestL2Sequencer_SequencerOnlyReorg(gt *testing.T) {
t := NewDefaultTesting(gt)
dp := e2eutils.MakeDeployParams(t, defaultRollupTestParams)
sd := e2eutils.Setup(t, dp, defaultAlloc)
log := testlog.Logger(t, log.LvlDebug)
miner, _, sequencer := setupSequencerTest(t, sd, log)

// Sequencer at first only recognizes the genesis as safe.
// The rest of the L1 chain will be incorporated as L1 origins into unsafe L2 blocks.
sequencer.ActL2PipelineFull(t)

// build L1 block with coinbase A
miner.ActL1SetFeeRecipient(common.Address{'A'})
miner.ActEmptyBlock(t)

// sequencer builds L2 blocks, until (incl.) it creates a L2 block with a L1 origin that has A as coinbase address
sequencer.ActL1HeadSignal(t)
sequencer.ActBuildToL1HeadUnsafe(t)

status := sequencer.SyncStatus()
require.Zero(t, status.SafeL2.L1Origin.Number, "no safe head progress")
require.Equal(t, status.HeadL1.Hash, status.UnsafeL2.L1Origin.Hash, "have head L1 origin")
// reorg out block with coinbase A, and make a block with coinbase B
miner.ActL1RewindToParent(t)
miner.ActL1SetFeeRecipient(common.Address{'B'})
miner.ActEmptyBlock(t)

// and a second block, for derivation to pick up on the new L1 chain
// (height is used as heuristic to not flip-flop between chains too frequently)
miner.ActEmptyBlock(t)

// Make the sequencer aware of the new head, and try to sync it.
// Since the safe chain never incorporated the now reorged L1 block with coinbase A,
// it will sync the new L1 chain fine.
// No batches are submitted yet however,
// so it'll keep the L2 block with the old L1 origin, since no conflict is detected.
sequencer.ActL1HeadSignal(t)
sequencer.ActL2PipelineFull(t)
// TODO: CLI-3405 we can detect the inconsistency of the L1 origin of the unsafe L2 head:
// as verifier, there is no need to wait for sequencer to recognize it.
newStatus := sequencer.SyncStatus()
require.Equal(t, status.HeadL1.Hash, newStatus.UnsafeL2.L1Origin.Hash, "still have old bad L1 origin")
require.NotEqual(t, status.HeadL1.Hash, newStatus.HeadL1.Hash, "did see the new L1 head change")
require.Equal(t, newStatus.HeadL1.Hash, newStatus.CurrentL1.Hash, "did sync the new L1 head as verifier")

// the block N+1 cannot build on the old N which still refers to the now orphaned L1 origin
require.Equal(t, status.UnsafeL2.L1Origin.Number, newStatus.HeadL1.Number-1, "seeing N+1 to attempt to build on N")
require.NotEqual(t, status.UnsafeL2.L1Origin.Hash, newStatus.HeadL1.ParentHash, "but N+1 cannot fit on N")
sequencer.ActL1HeadSignal(t)
// sequence more L2 blocks, until we actually need the next L1 origin
sequencer.ActBuildToL1HeadExclUnsafe(t)
// We expect block building to fail when the next L1 block is not consistent with the existing L1 origin
sequencer.ActL2StartBlockCheckErr(t, derive.ErrReset)
// After hitting a reset error, it reset derivation, and drops the old L1 chain
sequencer.ActL2PipelineFull(t)
require.Zero(t, sequencer.SyncStatus().UnsafeL2.L1Origin.Number, "back to genesis block with good L1 origin, drop old unsafe L2 chain with bad L1 origins")
// Can build new L2 blocks with good L1 origin
sequencer.ActBuildToL1HeadUnsafe(t)
require.Equal(t, newStatus.HeadL1.Hash, sequencer.SyncStatus().UnsafeL2.L1Origin.Hash, "build L2 chain with new correct L1 origins")
}
24 changes: 24 additions & 0 deletions op-node/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ type Metricer interface {
RecordUnsafePayloadsBuffer(length uint64, memSize uint64, next eth.BlockID)
CountSequencedTxs(count int)
RecordL1ReorgDepth(d uint64)
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
RecordGossipEvent(evType int32)
IncPeerCount()
DecPeerCount()
Expand Down Expand Up @@ -86,6 +88,9 @@ type Metrics struct {
SequencingErrors *EventMetrics
PublishingErrors *EventMetrics

SequencerInconsistentL1Origin *EventMetrics
SequencerResets *EventMetrics

SequencerBuildingDiffDurationSeconds prometheus.Histogram
SequencerBuildingDiffTotal prometheus.Counter

Expand Down Expand Up @@ -204,6 +209,9 @@ func NewMetrics(procName string) *Metrics {
SequencingErrors: NewEventMetrics(factory, ns, "sequencing_errors", "sequencing errors"),
PublishingErrors: NewEventMetrics(factory, ns, "publishing_errors", "p2p publishing errors"),

SequencerInconsistentL1Origin: NewEventMetrics(factory, ns, "sequencer_inconsistent_l1_origin", "events when the sequencer selects an inconsistent L1 origin"),
SequencerResets: NewEventMetrics(factory, ns, "sequencer_resets", "sequencer resets"),

UnsafePayloadsBufferLen: factory.NewGauge(prometheus.GaugeOpts{
Namespace: ns,
Name: "unsafe_payloads_buffer_len",
Expand Down Expand Up @@ -455,6 +463,16 @@ func (m *Metrics) RecordL1ReorgDepth(d uint64) {
m.L1ReorgDepth.Observe(float64(d))
}

func (m *Metrics) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
m.SequencerInconsistentL1Origin.RecordEvent()
m.recordRef("l1_origin", "inconsistent_from", from.Number, 0, from.Hash)
m.recordRef("l1_origin", "inconsistent_to", to.Number, 0, to.Hash)
}

func (m *Metrics) RecordSequencerReset() {
m.SequencerResets.RecordEvent()
}

func (m *Metrics) RecordGossipEvent(evType int32) {
m.GossipEventsTotal.WithLabelValues(pb.TraceEvent_Type_name[evType]).Inc()
}
Expand Down Expand Up @@ -584,6 +602,12 @@ func (n *noopMetricer) CountSequencedTxs(count int) {
func (n *noopMetricer) RecordL1ReorgDepth(d uint64) {
}

func (n *noopMetricer) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
}

func (n *noopMetricer) RecordSequencerReset() {
}

func (n *noopMetricer) RecordGossipEvent(evType int32) {
}

Expand Down
3 changes: 3 additions & 0 deletions op-node/rollup/derive/engine_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,9 @@ func (eq *EngineQueue) ConfirmPayload(ctx context.Context) (out *eth.ExecutionPa
}

func (eq *EngineQueue) CancelPayload(ctx context.Context, force bool) error {
if eq.buildingID == (eth.PayloadID{}) { // only cancel if there is something to cancel.
return nil
}
// the building job gets wrapped up as soon as the payload is retrieved, there's no explicit cancel in the Engine API
eq.log.Error("cancelling old block sealing job", "payload", eq.buildingID)
_, err := eq.engine.GetPayload(ctx, eq.buildingID)
Expand Down
8 changes: 8 additions & 0 deletions op-node/rollup/derive/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ type L1Fetcher interface {
L1TransactionFetcher
}

// ResettableEngineControl wraps EngineControl with reset-functionality,
// which handles reorgs like the derivation pipeline:
// by determining the last valid block references to continue from.
type ResettableEngineControl interface {
EngineControl
Reset()
}

type ResetableStage interface {
// Reset resets a pull stage. `base` refers to the L1 Block Reference to reset to, with corresponding configuration.
Reset(ctx context.Context, base eth.L1BlockRef, baseCfg eth.SystemConfig) error
Expand Down
6 changes: 3 additions & 3 deletions op-node/rollup/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type Metrics interface {
RecordL1ReorgDepth(d uint64)

EngineMetrics
SequencerMetrics
}

type L1Chain interface {
Expand Down Expand Up @@ -69,7 +70,7 @@ type SequencerIface interface {
StartBuildingBlock(ctx context.Context) error
CompleteBuildingBlock(ctx context.Context) (*eth.ExecutionPayload, error)
PlanNextSequencerAction() time.Duration
RunNextSequencerAction(ctx context.Context) *eth.ExecutionPayload
RunNextSequencerAction(ctx context.Context) (*eth.ExecutionPayload, error)
BuildingOnto() eth.L2BlockRef
}

Expand All @@ -88,12 +89,11 @@ func NewDriver(driverCfg *Config, cfg *rollup.Config, l2 L2Chain, l1 L1Chain, ne
attrBuilder := derive.NewFetchingAttributesBuilder(cfg, l1, l2)
engine := derivationPipeline
meteredEngine := NewMeteredEngine(cfg, engine, metrics, log)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin, metrics)

return &Driver{
l1State: l1State,
derivation: derivationPipeline,
idleDerivation: false,
stateReq: make(chan chan struct{}),
forceReset: make(chan chan struct{}, 10),
startSequencer: make(chan hashAndErrorChannel, 10),
Expand Down
12 changes: 8 additions & 4 deletions op-node/rollup/driver/metered_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type EngineMetrics interface {

// MeteredEngine wraps an EngineControl and adds metrics such as block building time diff and sealing time
type MeteredEngine struct {
inner derive.EngineControl
inner derive.ResettableEngineControl

cfg *rollup.Config
metrics EngineMetrics
Expand All @@ -30,10 +30,10 @@ type MeteredEngine struct {
buildingStartTime time.Time
}

// MeteredEngine implements derive.EngineControl
var _ derive.EngineControl = (*MeteredEngine)(nil)
// MeteredEngine implements derive.ResettableEngineControl
var _ derive.ResettableEngineControl = (*MeteredEngine)(nil)

func NewMeteredEngine(cfg *rollup.Config, inner derive.EngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine {
func NewMeteredEngine(cfg *rollup.Config, inner derive.ResettableEngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine {
return &MeteredEngine{
inner: inner,
cfg: cfg,
Expand Down Expand Up @@ -93,3 +93,7 @@ func (m *MeteredEngine) CancelPayload(ctx context.Context, force bool) error {
func (m *MeteredEngine) BuildingPayload() (onto eth.L2BlockRef, id eth.PayloadID, safe bool) {
return m.inner.BuildingPayload()
}

func (m *MeteredEngine) Reset() {
m.inner.Reset()
}
Loading

0 comments on commit 24edceb

Please sign in to comment.