Skip to content

Commit

Permalink
op-node: fix-sequencer-err-handling PR 4930 suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
protolambda committed Feb 24, 2023
1 parent 61c0b14 commit 91516a6
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 12 deletions.
6 changes: 4 additions & 2 deletions op-e2e/actions/l2_sequencer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/stretchr/testify/require"

"github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/rollup/driver"
Expand Down Expand Up @@ -47,7 +48,7 @@ func NewL2Sequencer(t Testing, log log.Logger, l1 derive.L1Fetcher, eng L2API, c
}
return &L2Sequencer{
L2Verifier: *ver,
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector),
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector, metrics.NoopMetrics),
mockL1OriginSelector: l1OriginSelector,
failL2GossipUnsafeBlock: nil,
}
Expand Down Expand Up @@ -121,7 +122,7 @@ func (s *L2Sequencer) ActBuildToL1Head(t Testing) {
// ActBuildToL1HeadUnsafe builds empty blocks until (incl.) the L1 head becomes the L1 origin of the L2 head
func (s *L2Sequencer) ActBuildToL1HeadUnsafe(t Testing) {
for s.derivation.UnsafeL2Head().L1Origin.Number < s.l1State.L1Head().Number {
// Note: the
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
s.ActL2StartBlock(t)
s.ActL2EndBlock(t)
}
Expand All @@ -144,6 +145,7 @@ func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) {
// ActBuildToL1HeadExclUnsafe builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head, without safe-head progression.
func (s *L2Sequencer) ActBuildToL1HeadExclUnsafe(t Testing) {
for {
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
nextOrigin, err := s.mockL1OriginSelector.FindL1Origin(t.Ctx(), s.derivation.UnsafeL2Head())
require.NoError(t, err)
if nextOrigin.Number >= s.l1State.L1Head().Number {
Expand Down
7 changes: 1 addition & 6 deletions op-e2e/actions/l2_sequencer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,7 @@ func TestL2Sequencer_SequencerDrift(gt *testing.T) {
// while the verifier-codepath only ever sees the valid post-reorg L1 chain.
func TestL2Sequencer_SequencerOnlyReorg(gt *testing.T) {
t := NewDefaultTesting(gt)
p := &e2eutils.TestParams{
MaxSequencerDrift: 20, // larger than L1 block time we simulate in this test (12)
SequencerWindowSize: 24,
ChannelTimeout: 20,
}
dp := e2eutils.MakeDeployParams(t, p)
dp := e2eutils.MakeDeployParams(t, defaultRollupTestParams)
sd := e2eutils.Setup(t, dp, defaultAlloc)
log := testlog.Logger(t, log.LvlDebug)
miner, _, sequencer := setupSequencerTest(t, sd, log)
Expand Down
24 changes: 24 additions & 0 deletions op-node/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ type Metricer interface {
RecordUnsafePayloadsBuffer(length uint64, memSize uint64, next eth.BlockID)
CountSequencedTxs(count int)
RecordL1ReorgDepth(d uint64)
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
RecordGossipEvent(evType int32)
IncPeerCount()
DecPeerCount()
Expand Down Expand Up @@ -86,6 +88,9 @@ type Metrics struct {
SequencingErrors *EventMetrics
PublishingErrors *EventMetrics

SequencerInconsistentL1Origin *EventMetrics
SequencerResets *EventMetrics

SequencerBuildingDiffDurationSeconds prometheus.Histogram
SequencerBuildingDiffTotal prometheus.Counter

Expand Down Expand Up @@ -204,6 +209,9 @@ func NewMetrics(procName string) *Metrics {
SequencingErrors: NewEventMetrics(factory, ns, "sequencing_errors", "sequencing errors"),
PublishingErrors: NewEventMetrics(factory, ns, "publishing_errors", "p2p publishing errors"),

SequencerInconsistentL1Origin: NewEventMetrics(factory, ns, "sequencer_inconsistent_l1_origin", "events when the sequencer selects an inconsistent L1 origin"),
SequencerResets: NewEventMetrics(factory, ns, "sequencer_resets", "sequencer resets"),

UnsafePayloadsBufferLen: factory.NewGauge(prometheus.GaugeOpts{
Namespace: ns,
Name: "unsafe_payloads_buffer_len",
Expand Down Expand Up @@ -455,6 +463,16 @@ func (m *Metrics) RecordL1ReorgDepth(d uint64) {
m.L1ReorgDepth.Observe(float64(d))
}

func (m *Metrics) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
m.SequencerInconsistentL1Origin.RecordEvent()
m.recordRef("l1_origin", "inconsistent_from", from.Number, 0, from.Hash)
m.recordRef("l1_origin", "inconsistent_to", to.Number, 0, to.Hash)
}

func (m *Metrics) RecordSequencerReset() {
m.SequencerResets.RecordEvent()
}

func (m *Metrics) RecordGossipEvent(evType int32) {
m.GossipEventsTotal.WithLabelValues(pb.TraceEvent_Type_name[evType]).Inc()
}
Expand Down Expand Up @@ -584,6 +602,12 @@ func (n *noopMetricer) CountSequencedTxs(count int) {
func (n *noopMetricer) RecordL1ReorgDepth(d uint64) {
}

func (n *noopMetricer) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
}

func (n *noopMetricer) RecordSequencerReset() {
}

func (n *noopMetricer) RecordGossipEvent(evType int32) {
}

Expand Down
3 changes: 3 additions & 0 deletions op-node/rollup/derive/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ type L1Fetcher interface {
L1TransactionFetcher
}

// ResettableEngineControl wraps EngineControl with reset-functionality,
// which handles reorgs like the derivation pipeline:
// by determining the last valid block references to continue from.
type ResettableEngineControl interface {
EngineControl
Reset()
Expand Down
3 changes: 2 additions & 1 deletion op-node/rollup/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type Metrics interface {
RecordL1ReorgDepth(d uint64)

EngineMetrics
SequencerMetrics
}

type L1Chain interface {
Expand Down Expand Up @@ -88,7 +89,7 @@ func NewDriver(driverCfg *Config, cfg *rollup.Config, l2 L2Chain, l1 L1Chain, ne
attrBuilder := derive.NewFetchingAttributesBuilder(cfg, l1, l2)
engine := derivationPipeline
meteredEngine := NewMeteredEngine(cfg, engine, metrics, log)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin, metrics)

return &Driver{
l1State: l1State,
Expand Down
2 changes: 1 addition & 1 deletion op-node/rollup/driver/metered_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type MeteredEngine struct {
buildingStartTime time.Time
}

// MeteredEngine implements derive.EngineControl
// MeteredEngine implements derive.ResettableEngineControl
var _ derive.ResettableEngineControl = (*MeteredEngine)(nil)

func NewMeteredEngine(cfg *rollup.Config, inner derive.ResettableEngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine {
Expand Down
14 changes: 13 additions & 1 deletion op-node/rollup/driver/sequencer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ type L1OriginSelectorIface interface {
FindL1Origin(ctx context.Context, l2Head eth.L2BlockRef) (eth.L1BlockRef, error)
}

type SequencerMetrics interface {
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
}

// Sequencer implements the sequencing interface of the driver: it starts and completes block building jobs.
type Sequencer struct {
log log.Logger
Expand All @@ -34,20 +39,23 @@ type Sequencer struct {
attrBuilder derive.AttributesBuilder
l1OriginSelector L1OriginSelectorIface

metrics SequencerMetrics

// timeNow enables sequencer testing to mock the time
timeNow func() time.Time

nextAction time.Time
}

func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface) *Sequencer {
func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface, metrics SequencerMetrics) *Sequencer {
return &Sequencer{
log: log,
config: cfg,
engine: engine,
timeNow: time.Now,
attrBuilder: attributesBuilder,
l1OriginSelector: l1OriginSelector,
metrics: metrics,
}
}

Expand All @@ -63,6 +71,7 @@ func (d *Sequencer) StartBuildingBlock(ctx context.Context) error {
}

if !(l2Head.L1Origin.Hash == l1Origin.ParentHash || l2Head.L1Origin.Hash == l1Origin.Hash) {
d.metrics.RecordSequencerInconsistentL1Origin(l2Head.L1Origin, l1Origin.ID())
return derive.NewResetError(fmt.Errorf("cannot build new L2 block with L1 origin %s (parent L1 %s) on current L2 head %s with L1 origin %s", l1Origin, l1Origin.ParentHash, l2Head, l2Head.L1Origin))
}

Expand Down Expand Up @@ -169,6 +178,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
return nil, err // bubble up critical errors.
} else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.metrics.RecordSequencerReset()
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
if buildingID != (eth.PayloadID{}) { // cancel what we were doing
d.CancelBuildingBlock(ctx)
Expand All @@ -180,6 +190,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
// Any unfinished block building work eventually times out, and will be cleaned up that way.
} else {
d.log.Error("sequencer failed to seal block with unclassified error", "err", err)
d.nextAction = d.timeNow().Add(time.Second)
if buildingID != (eth.PayloadID{}) { // don't keep stale block building jobs around, try to cancel them
d.CancelBuildingBlock(ctx)
}
Expand All @@ -196,6 +207,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
return nil, err
} else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.metrics.RecordSequencerReset()
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
d.engine.Reset()
} else if errors.Is(err, derive.ErrTemporary) {
Expand Down
3 changes: 2 additions & 1 deletion op-node/rollup/driver/sequencer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/stretchr/testify/require"

"github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/testlog"
Expand Down Expand Up @@ -301,7 +302,7 @@ func TestSequencerChaosMonkey(t *testing.T) {
}
})

seq := NewSequencer(log, cfg, engControl, attrBuilder, originSelector)
seq := NewSequencer(log, cfg, engControl, attrBuilder, originSelector, metrics.NoopMetrics)
seq.timeNow = clockFn

// try to build 1000 blocks, with 5x as many planning attempts, to handle errors and clock problems
Expand Down

0 comments on commit 91516a6

Please sign in to comment.