diff --git a/op-chain-ops/cmd/celo-migrate/README.md b/op-chain-ops/cmd/celo-migrate/README.md index 807a4f0b1606..72b9f437c1bd 100644 --- a/op-chain-ops/cmd/celo-migrate/README.md +++ b/op-chain-ops/cmd/celo-migrate/README.md @@ -129,7 +129,7 @@ If node operators would like to practice a `full migration` they can do so and r > [!IMPORTANT] > The pre-migration should be run using a chaindata snapshot, rather than a db that is being used by a node. To avoid network downtime, we recommend that node operators do not stop any nodes in order to perform the pre-migration. -Node operators should inspect their migration logs after the dry-run to ensure the migration completed succesfully and direct any questions to the Celo developer community on Discord before the actual migration. +Node operators should inspect their migration logs after the dry-run to ensure the migration completed successfully and direct any questions to the Celo developer community on Discord before the actual migration. ##### Final migration diff --git a/op-chain-ops/cmd/celo-migrate/ancients.go b/op-chain-ops/cmd/celo-migrate/ancients.go index 3dfc34763ba6..6f7549c10c2f 100644 --- a/op-chain-ops/cmd/celo-migrate/ancients.go +++ b/op-chain-ops/cmd/celo-migrate/ancients.go @@ -13,16 +13,6 @@ import ( "golang.org/x/sync/errgroup" ) -// RLPBlockRange is a range of blocks in RLP format -type RLPBlockRange struct { - start uint64 - hashes [][]byte - headers [][]byte - bodies [][]byte - receipts [][]byte - tds [][]byte -} - // NewChainFreezer is a small utility method around NewFreezer that sets the // default parameters for the chain storage. func NewChainFreezer(datadir string, namespace string, readonly bool) (*rawdb.Freezer, error) { @@ -104,46 +94,19 @@ func migrateAncientsDb(ctx context.Context, oldDBPath, newDBPath string, batchSi func readAncientBlocks(ctx context.Context, freezer *rawdb.Freezer, startBlock, endBlock, batchSize uint64, out chan<- RLPBlockRange) error { defer close(out) - for i := startBlock; i < endBlock; i += batchSize { - count := min(batchSize, endBlock-i+1) + count := min(batchSize, endBlock-i) start := i - blockRange := RLPBlockRange{ - start: start, - hashes: make([][]byte, count), - headers: make([][]byte, count), - bodies: make([][]byte, count), - receipts: make([][]byte, count), - tds: make([][]byte, count), - } - var err error - - blockRange.hashes, err = freezer.AncientRange(rawdb.ChainFreezerHashTable, start, count, 0) + blockRange, err := loadAncientRange(freezer, start, count) if err != nil { - return fmt.Errorf("failed to read hashes from old freezer: %w", err) - } - blockRange.headers, err = freezer.AncientRange(rawdb.ChainFreezerHeaderTable, start, count, 0) - if err != nil { - return fmt.Errorf("failed to read headers from old freezer: %w", err) - } - blockRange.bodies, err = freezer.AncientRange(rawdb.ChainFreezerBodiesTable, start, count, 0) - if err != nil { - return fmt.Errorf("failed to read bodies from old freezer: %w", err) - } - blockRange.receipts, err = freezer.AncientRange(rawdb.ChainFreezerReceiptTable, start, count, 0) - if err != nil { - return fmt.Errorf("failed to read receipts from old freezer: %w", err) - } - blockRange.tds, err = freezer.AncientRange(rawdb.ChainFreezerDifficultyTable, start, count, 0) - if err != nil { - return fmt.Errorf("failed to read tds from old freezer: %w", err) + return fmt.Errorf("Failed to load ancient block range: %w", err) } select { case <-ctx.Done(): return ctx.Err() - case out <- blockRange: + case out <- *blockRange: } log.Info("Read ancient blocks", "start", start, "end", start+count-1, "count", count) @@ -151,9 +114,47 @@ func readAncientBlocks(ctx context.Context, freezer *rawdb.Freezer, startBlock, return nil } +func loadAncientRange(freezer *rawdb.Freezer, start, count uint64) (*RLPBlockRange, error) { + log.Info("Loading ancient block range", "start", start, "end", start+count-1, "count", count) + + blockRange := &RLPBlockRange{ + start: start, + hashes: make([][]byte, count), + headers: make([][]byte, count), + bodies: make([][]byte, count), + receipts: make([][]byte, count), + tds: make([][]byte, count), + } + + var err error + blockRange.hashes, err = freezer.AncientRange(rawdb.ChainFreezerHashTable, start, count, 0) + if err != nil { + return nil, fmt.Errorf("failed to read hashes from freezer: %w", err) + } + blockRange.headers, err = freezer.AncientRange(rawdb.ChainFreezerHeaderTable, start, count, 0) + if err != nil { + return nil, fmt.Errorf("failed to read headers from freezer: %w", err) + } + blockRange.bodies, err = freezer.AncientRange(rawdb.ChainFreezerBodiesTable, start, count, 0) + if err != nil { + return nil, fmt.Errorf("failed to read bodies from freezer: %w", err) + } + blockRange.receipts, err = freezer.AncientRange(rawdb.ChainFreezerReceiptTable, start, count, 0) + if err != nil { + return nil, fmt.Errorf("failed to read receipts from freezer: %w", err) + } + blockRange.tds, err = freezer.AncientRange(rawdb.ChainFreezerDifficultyTable, start, count, 0) + if err != nil { + return nil, fmt.Errorf("failed to read tds from freezer: %w", err) + } + + return blockRange, nil +} + func transformBlocks(ctx context.Context, in <-chan RLPBlockRange, out chan<- RLPBlockRange) error { // Transform blocks from the in channel and send them to the out channel defer close(out) + for blockRange := range in { for i := range blockRange.hashes { blockNumber := blockRange.start + uint64(i) @@ -217,7 +218,7 @@ func writeAncientBlocks(ctx context.Context, freezer *rawdb.Freezer, in <-chan R return fmt.Errorf("failed to write block range: %w", err) } blockRangeEnd := blockRange.start + uint64(len(blockRange.hashes)) - 1 - log.Info("Wrote ancient blocks", "start", blockRange.start, "end", blockRangeEnd, "count", len(blockRange.hashes), "remaining", totalAncientBlocks-blockRangeEnd) + log.Info("Wrote ancient blocks", "start", blockRange.start, "end", blockRangeEnd, "count", len(blockRange.hashes), "remaining", totalAncientBlocks-(blockRangeEnd+1)) } } return nil @@ -242,3 +243,16 @@ func getStrayAncientBlocks(dbPath string) (blocks []*rawdb.NumberHash, err error return rawdb.ReadAllHashesInRange(db, 1, numAncients-1), nil } + +// Get the last ancient block data so we can check for continuity between ancients and non-ancients +func loadLastAncient(freezer *rawdb.Freezer) (*RLPBlockElement, error) { + numAncients, err := freezer.Ancients() + if err != nil { + return nil, fmt.Errorf("failed to get number of ancients in freezer: %w", err) + } + blockRange, err := loadAncientRange(freezer, numAncients-1, 1) + if err != nil { + return nil, err + } + return blockRange.Element(0) +} diff --git a/op-chain-ops/cmd/celo-migrate/continuity.go b/op-chain-ops/cmd/celo-migrate/continuity.go new file mode 100644 index 000000000000..9b41c60d3e7d --- /dev/null +++ b/op-chain-ops/cmd/celo-migrate/continuity.go @@ -0,0 +1,131 @@ +package main + +import ( + "errors" + "fmt" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" +) + +// RLPBlockRange is a range of blocks in RLP format +type RLPBlockRange struct { + start uint64 + hashes [][]byte + headers [][]byte + bodies [][]byte + receipts [][]byte + tds [][]byte +} + +// RLPBlockElement contains all relevant block data in RLP format +type RLPBlockElement struct { + decodedHeader *types.Header + hash []byte + header []byte + body []byte + receipts []byte + td []byte +} + +// Follows checks if the current block has a number one greater than the previous block +// and if the parent hash of the current block matches the hash of the previous block. +func (e *RLPBlockElement) Follows(prev *RLPBlockElement) (err error) { + if e.Number() != prev.Number()+1 { + err = errors.Join(err, fmt.Errorf("header number mismatch indicating a gap in block numbers: expected %d, actual %d", prev.Number()+1, e.Number())) + } + // We compare the parent hash with the stored hash of the previous block because + // at this point the header object will not calculate the correct hash since it + // first needs to be transformed. + if e.Header().ParentHash != common.BytesToHash(prev.hash) { + err = errors.Join(err, fmt.Errorf("parent hash mismatch between blocks %d and %d", e.Number(), prev.Number())) + } + return err +} + +func (e *RLPBlockElement) Header() *types.Header { + return e.decodedHeader +} + +func (e *RLPBlockElement) Number() uint64 { + return e.Header().Number.Uint64() +} + +func (r *RLPBlockRange) Element(i uint64) (*RLPBlockElement, error) { + header := types.Header{} + err := rlp.DecodeBytes(r.headers[i], &header) + if err != nil { + return nil, fmt.Errorf("can't decode header: %w", err) + } + return &RLPBlockElement{ + decodedHeader: &header, + hash: r.hashes[i], + header: r.headers[i], + body: r.bodies[i], + receipts: r.receipts[i], + td: r.tds[i], + }, nil +} + +// CheckContinuity checks if the block data in the range is continuous +// by comparing the header number and parent hash of each block with the previous block, +// and by checking if the number of elements retrieved from each table is the same. +// It takes in the last element in the preceding range, and returns the last element +// in the current range so that continuity can be checked across ranges. +func (r *RLPBlockRange) CheckContinuity(prevElement *RLPBlockElement, expectedLength uint64) (*RLPBlockElement, error) { + log.Info("Checking data continuity for block range", + "start", r.start, + "end", r.start+expectedLength-1, + "count", expectedLength, + "prevElement", func() interface{} { + if prevElement != nil { + return prevElement.Number() + } + return "nil" + }(), + ) + + if err := r.CheckLengths(expectedLength); err != nil { + return nil, err + } + for i := range r.hashes { + currElement, err := r.Element(uint64(i)) + if err != nil { + return nil, err + } + if currElement.Number() != r.start+uint64(i) { + return nil, fmt.Errorf("decoded header number mismatch indicating a gap in block numbers: expected %d, actual %d", r.start+uint64(i), currElement.Number()) + } + if prevElement != nil { + log.Debug("Checking continuity", "block", currElement.Number(), "prev", prevElement.Number()) + if err := currElement.Follows(prevElement); err != nil { + return nil, err + } + } + prevElement = currElement + } + return prevElement, nil +} + +// CheckLengths makes sure the number of elements retrieved from each table is the same +func (r *RLPBlockRange) CheckLengths(expectedLength uint64) error { + var err error + if uint64(len(r.hashes)) != expectedLength { + err = fmt.Errorf("Unexpected number of hashes for block range: expected %d, actual %d", expectedLength, len(r.hashes)) + } + if uint64(len(r.bodies)) != expectedLength { + err = errors.Join(err, fmt.Errorf("Unexpected number of bodies for block range: expected %d, actual %d", expectedLength, len(r.bodies))) + } + if uint64(len(r.headers)) != expectedLength { + err = errors.Join(err, fmt.Errorf("Unexpected number of headers for block range: expected %d, actual %d", expectedLength, len(r.headers))) + } + if uint64(len(r.receipts)) != expectedLength { + err = errors.Join(err, fmt.Errorf("Unexpected number of receipts for block range: expected %d, actual %d", expectedLength, len(r.receipts))) + } + if uint64(len(r.tds)) != expectedLength { + err = errors.Join(err, fmt.Errorf("Unexpected number of total difficulties for block range: expected %d, actual %d", expectedLength, len(r.tds))) + } + return err +} diff --git a/op-chain-ops/cmd/celo-migrate/continuity_test.go b/op-chain-ops/cmd/celo-migrate/continuity_test.go new file mode 100644 index 000000000000..caa31fef7bf6 --- /dev/null +++ b/op-chain-ops/cmd/celo-migrate/continuity_test.go @@ -0,0 +1,145 @@ +package main + +import ( + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/rlp" + "github.com/stretchr/testify/require" +) + +func makeRange(start int, bodies, receipts, tds, hashes, encodedHeaders [][]byte) *RLPBlockRange { + return &RLPBlockRange{ + start: uint64(start), + hashes: hashes, + headers: encodedHeaders, + bodies: bodies, + receipts: receipts, + tds: tds, + } +} + +func TestCheckContinuity(t *testing.T) { + hashes := [][]byte{[]byte("hash0"), []byte("hash1"), []byte("hash2"), []byte("hash3")} + bodies := [][]byte{[]byte("body0"), []byte("body1"), []byte("body2"), []byte("body3")} + receipts := [][]byte{[]byte("receipt0"), []byte("receipt1"), []byte("receipt2"), []byte("receipt3")} + tds := [][]byte{[]byte("td0"), []byte("td1"), []byte("td2"), []byte("td3")} + decodedHeaders := []*types.Header{ + {Number: big.NewInt(0), ParentHash: common.Hash{}}, + {Number: big.NewInt(1), ParentHash: common.BytesToHash(hashes[0])}, + {Number: big.NewInt(2), ParentHash: common.BytesToHash(hashes[1])}, + {Number: big.NewInt(3), ParentHash: common.BytesToHash(hashes[2])}, + } + headers := make([][]byte, len(decodedHeaders)) + for i, header := range decodedHeaders { + encodedHeader, err := rlp.EncodeToBytes(header) + if err != nil { + t.Fatalf("Failed to encode header: %v", err) + } + headers[i] = encodedHeader + } + + tests := []struct { + name string + blockRange *RLPBlockRange + prevElement *RLPBlockElement + expectedLength uint64 + expectErrorMsg string + }{ + // Valid continuity tests + { + name: "Valid continuity w/ nil prevElement", + blockRange: makeRange(0, bodies, receipts, tds, hashes, headers), + prevElement: nil, + expectedLength: 4, + expectErrorMsg: "", + }, + { + name: "Valid continuity w/ prevElement", + blockRange: makeRange(1, bodies[1:], receipts[1:], tds[1:], hashes[1:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: hashes[0]}, + expectedLength: 3, + expectErrorMsg: "", + }, + // Length mismatch tests + { + name: "Length mismatch from expected", + blockRange: makeRange(1, bodies[1:], receipts[1:], tds[1:], hashes[1:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: hashes[0]}, + expectedLength: 4, + expectErrorMsg: "Unexpected number of hashes for block range: expected 4, actual 3\nUnexpected number of bodies for block range: expected 4, actual 3\nUnexpected number of headers for block range: expected 4, actual 3\nUnexpected number of receipts for block range: expected 4, actual 3\nUnexpected number of total difficulties for block range: expected 4, actual 3", + }, + { + name: "Length mismatch in hashes", + blockRange: makeRange(1, bodies[1:], receipts[1:], tds[1:], hashes[2:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: hashes[0]}, + expectedLength: 3, + expectErrorMsg: "Unexpected number of hashes for block range: expected 3, actual 2", + }, + { + name: "Length mismatch in headers", + blockRange: makeRange(1, bodies[1:], receipts[1:], tds[1:], hashes[1:], headers), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: hashes[0]}, + expectedLength: 3, + expectErrorMsg: "Unexpected number of headers for block range: expected 3, actual 4", + }, + { + name: "Length mismatch in bodies", + blockRange: makeRange(1, bodies[2:], receipts[1:], tds[1:], hashes[1:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: hashes[0]}, + expectedLength: 3, + expectErrorMsg: "Unexpected number of bodies for block range: expected 3, actual 2", + }, + { + name: "Length mismatch in receipts", + blockRange: makeRange(1, bodies[1:], receipts[2:], tds[1:], hashes[1:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: hashes[0]}, + expectedLength: 3, + expectErrorMsg: "Unexpected number of receipts for block range: expected 3, actual 2", + }, + { + name: "Length mismatch in tds", + blockRange: makeRange(1, bodies[1:], receipts[1:], tds[2:], hashes[1:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: hashes[0]}, + expectedLength: 3, + expectErrorMsg: "Unexpected number of total difficulties for block range: expected 3, actual 2", + }, + // Number mismatch tests + { + name: "Header number mismatch from range index", + blockRange: makeRange(2, bodies[1:], receipts[1:], tds[1:], hashes[1:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: hashes[0]}, + expectedLength: 3, + expectErrorMsg: "decoded header number mismatch indicating a gap in block numbers: expected 2, actual 1", + }, + { + name: "Header number mismatch from prevElement number", + blockRange: makeRange(1, bodies[1:], receipts[1:], tds[1:], hashes[1:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[1], hash: hashes[1]}, + expectedLength: 3, + expectErrorMsg: "header number mismatch indicating a gap in block numbers: expected 2, actual 1\nparent hash mismatch between blocks 1 and 1", + }, + // Parent hash mismatch tests + { + name: "Parent hash mismatch", + blockRange: makeRange(1, bodies[1:], receipts[1:], tds[1:], hashes[1:], headers[1:]), + prevElement: &RLPBlockElement{decodedHeader: decodedHeaders[0], hash: []byte("wrong-hash")}, + expectedLength: 3, + expectErrorMsg: "parent hash mismatch between blocks 1 and 0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := tt.blockRange.CheckContinuity(tt.prevElement, tt.expectedLength) + if tt.expectErrorMsg == "" { + require.NoError(t, err, "CheckContinuity() unexpected error") + } else { + require.Error(t, err, "CheckContinuity() expected error") + require.EqualError(t, err, tt.expectErrorMsg, "CheckContinuity() error message") + } + }) + } +} diff --git a/op-chain-ops/cmd/celo-migrate/db.go b/op-chain-ops/cmd/celo-migrate/db.go index 1449c417bf95..9fa68ed98310 100644 --- a/op-chain-ops/cmd/celo-migrate/db.go +++ b/op-chain-ops/cmd/celo-migrate/db.go @@ -21,7 +21,13 @@ const ( ) var ( - headerPrefix = []byte("h") // headerPrefix + num (uint64 big endian) + hash -> header + headerPrefix = []byte("h") // headerPrefix + num (uint64 big endian) + hash -> header + headerTDSuffix = []byte("t") // headerPrefix + num (uint64 big endian) + hash + headerTDSuffix -> td + headerHashSuffix = []byte("n") // headerPrefix + num (uint64 big endian) + headerHashSuffix -> hash + headerNumberPrefix = []byte("H") // headerNumberPrefix + hash -> num (uint64 big endian) + + blockBodyPrefix = []byte("b") // blockBodyPrefix + num (uint64 big endian) + hash -> block body + blockReceiptsPrefix = []byte("r") // blockReceiptsPrefix + num (uint64 big endian) + hash -> block receipts ) // encodeBlockNumber encodes a block number as big endian uint64 @@ -36,6 +42,31 @@ func headerKey(number uint64, hash common.Hash) []byte { return append(append(headerPrefix, encodeBlockNumber(number)...), hash.Bytes()...) } +// headerTDKey = headerPrefix + num (uint64 big endian) + hash + headerTDSuffix +func headerTDKey(number uint64, hash common.Hash) []byte { + return append(headerKey(number, hash), headerTDSuffix...) +} + +// headerHashKey = headerPrefix + num (uint64 big endian) + headerHashSuffix +func headerHashKey(number uint64) []byte { + return append(append(headerPrefix, encodeBlockNumber(number)...), headerHashSuffix...) +} + +// headerNumberKey = headerNumberPrefix + hash +func headerNumberKey(hash common.Hash) []byte { + return append(headerNumberPrefix, hash.Bytes()...) +} + +// blockBodyKey = blockBodyPrefix + num (uint64 big endian) + hash +func blockBodyKey(number uint64, hash common.Hash) []byte { + return append(append(blockBodyPrefix, encodeBlockNumber(number)...), hash.Bytes()...) +} + +// blockReceiptsKey = blockReceiptsPrefix + num (uint64 big endian) + hash +func blockReceiptsKey(number uint64, hash common.Hash) []byte { + return append(append(blockReceiptsPrefix, encodeBlockNumber(number)...), hash.Bytes()...) +} + // Opens a database with access to AncientsDb func openDB(chaindataPath string, readOnly bool) (ethdb.Database, error) { // Will throw an error if the chaindataPath does not exist @@ -61,7 +92,7 @@ func openDB(chaindataPath string, readOnly bool) (ethdb.Database, error) { // Opens a database without access to AncientsDb func openDBWithoutFreezer(chaindataPath string, readOnly bool) (ethdb.Database, error) { - if _, err := os.Stat(chaindataPath); errors.Is(err, os.ErrNotExist) { + if _, err := os.Stat(chaindataPath); err != nil { return nil, err } diff --git a/op-chain-ops/cmd/celo-migrate/main.go b/op-chain-ops/cmd/celo-migrate/main.go index 84f746fd22e0..cc743dc8762d 100644 --- a/op-chain-ops/cmd/celo-migrate/main.go +++ b/op-chain-ops/cmd/celo-migrate/main.go @@ -7,7 +7,9 @@ import ( "math/big" "os" "os/exec" + "path/filepath" "runtime/debug" + "sync" "time" "log/slog" @@ -17,6 +19,7 @@ import ( "github.com/ethereum-optimism/optimism/op-service/ioutil" "github.com/ethereum-optimism/optimism/op-service/jsonutil" oplog "github.com/ethereum-optimism/optimism/op-service/log" + "github.com/hashicorp/go-multierror" "github.com/mattn/go-isatty" "github.com/urfave/cli/v2" @@ -101,6 +104,16 @@ var ( Usage: "Delete everything in the destination directory aside from /ancients. This is useful if you need to re-run the full migration but do not want to repeat the lengthy ancients migration. If you'd like to reset the entire destination directory, you can delete it manually.", Value: false, } + dbCheckPathFlag = &cli.PathFlag{ + Name: "db-path", + Usage: "Path to the db to perform a continuity check on", + Required: true, + } + dbCheckFailFastFlag = &cli.BoolFlag{ + Name: "fail-fast", + Usage: "Fail fast on the first error encountered. If set, the db check will stop on the first error encountered, otherwise it will continue to check all blocks and print out all errors at the end.", + Value: false, + } preMigrationFlags = []cli.Flag{ oldDBPathFlag, @@ -121,6 +134,11 @@ var ( migrationBlockTimeFlag, migrationBlockNumberFlag, ) + dbCheckFlags = []cli.Flag{ + dbCheckPathFlag, + batchSizeFlag, + dbCheckFailFastFlag, + } ) type preMigrationOptions struct { @@ -148,6 +166,12 @@ type fullMigrationOptions struct { migrationBlockNumber uint64 } +type dbCheckOptions struct { + dbPath string + batchSize uint64 + failFast bool +} + func parsePreMigrationOptions(ctx *cli.Context) preMigrationOptions { return preMigrationOptions{ oldDBPath: ctx.String(oldDBPathFlag.Name), @@ -179,6 +203,14 @@ func parseFullMigrationOptions(ctx *cli.Context) fullMigrationOptions { } } +func parseDBCheckOptions(ctx *cli.Context) dbCheckOptions { + return dbCheckOptions{ + dbPath: ctx.String(dbCheckPathFlag.Name), + batchSize: ctx.Uint64(batchSizeFlag.Name), + failFast: ctx.Bool(dbCheckFailFastFlag.Name), + } +} + func main() { color := isatty.IsTerminal(os.Stderr.Fd()) @@ -213,18 +245,32 @@ func main() { return nil }, }, + { + Name: "check-db", + Usage: "Perform a continuity check on the db, ensuring that all blocks are present and in order", + Flags: dbCheckFlags, + Action: func(ctx *cli.Context) error { + if err := runDBCheck(parseDBCheckOptions(ctx)); err != nil { + return fmt.Errorf("DB continuity check failed: %w", err) + } + log.Info("Finished db continuity check successfully!") + return nil + }, + }, }, OnUsageError: func(ctx *cli.Context, err error, isSubcommand bool) error { if isSubcommand { return err } - _ = cli.ShowAppHelp(ctx) + if err := cli.ShowAppHelp(ctx); err != nil { + log.Error("failed to show cli help", "err", err) + } return fmt.Errorf("please provide a valid command") }, } if err := app.Run(os.Args); err != nil { - log.Crit("error in migration", "err", err) + log.Crit("error in celo-migrate", "err", err) } } @@ -449,6 +495,139 @@ func runStateMigration(newDBPath string, opts stateMigrationOptions) error { return nil } +func runDBCheck(opts dbCheckOptions) (err error) { + defer timer("db continuity check")() + + log.Info("DB Continuity Check Started", "dbPath", opts.dbPath) + + ancientDB, err := NewChainFreezer(filepath.Join(opts.dbPath, "ancient"), "", true) + if err != nil { + return fmt.Errorf("failed to open ancient db: %w", err) + } + defer func() { + err = errors.Join(err, ancientDB.Close()) + }() + nonAncientDB, err := openDBWithoutFreezer(opts.dbPath, true) + if err != nil { + return fmt.Errorf("failed to open non-ancient db: %w", err) + } + defer func() { + err = errors.Join(err, nonAncientDB.Close()) + }() + + lastAncient, err := loadLastAncient(ancientDB) + if err != nil { + return fmt.Errorf("failed to load last ancient block: %w", err) + } + lastAncientNumber := lastAncient.Number() + lastBlockNumber := *rawdb.ReadHeaderNumber(nonAncientDB, rawdb.ReadHeadHeaderHash(nonAncientDB)) + + var errResult *multierror.Error + + // First, check continuity between ancients and non-ancients. + // Gaps in data will often halt the freezing process, so attempting to load the first non-ancient block + // will most likely fail if there is a gap. + firstNonAncientRange, err := loadNonAncientRange(nonAncientDB, lastAncientNumber+1, 1) + if err != nil { + if opts.failFast { + return fmt.Errorf("failed to load first non-ancient block: %w", err) + } + // We don't need to add the error to errResult here because it will be added below when we call checkContinuity on non-ancients + } else { + if _, err := firstNonAncientRange.CheckContinuity(lastAncient, 1); err != nil { + err = fmt.Errorf("failed continuity check between ancients and non-ancients: %w", err) + if opts.failFast { + return err + } + errResult = multierror.Append(errResult, err) + } + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + g, ctx := errgroup.WithContext(ctx) + // Use double the CPUs to account for the fact that each routine will block while reading from the db. + g.SetLimit(runtime.NumCPU() * 2) + + var mu sync.Mutex + + checkRange := func(start, count uint64, loadRangeFunc func(uint64, uint64) (*RLPBlockRange, error)) { + // If we are not at genesis or the first non-ancient block, include the last block of + // the previous range so we can check for continuity between ranges. + if start != 0 && start != lastAncientNumber+1 { + start = start - 1 + count = count + 1 + } + g.Go(func() error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + blockRange, err := loadRangeFunc(start, count) + if err != nil { + err = fmt.Errorf("failed to load block range: %w", err) + if opts.failFast { + return err + } + log.Error(err.Error()) + mu.Lock() + errResult = multierror.Append(errResult, err) + mu.Unlock() + return nil + } + if _, err := blockRange.CheckContinuity(nil, count); err != nil { + err = fmt.Errorf("failed continuity check: %w", err) + if opts.failFast { + return err + } + log.Error(err.Error()) + mu.Lock() + errResult = multierror.Append(errResult, err) + mu.Unlock() + return nil + } + log.Info("Successfully checked block range continuity", "start", start, "end", start+count-1, "count", count) + return nil + } + }) + } + checkContinuity := func(start, end uint64, loadRangeFunc func(uint64, uint64) (*RLPBlockRange, error)) error { + if (start <= lastAncientNumber && end > lastAncientNumber) || (end > lastBlockNumber) || (end < start) { + return fmt.Errorf("invalid range for continuity check: start=%d, end=%d, lastAncientNumber=%d, lastBlockNumber=%d", start, end, lastAncientNumber, lastBlockNumber) + } + for i := start; i <= end; i += opts.batchSize { + count := min(opts.batchSize, end-i+1) + checkRange(i, count, loadRangeFunc) + } + return nil + } + + log.Info("Checking continuity of ancient blocks", "start", 0, "end", lastAncientNumber, "count", lastAncientNumber+1) + if err := checkContinuity(0, lastAncientNumber, func(start, count uint64) (*RLPBlockRange, error) { + return loadAncientRange(ancientDB, start, count) + }); err != nil { + return err + } + log.Info("Checking continuity of non-ancient blocks", "start", lastAncientNumber+1, "end", lastBlockNumber, "count", lastBlockNumber-lastAncientNumber) + if err := checkContinuity(lastAncientNumber+1, lastBlockNumber, func(start, count uint64) (*RLPBlockRange, error) { + return loadNonAncientRange(nonAncientDB, start, count) + }); err != nil { + return err + } + + if err := g.Wait(); err != nil { + return err + } + + if errResult.ErrorOrNil() != nil { + return errResult + } + + log.Info("DB Continuity Check Finished", "dbPath", opts.dbPath) + + return nil +} + func timer(name string) func() { start := time.Now() return func() { diff --git a/op-chain-ops/cmd/celo-migrate/non-ancients.go b/op-chain-ops/cmd/celo-migrate/non-ancients.go index 44843d2080cd..9473edbe5a3a 100644 --- a/op-chain-ops/cmd/celo-migrate/non-ancients.go +++ b/op-chain-ops/cmd/celo-migrate/non-ancients.go @@ -1,6 +1,8 @@ package main import ( + "bytes" + "errors" "fmt" "os" "os/exec" @@ -125,3 +127,70 @@ func migrateNonAncientBlock(number uint64, hash common.Hash, newDB ethdb.Databas return nil } + +func loadNonAncientRange(db ethdb.Database, start, count uint64) (*RLPBlockRange, error) { + blockRange := &RLPBlockRange{ + start: start, + hashes: make([][]byte, count), + headers: make([][]byte, count), + bodies: make([][]byte, count), + receipts: make([][]byte, count), + tds: make([][]byte, count), + } + end := start + count - 1 + log.Info("Loading non-ancient block range", "start", start, "end", end, "count", count) + numberHashes := rawdb.ReadAllHashesInRange(db, start, end) + err := checkNumberHashes(db, numberHashes) + if err != nil { + return nil, err + } + + var combinedErr error + + for i, numberHash := range numberHashes { + number := numberHash.Number + hash := numberHash.Hash + + blockRange.hashes[i] = hash[:] + blockRange.headers[i], err = db.Get(headerKey(number, hash)) + if err != nil { + combinedErr = errors.Join(combinedErr, fmt.Errorf("failed to find header in db for non-ancient block %d - %x: %w", number, hash, err)) + } + blockRange.bodies[i], err = db.Get(blockBodyKey(number, hash)) + if err != nil { + combinedErr = errors.Join(combinedErr, fmt.Errorf("failed to find body in db for non-ancient block %d - %x: %w", number, hash, err)) + } + blockRange.receipts[i], err = db.Get(blockReceiptsKey(number, hash)) + if err != nil { + combinedErr = errors.Join(combinedErr, fmt.Errorf("failed to find receipts in db for non-ancient block %d - %x: %w", number, hash, err)) + } + blockRange.tds[i], err = db.Get(headerTDKey(number, hash)) + if err != nil { + combinedErr = errors.Join(combinedErr, fmt.Errorf("failed to find total difficulty in db for non-ancient block %d - %x: %w", number, hash, err)) + } + } + + return blockRange, combinedErr +} + +// checkNumberHashes checks that the contents of a NumberHash slice match the contents in the headerNumber and headerHash db tables. +// We do this to account for any differences in the way NumberHashes are read from the db, and to ensure the slice only contains canonical data. +func checkNumberHashes(db ethdb.Database, numberHashes []*rawdb.NumberHash) error { + for _, numberHash := range numberHashes { + numberRLP, err := db.Get(headerNumberKey(numberHash.Hash)) + if err != nil { + return fmt.Errorf("failed to find number for hash in db for non-ancient block %d - %x: %w", numberHash.Number, numberHash.Hash, err) + } + hashRLP, err := db.Get(headerHashKey(numberHash.Number)) + if err != nil { + return fmt.Errorf("failed to find canonical hash in db for non-ancient block %d - %x: %w", numberHash.Number, numberHash.Hash, err) + } + if !bytes.Equal(hashRLP, numberHash.Hash[:]) { + return fmt.Errorf("canonical hash mismatch in db for non-ancient block %d - %x: %w", numberHash.Number, numberHash.Hash, err) + } + if !bytes.Equal(numberRLP, encodeBlockNumber(numberHash.Number)) { + return fmt.Errorf("number for hash mismatch in db for non-ancient block %d - %x: %w", numberHash.Number, numberHash.Hash, err) + } + } + return nil +}