diff --git a/node/cmd/node/main.go b/node/cmd/node/main.go index 2a71f2a28..2ef1bdbed 100644 --- a/node/cmd/node/main.go +++ b/node/cmd/node/main.go @@ -24,7 +24,6 @@ import ( "morph-l2/node/sequencer/mock" "morph-l2/node/sync" "morph-l2/node/types" - "morph-l2/node/validator" ) var keyConverterCmd = cli.Command{ @@ -94,10 +93,6 @@ func L2NodeMain(ctx *cli.Context) error { if err != nil { return fmt.Errorf("failed to create syncer, error: %v", err) } - validatorCfg := validator.NewConfig() - if err := validatorCfg.SetCliContext(ctx); err != nil { - return fmt.Errorf("validator set cli context error: %v", err) - } l1Client, err := ethclient.Dial(derivationCfg.L1.Addr) if err != nil { return fmt.Errorf("dial l1 node error:%v", err) @@ -106,12 +101,8 @@ func L2NodeMain(ctx *cli.Context) error { if err != nil { return fmt.Errorf("NewRollup error:%v", err) } - vt, err := validator.NewValidator(validatorCfg, rollup, nodeConfig.Logger) - if err != nil { - return fmt.Errorf("new validator client error: %v", err) - } - dvNode, err = derivation.NewDerivationClient(context.Background(), derivationCfg, syncer, store, vt, rollup, nodeConfig.Logger) + dvNode, err = derivation.NewDerivationClient(context.Background(), derivationCfg, syncer, store, rollup, nodeConfig.Logger) if err != nil { return fmt.Errorf("new derivation client error: %v", err) } diff --git a/node/db/keys.go b/node/db/keys.go index b0d50ddcd..2101a8814 100644 --- a/node/db/keys.go +++ b/node/db/keys.go @@ -7,7 +7,8 @@ var ( L1MessagePrefix = []byte("l1") BatchBlockNumberPrefix = []byte("batch") - derivationL1HeightKey = []byte("LastDerivationL1Height") + derivationL1HeightKey = []byte("LastDerivationL1Height") + derivationL1BlockPrefix = []byte("derivL1Block") ) // encodeBlockNumber encodes an L1 enqueue index as big endian uint64 @@ -26,3 +27,8 @@ func L1MessageKey(enqueueIndex uint64) []byte { func BatchBlockNumberKey(batchIndex uint64) []byte { return append(BatchBlockNumberPrefix, encodeEnqueueIndex(batchIndex)...) } + +// DerivationL1BlockKey = derivationL1BlockPrefix + l1Height (uint64 big endian) +func DerivationL1BlockKey(l1Height uint64) []byte { + return append(derivationL1BlockPrefix, encodeEnqueueIndex(l1Height)...) +} diff --git a/node/db/store.go b/node/db/store.go index 1a87a227c..16bd912bf 100644 --- a/node/db/store.go +++ b/node/db/store.go @@ -156,6 +156,65 @@ func (s *Store) WriteSyncedL1Messages(messages []types.L1Message, latestSynced u return batch.Write() } +// DerivationL1Block stores L1 block info for reorg detection. +type DerivationL1Block struct { + Number uint64 + Hash [32]byte +} + +func (s *Store) WriteDerivationL1Block(block *DerivationL1Block) { + data, err := rlp.EncodeToBytes(block) + if err != nil { + panic(fmt.Sprintf("failed to RLP encode DerivationL1Block, err: %v", err)) + } + if err := s.db.Put(DerivationL1BlockKey(block.Number), data); err != nil { + panic(fmt.Sprintf("failed to write DerivationL1Block, err: %v", err)) + } +} + +func (s *Store) ReadDerivationL1Block(l1Height uint64) *DerivationL1Block { + data, err := s.db.Get(DerivationL1BlockKey(l1Height)) + if err != nil && !isNotFoundErr(err) { + panic(fmt.Sprintf("failed to read DerivationL1Block, err: %v", err)) + } + if len(data) == 0 { + return nil + } + var block DerivationL1Block + if err := rlp.DecodeBytes(data, &block); err != nil { + panic(fmt.Sprintf("invalid DerivationL1Block RLP, err: %v", err)) + } + return &block +} + +func (s *Store) ReadDerivationL1BlockRange(from, to uint64) []*DerivationL1Block { + var blocks []*DerivationL1Block + for h := from; h <= to; h++ { + b := s.ReadDerivationL1Block(h) + if b != nil { + blocks = append(blocks, b) + } + } + return blocks +} + +func (s *Store) DeleteDerivationL1BlocksFrom(height uint64) { + batch := s.db.NewBatch() + for h := height; ; h++ { + key := DerivationL1BlockKey(h) + has, err := s.db.Has(key) + if err != nil || !has { + break + } + if err := batch.Delete(key); err != nil { + panic(fmt.Sprintf("failed to delete DerivationL1Block at %d, err: %v", h, err)) + } + } + if err := batch.Write(); err != nil { + panic(fmt.Sprintf("failed to write batch delete for DerivationL1Blocks, err: %v", err)) + } +} + func isNotFoundErr(err error) bool { return err.Error() == leveldb.ErrNotFound.Error() || err.Error() == types.ErrMemoryDBNotFound.Error() } diff --git a/node/derivation/DERIVATION_REFACTOR.md b/node/derivation/DERIVATION_REFACTOR.md new file mode 100644 index 000000000..9f94940a1 --- /dev/null +++ b/node/derivation/DERIVATION_REFACTOR.md @@ -0,0 +1,163 @@ +# Derivation Refactor: Batch Verification & L1 Reorg Detection + +## Background + +The derivation module is the core component that syncs L2 state from L1 batch data. Previously it only ran on validator nodes and used a challenge mechanism when state mismatches were detected. This refactor makes two fundamental changes: + +1. **L1 batch data is the source of truth** — when local L2 blocks don't match L1 batch data, roll back and re-derive from L1 instead of issuing a challenge. +2. **Support `latest` mode** for fetching L1 batches (instead of only `finalized`), with L1 reorg detection to handle the reduced confirmation window. + +## Design Principles + +- **L2 rollback is only triggered by batch data mismatch**, never by L1 reorg alone. + - L1 reorg → clean up DB → re-derive from reorg point → batch comparison decides if L2 needs rollback. + - Most L1 reorgs just re-include the same batch tx in a different block — L2 stays valid. +- **Derivation can run as a verification thread** — when blocks already exist locally (e.g. produced by sequencer), derivation compares them against L1 batch data instead of skipping. + +## What Changed + +### Removed + +| Item | Reason | +|------|--------| +| `validator` field in `Derivation` struct | Challenge mechanism removed | +| `validator.Validator` parameter in `NewDerivationClient()` | No longer needed | +| `ChallengeState` / `ChallengeEnable` logic in `derivationBlock()` | Replaced by rollback + re-derive | +| `validator` import in `node/cmd/node/main.go` | No longer referenced | + +### Added — L1 Reorg Detection + +When `confirmations` is not `finalized` (i.e. using `latest` or `safe`), each derivation loop checks recent L1 blocks for hash changes before processing new batches. + +**New DB layer** (`node/db/`): + +- `DerivationL1Block` struct — stores `{Number, Hash}` per L1 block +- `WriteDerivationL1Block` / `ReadDerivationL1Block` / `ReadDerivationL1BlockRange` / `DeleteDerivationL1BlocksFrom` +- DB key prefix: `derivL1Block` + uint64 big-endian height + +**New config** (`node/derivation/config.go`): + +- `ReorgCheckDepth uint64` — how many recent L1 blocks to verify each loop (default: 64) +- CLI flag: `--derivation.reorgCheckDepth` / env `MORPH_NODE_DERIVATION_REORG_CHECK_DEPTH` + +**New methods** (`node/derivation/derivation.go`): + +| Method | Purpose | +|--------|---------| +| `detectReorg(ctx)` | Iterates recent L1 block hashes from DB, compares against current L1 chain. Returns the height where a mismatch is found, or nil. | +| `handleL1Reorg(height)` | Cleans DB records from the reorg point and resets `latestDerivationL1Height`. Does NOT rollback L2 — the next derivation loop re-fetches batches and the normal comparison logic decides. | +| `recordL1Blocks(ctx, from, to)` | After each derivation round, records L1 block hashes for the processed range. | + +**Flow**: + +```text +derivationBlock() loop start +│ +├─ [if not finalized] detectReorg() +│ ├─ no reorg → continue +│ └─ reorg at height X → handleL1Reorg(X) +│ ├─ DeleteDerivationL1BlocksFrom(X) +│ ├─ WriteLatestDerivationL1Height(X-1) +│ └─ return (next loop re-processes from X) +│ +├─ fetch CommitBatch logs from L1 +├─ process each batch → derive() + verifyBatchRoots() +├─ recordL1Blocks(start, end) +└─ WriteLatestDerivationL1Height(end) +``` + +### Added — Batch Data Verification + +When `derive()` encounters an L2 block that already exists locally, it now **compares** the block against the L1 batch data instead of blindly skipping it. + +**New methods**: + +| Method | Purpose | +|--------|---------| +| `verifyBlockContext(localHeader, blockData)` | Compares timestamp, gasLimit, baseFee between local L2 block header and batch block context. | +| `verifyBatchRoots(batchInfo, lastHeader)` | Compares stateRoot and withdrawalRoot between L1 batch and last derived L2 block. Extracted from the old inline logic. | +| `rollbackLocalChain(targetBlockNumber)` | **TODO stub** — will call geth `SetHead` API to rewind L2 chain. | + +**`derive()` new flow for each block in batch**: + +```text +block.Number <= latestBlockNumber? +├─ YES (block exists) +│ ├─ verifyBlockContext() passes → skip, continue +│ └─ verifyBlockContext() fails +│ ├─ IncBlockMismatchCount() +│ ├─ rollbackLocalChain(block.Number - 1) +│ └─ fall through to NewSafeL2Block (re-execute) +│ +└─ NO (new block) + └─ NewSafeL2Block (execute normally) +``` + +**`derivationBlock()` batch-level verification**: + +```text +After derive(batchInfo) completes: +│ +├─ verifyBatchRoots() passes → normal +└─ verifyBatchRoots() fails + ├─ IncRollbackCount() + ├─ rollbackLocalChain(firstBlockNumber - 1) + ├─ re-derive(batchInfo) + ├─ verifyBatchRoots() again + │ ├─ passes → recovered + │ └─ fails → CRITICAL error, stop (manual intervention needed) +``` + +### Added — Metrics + +| Metric | Type | Description | +|--------|------|-------------| +| `morphnode_derivation_l1_reorg_detected_total` | Counter | L1 reorg detection count | +| `morphnode_derivation_l2_rollback_total` | Counter | L2 rollbacks triggered by batch mismatch | +| `morphnode_derivation_block_mismatch_total` | Counter | Block-level context mismatches | +| `morphnode_derivation_halted` | Gauge | Set to 1 when derivation halts due to unrecoverable batch mismatch (alert on this) | + +## Modified Files + +| File | Changes | +|------|---------| +| `node/derivation/derivation.go` | Core refactor: removed validator/challenge, added reorg detection, batch verification, rollback flow | +| `node/derivation/database.go` | Extended `Reader`/`Writer` interfaces for L1 block hash tracking | +| `node/derivation/config.go` | Added `ReorgCheckDepth` config field | +| `node/derivation/metrics.go` | Added 3 new counter metrics | +| `node/db/keys.go` | Added `derivationL1BlockPrefix` and `DerivationL1BlockKey()` | +| `node/db/store.go` | Added `DerivationL1Block` struct and 4 CRUD methods | +| `node/flags/flags.go` | Added `DerivationReorgCheckDepth` CLI flag | +| `node/cmd/node/main.go` | Removed `validator` dependency from `NewDerivationClient` call | + +## TODO (follow-up work) + +### `rollbackLocalChain()` — geth SetHead integration + +Currently a stub that returns an error. Any batch mismatch will be detected and logged, but the +actual L2 chain rollback cannot proceed until this is implemented: + +1. Expose `SetL2Head(number uint64)` in `go-ethereum/eth/catalyst/l2_api.go` +2. Add `SetHead` method to `go-ethereum/ethclient/authclient` +3. Add `SetHead` method to `node/types/retryable_client.go` +4. Call `d.l2Client.SetHead(d.ctx, targetBlockNumber)` in `rollbackLocalChain()` + +Note: geth already has `BlockChain.SetHead(head uint64) error` — we just need to expose it through the engine API chain. + +### Transaction-level verification + +`verifyBlockContext` currently checks timestamp, gasLimit, baseFee, and batch-internal tx count +consistency. Full transaction hash comparison against local blocks requires `BlockByNumber` RPC +on `RetryableClient`, which is not yet exposed. State root verification in `verifyBatchRoots` +covers transaction execution correctness as an indirect check. + +### Concurrency safety + +When running as a verification thread alongside a sequencer, concurrent access between block production and rollback needs locking. This will be handled separately. + +## How to Test + +1. **Existing behavior preserved**: Set `--derivation.confirmations` to finalized (default) — reorg detection and L1 block hash recording are both skipped, batch verification still runs. +2. **Latest mode**: Set `--derivation.confirmations` to `-2` (latest) — reorg detection activates, L1 block hashes are tracked. +3. **Reorg detection**: Simulate by modifying a saved L1 block hash in DB — next loop should detect and clean up. +4. **Batch verification**: When an existing L2 block matches L1 batch data, it logs "block verified" and skips. When mismatched, it logs the error and returns (rollback stub returns error, preventing silent continuation). diff --git a/node/derivation/config.go b/node/derivation/config.go index efb1ceb31..81767d520 100644 --- a/node/derivation/config.go +++ b/node/derivation/config.go @@ -29,6 +29,9 @@ const ( // DefaultLogProgressInterval is the frequency at which we log progress. DefaultLogProgressInterval = time.Second * 10 + + // DefaultReorgCheckDepth is the number of recent L1 blocks to check for reorgs. + DefaultReorgCheckDepth = uint64(64) ) type Config struct { @@ -42,6 +45,7 @@ type Config struct { PollInterval time.Duration `json:"poll_interval"` LogProgressInterval time.Duration `json:"log_progress_interval"` FetchBlockRange uint64 `json:"fetch_block_range"` + ReorgCheckDepth uint64 `json:"reorg_check_depth"` MetricsPort uint64 `json:"metrics_port"` MetricsHostname string `json:"metrics_hostname"` MetricsServerEnable bool `json:"metrics_server_enable"` @@ -55,6 +59,7 @@ func DefaultConfig() *Config { PollInterval: DefaultPollInterval, LogProgressInterval: DefaultLogProgressInterval, FetchBlockRange: DefaultFetchBlockRange, + ReorgCheckDepth: DefaultReorgCheckDepth, L2: new(types.L2Config), L2Next: nil, // optional, only for upgrade switch } @@ -111,6 +116,9 @@ func (c *Config) SetCliContext(ctx *cli.Context) error { return errors.New("invalid fetchBlockRange") } } + if ctx.GlobalIsSet(flags.DerivationReorgCheckDepth.Name) { + c.ReorgCheckDepth = ctx.GlobalUint64(flags.DerivationReorgCheckDepth.Name) + } l2EthAddr := ctx.GlobalString(flags.L2EthAddr.Name) l2EngineAddr := ctx.GlobalString(flags.L2EngineAddr.Name) diff --git a/node/derivation/database.go b/node/derivation/database.go index a63f4eba1..369b135e1 100644 --- a/node/derivation/database.go +++ b/node/derivation/database.go @@ -1,6 +1,7 @@ package derivation import ( + "morph-l2/node/db" "morph-l2/node/sync" ) @@ -12,8 +13,12 @@ type Database interface { type Reader interface { ReadLatestDerivationL1Height() *uint64 + ReadDerivationL1Block(l1Height uint64) *db.DerivationL1Block + ReadDerivationL1BlockRange(from, to uint64) []*db.DerivationL1Block } type Writer interface { WriteLatestDerivationL1Height(latest uint64) + WriteDerivationL1Block(block *db.DerivationL1Block) + DeleteDerivationL1BlocksFrom(height uint64) } diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 8fb311b0e..73a17b741 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -27,7 +27,6 @@ import ( nodecommon "morph-l2/node/common" "morph-l2/node/sync" "morph-l2/node/types" - "morph-l2/node/validator" ) var ( @@ -42,7 +41,6 @@ type Derivation struct { RollupContractAddress common.Address confirmations rpc.BlockNumber l2Client *types.RetryableClient - validator *validator.Validator logger tmlog.Logger rollup *bindings.Rollup metrics *Metrics @@ -60,9 +58,11 @@ type Derivation struct { startHeight uint64 baseHeight uint64 fetchBlockRange uint64 + reorgCheckDepth uint64 pollInterval time.Duration logProgressInterval time.Duration stop chan struct{} + halted bool // set when an unrecoverable mismatch is detected but rollback is not yet implemented // geth upgrade config (fetched once at startup) switchTime uint64 @@ -76,7 +76,7 @@ type DeployContractBackend interface { ethereum.TransactionReader } -func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, db Database, validator *validator.Validator, rollup *bindings.Rollup, logger tmlog.Logger) (*Derivation, error) { +func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, db Database, rollup *bindings.Rollup, logger tmlog.Logger) (*Derivation, error) { l1Client, err := ethclient.Dial(cfg.L1.Addr) if err != nil { return nil, err @@ -152,7 +152,6 @@ func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, db: db, l1Client: l1Client, syncer: syncer, - validator: validator, rollup: rollup, rollupABI: rollupAbi, legacyRollupABI: legacyRollupAbi, @@ -166,6 +165,7 @@ func NewDerivationClient(ctx context.Context, cfg *Config, syncer *sync.Syncer, startHeight: cfg.StartHeight, baseHeight: cfg.BaseHeight, fetchBlockRange: cfg.FetchBlockRange, + reorgCheckDepth: cfg.ReorgCheckDepth, pollInterval: cfg.PollInterval, logProgressInterval: cfg.LogProgressInterval, metrics: metrics, @@ -214,6 +214,33 @@ func (d *Derivation) Stop() { } func (d *Derivation) derivationBlock(ctx context.Context) { + if d.halted { + d.logger.Error("derivation halted due to unrecoverable batch mismatch, manual intervention required") + return + } + + // Step 1: Check for L1 reorg (only meaningful when not using finalized) + if d.confirmations != rpc.FinalizedBlockNumber { + reorgAt, err := d.detectReorg(ctx) + if err != nil { + d.logger.Error("reorg detection failed", "err", err) + return + } + if reorgAt != nil { + d.logger.Info("L1 reorg detected, invoking reorg handler", "reorgAtL1Height", *reorgAt) + d.metrics.IncReorgCount() + if err := d.handleL1Reorg(*reorgAt); err != nil { + d.logger.Error("handle L1 reorg failed", "err", err) + } + // Always return after reorg detection — don't continue processing in + // the same loop. Let the next poll interval re-fetch from the reset + // height. This avoids recording potentially unstable L1 block hashes + // if the chain is still reorging. + return + } + } + + // Step 2: Determine L1 scan range latestDerivation := d.db.ReadLatestDerivationL1Height() latest, err := d.getLatestConfirmedBlockNumber(d.ctx) if err != nil { @@ -233,7 +260,9 @@ func (d *Derivation) derivationBlock(ctx context.Context) { } else if latest-start >= d.fetchBlockRange { end = start + d.fetchBlockRange } - d.logger.Info("derivation start pull rollupData form l1", "startBlock", start, "end", end) + d.logger.Info("derivation start pull rollupData from l1", "startBlock", start, "end", end) + + // Step 3: Fetch CommitBatch logs logs, err := d.fetchRollupLog(ctx, start, end) if err != nil { d.logger.Error("eth_getLogs failed", "err", err) @@ -247,6 +276,7 @@ func (d *Derivation) derivationBlock(ctx context.Context) { d.metrics.SetLatestBatchIndex(latestBatchIndex.Uint64()) d.logger.Info("fetched rollup tx", "txNum", len(logs), "latestBatchIndex", latestBatchIndex) + // Step 4: Process each batch for _, lg := range logs { batchInfo, err := d.fetchRollupDataByTxHash(lg.TxHash, lg.BlockNumber) if err != nil { @@ -259,75 +289,81 @@ func (d *Derivation) derivationBlock(ctx context.Context) { d.logger.Info("fetch rollup transaction success", "txNonce", batchInfo.nonce, "txHash", batchInfo.txHash, "l1BlockNumber", batchInfo.l1BlockNumber, "firstL2BlockNumber", batchInfo.firstBlockNumber, "lastL2BlockNumber", batchInfo.lastBlockNumber) - // derivation + // Derive or verify blocks lastHeader, err := d.derive(batchInfo) if err != nil { d.logger.Error("derive blocks interrupt", "error", err) return } - // only last block of batch + if lastHeader == nil { + d.logger.Error("derive returned nil header, skipping empty batch", "batchIndex", batchInfo.batchIndex) + continue + } + d.logger.Info("batch derivation complete", "batch_index", batchInfo.batchIndex, "currentBatchEndBlock", lastHeader.Number.Uint64()) d.metrics.SetL2DeriveHeight(lastHeader.Number.Uint64()) d.metrics.SetSyncedBatchIndex(batchInfo.batchIndex) + if lastHeader.Number.Uint64() <= d.baseHeight { continue } - withdrawalRoot, err := d.L2ToL1MessagePasser.MessageRoot(&bind.CallOpts{ - BlockNumber: lastHeader.Number, - }) - if err != nil { - d.logger.Error("get withdrawal root failed", "error", err) - return - } - rootMismatch := !bytes.Equal(lastHeader.Root.Bytes(), batchInfo.root.Bytes()) - withdrawalMismatch := !bytes.Equal(withdrawalRoot[:], batchInfo.withdrawalRoot.Bytes()) - - if rootMismatch || withdrawalMismatch { - // Check if should skip validation during upgrade transition - // Skip if: (before switch && MPT geth) or (after switch && ZK geth) - skipValidation := false - if d.switchTime > 0 { - beforeSwitch := lastHeader.Time < d.switchTime - if (beforeSwitch && !d.useZktrie) || (!beforeSwitch && d.useZktrie) { - skipValidation = true - d.logger.Info("Root validation skipped during upgrade transition", - "originStateRootHash", batchInfo.root, - "deriveStateRootHash", lastHeader.Root.Hex(), - "blockTimestamp", lastHeader.Time, - "switchTime", d.switchTime, - "useZktrie", d.useZktrie, - ) - } + // Verify state root and withdrawal root against L1 batch data + if err := d.verifyBatchRoots(batchInfo, lastHeader); err != nil { + d.logger.Error("batch root verification failed, attempting rollback and re-derive", + "batchIndex", batchInfo.batchIndex, "error", err) + d.metrics.SetBatchStatus(stateException) + d.metrics.IncRollbackCount() + + rollbackTarget := batchInfo.firstBlockNumber - 1 + if err := d.rollbackLocalChain(rollbackTarget); err != nil { + d.logger.Error("rollback failed, halting derivation to prevent infinite retry", + "target", rollbackTarget, "batchIndex", batchInfo.batchIndex, "error", err) + d.halted = true + d.metrics.SetHalted() + return } - if !skipValidation { - d.metrics.SetBatchStatus(stateException) - // TODO The challenge switch is currently on and will be turned on in the future - if d.validator != nil && d.validator.ChallengeEnable() { - if err := d.validator.ChallengeState(batchInfo.batchIndex); err != nil { - d.logger.Error("challenge state failed") - return - } - } - d.logger.Error("root hash or withdrawal hash is not equal", - "originStateRootHash", batchInfo.root, - "deriveStateRootHash", lastHeader.Root.Hex(), - "batchWithdrawalRoot", batchInfo.withdrawalRoot.Hex(), - "deriveWithdrawalRoot", common.BytesToHash(withdrawalRoot[:]).Hex(), - ) + // Re-derive the batch using L1 batch data as source of truth + lastHeader, err = d.derive(batchInfo) + if err != nil { + d.logger.Error("re-derive after rollback failed", "error", err) return } + if lastHeader == nil { + d.logger.Error("re-derive returned nil header after rollback", "batchIndex", batchInfo.batchIndex) + return + } + + // Verify again after re-derive + if err := d.verifyBatchRoots(batchInfo, lastHeader); err != nil { + d.logger.Error("CRITICAL: batch roots still mismatch after rollback and re-derive, halting derivation", + "batchIndex", batchInfo.batchIndex, "error", err) + d.halted = true + d.metrics.SetHalted() + return + } + d.logger.Info("rollback and re-derive succeeded", "batchIndex", batchInfo.batchIndex) } + d.metrics.SetBatchStatus(stateNormal) d.metrics.SetL1SyncHeight(lg.BlockNumber) } + // Step 5: Record L1 block hashes for reorg detection (only needed for non-finalized modes) + if d.confirmations != rpc.FinalizedBlockNumber { + if err := d.recordL1Blocks(ctx, start, end); err != nil { + d.logger.Error("recordL1Blocks failed, will retry next loop", "err", err) + return + } + } + d.db.WriteLatestDerivationL1Height(end) d.metrics.SetL1SyncHeight(end) d.logger.Info("write latest derivation l1 height success", "l1BlockNumber", end) } + func (d *Derivation) fetchRollupLog(ctx context.Context, from, to uint64) ([]eth.Log, error) { query := ethereum.FilterQuery{ FromBlock: big.NewInt(0).SetUint64(from), @@ -606,13 +642,32 @@ func (d *Derivation) derive(rollupData *BatchInfo) (*eth.Header, error) { return nil, fmt.Errorf("get derivation geth block number error:%v", err) } if blockData.SafeL2Data.Number <= latestBlockNumber { - d.logger.Info("new L2 Data block number less than latestBlockNumber", "safeL2DataNumber", blockData.SafeL2Data.Number, "latestBlockNumber", latestBlockNumber) - lastHeader, err = d.l2Client.HeaderByNumber(d.ctx, big.NewInt(int64(blockData.SafeL2Data.Number))) + // Block already exists locally - verify it matches the batch data + localHeader, err := d.l2Client.HeaderByNumber(d.ctx, big.NewInt(int64(blockData.SafeL2Data.Number))) if err != nil { return nil, fmt.Errorf("query header by number error:%v", err) } - continue + + if err := d.verifyBlockContext(localHeader, blockData); err != nil { + d.logger.Error("block context mismatch with L1 batch data, rollback required", + "blockNumber", blockData.Number, "error", err) + d.metrics.IncBlockMismatchCount() + + rollbackTarget := blockData.SafeL2Data.Number - 1 + if err := d.rollbackLocalChain(rollbackTarget); err != nil { + d.halted = true + d.metrics.SetHalted() + return nil, fmt.Errorf("rollback to %d failed (derivation halted): %v", rollbackTarget, err) + } + } else { + d.logger.Info("block verified against L1 batch data", + "blockNumber", blockData.Number) + lastHeader = localHeader + continue + } } + + // Execute the block (either new block or re-execution after rollback) err = func() error { ctx, cancel := context.WithTimeout(context.Background(), time.Duration(60)*time.Second) defer cancel() diff --git a/node/derivation/metrics.go b/node/derivation/metrics.go index da5e8937d..a0ae4817a 100644 --- a/node/derivation/metrics.go +++ b/node/derivation/metrics.go @@ -18,12 +18,16 @@ const ( ) type Metrics struct { - L1SyncHeight metrics.Gauge - RollupL2Height metrics.Gauge - DeriveL2Height metrics.Gauge - BatchStatus metrics.Gauge - LatestBatchIndex metrics.Gauge - SyncedBatchIndex metrics.Gauge + L1SyncHeight metrics.Gauge + RollupL2Height metrics.Gauge + DeriveL2Height metrics.Gauge + BatchStatus metrics.Gauge + LatestBatchIndex metrics.Gauge + SyncedBatchIndex metrics.Gauge + ReorgCount metrics.Counter + RollbackCount metrics.Counter + BlockMismatchCount metrics.Counter + Halted metrics.Gauge } func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { @@ -68,6 +72,30 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "synced_batch_index", Help: "", }, labels).With(labelsAndValues...), + ReorgCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "l1_reorg_detected_total", + Help: "Total number of L1 reorgs detected", + }, labels).With(labelsAndValues...), + RollbackCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "l2_rollback_total", + Help: "Total number of L2 chain rollbacks triggered by batch mismatch", + }, labels).With(labelsAndValues...), + BlockMismatchCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "block_mismatch_total", + Help: "Total number of block context mismatches detected during verification", + }, labels).With(labelsAndValues...), + Halted: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: metricsSubsystem, + Name: "halted", + Help: "Set to 1 when derivation is halted due to unrecoverable batch mismatch requiring manual intervention", + }, labels).With(labelsAndValues...), } } @@ -95,6 +123,22 @@ func (m *Metrics) SetSyncedBatchIndex(batchIndex uint64) { m.SyncedBatchIndex.Set(float64(batchIndex)) } +func (m *Metrics) IncReorgCount() { + m.ReorgCount.Add(1) +} + +func (m *Metrics) IncRollbackCount() { + m.RollbackCount.Add(1) +} + +func (m *Metrics) IncBlockMismatchCount() { + m.BlockMismatchCount.Add(1) +} + +func (m *Metrics) SetHalted() { + m.Halted.Set(1) +} + func (m *Metrics) Serve(hostname string, port uint64) (*http.Server, error) { mux := http.NewServeMux() mux.Handle("/metrics", promhttp.Handler()) diff --git a/node/derivation/reorg.go b/node/derivation/reorg.go new file mode 100644 index 000000000..8773e3117 --- /dev/null +++ b/node/derivation/reorg.go @@ -0,0 +1,114 @@ +package derivation + +import ( + "context" + "fmt" + "math/big" + + "github.com/morph-l2/go-ethereum/common" + + "morph-l2/node/db" +) + +// detectReorg checks recent L1 blocks for hash mismatches indicating a reorg. +// Returns the L1 height where reorg was first detected, or nil if no reorg. +// +// Optimization: checks the newest saved block first. If it matches, there is +// no reorg (1 RPC call in the common case). Only when the newest block +// mismatches does it do a full oldest-to-newest scan to find the earliest +// divergence point. +func (d *Derivation) detectReorg(ctx context.Context) (*uint64, error) { + latestDerivation := d.db.ReadLatestDerivationL1Height() + if latestDerivation == nil { + return nil, nil + } + + checkFrom := d.startHeight + if *latestDerivation > d.reorgCheckDepth && (*latestDerivation-d.reorgCheckDepth) > checkFrom { + checkFrom = *latestDerivation - d.reorgCheckDepth + } + + savedBlocks := d.db.ReadDerivationL1BlockRange(checkFrom, *latestDerivation) + if len(savedBlocks) == 0 { + return nil, nil + } + + // Fast path: check the newest block first. If it matches, no reorg occurred. + newest := savedBlocks[len(savedBlocks)-1] + newestHeader, err := d.l1Client.HeaderByNumber(ctx, big.NewInt(int64(newest.Number))) + if err != nil { + return nil, fmt.Errorf("failed to get L1 header at %d: %w", newest.Number, err) + } + if newestHeader.Hash() == common.BytesToHash(newest.Hash[:]) { + return nil, nil + } + + // Slow path: reorg detected. Scan oldest-to-newest to find the earliest divergence. + for i := 0; i < len(savedBlocks); i++ { + block := savedBlocks[i] + header, err := d.l1Client.HeaderByNumber(ctx, big.NewInt(int64(block.Number))) + if err != nil { + return nil, fmt.Errorf("failed to get L1 header at %d: %w", block.Number, err) + } + savedHash := common.BytesToHash(block.Hash[:]) + if header.Hash() != savedHash { + d.logger.Info("L1 block hash mismatch detected", + "height", block.Number, + "savedHash", savedHash.Hex(), + "currentHash", header.Hash().Hex(), + ) + return &block.Number, nil + } + } + return nil, nil +} + +// handleL1Reorg handles an L1 reorg detected at the given L1 height. +// It only cleans up derivation DB state and resets the derivation L1 height +// so the next derivation loop re-processes from the reorg point. +// +// L1 reorg does NOT directly trigger an L2 rollback — in most cases the same +// batch tx gets re-included in a new L1 block with identical content, so L2 +// blocks remain valid. The normal derivation loop will re-fetch batches and +// run verifyBlockContext / verifyBatchRoots; only if those comparisons fail +// will an L2 rollback be triggered through rollbackLocalChain. +func (d *Derivation) handleL1Reorg(reorgAtL1Height uint64) error { + d.logger.Info("L1 reorg detected, cleaning DB records and restarting derivation from reorg point", + "reorgAtL1Height", reorgAtL1Height) + + d.db.DeleteDerivationL1BlocksFrom(reorgAtL1Height) + + if reorgAtL1Height > d.startHeight { + d.db.WriteLatestDerivationL1Height(reorgAtL1Height - 1) + } else { + // Reorg at or before startHeight — reset so next loop starts from startHeight. + if d.startHeight > 0 { + d.db.WriteLatestDerivationL1Height(d.startHeight - 1) + } else { + d.db.WriteLatestDerivationL1Height(0) + } + } + + return nil +} + +// recordL1Blocks saves L1 block hashes for reorg detection. +// Returns an error if any header fetch fails — the caller must not advance +// derivation height to avoid permanent gaps in L1 block hash tracking. +func (d *Derivation) recordL1Blocks(ctx context.Context, from, to uint64) error { + for h := from; h <= to; h++ { + header, err := d.l1Client.HeaderByNumber(ctx, big.NewInt(int64(h))) + if err != nil { + return fmt.Errorf("failed to get L1 header at %d: %w", h, err) + } + + var hashBytes [32]byte + copy(hashBytes[:], header.Hash().Bytes()) + + d.db.WriteDerivationL1Block(&db.DerivationL1Block{ + Number: h, + Hash: hashBytes, + }) + } + return nil +} diff --git a/node/derivation/verify.go b/node/derivation/verify.go new file mode 100644 index 000000000..271b4a3a5 --- /dev/null +++ b/node/derivation/verify.go @@ -0,0 +1,98 @@ +package derivation + +import ( + "bytes" + "fmt" + + "github.com/morph-l2/go-ethereum/accounts/abi/bind" + "github.com/morph-l2/go-ethereum/common" + eth "github.com/morph-l2/go-ethereum/core/types" +) + +// rollbackLocalChain rolls back the local L2 chain to the specified block number. +// This is only triggered when batch data comparison fails — i.e. the local L2 block +// does not match the L1 batch data (block context mismatch or state root mismatch). +// After rollback, the caller re-derives blocks using L1 batch data as source of truth. +func (d *Derivation) rollbackLocalChain(targetBlockNumber uint64) error { + d.logger.Error("L2 chain rollback not yet implemented", + "targetBlockNumber", targetBlockNumber) + + // TODO: Implement actual rollback via geth SetHead engine API: + // 1. Expose SetL2Head(number uint64) in go-ethereum/eth/catalyst/l2_api.go + // 2. Add SetHead method to go-ethereum/ethclient/authclient + // 3. Add SetHead method to node/types/retryable_client.go + // 4. Call d.l2Client.SetHead(d.ctx, targetBlockNumber) + return fmt.Errorf("rollback not implemented yet, target=%d", targetBlockNumber) +} + +// verifyBatchRoots verifies that the local state root and withdrawal root match the L1 batch data. +func (d *Derivation) verifyBatchRoots(batchInfo *BatchInfo, lastHeader *eth.Header) error { + withdrawalRoot, err := d.L2ToL1MessagePasser.MessageRoot(&bind.CallOpts{ + BlockNumber: lastHeader.Number, + }) + if err != nil { + return fmt.Errorf("get withdrawal root failed: %w", err) + } + + rootMismatch := !bytes.Equal(lastHeader.Root.Bytes(), batchInfo.root.Bytes()) + withdrawalMismatch := !bytes.Equal(withdrawalRoot[:], batchInfo.withdrawalRoot.Bytes()) + + if rootMismatch || withdrawalMismatch { + // Check if should skip validation during upgrade transition + if d.switchTime > 0 { + beforeSwitch := lastHeader.Time < d.switchTime + if (beforeSwitch && !d.useZktrie) || (!beforeSwitch && d.useZktrie) { + d.logger.Info("Root validation skipped during upgrade transition", + "originStateRootHash", batchInfo.root, + "deriveStateRootHash", lastHeader.Root.Hex(), + "blockTimestamp", lastHeader.Time, + "switchTime", d.switchTime, + "useZktrie", d.useZktrie, + ) + return nil + } + } + return fmt.Errorf("root mismatch: stateRoot(l1=%s, local=%s) withdrawalRoot(l1=%s, local=%s)", + batchInfo.root.Hex(), lastHeader.Root.Hex(), + batchInfo.withdrawalRoot.Hex(), common.BytesToHash(withdrawalRoot[:]).Hex()) + } + return nil +} + +// verifyBlockContext compares a local L2 block header against the batch block context from L1. +func (d *Derivation) verifyBlockContext(localHeader *eth.Header, blockData *BlockContext) error { + if localHeader.Time != blockData.Timestamp { + return fmt.Errorf("timestamp mismatch at block %d: local=%d, batch=%d", + blockData.Number, localHeader.Time, blockData.Timestamp) + } + if localHeader.GasLimit != blockData.GasLimit { + return fmt.Errorf("gasLimit mismatch at block %d: local=%d, batch=%d", + blockData.Number, localHeader.GasLimit, blockData.GasLimit) + } + switch { + case blockData.BaseFee != nil && localHeader.BaseFee != nil: + if localHeader.BaseFee.Cmp(blockData.BaseFee) != 0 { + return fmt.Errorf("baseFee mismatch at block %d: local=%s, batch=%s", + blockData.Number, localHeader.BaseFee.String(), blockData.BaseFee.String()) + } + case blockData.BaseFee == nil && localHeader.BaseFee == nil: + // Both nil — pre-EIP-1559 or legacy batch format, OK. + default: + // One side has BaseFee, the other doesn't — structural inconsistency. + return fmt.Errorf("baseFee nil mismatch at block %d: local=%v, batch=%v", + blockData.Number, localHeader.BaseFee, blockData.BaseFee) + } + // Batch internal consistency check: txsNum in the block context should match the + // actual number of transactions assembled in SafeL2Data (L1 messages + L2 txs). + // This catches batch parsing/corruption issues, not local-vs-L1 divergence. + // Local-vs-L1 transaction divergence is covered by state root verification + // in verifyBatchRoots (different txs → different state root). + if blockData.SafeL2Data != nil { + batchTxCount := len(blockData.SafeL2Data.Transactions) + if batchTxCount != int(blockData.txsNum) { + return fmt.Errorf("batch internal tx count inconsistency at block %d: blockContext.txsNum=%d, safeL2Data.Transactions=%d", + blockData.Number, blockData.txsNum, batchTxCount) + } + } + return nil +} diff --git a/node/flags/flags.go b/node/flags/flags.go index 2c00f4a87..8b6201077 100644 --- a/node/flags/flags.go +++ b/node/flags/flags.go @@ -256,6 +256,13 @@ var ( Usage: "The number of confirmations needed on L1 for finalization. If not set, the default value is l1.confirmations", EnvVar: prefixEnvVar("DERIVATION_CONFIRMATIONS"), } + + DerivationReorgCheckDepth = cli.Uint64Flag{ + Name: "derivation.reorgCheckDepth", + Usage: "Number of recent L1 blocks to check for reorgs (default: 64)", + EnvVar: prefixEnvVar("DERIVATION_REORG_CHECK_DEPTH"), + Value: 64, + } // Logger LogLevel = &cli.StringFlag{ Name: "log.level", @@ -364,6 +371,7 @@ var Flags = []cli.Flag{ DerivationLogProgressInterval, DerivationFetchBlockRange, DerivationConfirmations, + DerivationReorgCheckDepth, L1BeaconAddr, // blocktag options