From bdc42cd5952ce963c760f25312399f25157d4f38 Mon Sep 17 00:00:00 2001 From: Anna Shaleva Date: Sun, 20 Nov 2022 20:55:48 +0300 Subject: [PATCH] core: reset blocks, txs and AERs in several stages Sometimes it can be hard to persist all changes at ones, the process can take almost all RAM and a lot of time. Here's the example of reset for mainnet from 2.4M to 1: ``` anna@kiwi:~/Documents/GitProjects/nspcc-dev/neo-go$ ./bin/neo-go db reset -m --height 1 2022-11-20T17:16:48.236+0300 INFO MaxBlockSize is not set or wrong, setting default value {"MaxBlockSize": 262144} 2022-11-20T17:16:48.236+0300 INFO MaxBlockSystemFee is not set or wrong, setting default value {"MaxBlockSystemFee": 900000000000} 2022-11-20T17:16:48.237+0300 INFO MaxTransactionsPerBlock is not set or wrong, using default value {"MaxTransactionsPerBlock": 512} 2022-11-20T17:16:48.237+0300 INFO MaxValidUntilBlockIncrement is not set or wrong, using default value {"MaxValidUntilBlockIncrement": 5760} 2022-11-20T17:16:48.240+0300 INFO restoring blockchain {"version": "0.2.6"} 2022-11-20T17:16:48.297+0300 INFO initialize state reset {"target height": 1} 2022-11-20T17:16:48.300+0300 INFO trying to reset blocks, transactions and AERs 2022-11-20T17:19:29.313+0300 INFO blocks, transactions ans AERs are reset {"took": "2m41.015126493s", "keys": 3958420} ... ``` To avoid OOM killer, split blocks reset into multiple stages. It increases operation time due to intermediate DB persists, but makes things cleaner, the result for almost the same DB height with the new approach: ``` anna@kiwi:~/Documents/GitProjects/nspcc-dev/neo-go$ ./bin/neo-go db reset -m --height 1 2022-11-20T17:39:42.023+0300 INFO MaxBlockSize is not set or wrong, setting default value {"MaxBlockSize": 262144} 2022-11-20T17:39:42.023+0300 INFO MaxBlockSystemFee is not set or wrong, setting default value {"MaxBlockSystemFee": 900000000000} 2022-11-20T17:39:42.023+0300 INFO MaxTransactionsPerBlock is not set or wrong, using default value {"MaxTransactionsPerBlock": 512} 2022-11-20T17:39:42.023+0300 INFO MaxValidUntilBlockIncrement is not set or wrong, using default value {"MaxValidUntilBlockIncrement": 5760} 2022-11-20T17:39:42.026+0300 INFO restoring blockchain {"version": "0.2.6"} 2022-11-20T17:39:42.071+0300 INFO initialize state reset {"target height": 1} 2022-11-20T17:39:42.073+0300 INFO trying to reset blocks, transactions and AERs 2022-11-20T17:40:11.735+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 1, "took": "29.66363737s", "keys": 210973} 2022-11-20T17:40:33.574+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 2, "took": "21.839208683s", "keys": 241203} 2022-11-20T17:41:29.325+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 3, "took": "55.750698386s", "keys": 250593} 2022-11-20T17:42:12.532+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 4, "took": "43.205892757s", "keys": 321896} 2022-11-20T17:43:07.978+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 5, "took": "55.445398156s", "keys": 334822} 2022-11-20T17:43:35.603+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 6, "took": "27.625292032s", "keys": 317131} 2022-11-20T17:43:51.747+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 7, "took": "16.144359017s", "keys": 355832} 2022-11-20T17:44:05.176+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 8, "took": "13.428733899s", "keys": 357690} 2022-11-20T17:44:32.895+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 9, "took": "27.718548783s", "keys": 393356} 2022-11-20T17:44:51.814+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 10, "took": "18.917954658s", "keys": 366492} 2022-11-20T17:45:07.208+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 11, "took": "15.392642196s", "keys": 326030} 2022-11-20T17:45:18.776+0300 INFO intermediate batch of removed blocks, transactions and AERs is persisted {"batches persisted": 12, "took": "11.568255716s", "keys": 299884} 2022-11-20T17:45:25.862+0300 INFO last batch of removed blocks, transactions and AERs is persisted {"batches persisted": 13, "took": "7.086079594s", "keys": 190399} 2022-11-20T17:45:25.862+0300 INFO blocks, transactions ans AERs are reset {"took": "5m43.791214084s", "overall persisted keys": 3966301} ... ``` --- pkg/core/blockchain.go | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/pkg/core/blockchain.go b/pkg/core/blockchain.go index 0f277b504..aa8957ad9 100644 --- a/pkg/core/blockchain.go +++ b/pkg/core/blockchain.go @@ -724,20 +724,42 @@ func (bc *Blockchain) resetStateInternal(height uint32, stage stateChangeStage) case stateJumpStarted: bc.log.Info("trying to reset blocks, transactions and AERs") // Remove blocks/transactions/aers from currHeight down to height (not including height itself). - // Keep headers for now, they'll be removed later. + // Keep headers for now, they'll be removed later. It's hard to handle the whole set of changes in + // one stage, so persist periodically. + const persistBatchSize = 100 * headerBatchCount // count blocks only, should be enough to avoid OOM killer even for large blocks + var ( + pBlocksStart = p + blocksCnt, batchCnt, keysCnt int + ) for i := height + 1; i <= currHeight; i++ { err := cache.DeleteBlock(bc.GetHeaderHash(int(i))) if err != nil { return fmt.Errorf("error while removing block %d: %w", i, err) } + blocksCnt++ + if blocksCnt == persistBatchSize { + keys, err = cache.Persist() + if err != nil { + return fmt.Errorf("failed to persist intermediate batch removed blocks, transactions and AERs: %w", err) + } + blocksCnt = 0 + batchCnt++ + keysCnt += keys + bc.log.Info("intermediate batch of removed blocks, transactions and AERs is persisted", zap.Int("batches persisted", batchCnt), zap.Duration("took", time.Since(p)), zap.Int("keys", keys)) + p = time.Now() + } } cache.Store.Put(resetStageKey, []byte{stateResetBit | byte(staleBlocksRemoved)}) keys, err = cache.Persist() if err != nil { - return fmt.Errorf("failed to persist blocks, transactions ans AERs changes to the DB: %w", err) + return fmt.Errorf("failed to persist last batch of removed blocks, transactions ans AERs: %w", err) } + batchCnt++ + keysCnt += keys + bc.log.Info("last batch of removed blocks, transactions and AERs is persisted", zap.Int("batches persisted", batchCnt), zap.Duration("took", time.Since(p)), zap.Int("keys", keys)) - bc.log.Info("blocks, transactions ans AERs are reset", zap.Duration("took", time.Since(p)), zap.Int("keys", keys)) + bc.log.Info("blocks, transactions ans AERs are reset", zap.Duration("took", time.Since(pBlocksStart)), + zap.Int("overall persisted keys", keysCnt)) p = time.Now() fallthrough case staleBlocksRemoved: