core: reset blocks, txs and AERs in several stages

Sometimes it can be hard to persist all changes at ones, the process
can take almost all RAM and a lot of time. Here's the example of reset
for mainnet from 2.4M to 1:
```
anna@kiwi:~/Documents/GitProjects/nspcc-dev/neo-go$ ./bin/neo-go db reset -m --height 1
2022-11-20T17:16:48.236+0300	INFO	MaxBlockSize is not set or wrong, setting default value	{"MaxBlockSize": 262144}
2022-11-20T17:16:48.236+0300	INFO	MaxBlockSystemFee is not set or wrong, setting default value	{"MaxBlockSystemFee": 900000000000}
2022-11-20T17:16:48.237+0300	INFO	MaxTransactionsPerBlock is not set or wrong, using default value	{"MaxTransactionsPerBlock": 512}
2022-11-20T17:16:48.237+0300	INFO	MaxValidUntilBlockIncrement is not set or wrong, using default value	{"MaxValidUntilBlockIncrement": 5760}
2022-11-20T17:16:48.240+0300	INFO	restoring blockchain	{"version": "0.2.6"}
2022-11-20T17:16:48.297+0300	INFO	initialize state reset	{"target height": 1}
2022-11-20T17:16:48.300+0300	INFO	trying to reset blocks, transactions and AERs
2022-11-20T17:19:29.313+0300	INFO	blocks, transactions ans AERs are reset	{"took": "2m41.015126493s", "keys": 3958420}
...
```
To avoid OOM killer, split blocks reset into multiple stages. It increases
operation time due to intermediate DB persists, but makes things cleaner, the
result for almost the same DB height with the new approach:
```
anna@kiwi:~/Documents/GitProjects/nspcc-dev/neo-go$ ./bin/neo-go db reset -m --height 1
2022-11-20T17:39:42.023+0300	INFO	MaxBlockSize is not set or wrong, setting default value	{"MaxBlockSize": 262144}
2022-11-20T17:39:42.023+0300	INFO	MaxBlockSystemFee is not set or wrong, setting default value	{"MaxBlockSystemFee": 900000000000}
2022-11-20T17:39:42.023+0300	INFO	MaxTransactionsPerBlock is not set or wrong, using default value	{"MaxTransactionsPerBlock": 512}
2022-11-20T17:39:42.023+0300	INFO	MaxValidUntilBlockIncrement is not set or wrong, using default value	{"MaxValidUntilBlockIncrement": 5760}
2022-11-20T17:39:42.026+0300	INFO	restoring blockchain	{"version": "0.2.6"}
2022-11-20T17:39:42.071+0300	INFO	initialize state reset	{"target height": 1}
2022-11-20T17:39:42.073+0300	INFO	trying to reset blocks, transactions and AERs
2022-11-20T17:40:11.735+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 1, "took": "29.66363737s", "keys": 210973}
2022-11-20T17:40:33.574+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 2, "took": "21.839208683s", "keys": 241203}
2022-11-20T17:41:29.325+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 3, "took": "55.750698386s", "keys": 250593}
2022-11-20T17:42:12.532+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 4, "took": "43.205892757s", "keys": 321896}
2022-11-20T17:43:07.978+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 5, "took": "55.445398156s", "keys": 334822}
2022-11-20T17:43:35.603+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 6, "took": "27.625292032s", "keys": 317131}
2022-11-20T17:43:51.747+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 7, "took": "16.144359017s", "keys": 355832}
2022-11-20T17:44:05.176+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 8, "took": "13.428733899s", "keys": 357690}
2022-11-20T17:44:32.895+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 9, "took": "27.718548783s", "keys": 393356}
2022-11-20T17:44:51.814+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 10, "took": "18.917954658s", "keys": 366492}
2022-11-20T17:45:07.208+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 11, "took": "15.392642196s", "keys": 326030}
2022-11-20T17:45:18.776+0300	INFO	intermediate batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 12, "took": "11.568255716s", "keys": 299884}
2022-11-20T17:45:25.862+0300	INFO	last batch of removed blocks, transactions and AERs is persisted	{"batches persisted": 13, "took": "7.086079594s", "keys": 190399}
2022-11-20T17:45:25.862+0300	INFO	blocks, transactions ans AERs are reset	{"took": "5m43.791214084s", "overall persisted keys": 3966301}
...
```
This commit is contained in:
Anna Shaleva 2022-11-20 20:55:48 +03:00
parent d67f0df516
commit bdc42cd595

View file

@ -724,20 +724,42 @@ func (bc *Blockchain) resetStateInternal(height uint32, stage stateChangeStage)
case stateJumpStarted:
bc.log.Info("trying to reset blocks, transactions and AERs")
// Remove blocks/transactions/aers from currHeight down to height (not including height itself).
// Keep headers for now, they'll be removed later.
// Keep headers for now, they'll be removed later. It's hard to handle the whole set of changes in
// one stage, so persist periodically.
const persistBatchSize = 100 * headerBatchCount // count blocks only, should be enough to avoid OOM killer even for large blocks
var (
pBlocksStart = p
blocksCnt, batchCnt, keysCnt int
)
for i := height + 1; i <= currHeight; i++ {
err := cache.DeleteBlock(bc.GetHeaderHash(int(i)))
if err != nil {
return fmt.Errorf("error while removing block %d: %w", i, err)
}
blocksCnt++
if blocksCnt == persistBatchSize {
keys, err = cache.Persist()
if err != nil {
return fmt.Errorf("failed to persist intermediate batch removed blocks, transactions and AERs: %w", err)
}
blocksCnt = 0
batchCnt++
keysCnt += keys
bc.log.Info("intermediate batch of removed blocks, transactions and AERs is persisted", zap.Int("batches persisted", batchCnt), zap.Duration("took", time.Since(p)), zap.Int("keys", keys))
p = time.Now()
}
}
cache.Store.Put(resetStageKey, []byte{stateResetBit | byte(staleBlocksRemoved)})
keys, err = cache.Persist()
if err != nil {
return fmt.Errorf("failed to persist blocks, transactions ans AERs changes to the DB: %w", err)
return fmt.Errorf("failed to persist last batch of removed blocks, transactions ans AERs: %w", err)
}
batchCnt++
keysCnt += keys
bc.log.Info("last batch of removed blocks, transactions and AERs is persisted", zap.Int("batches persisted", batchCnt), zap.Duration("took", time.Since(p)), zap.Int("keys", keys))
bc.log.Info("blocks, transactions ans AERs are reset", zap.Duration("took", time.Since(p)), zap.Int("keys", keys))
bc.log.Info("blocks, transactions ans AERs are reset", zap.Duration("took", time.Since(pBlocksStart)),
zap.Int("overall persisted keys", keysCnt))
p = time.Now()
fallthrough
case staleBlocksRemoved: