core: implement basic GC for value-based storage scheme

The key idea here is that even though we can't ensure MPT code won't make the node active again we can order the changes made to the persistent store in such a way that it practically doesn't matter. What happens is: * after persist if it's time to collect our garbage we do it synchronously right in the same thread working the underlying persistent store directly * all the other node code doesn't see much of it, it works with bc.dao or layers above it * if MPT doesn't find some stale deactivated node in the storage it's OK, it'll recreate it in bc.dao * if MPT finds it and activates it, it's OK too, bc.dao will store it * while GC is being performed nothing else changes the persistent store * all subsequent bc.dao persists only happen after the GC is completed which means that any changes to the (potentially) deleted nodes have a priority, it's OK for GC to delete something that'll be recreated with the next persist cycle Otherwise it's a simple scheme with node status/last active height stored in the value. Preliminary tests show that it works ~18% worse than the simple KeepOnlyLatest scheme, but this seems to be the best result so far. Fixes #2095.
2022-01-29 11:28:29 +03:00 · 2022-01-29 11:28:29 +03:00 · 423c7883b8
commit 423c7883b8
parent c4ee310e85
5 changed files with 131 additions and 13 deletions
--- a/pkg/core/stateroot/module.go
+++ b/pkg/core/stateroot/module.go
@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"sync"
+	"time"

 	"github.com/nspcc-dev/neo-go/pkg/config"
 	"github.com/nspcc-dev/neo-go/pkg/config/netmode"
@ -57,6 +58,9 @@ func NewModule(cfg config.ProtocolConfiguration, verif VerifierFunc, log *zap.Lo
 	if cfg.KeepOnlyLatestState {
 		mode |= mpt.ModeLatest
 	}
+	if cfg.RemoveUntraceableBlocks {
+		mode |= mpt.ModeGC
+	}
 	return &Module{
 		network:  cfg.Magic,
 		srInHead: cfg.StateRootInHeader,
@ -184,6 +188,44 @@ func (s *Module) JumpToState(sr *state.MPTRoot) error {
 	return nil
 }

+// GC performs garbage collection.
+func (s *Module) GC(index uint32, store storage.Store) time.Duration {
+	if !s.mode.GC() {
+		panic("stateroot: GC invoked, but not enabled")
+	}
+	var removed int
+	var stored int64
+	s.log.Info("starting MPT garbage collection", zap.Uint32("index", index))
+	start := time.Now()
+	b := store.Batch()
+	store.Seek(storage.SeekRange{
+		Prefix: []byte{byte(storage.DataMPT)},
+	}, func(k, v []byte) bool {
+		stored++
+		if !mpt.IsActiveValue(v) {
+			h := binary.LittleEndian.Uint32(v[len(v)-4:])
+			if h > index {
+				return true
+			}
+			b.Delete(k)
+			removed++
+			stored--
+		}
+		return true
+	})
+	err := store.PutBatch(b)
+	dur := time.Since(start)
+	if err != nil {
+		s.log.Error("failed to flush MPT GC changeset", zap.Duration("time", dur), zap.Error(err))
+	} else {
+		s.log.Info("finished MPT garbage collection",
+			zap.Int("removed", removed),
+			zap.Int64("stored", stored),
+			zap.Duration("time", dur))
+	}
+	return dur
+}
+
 // AddMPTBatch updates using provided batch.
 func (s *Module) AddMPTBatch(index uint32, b mpt.Batch, cache *storage.MemCachedStore) (*mpt.Trie, *state.MPTRoot, error) {
 	mpt := *s.mpt