core: collapse completed parts of Billet

Some kind of marker is needed to check whether node has been collapsed
or not. So introduce (HashNode).Collapsed
This commit is contained in:
Anna Shaleva 2021-08-11 16:09:50 +03:00
parent 74f1848d19
commit 6a04880b49
3 changed files with 279 additions and 15 deletions

View file

@ -23,7 +23,10 @@ var (
// Billet is based on the following assumptions: // Billet is based on the following assumptions:
// 1. Refcount can only be incremented (we don't change MPT structure during restore, // 1. Refcount can only be incremented (we don't change MPT structure during restore,
// thus don't need to decrease refcount). // thus don't need to decrease refcount).
// 2. TODO: Each time the part of Billet is completely restored, it is collapsed into HashNode. // 2. Each time the part of Billet is completely restored, it is collapsed into
// HashNode.
// 3. Pair (node, path) must be restored only once. It's a duty of MPT pool to manage
// MPT paths in order to provide this assumption.
type Billet struct { type Billet struct {
Store *storage.MemCachedStore Store *storage.MemCachedStore
@ -44,8 +47,7 @@ func NewBillet(rootHash util.Uint256, enableRefCount bool, store *storage.MemCac
} }
// RestoreHashNode replaces HashNode located at the provided path by the specified Node // RestoreHashNode replaces HashNode located at the provided path by the specified Node
// and stores it. // and stores it. It also maintains MPT as small as possible by collapsing those parts
// TODO: It also maintains MPT as small as possible by collapsing those parts
// of MPT that have been completely restored. // of MPT that have been completely restored.
func (b *Billet) RestoreHashNode(path []byte, node Node) error { func (b *Billet) RestoreHashNode(path []byte, node Node) error {
if _, ok := node.(*HashNode); ok { if _, ok := node.(*HashNode); ok {
@ -94,14 +96,16 @@ func (b *Billet) putIntoLeaf(curr *LeafNode, path []byte, val Node) (Node, error
if curr.Hash() != val.Hash() { if curr.Hash() != val.Hash() {
return nil, fmt.Errorf("%w: bad Leaf node hash: expected %s, got %s", ErrRestoreFailed, curr.Hash().StringBE(), val.Hash().StringBE()) return nil, fmt.Errorf("%w: bad Leaf node hash: expected %s, got %s", ErrRestoreFailed, curr.Hash().StringBE(), val.Hash().StringBE())
} }
// this node has already been restored, no refcount changes required // Once Leaf node is restored, it will be collapsed into HashNode forever, so
return curr, nil // there shouldn't be such situation when we try to restore Leaf node.
panic("bug: can't restore LeafNode")
} }
func (b *Billet) putIntoBranch(curr *BranchNode, path []byte, val Node) (Node, error) { func (b *Billet) putIntoBranch(curr *BranchNode, path []byte, val Node) (Node, error) {
if len(path) == 0 && curr.Hash().Equals(val.Hash()) { if len(path) == 0 && curr.Hash().Equals(val.Hash()) {
// this node has already been restored, no refcount changes required // This node has already been restored, so it's an MPT pool duty to avoid
return curr, nil // duplicating restore requests.
panic("bug: can't perform restoring of BranchNode twice")
} }
i, path := splitPath(path) i, path := splitPath(path)
r, err := b.putIntoNode(curr.Children[i], path, val) r, err := b.putIntoNode(curr.Children[i], path, val)
@ -109,7 +113,7 @@ func (b *Billet) putIntoBranch(curr *BranchNode, path []byte, val Node) (Node, e
return nil, err return nil, err
} }
curr.Children[i] = r curr.Children[i] = r
return curr, nil return b.tryCollapseBranch(curr), nil
} }
func (b *Billet) putIntoExtension(curr *ExtensionNode, path []byte, val Node) (Node, error) { func (b *Billet) putIntoExtension(curr *ExtensionNode, path []byte, val Node) (Node, error) {
@ -117,8 +121,9 @@ func (b *Billet) putIntoExtension(curr *ExtensionNode, path []byte, val Node) (N
if curr.Hash() != val.Hash() { if curr.Hash() != val.Hash() {
return nil, fmt.Errorf("%w: bad Extension node hash: expected %s, got %s", ErrRestoreFailed, curr.Hash().StringBE(), val.Hash().StringBE()) return nil, fmt.Errorf("%w: bad Extension node hash: expected %s, got %s", ErrRestoreFailed, curr.Hash().StringBE(), val.Hash().StringBE())
} }
// this node has already been restored, no refcount changes required // This node has already been restored, so it's an MPT pool duty to avoid
return curr, nil // duplicating restore requests.
panic("bug: can't perform restoring of ExtensionNode twice")
} }
if !bytes.HasPrefix(path, curr.key) { if !bytes.HasPrefix(path, curr.key) {
return nil, fmt.Errorf("%w: can't modify ExtensionNode during restore", ErrRestoreFailed) return nil, fmt.Errorf("%w: can't modify ExtensionNode during restore", ErrRestoreFailed)
@ -129,11 +134,11 @@ func (b *Billet) putIntoExtension(curr *ExtensionNode, path []byte, val Node) (N
return nil, err return nil, err
} }
curr.next = r curr.next = r
return curr, nil return b.tryCollapseExtension(curr), nil
} }
func (b *Billet) putIntoHash(curr *HashNode, path []byte, val Node) (Node, error) { func (b *Billet) putIntoHash(curr *HashNode, path []byte, val Node) (Node, error) {
// Once the part of MPT Billet is completely restored, it will be collapsed forever, so // Once a part of MPT Billet is completely restored, it will be collapsed forever, so
// it's an MPT pool duty to avoid duplicating restore requests. // it's an MPT pool duty to avoid duplicating restore requests.
if len(path) != 0 { if len(path) != 0 {
return nil, fmt.Errorf("%w: node has already been collapsed", ErrRestoreFailed) return nil, fmt.Errorf("%w: node has already been collapsed", ErrRestoreFailed)
@ -148,10 +153,21 @@ func (b *Billet) putIntoHash(curr *HashNode, path []byte, val Node) (Node, error
if val.Hash() != curr.Hash() { if val.Hash() != curr.Hash() {
return nil, fmt.Errorf("%w: can't restore HashNode: expected and actual hashes mismatch (%s vs %s)", ErrRestoreFailed, curr.Hash().StringBE(), val.Hash().StringBE()) return nil, fmt.Errorf("%w: can't restore HashNode: expected and actual hashes mismatch (%s vs %s)", ErrRestoreFailed, curr.Hash().StringBE(), val.Hash().StringBE())
} }
if curr.Collapsed {
// This node has already been restored and collapsed, so it's an MPT pool duty to avoid
// duplicating restore requests.
panic("bug: can't perform restoring of collapsed node")
}
// We also need to increment refcount in both cases. That's the only place where refcount // We also need to increment refcount in both cases. That's the only place where refcount
// is changed during restore process. Also flush right now, because sync process can be // is changed during restore process. Also flush right now, because sync process can be
// interrupted at any time. // interrupted at any time.
b.incrementRefAndStore(val.Hash(), val.Bytes()) b.incrementRefAndStore(val.Hash(), val.Bytes())
if val.Type() == LeafT {
return b.tryCollapseLeaf(val.(*LeafNode)), nil
}
return val, nil return val, nil
} }
@ -214,7 +230,7 @@ func (b *Billet) traverse(curr Node, process func(node Node, nodeBytes []byte) b
} }
switch n := curr.(type) { switch n := curr.(type) {
case *LeafNode: case *LeafNode:
return n, nil return b.tryCollapseLeaf(n), nil
case *BranchNode: case *BranchNode:
for i := range n.Children { for i := range n.Children {
r, err := b.traverse(n.Children[i], process, ignoreStorageErr) r, err := b.traverse(n.Children[i], process, ignoreStorageErr)
@ -227,19 +243,55 @@ func (b *Billet) traverse(curr Node, process func(node Node, nodeBytes []byte) b
} }
n.Children[i] = r n.Children[i] = r
} }
return n, nil return b.tryCollapseBranch(n), nil
case *ExtensionNode: case *ExtensionNode:
r, err := b.traverse(n.next, process, ignoreStorageErr) r, err := b.traverse(n.next, process, ignoreStorageErr)
if err != nil && !errors.Is(err, errStop) { if err != nil && !errors.Is(err, errStop) {
return nil, err return nil, err
} }
n.next = r n.next = r
return n, err return b.tryCollapseExtension(n), err
default: default:
return nil, ErrNotFound return nil, ErrNotFound
} }
} }
func (b *Billet) tryCollapseLeaf(curr *LeafNode) Node {
// Leaf can always be collapsed.
res := NewHashNode(curr.Hash())
res.Collapsed = true
return res
}
func (b *Billet) tryCollapseExtension(curr *ExtensionNode) Node {
if !(curr.next.Type() == HashT && curr.next.(*HashNode).Collapsed) {
return curr
}
res := NewHashNode(curr.Hash())
res.Collapsed = true
return res
}
func (b *Billet) tryCollapseBranch(curr *BranchNode) Node {
canCollapse := true
for i := 0; i < childrenCount; i++ {
if curr.Children[i].Type() == EmptyT {
continue
}
if curr.Children[i].Type() == HashT && curr.Children[i].(*HashNode).Collapsed {
continue
}
canCollapse = false
break
}
if !canCollapse {
return curr
}
res := NewHashNode(curr.Hash())
res.Collapsed = true
return res
}
func (b *Billet) getFromStore(h util.Uint256) (Node, error) { func (b *Billet) getFromStore(h util.Uint256) (Node, error) {
data, err := b.Store.Get(makeStorageKey(h.BytesBE())) data, err := b.Store.Get(makeStorageKey(h.BytesBE()))
if err != nil { if err != nil {

211
pkg/core/mpt/billet_test.go Normal file
View file

@ -0,0 +1,211 @@
package mpt
import (
"encoding/binary"
"errors"
"testing"
"github.com/nspcc-dev/neo-go/pkg/core/storage"
"github.com/nspcc-dev/neo-go/pkg/io"
"github.com/nspcc-dev/neo-go/pkg/util"
"github.com/stretchr/testify/require"
)
func TestBillet_RestoreHashNode(t *testing.T) {
check := func(t *testing.T, tr *Billet, expectedRoot Node, expectedNode Node, expectedRefCount uint32) {
_ = expectedRoot.Hash()
_ = tr.root.Hash()
require.Equal(t, expectedRoot, tr.root)
expectedBytes, err := tr.Store.Get(makeStorageKey(expectedNode.Hash().BytesBE()))
if expectedRefCount != 0 {
require.NoError(t, err)
require.Equal(t, expectedRefCount, binary.LittleEndian.Uint32(expectedBytes[len(expectedBytes)-4:]))
} else {
require.True(t, errors.Is(err, storage.ErrKeyNotFound))
}
}
t.Run("parent is Extension", func(t *testing.T) {
t.Run("restore Branch", func(t *testing.T) {
b := NewBranchNode()
b.Children[0] = NewExtensionNode([]byte{0x01}, NewLeafNode([]byte{0xAB, 0xCD}))
b.Children[5] = NewExtensionNode([]byte{0x01}, NewLeafNode([]byte{0xAB, 0xDE}))
path := toNibbles([]byte{0xAC})
e := NewExtensionNode(path, NewHashNode(b.Hash()))
tr := NewBillet(e.Hash(), true, newTestStore())
tr.root = e
// OK
n := new(NodeObject)
n.DecodeBinary(io.NewBinReaderFromBuf(b.Bytes()))
require.NoError(t, tr.RestoreHashNode(path, n.Node))
expected := NewExtensionNode(path, n.Node)
check(t, tr, expected, n.Node, 1)
// One more time (already restored) => panic expected, no refcount changes
require.Panics(t, func() {
_ = tr.RestoreHashNode(path, n.Node)
})
check(t, tr, expected, n.Node, 1)
// Same path, but wrong hash => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode(path, NewBranchNode()), ErrRestoreFailed))
check(t, tr, expected, n.Node, 1)
// New path (changes in the MPT structure are not allowed) => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode(toNibbles([]byte{0xAB}), n.Node), ErrRestoreFailed))
check(t, tr, expected, n.Node, 1)
})
t.Run("restore Leaf", func(t *testing.T) {
l := NewLeafNode([]byte{0xAB, 0xCD})
path := toNibbles([]byte{0xAC})
e := NewExtensionNode(path, NewHashNode(l.Hash()))
tr := NewBillet(e.Hash(), true, newTestStore())
tr.root = e
// OK
require.NoError(t, tr.RestoreHashNode(path, l))
expected := NewHashNode(e.Hash()) // leaf should be collapsed immediately => extension should also be collapsed
expected.Collapsed = true
check(t, tr, expected, l, 1)
// One more time (already restored and collapsed) => error expected, no refcount changes
require.Error(t, tr.RestoreHashNode(path, l))
check(t, tr, expected, l, 1)
// Same path, but wrong hash => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode(path, NewLeafNode([]byte{0xAB, 0xEF})), ErrRestoreFailed))
check(t, tr, expected, l, 1)
// New path (changes in the MPT structure are not allowed) => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode(toNibbles([]byte{0xAB}), l), ErrRestoreFailed))
check(t, tr, expected, l, 1)
})
t.Run("restore Hash", func(t *testing.T) {
h := NewHashNode(util.Uint256{1, 2, 3})
path := toNibbles([]byte{0xAC})
e := NewExtensionNode(path, h)
tr := NewBillet(e.Hash(), true, newTestStore())
tr.root = e
// no-op
require.True(t, errors.Is(tr.RestoreHashNode(path, h), ErrRestoreFailed))
check(t, tr, e, h, 0)
})
})
t.Run("parent is Leaf", func(t *testing.T) {
l := NewLeafNode([]byte{0xAB, 0xCD})
path := []byte{}
tr := NewBillet(l.Hash(), true, newTestStore())
tr.root = l
// Already restored => panic expected
require.Panics(t, func() {
_ = tr.RestoreHashNode(path, l)
})
// Same path, but wrong hash => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode(path, NewLeafNode([]byte{0xAB, 0xEF})), ErrRestoreFailed))
// Non-nil path, but MPT structure can't be changed => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode(toNibbles([]byte{0xAC}), NewLeafNode([]byte{0xAB, 0xEF})), ErrRestoreFailed))
})
t.Run("parent is Branch", func(t *testing.T) {
t.Run("middle child", func(t *testing.T) {
l1 := NewLeafNode([]byte{0xAB, 0xCD})
l2 := NewLeafNode([]byte{0xAB, 0xDE})
b := NewBranchNode()
b.Children[5] = NewHashNode(l1.Hash())
b.Children[lastChild] = NewHashNode(l2.Hash())
tr := NewBillet(b.Hash(), true, newTestStore())
tr.root = b
// OK
path := []byte{0x05}
require.NoError(t, tr.RestoreHashNode(path, l1))
check(t, tr, b, l1, 1)
// One more time (already restored) => panic expected.
// It's an MPT pool duty to avoid such situations during real restore process.
require.Panics(t, func() {
_ = tr.RestoreHashNode(path, l1)
})
// No refcount changes expected.
check(t, tr, b, l1, 1)
// Same path, but wrong hash => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode(path, NewLeafNode([]byte{0xAD})), ErrRestoreFailed))
check(t, tr, b, l1, 1)
// New path pointing to the empty HashNode (changes in the MPT structure are not allowed) => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode([]byte{0x01}, l1), ErrRestoreFailed))
check(t, tr, b, l1, 1)
})
t.Run("last child", func(t *testing.T) {
l1 := NewLeafNode([]byte{0xAB, 0xCD})
l2 := NewLeafNode([]byte{0xAB, 0xDE})
b := NewBranchNode()
b.Children[5] = NewHashNode(l1.Hash())
b.Children[lastChild] = NewHashNode(l2.Hash())
tr := NewBillet(b.Hash(), true, newTestStore())
tr.root = b
// OK
path := []byte{}
require.NoError(t, tr.RestoreHashNode(path, l2))
check(t, tr, b, l2, 1)
// One more time (already restored) => panic expected.
// It's an MPT pool duty to avoid such situations during real restore process.
require.Panics(t, func() {
_ = tr.RestoreHashNode(path, l2)
})
// No refcount changes expected.
check(t, tr, b, l2, 1)
// Same path, but wrong hash => error expected, no refcount changes
require.True(t, errors.Is(tr.RestoreHashNode(path, NewLeafNode([]byte{0xAD})), ErrRestoreFailed))
check(t, tr, b, l2, 1)
})
t.Run("two children with same hash", func(t *testing.T) {
l := NewLeafNode([]byte{0xAB, 0xCD})
b := NewBranchNode()
// two same hashnodes => leaf's refcount expected to be 2 in the end.
b.Children[3] = NewHashNode(l.Hash())
b.Children[4] = NewHashNode(l.Hash())
tr := NewBillet(b.Hash(), true, newTestStore())
tr.root = b
// OK
require.NoError(t, tr.RestoreHashNode([]byte{0x03}, l))
expected := b
expected.Children[3].(*HashNode).Collapsed = true
check(t, tr, b, l, 1)
// Restore another node with the same hash => no error expected, refcount should be incremented.
// Branch node should be collapsed.
require.NoError(t, tr.RestoreHashNode([]byte{0x04}, l))
res := NewHashNode(b.Hash())
res.Collapsed = true
check(t, tr, res, l, 2)
})
})
t.Run("parent is Hash", func(t *testing.T) {
l := NewLeafNode([]byte{0xAB, 0xCD})
b := NewBranchNode()
b.Children[3] = NewHashNode(l.Hash())
b.Children[4] = NewHashNode(l.Hash())
tr := NewBillet(b.Hash(), true, newTestStore())
// Should fail, because if it's a hash node with non-empty path, then the node
// has already been collapsed.
require.Error(t, tr.RestoreHashNode([]byte{0x03}, l))
})
}

View file

@ -10,6 +10,7 @@ import (
// HashNode represents MPT's hash node. // HashNode represents MPT's hash node.
type HashNode struct { type HashNode struct {
BaseNode BaseNode
Collapsed bool
} }
var _ Node = (*HashNode)(nil) var _ Node = (*HashNode)(nil)