forked from TrueCloudLab/frostfs-node
[#1642] tree: Fix sorted getSubtree for multiversion filenames
Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
This commit is contained in:
parent
f339b28321
commit
68e879eec0
9 changed files with 90 additions and 44 deletions
|
@ -238,7 +238,7 @@ func (e *StorageEngine) TreeGetChildren(ctx context.Context, cid cidSDK.ID, tree
|
|||
}
|
||||
|
||||
// TreeSortedByFilename implements the pilorama.Forest interface.
|
||||
func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, count int) ([]pilorama.MultiNodeInfo, *string, error) {
|
||||
func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, lastNode *pilorama.MultiNode, count int) ([]pilorama.MultiNodeInfo, *string, *pilorama.MultiNode, error) {
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "StorageEngine.TreeSortedByFilename",
|
||||
trace.WithAttributes(
|
||||
attribute.String("container_id", cid.EncodeToString()),
|
||||
|
@ -250,8 +250,9 @@ func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID,
|
|||
var err error
|
||||
var nodes []pilorama.MultiNodeInfo
|
||||
var cursor *string
|
||||
var cursorNode *pilorama.MultiNode
|
||||
for _, sh := range e.sortShards(cid) {
|
||||
nodes, cursor, err = sh.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, count)
|
||||
nodes, cursor, cursorNode, err = sh.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, lastNode, count)
|
||||
if err != nil {
|
||||
if err == shard.ErrPiloramaDisabled {
|
||||
break
|
||||
|
@ -264,9 +265,9 @@ func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID,
|
|||
}
|
||||
continue
|
||||
}
|
||||
return nodes, cursor, nil
|
||||
return nodes, cursor, cursorNode, nil
|
||||
}
|
||||
return nil, lastFilename, err
|
||||
return nil, lastFilename, lastNode, err
|
||||
}
|
||||
|
||||
// TreeGetOpLog implements the pilorama.Forest interface.
|
||||
|
|
|
@ -1083,7 +1083,7 @@ func (t *boltForest) hasFewChildren(b *bbolt.Bucket, nodeIDs MultiNode, threshol
|
|||
}
|
||||
|
||||
// TreeSortedByFilename implements the Forest interface.
|
||||
func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeIDs MultiNode, lastFilename *string, count int) ([]MultiNodeInfo, *string, error) {
|
||||
func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeIDs MultiNode, lastFilename *string, lastNode *MultiNode, count int) ([]MultiNodeInfo, *string, *MultiNode, error) {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
success = false
|
||||
|
@ -1104,13 +1104,14 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
|
|||
defer t.modeMtx.RUnlock()
|
||||
|
||||
if t.mode.NoMetabase() {
|
||||
return nil, lastFilename, ErrDegradedMode
|
||||
return nil, lastFilename, lastNode, ErrDegradedMode
|
||||
}
|
||||
if len(nodeIDs) == 0 {
|
||||
return nil, lastFilename, errors.New("empty node list")
|
||||
return nil, lastFilename, lastNode, errors.New("empty node list")
|
||||
}
|
||||
|
||||
h := newHeap(lastFilename, count)
|
||||
forcePush := lastNode != nil
|
||||
h := newHeap(lastFilename, forcePush, count)
|
||||
key := make([]byte, 9)
|
||||
|
||||
var result []NodeInfo
|
||||
|
@ -1149,13 +1150,51 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
|
|||
|
||||
success = err == nil
|
||||
if err != nil {
|
||||
return nil, lastFilename, metaerr.Wrap(err)
|
||||
return nil, lastFilename, lastNode, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
if fewChildren {
|
||||
result = sortAndCut(result, lastFilename)
|
||||
}
|
||||
res := mergeNodeInfos(result)
|
||||
|
||||
if lastNode != nil {
|
||||
var start int
|
||||
for start = range res {
|
||||
if slices.Equal(res[start].Children, *lastNode) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// These first 'start' nodes have been already visited.
|
||||
res = res[start:]
|
||||
}
|
||||
|
||||
// Case: A parent has children so that len(children) > batchSize
|
||||
// ◯ "dir"
|
||||
// ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓ ↓
|
||||
// ◯ ◯ ◯ ◯ ◯ ◯ ◯ ◯ . . . ◯ ◯ ◯ ◯ ◯
|
||||
// "c" "d" "a" "c" "a" "e" "b" "c" "b" "b" "c" "f" "f"
|
||||
//
|
||||
// After pushing nodes into the heap
|
||||
// ◯ ◯ ◯ ◯ ◯ ◯ ◯ ◯ ◯ . . .
|
||||
// "a" "a" "b" "b "b" "c" "c" "c" "d . . .
|
||||
// |←------batch size------→| |←cut→|
|
||||
//
|
||||
// So, the CUT filename "c" (the third one) should be able to be traversed, read out AND
|
||||
// not ignored. For this, we have `lastNode` to seek.
|
||||
if len(res) > count {
|
||||
lhs := findAttr(res[count-1].Meta, AttributeFilename)
|
||||
rhs := findAttr(res[count].Meta, AttributeFilename)
|
||||
|
||||
if bytes.Equal(lhs, rhs) {
|
||||
lastNode = &res[count].Children
|
||||
}
|
||||
res = res[:count]
|
||||
} else {
|
||||
lastNode = nil
|
||||
}
|
||||
|
||||
if len(res) > count {
|
||||
res = res[:count]
|
||||
}
|
||||
|
@ -1163,7 +1202,7 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
|
|||
s := string(findAttr(res[len(res)-1].Meta, AttributeFilename))
|
||||
lastFilename = &s
|
||||
}
|
||||
return res, lastFilename, metaerr.Wrap(err)
|
||||
return res, lastFilename, lastNode, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func sortByFilename(nodes []NodeInfo) {
|
||||
|
|
|
@ -164,14 +164,14 @@ func (f *memoryForest) TreeGetMeta(_ context.Context, cid cid.ID, treeID string,
|
|||
}
|
||||
|
||||
// TreeSortedByFilename implements the Forest interface.
|
||||
func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeID string, nodeIDs MultiNode, start *string, count int) ([]MultiNodeInfo, *string, error) {
|
||||
func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeID string, nodeIDs MultiNode, start *string, startNode *MultiNode, count int) ([]MultiNodeInfo, *string, *MultiNode, error) {
|
||||
fullID := cid.String() + "/" + treeID
|
||||
s, ok := f.treeMap[fullID]
|
||||
if !ok {
|
||||
return nil, start, ErrTreeNotFound
|
||||
return nil, start, startNode, ErrTreeNotFound
|
||||
}
|
||||
if count == 0 {
|
||||
return nil, start, nil
|
||||
return nil, start, startNode, nil
|
||||
}
|
||||
|
||||
var res []NodeInfo
|
||||
|
@ -197,7 +197,7 @@ func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeI
|
|||
}
|
||||
}
|
||||
if len(res) == 0 {
|
||||
return nil, start, nil
|
||||
return nil, start, startNode, nil
|
||||
}
|
||||
|
||||
sortByFilename(res)
|
||||
|
@ -210,11 +210,11 @@ func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeI
|
|||
finish = len(res)
|
||||
}
|
||||
last := string(findAttr(r[finish-1].Meta, AttributeFilename))
|
||||
return r[i:finish], &last, nil
|
||||
return r[i:finish], &last, startNode, nil
|
||||
}
|
||||
}
|
||||
last := string(res[len(res)-1].Meta.GetAttr(AttributeFilename))
|
||||
return nil, &last, nil
|
||||
return nil, &last, startNode, nil
|
||||
}
|
||||
|
||||
// TreeGetChildren implements the Forest interface.
|
||||
|
|
|
@ -217,7 +217,7 @@ func BenchmarkForestSortedIteration(b *testing.B) {
|
|||
|
||||
b.Run(providers[i].name+",root", func(b *testing.B) {
|
||||
for range b.N {
|
||||
res, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{RootID}, nil, 100)
|
||||
res, _, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{RootID}, nil, nil, 100)
|
||||
if err != nil || len(res) != 100 {
|
||||
b.Fatalf("err %v, count %d", err, len(res))
|
||||
}
|
||||
|
@ -225,7 +225,7 @@ func BenchmarkForestSortedIteration(b *testing.B) {
|
|||
})
|
||||
b.Run(providers[i].name+",leaf", func(b *testing.B) {
|
||||
for range b.N {
|
||||
res, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{1}, nil, 100)
|
||||
res, _, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{1}, nil, nil, 100)
|
||||
if err != nil || len(res) != 0 {
|
||||
b.FailNow()
|
||||
}
|
||||
|
@ -274,7 +274,7 @@ func testForestTreeSortedIterationBugWithSkip(t *testing.T, s ForestStorage) {
|
|||
|
||||
var result []MultiNodeInfo
|
||||
treeAppend := func(t *testing.T, last *string, count int) *string {
|
||||
res, cursor, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, count)
|
||||
res, cursor, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, nil, count)
|
||||
require.NoError(t, err)
|
||||
result = append(result, res...)
|
||||
return cursor
|
||||
|
@ -329,7 +329,7 @@ func testForestTreeSortedIteration(t *testing.T, s ForestStorage) {
|
|||
|
||||
var result []MultiNodeInfo
|
||||
treeAppend := func(t *testing.T, last *string, count int) *string {
|
||||
res, cursor, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, count)
|
||||
res, cursor, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, nil, count)
|
||||
require.NoError(t, err)
|
||||
result = append(result, res...)
|
||||
return cursor
|
||||
|
@ -407,7 +407,7 @@ func testForestTreeSortedByFilename(t *testing.T, s ForestStorage) {
|
|||
}
|
||||
|
||||
getChildren := func(t *testing.T, id MultiNode) []MultiNodeInfo {
|
||||
res, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, id, nil, len(items))
|
||||
res, _, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, id, nil, nil, len(items))
|
||||
require.NoError(t, err)
|
||||
return res
|
||||
}
|
||||
|
|
|
@ -30,27 +30,30 @@ func (h *filenameHeap) Pop() any {
|
|||
|
||||
// fixedHeap maintains a fixed number of smallest elements started at some point.
|
||||
type fixedHeap struct {
|
||||
start *string
|
||||
sorted bool
|
||||
count int
|
||||
h *filenameHeap
|
||||
start *string
|
||||
forcePush bool
|
||||
sorted bool
|
||||
count int
|
||||
h *filenameHeap
|
||||
}
|
||||
|
||||
func newHeap(start *string, count int) *fixedHeap {
|
||||
func newHeap(start *string, forcePush bool, count int) *fixedHeap {
|
||||
h := new(filenameHeap)
|
||||
heap.Init(h)
|
||||
|
||||
return &fixedHeap{
|
||||
start: start,
|
||||
count: count,
|
||||
h: h,
|
||||
start: start,
|
||||
forcePush: forcePush,
|
||||
count: count,
|
||||
h: h,
|
||||
}
|
||||
}
|
||||
|
||||
const amortizationMultiplier = 5
|
||||
|
||||
func (h *fixedHeap) push(id MultiNode, filename string) bool {
|
||||
if h.start != nil && filename <= *h.start {
|
||||
// The result is forced to be pushed into the heap because nodes may have the same filenames.
|
||||
if h.start != nil && (filename < *h.start || filename == *h.start && !h.forcePush) {
|
||||
return false
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ type Forest interface {
|
|||
TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID string, nodeID Node) ([]NodeInfo, error)
|
||||
// TreeSortedByFilename returns children of the node with the specified ID. The nodes are sorted by the filename attribute..
|
||||
// Should return ErrTreeNotFound if the tree is not found, and empty result if the node is not in the tree.
|
||||
TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID MultiNode, lastFilename *string, count int) ([]MultiNodeInfo, *string, error)
|
||||
TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID MultiNode, lastFilename *string, lastNode *MultiNode, count int) ([]MultiNodeInfo, *string, *MultiNode, error)
|
||||
// TreeGetOpLog returns first log operation stored at or above the height.
|
||||
// In case no such operation is found, empty Move and nil error should be returned.
|
||||
TreeGetOpLog(ctx context.Context, cid cidSDK.ID, treeID string, height uint64) (Move, error)
|
||||
|
|
|
@ -97,7 +97,7 @@ func testDuplicateDirectory(t *testing.T, f Forest) {
|
|||
require.Equal(t, []byte{10}, testGetByPath(t, "value0"))
|
||||
|
||||
testSortedByFilename := func(t *testing.T, root MultiNode, last *string, batchSize int) ([]MultiNodeInfo, *string) {
|
||||
res, last, err := f.TreeSortedByFilename(context.Background(), d.CID, treeID, root, last, batchSize)
|
||||
res, last, _, err := f.TreeSortedByFilename(context.Background(), d.CID, treeID, root, last, nil, batchSize)
|
||||
require.NoError(t, err)
|
||||
return res, last
|
||||
}
|
||||
|
|
|
@ -211,7 +211,7 @@ func (s *Shard) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID strin
|
|||
}
|
||||
|
||||
// TreeSortedByFilename implements the pilorama.Forest interface.
|
||||
func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, count int) ([]pilorama.MultiNodeInfo, *string, error) {
|
||||
func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, lastNode *pilorama.MultiNode, count int) ([]pilorama.MultiNodeInfo, *string, *pilorama.MultiNode, error) {
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "Shard.TreeSortedByFilename",
|
||||
trace.WithAttributes(
|
||||
attribute.String("shard_id", s.ID().String()),
|
||||
|
@ -222,16 +222,16 @@ func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID
|
|||
defer span.End()
|
||||
|
||||
if s.pilorama == nil {
|
||||
return nil, lastFilename, ErrPiloramaDisabled
|
||||
return nil, lastFilename, lastNode, ErrPiloramaDisabled
|
||||
}
|
||||
|
||||
s.m.RLock()
|
||||
defer s.m.RUnlock()
|
||||
|
||||
if s.info.Mode.NoMetabase() {
|
||||
return nil, lastFilename, ErrDegradedMode
|
||||
return nil, lastFilename, lastNode, ErrDegradedMode
|
||||
}
|
||||
return s.pilorama.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, count)
|
||||
return s.pilorama.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, lastNode, count)
|
||||
}
|
||||
|
||||
// TreeGetOpLog implements the pilorama.Forest interface.
|
||||
|
|
|
@ -404,9 +404,10 @@ func (s *Service) GetSubTree(req *GetSubTreeRequest, srv TreeService_GetSubTreeS
|
|||
}
|
||||
|
||||
type stackItem struct {
|
||||
values []pilorama.MultiNodeInfo
|
||||
parent pilorama.MultiNode
|
||||
last *string
|
||||
values []pilorama.MultiNodeInfo
|
||||
parent pilorama.MultiNode
|
||||
last *string
|
||||
lastNode *pilorama.MultiNode
|
||||
}
|
||||
|
||||
func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
|
||||
|
@ -457,12 +458,13 @@ func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid
|
|||
break
|
||||
}
|
||||
|
||||
nodes, last, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), item.parent, item.last, batchSize)
|
||||
nodes, last, lastNode, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), item.parent, item.last, item.lastNode, batchSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
item.values = nodes
|
||||
item.last = last
|
||||
item.lastNode = lastNode
|
||||
|
||||
if len(nodes) == 0 {
|
||||
stack = stack[:len(stack)-1]
|
||||
|
@ -476,15 +478,16 @@ func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid
|
|||
}
|
||||
|
||||
if b.GetDepth() == 0 || uint32(len(stack)) < b.GetDepth() {
|
||||
children, last, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), node.Children, nil, batchSize)
|
||||
children, last, lastNode, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), node.Children, nil, nil, batchSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(children) != 0 {
|
||||
stack = append(stack, stackItem{
|
||||
values: children,
|
||||
parent: node.Children,
|
||||
last: last,
|
||||
values: children,
|
||||
parent: node.Children,
|
||||
last: last,
|
||||
lastNode: lastNode,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue