[#1642] tree: Fix sorted getSubtree for multiversion filenames

Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
This commit is contained in:
Airat Arifullin 2025-02-20 12:22:12 +03:00
parent f339b28321
commit 91c73058e7
9 changed files with 70 additions and 49 deletions

View file

@ -238,7 +238,7 @@ func (e *StorageEngine) TreeGetChildren(ctx context.Context, cid cidSDK.ID, tree
}
// TreeSortedByFilename implements the pilorama.Forest interface.
func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, count int) ([]pilorama.MultiNodeInfo, *string, error) {
func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, lastNode *pilorama.Node, count int) ([]pilorama.MultiNodeInfo, *string, *pilorama.Node, error) {
ctx, span := tracing.StartSpanFromContext(ctx, "StorageEngine.TreeSortedByFilename",
trace.WithAttributes(
attribute.String("container_id", cid.EncodeToString()),
@ -250,8 +250,9 @@ func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID,
var err error
var nodes []pilorama.MultiNodeInfo
var cursor *string
var cursorNode *pilorama.Node
for _, sh := range e.sortShards(cid) {
nodes, cursor, err = sh.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, count)
nodes, cursor, cursorNode, err = sh.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, lastNode, count)
if err != nil {
if err == shard.ErrPiloramaDisabled {
break
@ -264,9 +265,9 @@ func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID,
}
continue
}
return nodes, cursor, nil
return nodes, cursor, cursorNode, nil
}
return nil, lastFilename, err
return nil, lastFilename, lastNode, err
}
// TreeGetOpLog implements the pilorama.Forest interface.

View file

@ -1083,7 +1083,7 @@ func (t *boltForest) hasFewChildren(b *bbolt.Bucket, nodeIDs MultiNode, threshol
}
// TreeSortedByFilename implements the Forest interface.
func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeIDs MultiNode, lastFilename *string, count int) ([]MultiNodeInfo, *string, error) {
func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeIDs MultiNode, lastFilename *string, lastNode *Node, count int) ([]MultiNodeInfo, *string, *Node, error) {
var (
startedAt = time.Now()
success = false
@ -1104,13 +1104,13 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
defer t.modeMtx.RUnlock()
if t.mode.NoMetabase() {
return nil, lastFilename, ErrDegradedMode
return nil, lastFilename, lastNode, ErrDegradedMode
}
if len(nodeIDs) == 0 {
return nil, lastFilename, errors.New("empty node list")
return nil, lastFilename, lastNode, errors.New("empty node list")
}
h := newHeap(lastFilename, count)
h := newHeap(lastFilename, lastNode, count)
key := make([]byte, 9)
var result []NodeInfo
@ -1149,7 +1149,7 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
success = err == nil
if err != nil {
return nil, lastFilename, metaerr.Wrap(err)
return nil, lastFilename, lastNode, metaerr.Wrap(err)
}
if fewChildren {
@ -1161,9 +1161,9 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
}
if len(res) != 0 {
s := string(findAttr(res[len(res)-1].Meta, AttributeFilename))
lastFilename = &s
lastFilename, lastNode = &s, &res[len(res)-1].Children[len(res[len(res)-1].Children)-1]
}
return res, lastFilename, metaerr.Wrap(err)
return res, lastFilename, lastNode, metaerr.Wrap(err)
}
func sortByFilename(nodes []NodeInfo) {

View file

@ -164,14 +164,14 @@ func (f *memoryForest) TreeGetMeta(_ context.Context, cid cid.ID, treeID string,
}
// TreeSortedByFilename implements the Forest interface.
func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeID string, nodeIDs MultiNode, start *string, count int) ([]MultiNodeInfo, *string, error) {
func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeID string, nodeIDs MultiNode, start *string, startNode *Node, count int) ([]MultiNodeInfo, *string, *Node, error) {
fullID := cid.String() + "/" + treeID
s, ok := f.treeMap[fullID]
if !ok {
return nil, start, ErrTreeNotFound
return nil, start, startNode, ErrTreeNotFound
}
if count == 0 {
return nil, start, nil
return nil, start, startNode, nil
}
var res []NodeInfo
@ -197,7 +197,7 @@ func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeI
}
}
if len(res) == 0 {
return nil, start, nil
return nil, start, startNode, nil
}
sortByFilename(res)
@ -210,11 +210,11 @@ func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeI
finish = len(res)
}
last := string(findAttr(r[finish-1].Meta, AttributeFilename))
return r[i:finish], &last, nil
return r[i:finish], &last, startNode, nil
}
}
last := string(res[len(res)-1].Meta.GetAttr(AttributeFilename))
return nil, &last, nil
return nil, &last, startNode, nil
}
// TreeGetChildren implements the Forest interface.

View file

@ -217,7 +217,7 @@ func BenchmarkForestSortedIteration(b *testing.B) {
b.Run(providers[i].name+",root", func(b *testing.B) {
for range b.N {
res, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{RootID}, nil, 100)
res, _, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{RootID}, nil, nil, 100)
if err != nil || len(res) != 100 {
b.Fatalf("err %v, count %d", err, len(res))
}
@ -225,7 +225,7 @@ func BenchmarkForestSortedIteration(b *testing.B) {
})
b.Run(providers[i].name+",leaf", func(b *testing.B) {
for range b.N {
res, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{1}, nil, 100)
res, _, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{1}, nil, nil, 100)
if err != nil || len(res) != 0 {
b.FailNow()
}
@ -274,7 +274,7 @@ func testForestTreeSortedIterationBugWithSkip(t *testing.T, s ForestStorage) {
var result []MultiNodeInfo
treeAppend := func(t *testing.T, last *string, count int) *string {
res, cursor, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, count)
res, cursor, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, nil, count)
require.NoError(t, err)
result = append(result, res...)
return cursor
@ -329,7 +329,7 @@ func testForestTreeSortedIteration(t *testing.T, s ForestStorage) {
var result []MultiNodeInfo
treeAppend := func(t *testing.T, last *string, count int) *string {
res, cursor, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, count)
res, cursor, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, nil, count)
require.NoError(t, err)
result = append(result, res...)
return cursor
@ -407,7 +407,7 @@ func testForestTreeSortedByFilename(t *testing.T, s ForestStorage) {
}
getChildren := func(t *testing.T, id MultiNode) []MultiNodeInfo {
res, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, id, nil, len(items))
res, _, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, id, nil, nil, len(items))
require.NoError(t, err)
return res
}

View file

@ -30,28 +30,47 @@ func (h *filenameHeap) Pop() any {
// fixedHeap maintains a fixed number of smallest elements started at some point.
type fixedHeap struct {
start *string
sorted bool
count int
h *filenameHeap
start *string
filenameVersion *Node
sorted bool
count int
h *filenameHeap
}
func newHeap(start *string, count int) *fixedHeap {
func newHeap(start *string, filenameVersion *Node, count int) *fixedHeap {
h := new(filenameHeap)
heap.Init(h)
return &fixedHeap{
start: start,
count: count,
h: h,
start: start,
filenameVersion: filenameVersion,
count: count,
h: h,
}
}
const amortizationMultiplier = 5
func (h *fixedHeap) push(id MultiNode, filename string) bool {
if h.start != nil && filename <= *h.start {
return false
slices.Sort(id)
if h.start != nil {
if filename < *h.start {
return false
} else if filename == *h.start {
if h.filenameVersion == nil {
return false
}
// A tree may have a lot of nodes with the same filename but different versions so that
// len(nodes) > batch_size. The cut nodes should be pushed into the result on repeated call
// with the same filename.
pos, found := slices.BinarySearch(id, *h.filenameVersion)
if !found || pos+1 >= len(id) {
return false
}
id = id[pos+1:]
}
}
*h.h = append(*h.h, heapInfo{id: id, filename: filename})

View file

@ -37,7 +37,7 @@ type Forest interface {
TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID string, nodeID Node) ([]NodeInfo, error)
// TreeSortedByFilename returns children of the node with the specified ID. The nodes are sorted by the filename attribute..
// Should return ErrTreeNotFound if the tree is not found, and empty result if the node is not in the tree.
TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID MultiNode, lastFilename *string, count int) ([]MultiNodeInfo, *string, error)
TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID MultiNode, lastFilename *string, lastNode *Node, count int) ([]MultiNodeInfo, *string, *Node, error)
// TreeGetOpLog returns first log operation stored at or above the height.
// In case no such operation is found, empty Move and nil error should be returned.
TreeGetOpLog(ctx context.Context, cid cidSDK.ID, treeID string, height uint64) (Move, error)

View file

@ -97,7 +97,7 @@ func testDuplicateDirectory(t *testing.T, f Forest) {
require.Equal(t, []byte{10}, testGetByPath(t, "value0"))
testSortedByFilename := func(t *testing.T, root MultiNode, last *string, batchSize int) ([]MultiNodeInfo, *string) {
res, last, err := f.TreeSortedByFilename(context.Background(), d.CID, treeID, root, last, batchSize)
res, last, _, err := f.TreeSortedByFilename(context.Background(), d.CID, treeID, root, last, nil, batchSize)
require.NoError(t, err)
return res, last
}

View file

@ -211,7 +211,7 @@ func (s *Shard) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID strin
}
// TreeSortedByFilename implements the pilorama.Forest interface.
func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, count int) ([]pilorama.MultiNodeInfo, *string, error) {
func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, lastNode *pilorama.Node, count int) ([]pilorama.MultiNodeInfo, *string, *pilorama.Node, error) {
ctx, span := tracing.StartSpanFromContext(ctx, "Shard.TreeSortedByFilename",
trace.WithAttributes(
attribute.String("shard_id", s.ID().String()),
@ -222,16 +222,16 @@ func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID
defer span.End()
if s.pilorama == nil {
return nil, lastFilename, ErrPiloramaDisabled
return nil, lastFilename, lastNode, ErrPiloramaDisabled
}
s.m.RLock()
defer s.m.RUnlock()
if s.info.Mode.NoMetabase() {
return nil, lastFilename, ErrDegradedMode
return nil, lastFilename, lastNode, ErrDegradedMode
}
return s.pilorama.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, count)
return s.pilorama.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, lastNode, count)
}
// TreeGetOpLog implements the pilorama.Forest interface.

View file

@ -404,9 +404,10 @@ func (s *Service) GetSubTree(req *GetSubTreeRequest, srv TreeService_GetSubTreeS
}
type stackItem struct {
values []pilorama.MultiNodeInfo
parent pilorama.MultiNode
last *string
values []pilorama.MultiNodeInfo
parent pilorama.MultiNode
last *string
lastNode *pilorama.Node
}
func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
@ -457,14 +458,13 @@ func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid
break
}
nodes, last, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), item.parent, item.last, batchSize)
var err error
item.values, item.last, item.lastNode, err = forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), item.parent, item.last, item.lastNode, batchSize)
if err != nil {
return err
}
item.values = nodes
item.last = last
if len(nodes) == 0 {
if len(item.values) == 0 {
stack = stack[:len(stack)-1]
continue
}
@ -476,15 +476,16 @@ func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid
}
if b.GetDepth() == 0 || uint32(len(stack)) < b.GetDepth() {
children, last, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), node.Children, nil, batchSize)
children, last, lastNode, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), node.Children, nil, nil, batchSize)
if err != nil {
return err
}
if len(children) != 0 {
stack = append(stack, stackItem{
values: children,
parent: node.Children,
last: last,
values: children,
parent: node.Children,
last: last,
lastNode: lastNode,
})
}
}