diff --git a/pkg/local_object_storage/engine/tree.go b/pkg/local_object_storage/engine/tree.go index df8eb1706..97aba964d 100644 --- a/pkg/local_object_storage/engine/tree.go +++ b/pkg/local_object_storage/engine/tree.go @@ -238,7 +238,7 @@ func (e *StorageEngine) TreeGetChildren(ctx context.Context, cid cidSDK.ID, tree } // TreeSortedByFilename implements the pilorama.Forest interface. -func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, count int) ([]pilorama.MultiNodeInfo, *string, error) { +func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, lastNode *pilorama.Node, count int) ([]pilorama.MultiNodeInfo, *string, *pilorama.Node, error) { ctx, span := tracing.StartSpanFromContext(ctx, "StorageEngine.TreeSortedByFilename", trace.WithAttributes( attribute.String("container_id", cid.EncodeToString()), @@ -250,8 +250,9 @@ func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, var err error var nodes []pilorama.MultiNodeInfo var cursor *string + var cursorNode *pilorama.Node for _, sh := range e.sortShards(cid) { - nodes, cursor, err = sh.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, count) + nodes, cursor, cursorNode, err = sh.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, lastNode, count) if err != nil { if err == shard.ErrPiloramaDisabled { break @@ -264,9 +265,9 @@ func (e *StorageEngine) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, } continue } - return nodes, cursor, nil + return nodes, cursor, cursorNode, nil } - return nil, lastFilename, err + return nil, lastFilename, lastNode, err } // TreeGetOpLog implements the pilorama.Forest interface. diff --git a/pkg/local_object_storage/pilorama/boltdb.go b/pkg/local_object_storage/pilorama/boltdb.go index a4fa74e35..3086dbbfd 100644 --- a/pkg/local_object_storage/pilorama/boltdb.go +++ b/pkg/local_object_storage/pilorama/boltdb.go @@ -1083,7 +1083,7 @@ func (t *boltForest) hasFewChildren(b *bbolt.Bucket, nodeIDs MultiNode, threshol } // TreeSortedByFilename implements the Forest interface. -func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeIDs MultiNode, lastFilename *string, count int) ([]MultiNodeInfo, *string, error) { +func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeIDs MultiNode, lastFilename *string, lastNode *Node, count int) ([]MultiNodeInfo, *string, *Node, error) { var ( startedAt = time.Now() success = false @@ -1104,13 +1104,13 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr defer t.modeMtx.RUnlock() if t.mode.NoMetabase() { - return nil, lastFilename, ErrDegradedMode + return nil, lastFilename, lastNode, ErrDegradedMode } if len(nodeIDs) == 0 { - return nil, lastFilename, errors.New("empty node list") + return nil, lastFilename, lastNode, errors.New("empty node list") } - h := newHeap(lastFilename, count) + h := newHeap(lastFilename, lastNode, count) key := make([]byte, 9) var result []NodeInfo @@ -1149,7 +1149,7 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr success = err == nil if err != nil { - return nil, lastFilename, metaerr.Wrap(err) + return nil, lastFilename, lastNode, metaerr.Wrap(err) } if fewChildren { @@ -1161,9 +1161,9 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr } if len(res) != 0 { s := string(findAttr(res[len(res)-1].Meta, AttributeFilename)) - lastFilename = &s + lastFilename, lastNode = &s, &res[len(res)-1].Children[len(res[len(res)-1].Children)-1] } - return res, lastFilename, metaerr.Wrap(err) + return res, lastFilename, lastNode, metaerr.Wrap(err) } func sortByFilename(nodes []NodeInfo) { diff --git a/pkg/local_object_storage/pilorama/forest.go b/pkg/local_object_storage/pilorama/forest.go index f31504e2b..ed3dab0c8 100644 --- a/pkg/local_object_storage/pilorama/forest.go +++ b/pkg/local_object_storage/pilorama/forest.go @@ -164,14 +164,14 @@ func (f *memoryForest) TreeGetMeta(_ context.Context, cid cid.ID, treeID string, } // TreeSortedByFilename implements the Forest interface. -func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeID string, nodeIDs MultiNode, start *string, count int) ([]MultiNodeInfo, *string, error) { +func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeID string, nodeIDs MultiNode, start *string, startNode *Node, count int) ([]MultiNodeInfo, *string, *Node, error) { fullID := cid.String() + "/" + treeID s, ok := f.treeMap[fullID] if !ok { - return nil, start, ErrTreeNotFound + return nil, start, startNode, ErrTreeNotFound } if count == 0 { - return nil, start, nil + return nil, start, startNode, nil } var res []NodeInfo @@ -197,7 +197,7 @@ func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeI } } if len(res) == 0 { - return nil, start, nil + return nil, start, startNode, nil } sortByFilename(res) @@ -210,11 +210,11 @@ func (f *memoryForest) TreeSortedByFilename(_ context.Context, cid cid.ID, treeI finish = len(res) } last := string(findAttr(r[finish-1].Meta, AttributeFilename)) - return r[i:finish], &last, nil + return r[i:finish], &last, startNode, nil } } last := string(res[len(res)-1].Meta.GetAttr(AttributeFilename)) - return nil, &last, nil + return nil, &last, startNode, nil } // TreeGetChildren implements the Forest interface. diff --git a/pkg/local_object_storage/pilorama/forest_test.go b/pkg/local_object_storage/pilorama/forest_test.go index de56fc82b..662cbc25b 100644 --- a/pkg/local_object_storage/pilorama/forest_test.go +++ b/pkg/local_object_storage/pilorama/forest_test.go @@ -217,7 +217,7 @@ func BenchmarkForestSortedIteration(b *testing.B) { b.Run(providers[i].name+",root", func(b *testing.B) { for range b.N { - res, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{RootID}, nil, 100) + res, _, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{RootID}, nil, nil, 100) if err != nil || len(res) != 100 { b.Fatalf("err %v, count %d", err, len(res)) } @@ -225,7 +225,7 @@ func BenchmarkForestSortedIteration(b *testing.B) { }) b.Run(providers[i].name+",leaf", func(b *testing.B) { for range b.N { - res, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{1}, nil, 100) + res, _, _, err := f.TreeSortedByFilename(context.Background(), cnr, treeID, MultiNode{1}, nil, nil, 100) if err != nil || len(res) != 0 { b.FailNow() } @@ -274,7 +274,7 @@ func testForestTreeSortedIterationBugWithSkip(t *testing.T, s ForestStorage) { var result []MultiNodeInfo treeAppend := func(t *testing.T, last *string, count int) *string { - res, cursor, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, count) + res, cursor, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, nil, count) require.NoError(t, err) result = append(result, res...) return cursor @@ -329,7 +329,7 @@ func testForestTreeSortedIteration(t *testing.T, s ForestStorage) { var result []MultiNodeInfo treeAppend := func(t *testing.T, last *string, count int) *string { - res, cursor, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, count) + res, cursor, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, MultiNode{RootID}, last, nil, count) require.NoError(t, err) result = append(result, res...) return cursor @@ -407,7 +407,7 @@ func testForestTreeSortedByFilename(t *testing.T, s ForestStorage) { } getChildren := func(t *testing.T, id MultiNode) []MultiNodeInfo { - res, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, id, nil, len(items)) + res, _, _, err := s.TreeSortedByFilename(context.Background(), d.CID, treeID, id, nil, nil, len(items)) require.NoError(t, err) return res } diff --git a/pkg/local_object_storage/pilorama/heap.go b/pkg/local_object_storage/pilorama/heap.go index 5a00bcf7a..eb29ea630 100644 --- a/pkg/local_object_storage/pilorama/heap.go +++ b/pkg/local_object_storage/pilorama/heap.go @@ -30,28 +30,47 @@ func (h *filenameHeap) Pop() any { // fixedHeap maintains a fixed number of smallest elements started at some point. type fixedHeap struct { - start *string - sorted bool - count int - h *filenameHeap + start *string + filenameVersion *Node + sorted bool + count int + h *filenameHeap } -func newHeap(start *string, count int) *fixedHeap { +func newHeap(start *string, filenameVersion *Node, count int) *fixedHeap { h := new(filenameHeap) heap.Init(h) return &fixedHeap{ - start: start, - count: count, - h: h, + start: start, + filenameVersion: filenameVersion, + count: count, + h: h, } } const amortizationMultiplier = 5 func (h *fixedHeap) push(id MultiNode, filename string) bool { - if h.start != nil && filename <= *h.start { - return false + slices.Sort(id) + + if h.start != nil { + if filename < *h.start { + return false + } else if filename == *h.start { + if h.filenameVersion == nil { + return false + } + + // A tree may have a lot of nodes with the same filename but different versions so that + // len(nodes) > batch_size. The cut nodes should be pushed into the result on repeated call + // with the same filename. + pos, found := slices.BinarySearch(id, *h.filenameVersion) + if !found { + return false + } + id = id[pos+1:] + } } *h.h = append(*h.h, heapInfo{id: id, filename: filename}) diff --git a/pkg/local_object_storage/pilorama/interface.go b/pkg/local_object_storage/pilorama/interface.go index 5a59eb847..f2fff5d84 100644 --- a/pkg/local_object_storage/pilorama/interface.go +++ b/pkg/local_object_storage/pilorama/interface.go @@ -37,7 +37,7 @@ type Forest interface { TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID string, nodeID Node) ([]NodeInfo, error) // TreeSortedByFilename returns children of the node with the specified ID. The nodes are sorted by the filename attribute.. // Should return ErrTreeNotFound if the tree is not found, and empty result if the node is not in the tree. - TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID MultiNode, lastFilename *string, count int) ([]MultiNodeInfo, *string, error) + TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID MultiNode, lastFilename *string, lastNode *Node, count int) ([]MultiNodeInfo, *string, *Node, error) // TreeGetOpLog returns first log operation stored at or above the height. // In case no such operation is found, empty Move and nil error should be returned. TreeGetOpLog(ctx context.Context, cid cidSDK.ID, treeID string, height uint64) (Move, error) diff --git a/pkg/local_object_storage/pilorama/split_test.go b/pkg/local_object_storage/pilorama/split_test.go index 54c2b90a6..97fd78f34 100644 --- a/pkg/local_object_storage/pilorama/split_test.go +++ b/pkg/local_object_storage/pilorama/split_test.go @@ -97,7 +97,7 @@ func testDuplicateDirectory(t *testing.T, f Forest) { require.Equal(t, []byte{10}, testGetByPath(t, "value0")) testSortedByFilename := func(t *testing.T, root MultiNode, last *string, batchSize int) ([]MultiNodeInfo, *string) { - res, last, err := f.TreeSortedByFilename(context.Background(), d.CID, treeID, root, last, batchSize) + res, last, _, err := f.TreeSortedByFilename(context.Background(), d.CID, treeID, root, last, nil, batchSize) require.NoError(t, err) return res, last } diff --git a/pkg/local_object_storage/shard/tree.go b/pkg/local_object_storage/shard/tree.go index 59448aa6a..c66817047 100644 --- a/pkg/local_object_storage/shard/tree.go +++ b/pkg/local_object_storage/shard/tree.go @@ -211,7 +211,7 @@ func (s *Shard) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID strin } // TreeSortedByFilename implements the pilorama.Forest interface. -func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, count int) ([]pilorama.MultiNodeInfo, *string, error) { +func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.MultiNode, lastFilename *string, lastNode *pilorama.Node, count int) ([]pilorama.MultiNodeInfo, *string, *pilorama.Node, error) { ctx, span := tracing.StartSpanFromContext(ctx, "Shard.TreeSortedByFilename", trace.WithAttributes( attribute.String("shard_id", s.ID().String()), @@ -222,16 +222,16 @@ func (s *Shard) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, treeID defer span.End() if s.pilorama == nil { - return nil, lastFilename, ErrPiloramaDisabled + return nil, lastFilename, lastNode, ErrPiloramaDisabled } s.m.RLock() defer s.m.RUnlock() if s.info.Mode.NoMetabase() { - return nil, lastFilename, ErrDegradedMode + return nil, lastFilename, lastNode, ErrDegradedMode } - return s.pilorama.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, count) + return s.pilorama.TreeSortedByFilename(ctx, cid, treeID, nodeID, lastFilename, lastNode, count) } // TreeGetOpLog implements the pilorama.Forest interface. diff --git a/pkg/services/tree/service.go b/pkg/services/tree/service.go index 2df3c08e6..e856d7ca1 100644 --- a/pkg/services/tree/service.go +++ b/pkg/services/tree/service.go @@ -404,9 +404,10 @@ func (s *Service) GetSubTree(req *GetSubTreeRequest, srv TreeService_GetSubTreeS } type stackItem struct { - values []pilorama.MultiNodeInfo - parent pilorama.MultiNode - last *string + values []pilorama.MultiNodeInfo + parent pilorama.MultiNode + last *string + lastNode *pilorama.Node } func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error { @@ -457,14 +458,13 @@ func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid break } - nodes, last, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), item.parent, item.last, batchSize) + var err error + item.values, item.last, item.lastNode, err = forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), item.parent, item.last, item.lastNode, batchSize) if err != nil { return err } - item.values = nodes - item.last = last - if len(nodes) == 0 { + if len(item.values) == 0 { stack = stack[:len(stack)-1] continue } @@ -476,15 +476,16 @@ func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid } if b.GetDepth() == 0 || uint32(len(stack)) < b.GetDepth() { - children, last, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), node.Children, nil, batchSize) + children, last, lastNode, err := forest.TreeSortedByFilename(ctx, cid, b.GetTreeId(), node.Children, nil, nil, batchSize) if err != nil { return err } if len(children) != 0 { stack = append(stack, stackItem{ - values: children, - parent: node.Children, - last: last, + values: children, + parent: node.Children, + last: last, + lastNode: lastNode, }) } }