diff --git a/pkg/local_object_storage/pilorama/boltdb.go b/pkg/local_object_storage/pilorama/boltdb.go index 901bfd58e..044c3fc6d 100644 --- a/pkg/local_object_storage/pilorama/boltdb.go +++ b/pkg/local_object_storage/pilorama/boltdb.go @@ -1128,7 +1128,7 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr } t.fillSortedChildren(b, nodeIDs, h) - + h.mergeByFilename() for info, ok := h.pop(); ok; info, ok = h.pop() { for _, id := range info.id { childInfo, err := t.getChildInfo(b, key, id) @@ -1155,7 +1155,7 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr } if len(res) != 0 { s := string(findAttr(res[len(res)-1].Meta, AttributeFilename)) - cursor = &Cursor{Filename: s} + cursor = &Cursor{Filename: s, Node: &res[len(res)-1].Children[len(res[len(res)-1].Children)-1]} } return res, cursor, metaerr.Wrap(err) } diff --git a/pkg/local_object_storage/pilorama/heap.go b/pkg/local_object_storage/pilorama/heap.go index c59a02b09..3ba5265b7 100644 --- a/pkg/local_object_storage/pilorama/heap.go +++ b/pkg/local_object_storage/pilorama/heap.go @@ -53,6 +53,25 @@ func (h *fixedHeap) push(id MultiNode, filename string) bool { if h.start != nil && filename <= (*h.start).Filename { return false } + slices.Sort(id) + + if h.start != nil { + if filename < (*h.start).Filename { + return false + } else if filename == (*h.start).Filename { + if (*h.start).Node == nil { + return false + } + // A tree may have a lot of nodes with the same filename but different versions so that + // len(nodes) > batch_size. The cut nodes should be pushed into the result on repeated call + // with the same filename. + pos, found := slices.BinarySearch(id, *(*h.start).Node) + if !found || pos+1 >= len(id) { + return false + } + id = id[pos+1:] + } + } *h.h = append(*h.h, heapInfo{id: id, filename: filename}) h.sorted = false @@ -66,6 +85,38 @@ func (h *fixedHeap) push(id MultiNode, filename string) bool { return true } +// mergeByFilename merges heap entries with identical filenames by combining their IDs. +// Identical filenames may appear in the heap after few pushes for example by `fillSortedChildren` method: +// +// heap entry 1: {filename: "somefilenameB", id: [33, 45]} +// heap entry 2: {filename: "somefilenameA", id: [13, 15, 40]} +// heap entry 3: {filename: "somefilenameB", id: [1, 14, 29]} +// +// After merge we get: +// +// heap entry 1: {filename: "somefilenameB", id: [1, 14, 29, 33, 45]} +// heap entry 2: {filename: "somefilenameA", id: [13, 15, 40]}. +func (h *fixedHeap) mergeByFilename() { + m := make(map[string]MultiNode) + + for _, heapInfo := range *h.h { + m[heapInfo.filename] = append(m[heapInfo.filename], heapInfo.id...) + } + + newHeap := make([]heapInfo, 0, len(m)) + + for filename, ids := range m { + slices.Sort(ids) + + newHeap = append(newHeap, heapInfo{ + filename: filename, + id: ids, + }) + } + + *h.h = newHeap +} + func (h *fixedHeap) pop() (heapInfo, bool) { if !h.sorted { slices.SortFunc(*h.h, func(a, b heapInfo) int { diff --git a/pkg/local_object_storage/pilorama/interface.go b/pkg/local_object_storage/pilorama/interface.go index b7694ab7c..641ab6ef3 100644 --- a/pkg/local_object_storage/pilorama/interface.go +++ b/pkg/local_object_storage/pilorama/interface.go @@ -85,6 +85,9 @@ const ( type Cursor struct { // Last traversed filename. Filename string + + // Last traversed node. + Node *Node } // CIDDescriptor contains container ID and information about the node position diff --git a/pkg/services/tree/service.go b/pkg/services/tree/service.go index ef0a38bc1..19c577f8a 100644 --- a/pkg/services/tree/service.go +++ b/pkg/services/tree/service.go @@ -467,7 +467,7 @@ func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid item.values = nodes item.cursor = cursor - if len(nodes) == 0 { + if len(item.values) == 0 { stack = stack[:len(stack)-1] continue }