[#1642] tree: Fix sorted getSubtree for multiversion filenames

Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
This commit is contained in:
Airat Arifullin 2025-02-20 12:22:12 +03:00
parent 669571f367
commit 41a913eca7
4 changed files with 57 additions and 3 deletions

View file

@ -1128,7 +1128,7 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
}
t.fillSortedChildren(b, nodeIDs, h)
h.mergeByFilename()
for info, ok := h.pop(); ok; info, ok = h.pop() {
for _, id := range info.id {
childInfo, err := t.getChildInfo(b, key, id)
@ -1155,7 +1155,7 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
}
if len(res) != 0 {
s := string(findAttr(res[len(res)-1].Meta, AttributeFilename))
cursor = &Cursor{Filename: s}
cursor = &Cursor{Filename: s, Node: &res[len(res)-1].Children[len(res[len(res)-1].Children)-1]}
}
return res, cursor, metaerr.Wrap(err)
}

View file

@ -53,6 +53,25 @@ func (h *fixedHeap) push(id MultiNode, filename string) bool {
if h.start != nil && filename <= (*h.start).Filename {
return false
}
slices.Sort(id)
if h.start != nil {
if filename < (*h.start).Filename {
return false
} else if filename == (*h.start).Filename {
if (*h.start).Node == nil {
return false
}
// A tree may have a lot of nodes with the same filename but different versions so that
// len(nodes) > batch_size. The cut nodes should be pushed into the result on repeated call
// with the same filename.
pos, found := slices.BinarySearch(id, *(*h.start).Node)
if !found || pos+1 >= len(id) {
return false
}
id = id[pos+1:]
}
}
*h.h = append(*h.h, heapInfo{id: id, filename: filename})
h.sorted = false
@ -66,6 +85,38 @@ func (h *fixedHeap) push(id MultiNode, filename string) bool {
return true
}
// mergeByFilename merges heap entries with identical filenames by combining their IDs.
// Identical filenames may appear in the heap after few pushes for example by `fillSortedChildren` method:
//
// heap entry 1: {filename: "somefilenameB", id: [33, 45]}
// heap entry 2: {filename: "somefilenameA", id: [13, 15, 40]}
// heap entry 3: {filename: "somefilenameB", id: [1, 14, 29]}
//
// After merge we get:
//
// heap entry 1: {filename: "somefilenameB", id: [1, 14, 29, 33, 45]}
// heap entry 2: {filename: "somefilenameA", id: [13, 15, 40]}.
func (h *fixedHeap) mergeByFilename() {
m := make(map[string]MultiNode)
for _, heapInfo := range *h.h {
m[heapInfo.filename] = append(m[heapInfo.filename], heapInfo.id...)
}
newHeap := make([]heapInfo, 0, len(m))
for filename, ids := range m {
slices.Sort(ids)
newHeap = append(newHeap, heapInfo{
filename: filename,
id: ids,
})
}
*h.h = newHeap
}
func (h *fixedHeap) pop() (heapInfo, bool) {
if !h.sorted {
slices.SortFunc(*h.h, func(a, b heapInfo) int {

View file

@ -85,6 +85,9 @@ const (
type Cursor struct {
// Last traversed filename.
Filename string
// Last traversed node.
Node *Node
}
// CIDDescriptor contains container ID and information about the node position

View file

@ -467,7 +467,7 @@ func getSortedSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid
item.values = nodes
item.cursor = cursor
if len(nodes) == 0 {
if len(item.values) == 0 {
stack = stack[:len(stack)-1]
continue
}