diff --git a/pkg/local_object_storage/pilorama/boltdb.go b/pkg/local_object_storage/pilorama/boltdb.go index 48363ceac..29941be83 100644 --- a/pkg/local_object_storage/pilorama/boltdb.go +++ b/pkg/local_object_storage/pilorama/boltdb.go @@ -1161,6 +1161,7 @@ func (t *boltForest) fillSortedChildren(b *bbolt.Bucket, nodeIDs MultiNode, h *f lastFilename = nil nodes = nil length = actualLength + 1 + count = 0 c.Seek(append(prefix, byte(length), byte(length>>8))) c.Prev() // c.Next() will be performed by for loop } diff --git a/pkg/local_object_storage/pilorama/forest_test.go b/pkg/local_object_storage/pilorama/forest_test.go index c6c6e8c8b..ecca9842f 100644 --- a/pkg/local_object_storage/pilorama/forest_test.go +++ b/pkg/local_object_storage/pilorama/forest_test.go @@ -237,9 +237,8 @@ func BenchmarkForestSortedIteration(b *testing.B) { // The issue which we call "BugWithSkip" is easiest to understand when filenames are // monotonically increasing numbers. We want the list of sorted filenames to have different length interleaved. // The bug happens when we switch between length during listing. -// Thus this test contains numbers from 1 to 1000 and batch size of size 100. +// Thus this test contains numbers from 1 to 2000 and batch size of size 10. func TestForest_TreeSortedIterationBugWithSkip(t *testing.T) { - t.Skip() for i := range providers { t.Run(providers[i].name, func(t *testing.T) { testForestTreeSortedIterationBugWithSkip(t, providers[i].construct(t)) diff --git a/pkg/local_object_storage/pilorama/heap.go b/pkg/local_object_storage/pilorama/heap.go index ec57b9e1f..5a00bcf7a 100644 --- a/pkg/local_object_storage/pilorama/heap.go +++ b/pkg/local_object_storage/pilorama/heap.go @@ -2,6 +2,8 @@ package pilorama import ( "container/heap" + "slices" + "strings" ) type heapInfo struct { @@ -28,9 +30,10 @@ func (h *filenameHeap) Pop() any { // fixedHeap maintains a fixed number of smallest elements started at some point. type fixedHeap struct { - start *string - count int - h *filenameHeap + start *string + sorted bool + count int + h *filenameHeap } func newHeap(start *string, count int) *fixedHeap { @@ -44,20 +47,39 @@ func newHeap(start *string, count int) *fixedHeap { } } +const amortizationMultiplier = 5 + func (h *fixedHeap) push(id MultiNode, filename string) bool { if h.start != nil && filename <= *h.start { return false } - heap.Push(h.h, heapInfo{id: id, filename: filename}) - if h.h.Len() > h.count { - heap.Remove(h.h, h.h.Len()-1) + + *h.h = append(*h.h, heapInfo{id: id, filename: filename}) + h.sorted = false + + if h.h.Len() > h.count*amortizationMultiplier { + slices.SortFunc(*h.h, func(a, b heapInfo) int { + return strings.Compare(a.filename, b.filename) + }) + *h.h = (*h.h)[:h.count] } return true } func (h *fixedHeap) pop() (heapInfo, bool) { - if h.h.Len() != 0 { - return heap.Pop(h.h).(heapInfo), true + if !h.sorted { + slices.SortFunc(*h.h, func(a, b heapInfo) int { + return strings.Compare(a.filename, b.filename) + }) + if len(*h.h) > h.count { + *h.h = (*h.h)[:h.count] + } + h.sorted = true + } + if len(*h.h) != 0 { + info := (*h.h)[0] + *h.h = (*h.h)[1:] + return info, true } return heapInfo{}, false }