[#1328] pilorama: Do not skip items in SortedByFilename

Benchmark results: ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ old │ new │ │ sec/op │ sec/op vs base │ ForestSortedIteration/bbolt,root-8 207.2µ ± 6% 173.6µ ± 6% -16.23% (p=0.000 n=10) ForestSortedIteration/bbolt,leaf-8 3.910µ ± 5% 3.928µ ± 7% ~ (p=0.529 n=10) geomean 28.46µ 26.11µ -8.27% ``` They are not representative, as the worst case is when we have multiple items of different lengths. However, `FileName` is usually less than 100 in practice, so the asymptotics is the same. Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2024-08-23 10:58:33 +03:00 · 2024-08-23 10:58:33 +03:00 · 806ea37101
commit 806ea37101
parent 80099d9a2f
3 changed files with 32 additions and 10 deletions
--- a/pkg/local_object_storage/pilorama/boltdb.go
+++ b/pkg/local_object_storage/pilorama/boltdb.go
@ -1161,6 +1161,7 @@ func (t *boltForest) fillSortedChildren(b *bbolt.Bucket, nodeIDs MultiNode, h *f
 					lastFilename = nil
 					nodes = nil
 					length = actualLength + 1
+					count = 0
 					c.Seek(append(prefix, byte(length), byte(length>>8)))
 					c.Prev() // c.Next() will be performed by for loop
 				}
--- a/pkg/local_object_storage/pilorama/forest_test.go
+++ b/pkg/local_object_storage/pilorama/forest_test.go
@ -237,9 +237,8 @@ func BenchmarkForestSortedIteration(b *testing.B) {
 // The issue which we call "BugWithSkip" is easiest to understand when filenames are
 // monotonically increasing numbers. We want the list of sorted filenames to have different length interleaved.
 // The bug happens when we switch between length during listing.
-// Thus this test contains numbers from 1 to 1000 and batch size of size 100.
+// Thus this test contains numbers from 1 to 2000 and batch size of size 10.
 func TestForest_TreeSortedIterationBugWithSkip(t *testing.T) {
-	t.Skip()
 	for i := range providers {
 		t.Run(providers[i].name, func(t *testing.T) {
 			testForestTreeSortedIterationBugWithSkip(t, providers[i].construct(t))
--- a/pkg/local_object_storage/pilorama/heap.go
+++ b/pkg/local_object_storage/pilorama/heap.go
@ -2,6 +2,8 @@ package pilorama

 import (
 	"container/heap"
+	"slices"
+	"strings"
 )

 type heapInfo struct {
@ -29,6 +31,7 @@ func (h *filenameHeap) Pop() any {
 // fixedHeap maintains a fixed number of smallest elements started at some point.
 type fixedHeap struct {
 	start  *string
+	sorted bool
 	count  int
 	h      *filenameHeap
 }
@ -44,20 +47,39 @@ func newHeap(start *string, count int) *fixedHeap {
 	}
 }

+const amortizationMultiplier = 5
+
 func (h *fixedHeap) push(id MultiNode, filename string) bool {
 	if h.start != nil && filename <= *h.start {
 		return false
 	}
-	heap.Push(h.h, heapInfo{id: id, filename: filename})
-	if h.h.Len() > h.count {
-		heap.Remove(h.h, h.h.Len()-1)
+
+	*h.h = append(*h.h, heapInfo{id: id, filename: filename})
+	h.sorted = false
+
+	if h.h.Len() > h.count*amortizationMultiplier {
+		slices.SortFunc(*h.h, func(a, b heapInfo) int {
+			return strings.Compare(a.filename, b.filename)
+		})
+		*h.h = (*h.h)[:h.count]
 	}
 	return true
 }

 func (h *fixedHeap) pop() (heapInfo, bool) {
-	if h.h.Len() != 0 {
-		return heap.Pop(h.h).(heapInfo), true
+	if !h.sorted {
+		slices.SortFunc(*h.h, func(a, b heapInfo) int {
+			return strings.Compare(a.filename, b.filename)
+		})
+		if len(*h.h) > h.count {
+			*h.h = (*h.h)[:h.count]
+		}
+		h.sorted = true
+	}
+	if len(*h.h) != 0 {
+		info := (*h.h)[0]
+		*h.h = (*h.h)[1:]
+		return info, true
 	}
 	return heapInfo{}, false
 }