From 806ea37101fc35cedca3e10681e43d7a91968d67 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 23 Aug 2024 10:58:33 +0300 Subject: [PATCH] [#1328] pilorama: Do not skip items in SortedByFilename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark results: ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ old │ new │ │ sec/op │ sec/op vs base │ ForestSortedIteration/bbolt,root-8 207.2µ ± 6% 173.6µ ± 6% -16.23% (p=0.000 n=10) ForestSortedIteration/bbolt,leaf-8 3.910µ ± 5% 3.928µ ± 7% ~ (p=0.529 n=10) geomean 28.46µ 26.11µ -8.27% ``` They are not representative, as the worst case is when we have multiple items of different lengths. However, `FileName` is usually less than 100 in practice, so the asymptotics is the same. Signed-off-by: Evgenii Stratonikov --- pkg/local_object_storage/pilorama/boltdb.go | 1 + .../pilorama/forest_test.go | 3 +- pkg/local_object_storage/pilorama/heap.go | 38 +++++++++++++++---- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/pkg/local_object_storage/pilorama/boltdb.go b/pkg/local_object_storage/pilorama/boltdb.go index 48363ceac..29941be83 100644 --- a/pkg/local_object_storage/pilorama/boltdb.go +++ b/pkg/local_object_storage/pilorama/boltdb.go @@ -1161,6 +1161,7 @@ func (t *boltForest) fillSortedChildren(b *bbolt.Bucket, nodeIDs MultiNode, h *f lastFilename = nil nodes = nil length = actualLength + 1 + count = 0 c.Seek(append(prefix, byte(length), byte(length>>8))) c.Prev() // c.Next() will be performed by for loop } diff --git a/pkg/local_object_storage/pilorama/forest_test.go b/pkg/local_object_storage/pilorama/forest_test.go index c6c6e8c8b..ecca9842f 100644 --- a/pkg/local_object_storage/pilorama/forest_test.go +++ b/pkg/local_object_storage/pilorama/forest_test.go @@ -237,9 +237,8 @@ func BenchmarkForestSortedIteration(b *testing.B) { // The issue which we call "BugWithSkip" is easiest to understand when filenames are // monotonically increasing numbers. We want the list of sorted filenames to have different length interleaved. // The bug happens when we switch between length during listing. -// Thus this test contains numbers from 1 to 1000 and batch size of size 100. +// Thus this test contains numbers from 1 to 2000 and batch size of size 10. func TestForest_TreeSortedIterationBugWithSkip(t *testing.T) { - t.Skip() for i := range providers { t.Run(providers[i].name, func(t *testing.T) { testForestTreeSortedIterationBugWithSkip(t, providers[i].construct(t)) diff --git a/pkg/local_object_storage/pilorama/heap.go b/pkg/local_object_storage/pilorama/heap.go index ec57b9e1f..5a00bcf7a 100644 --- a/pkg/local_object_storage/pilorama/heap.go +++ b/pkg/local_object_storage/pilorama/heap.go @@ -2,6 +2,8 @@ package pilorama import ( "container/heap" + "slices" + "strings" ) type heapInfo struct { @@ -28,9 +30,10 @@ func (h *filenameHeap) Pop() any { // fixedHeap maintains a fixed number of smallest elements started at some point. type fixedHeap struct { - start *string - count int - h *filenameHeap + start *string + sorted bool + count int + h *filenameHeap } func newHeap(start *string, count int) *fixedHeap { @@ -44,20 +47,39 @@ func newHeap(start *string, count int) *fixedHeap { } } +const amortizationMultiplier = 5 + func (h *fixedHeap) push(id MultiNode, filename string) bool { if h.start != nil && filename <= *h.start { return false } - heap.Push(h.h, heapInfo{id: id, filename: filename}) - if h.h.Len() > h.count { - heap.Remove(h.h, h.h.Len()-1) + + *h.h = append(*h.h, heapInfo{id: id, filename: filename}) + h.sorted = false + + if h.h.Len() > h.count*amortizationMultiplier { + slices.SortFunc(*h.h, func(a, b heapInfo) int { + return strings.Compare(a.filename, b.filename) + }) + *h.h = (*h.h)[:h.count] } return true } func (h *fixedHeap) pop() (heapInfo, bool) { - if h.h.Len() != 0 { - return heap.Pop(h.h).(heapInfo), true + if !h.sorted { + slices.SortFunc(*h.h, func(a, b heapInfo) int { + return strings.Compare(a.filename, b.filename) + }) + if len(*h.h) > h.count { + *h.h = (*h.h)[:h.count] + } + h.sorted = true + } + if len(*h.h) != 0 { + info := (*h.h)[0] + *h.h = (*h.h)[1:] + return info, true } return heapInfo{}, false }