frostfs-node/pkg/local_object_storage/pilorama/heap.go
Evgenii Stratonikov 01e18eda43
All checks were successful
DCO action / DCO (pull_request) Successful in 1m36s
Vulncheck / Vulncheck (pull_request) Successful in 2m22s
Build / Build Components (1.21) (pull_request) Successful in 2m50s
Pre-commit hooks / Pre-commit (pull_request) Successful in 3m8s
Build / Build Components (1.22) (pull_request) Successful in 3m14s
Tests and linters / Tests (1.22) (pull_request) Successful in 3m27s
Tests and linters / Tests (1.21) (pull_request) Successful in 3m28s
Tests and linters / Staticcheck (pull_request) Successful in 3m23s
Tests and linters / Tests with -race (pull_request) Successful in 3m34s
Tests and linters / Lint (pull_request) Successful in 3m47s
Tests and linters / gopls check (pull_request) Successful in 4m5s
[#1328] pilorama: Do not skip items in SortedByFilename
Benchmark results:
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                   │     old     │                 new                 │
                                   │   sec/op    │   sec/op     vs base                │
ForestSortedIteration/bbolt,root-8   207.2µ ± 6%   173.6µ ± 6%  -16.23% (p=0.000 n=10)
ForestSortedIteration/bbolt,leaf-8   3.910µ ± 5%   3.928µ ± 7%        ~ (p=0.529 n=10)
geomean                              28.46µ        26.11µ        -8.27%
```

They are not representative, as the worst case is when we have multiple
items of different lengths. However, `FileName` is usually less than 100
in practice, so the asymptotics is the same.

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2024-08-23 13:41:01 +03:00

85 lines
1.6 KiB
Go

package pilorama
import (
"container/heap"
"slices"
"strings"
)
type heapInfo struct {
id MultiNode
filename string
}
type filenameHeap []heapInfo
func (h filenameHeap) Len() int { return len(h) }
func (h filenameHeap) Less(i, j int) bool { return h[i].filename < h[j].filename }
func (h filenameHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *filenameHeap) Push(x any) {
*h = append(*h, x.(heapInfo))
}
func (h *filenameHeap) Pop() any {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
// fixedHeap maintains a fixed number of smallest elements started at some point.
type fixedHeap struct {
start *string
sorted bool
count int
h *filenameHeap
}
func newHeap(start *string, count int) *fixedHeap {
h := new(filenameHeap)
heap.Init(h)
return &fixedHeap{
start: start,
count: count,
h: h,
}
}
const amortizationMultiplier = 5
func (h *fixedHeap) push(id MultiNode, filename string) bool {
if h.start != nil && filename <= *h.start {
return false
}
*h.h = append(*h.h, heapInfo{id: id, filename: filename})
h.sorted = false
if h.h.Len() > h.count*amortizationMultiplier {
slices.SortFunc(*h.h, func(a, b heapInfo) int {
return strings.Compare(a.filename, b.filename)
})
*h.h = (*h.h)[:h.count]
}
return true
}
func (h *fixedHeap) pop() (heapInfo, bool) {
if !h.sorted {
slices.SortFunc(*h.h, func(a, b heapInfo) int {
return strings.Compare(a.filename, b.filename)
})
if len(*h.h) > h.count {
*h.h = (*h.h)[:h.count]
}
h.sorted = true
}
if len(*h.h) != 0 {
info := (*h.h)[0]
*h.h = (*h.h)[1:]
return info, true
}
return heapInfo{}, false
}