[#957] treesvc: Implement pairing heap

With a large number of objects in a flat tree, sorting the nodes
dominates the latency of the operation. This scales nonlinearly
as O(n log n). Pairing heap has O(1) insert, and O(log n) extractMin,
which allows us to have O(n + log n) = O(n) latency on the first
operation, albeit with a slight increase in total running time.
On a real cluster with 2m objects, the latency decreased from 25s to
15s.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/tree
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                        │     cache     │               noparent               │
                        │    sec/op     │    sec/op     vs base                │
GetSubTree/latency-8      2349.9µ ± 19%   572.1µ ± 17%  -75.65% (p=0.000 n=10)
GetSubTree/total_time-8    70.62m ±  8%   86.02m ±  3%  +21.81% (p=0.000 n=10)
geomean                    12.88m         7.015m        -45.54%

                        │    cache     │               noparent               │
                        │     B/op     │     B/op      vs base                │
GetSubTree/latency-8      43.87Mi ± 0%   32.81Mi ± 0%  -25.22% (p=0.000 n=10)
GetSubTree/total_time-8   43.87Mi ± 0%   32.81Mi ± 0%  -25.22% (p=0.000 n=10)
geomean                   43.87Mi        32.81Mi       -25.22%

                        │    cache    │              noparent               │
                        │  allocs/op  │  allocs/op   vs base                │
GetSubTree/latency-8      400.0k ± 0%   500.0k ± 0%  +24.99% (p=0.000 n=10)
GetSubTree/total_time-8   400.0k ± 0%   500.0k ± 0%  +24.99% (p=0.000 n=10)
geomean                   400.0k        500.0k       +24.99%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
This commit is contained in:
Evgenii Stratonikov 2024-02-02 23:42:23 +03:00
parent 77bdadde14
commit d18eaf060d
2 changed files with 69 additions and 1 deletions

View file

@ -0,0 +1,68 @@
package heap
import (
"bytes"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
)
type MinPairingHeap struct {
head *phNode
size int
}
type phNode struct {
val *pilorama.NodeInfo
key []byte
child *phNode
sibling *phNode
}
func NewPairing() *MinPairingHeap {
return &MinPairingHeap{}
}
func (m *MinPairingHeap) Insert(infos ...pilorama.NodeInfo) {
for i := range infos {
tmp := &phNode{key: infos[i].Meta.GetAttr(pilorama.AttributeFilename), val: &infos[i]}
m.head = meld(tmp, m.head)
m.size++
}
}
func (m *MinPairingHeap) IsEmpty() bool {
return m.head == nil
}
func (m *MinPairingHeap) ExtractMin() pilorama.NodeInfo {
val := m.head.val
m.head = mergePairs(m.head.child)
m.size--
return *val
}
func meld(m1, m2 *phNode) *phNode {
if m1 == nil {
return m2
}
if m2 == nil {
return m1
}
if bytes.Compare(m1.key, m2.key) == -1 {
m1.child, m2.sibling = m2, m1.child
return m1
}
m2.child, m1.sibling = m1, m2.child
return m2
}
func mergePairs(n *phNode) *phNode {
if n == nil {
return nil
}
if n.sibling == nil {
return n
}
tmp := n.sibling.sibling
return meld(meld(n, n.sibling), mergePairs(tmp))
}

View file

@ -448,7 +448,7 @@ type Heap interface {
func makeHeap(ordered bool) Heap {
if ordered {
return heap.NewOrderedSlice()
return heap.NewPairing()
}
return heap.NewUnorderedSlice()
}