tree: Use pairing heap for listing

When we have N items, sorting then iterating provides `O(n log n)` latency
in the worst case scenario (flat bucket), because we must return items
from a level in the sorted order. Some heap implementations allow O(1)
insertion and O(log n) dequeue, this means that we can decrease the
latency for the first received operation to O(log n), albeit with a
slight increase in the total time.
Pairing heap was chosen as one of the most simplest implementations.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/tree
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                        │     old      │                 new                  │
                        │    sec/op    │    sec/op     vs base                │
GetSubTree/latency-8      5.034m ± 23%   1.110m ± 22%  -77.95% (p=0.000 n=10)
GetSubTree/total_time-8   81.03m ±  1%   95.02m ± 14%  +17.26% (p=0.000 n=10)
geomean                   20.20m         10.27m        -49.15%

                        │     old      │                 new                  │
                        │     B/op     │     B/op      vs base                │
GetSubTree/latency-8      32.14Mi ± 0%   37.49Mi ± 0%  +16.63% (p=0.000 n=10)
GetSubTree/total_time-8   32.14Mi ± 0%   37.49Mi ± 0%  +16.63% (p=0.000 n=10)
geomean                   32.14Mi        37.49Mi       +16.63%

                        │     old     │                new                 │
                        │  allocs/op  │  allocs/op   vs base               │
GetSubTree/latency-8      400.0k ± 0%   400.0k ± 0%  +0.00% (p=0.000 n=10)
GetSubTree/total_time-8   400.0k ± 0%   400.0k ± 0%  +0.00% (p=0.000 n=10)
geomean                   400.0k        400.0k       +0.00%
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
This commit is contained in:
Evgenii Stratonikov 2024-01-30 11:18:30 +03:00
parent 2bfaa65455
commit c0135f8a65
2 changed files with 160 additions and 38 deletions

View file

@ -0,0 +1,82 @@
package tree
import (
"bytes"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
)
type MinPairingHeap struct {
head *node
contains map[any]*node
size int
}
type node struct {
val *pilorama.NodeInfo
key []byte
child, sibling, parent *node
}
func makePairing() *MinPairingHeap {
return &MinPairingHeap{
//contains: make(map[any]*node),
}
}
func (m *MinPairingHeap) peekMin() (*pilorama.NodeInfo, []byte) {
if m.head == nil {
return nil, nil
}
return m.head.val, m.head.key
}
func mergeNodes(m1, m2 *node) *node {
if m1 == nil {
return m2
}
if m2 == nil {
return m1
}
if m1.key[0] < m2.key[0] || m1.key[0] == m2.key[0] && bytes.Compare(m1.key, m2.key) == -1 {
m1child := m1.child
m1.child = m2
m2.parent = m1
m2.sibling = m1child
return m1
}
m2child := m2.child
m2.child = m1
m1.parent = m2
m1.sibling = m2child
return m2
}
func (m *MinPairingHeap) insert(tmp *node) {
m.head = mergeNodes(tmp, m.head)
m.size++
}
func (m *MinPairingHeap) extractMin() (*pilorama.NodeInfo, []byte) {
if m.head == nil {
return nil, nil
}
val, key := m.peekMin()
m.head = mergePairs(m.head.child)
if m.head != nil {
m.head.parent = nil
}
m.size--
return val, key
}
func mergePairs(n *node) *node {
if n == nil {
return nil
}
if n.sibling == nil {
return n
}
tmp := n.sibling.sibling
return mergeNodes(mergeNodes(n, n.sibling), mergePairs(tmp))
}

View file

@ -5,7 +5,6 @@ import (
"context"
"errors"
"fmt"
"sort"
"sync"
"sync/atomic"
@ -445,29 +444,47 @@ func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSD
if err != nil {
return err
}
stack := [][]pilorama.NodeInfo{{{
stack := []*MinPairingHeap{makePairing()}
stack[0].insert(&node{val: &pilorama.NodeInfo{
ID: b.GetRootId(),
Meta: m,
ParentID: p,
}}}
}, key: m.GetAttr(pilorama.AttributeFilename)})
//stack := []*ThinHeap{makeHeap()}
// stack[0].insert(&HeapNode{
// key: m.GetAttr(pilorama.AttributeFilename),
// v: &pilorama.NodeInfo{
// ID: b.GetRootId(),
// Meta: m,
// ParentID: p,
// },
// })
// stack := [][]pilorama.NodeInfo{{{
// ID: b.GetRootId(),
// Meta: m,
// ParentID: p,
// }}}
for {
if len(stack) == 0 {
break
} else if len(stack[len(stack)-1]) == 0 {
//} else if len(stack[len(stack)-1]) == 0 {
} else if stack[len(stack)-1].head == nil {
stack = stack[:len(stack)-1]
continue
}
node := stack[len(stack)-1][0]
stack[len(stack)-1] = stack[len(stack)-1][1:]
// minNode := stack[len(stack)-1][0]
// stack[len(stack)-1] = stack[len(stack)-1][1:]
minNode, _ := stack[len(stack)-1].extractMin()
meta := metaToProto(minNode.Meta.Items)
err = srv.Send(&GetSubTreeResponse{
Body: &GetSubTreeResponse_Body{
NodeId: node.ID,
ParentId: node.ParentID,
Timestamp: node.Meta.Time,
Meta: metaToProto(node.Meta.Items),
NodeId: minNode.ID,
ParentId: minNode.ParentID,
Timestamp: minNode.Meta.Time,
Meta: meta,
},
})
if err != nil {
@ -475,39 +492,62 @@ func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSD
}
if b.GetDepth() == 0 || uint32(len(stack)) < b.GetDepth() {
children, err := forest.TreeGetChildren(ctx, cid, b.GetTreeId(), node.ID)
if err != nil {
return err
}
children, err = sortByFilename(children, b.GetOrderBy().GetDirection())
children, err := forest.TreeGetChildren(ctx, cid, b.GetTreeId(), minNode.ID)
if err != nil {
return err
}
if len(children) != 0 {
stack = append(stack, children)
cc := makePairing()
nn := make([]node, len(children))
for i := range children {
nn[i].val = &children[i]
nn[i].key = children[i].Meta.GetAttr(pilorama.AttributeFilename)
cc.insert(&nn[i])
}
stack = append(stack, cc)
}
}
}
return nil
}
func sortByFilename(nodes []pilorama.NodeInfo, d GetSubTreeRequest_Body_Order_Direction) ([]pilorama.NodeInfo, error) {
switch d {
case GetSubTreeRequest_Body_Order_None:
return nodes, nil
case GetSubTreeRequest_Body_Order_Asc:
if len(nodes) == 0 {
return nodes, nil
}
less := func(i, j int) bool {
return bytes.Compare(nodes[i].Meta.GetAttr(pilorama.AttributeFilename), nodes[j].Meta.GetAttr(pilorama.AttributeFilename)) < 0
}
sort.Slice(nodes, less)
return nodes, nil
default:
return nil, fmt.Errorf("unsupported order direction: %s", d.String())
}
}
// func sortByFilename(nodes []pilorama.NodeInfo, d GetSubTreeRequest_Body_Order_Direction) ([]pilorama.NodeInfo, error) {
// switch d {
// case GetSubTreeRequest_Body_Order_None:
// return nodes, nil
// case GetSubTreeRequest_Body_Order_Asc:
// if len(nodes) == 0 {
// return nodes, nil
// }
// sort.Slice(nodes, func(i, j int) bool {
// return bytes.Compare(
// nodes[i].Meta.GetAttr(pilorama.AttributeFilename),
// nodes[j].Meta.GetAttr(pilorama.AttributeFilename)) < 0
// })
// return nodes, nil
// type pair struct {
// fname []byte
// info pilorama.NodeInfo
// }
// ns := make([]pair, len(nodes))
// for i := range nodes {
// ns[i] = pair{
// fname: nodes[i].Meta.GetAttr(pilorama.AttributeFilename),
// info: nodes[i],
// }
// }
// slices.SortFunc(ns, func(a, b pair) int {
// return bytes.Compare(a.fname, b.fname)
// })
// for i := range ns {
// nodes[i] = ns[i].info
// }
// return nodes, nil
// default:
// return nil, fmt.Errorf("unsupported order direction: %s", d.String())
// }
// }
// Apply locally applies operation from the remote node to the tree.
func (s *Service) Apply(_ context.Context, req *ApplyRequest) (*ApplyResponse, error) {
@ -679,11 +719,11 @@ func protoToMeta(arr []*KeyValue) []pilorama.KeyValue {
func metaToProto(arr []pilorama.KeyValue) []*KeyValue {
meta := make([]*KeyValue, len(arr))
mm := make([]KeyValue, len(arr))
for i, kv := range arr {
meta[i] = &KeyValue{
Key: kv.Key,
Value: kv.Value,
}
mm[i].Key = kv.Key
mm[i].Value = kv.Value
meta[i] = &mm[i]
}
return meta
}