Optimize tree service GetSubTree latency #957
5 changed files with 310 additions and 22 deletions
121
pkg/services/tree/bench_test.go
Normal file
121
pkg/services/tree/bench_test.go
Normal file
|
@ -0,0 +1,121 @@
|
|||
package tree
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||
cidSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
||||
"github.com/google/uuid"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
func BenchmarkGetSubTree(b *testing.B) {
|
||||
const count = simpleHeapFallbackThreshold
|
||||
|
||||
d := pilorama.CIDDescriptor{CID: cidtest.ID(), Size: 1}
|
||||
treeID := "sometree"
|
||||
|
||||
body := &GetSubTreeRequest_Body{
|
||||
TreeId: treeID,
|
||||
RootId: pilorama.RootID,
|
||||
Depth: 2,
|
||||
OrderBy: &GetSubTreeRequest_Body_Order{
|
||||
Direction: GetSubTreeRequest_Body_Order_Asc,
|
||||
},
|
||||
}
|
||||
p := newBenchPilorama(count)
|
||||
|
||||
b.Run("latency", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
acc := benchServer{start: time.Now(), errIndex: count + 1}
|
||||
err := getSubTree(context.Background(), &acc, d.CID, body, p)
|
||||
if err != nil {
|
||||
b.Fatalf("Error: %v, expected: %d, got %d", err, count, acc.seen)
|
||||
}
|
||||
b.ReportMetric(float64(uint64(acc.first)/uint64(b.N)), "ns/op")
|
||||
}
|
||||
})
|
||||
b.Run("total time", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
acc := benchServer{start: time.Now(), errIndex: count + 1}
|
||||
err := getSubTree(context.Background(), &acc, d.CID, body, p)
|
||||
if err != nil {
|
||||
b.Fatalf("Error: %v, expected: %d, got %d", err, count, acc.seen)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// benchPilorama represents flat pilorama, i.e. tree of height 2 with many child nodes.
|
||||
type benchPilorama struct {
|
||||
pilorama.Forest // Satisfy interface.
|
||||
nodes []pilorama.NodeInfo
|
||||
}
|
||||
|
||||
func newBenchPilorama(size int) *benchPilorama {
|
||||
nodes := make([]pilorama.NodeInfo, 0, size)
|
||||
|
||||
for i := 1; i <= size; i++ { // Start with 1 to avoid intersecting with RootID = 0.
|
||||
nodes = append(nodes, pilorama.NodeInfo{
|
||||
ParentID: pilorama.RootID,
|
||||
ID: pilorama.Node(i),
|
||||
Meta: pilorama.Meta{
|
||||
Items: []pilorama.KeyValue{{
|
||||
Key: pilorama.AttributeFilename,
|
||||
Value: []byte(uuid.New().String()),
|
||||
}},
|
||||
},
|
||||
})
|
||||
}
|
||||
return &benchPilorama{nodes: nodes}
|
||||
}
|
||||
|
||||
func (p *benchPilorama) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.Node) ([]pilorama.NodeInfo, error) {
|
||||
switch nodeID {
|
||||
case pilorama.RootID:
|
||||
result := make([]pilorama.NodeInfo, len(p.nodes))
|
||||
copy(result, p.nodes)
|
||||
return result, nil
|
||||
default:
|
||||
panic("unexpected")
|
||||
}
|
||||
}
|
||||
|
||||
func (p *benchPilorama) TreeGetMeta(ctx context.Context, cid cidSDK.ID, treeID string, root pilorama.Node) (pilorama.Meta, pilorama.Node, error) {
|
||||
if root == pilorama.RootID {
|
||||
return pilorama.Meta{}, pilorama.RootID, nil
|
||||
}
|
||||
return p.nodes[root-1].Meta, p.nodes[root-1].ParentID, nil
|
||||
}
|
||||
|
||||
type benchServer struct {
|
||||
grpc.ServerStream // to satisfy the interface
|
||||
start time.Time
|
||||
first time.Duration
|
||||
seen int
|
||||
errIndex int
|
||||
}
|
||||
|
||||
var _ TreeService_GetSubTreeServer = (*benchServer)(nil)
|
||||
|
||||
func (s *benchServer) Send(r *GetSubTreeResponse) error {
|
||||
if s.seen == 1 {
|
||||
s.first = time.Since(s.start)
|
||||
}
|
||||
s.seen++
|
||||
|
||||
if s.errIndex >= 0 {
|
||||
if s.seen == s.errIndex+1 {
|
||||
return errSubTreeSend
|
||||
}
|
||||
if s.errIndex >= 0 && s.seen > s.errIndex {
|
||||
return errSubTreeSendAfterError
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
56
pkg/services/tree/heap/ordered_slice.go
Normal file
56
pkg/services/tree/heap/ordered_slice.go
Normal file
|
@ -0,0 +1,56 @@
|
|||
package heap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||
)
|
||||
|
||||
type OrderedSlice struct {
|
||||
nodes []node
|
||||
sorted bool
|
||||
}
|
||||
type node struct {
|
||||
filename []byte
|
||||
info *pilorama.NodeInfo
|
||||
}
|
||||
|
||||
func NewOrderedSlice() *OrderedSlice {
|
||||
return new(OrderedSlice)
|
||||
}
|
||||
|
||||
func (s *OrderedSlice) Insert(infos ...pilorama.NodeInfo) {
|
||||
s.sorted = false
|
||||
for i := range infos {
|
||||
s.nodes = append(s.nodes, node{
|
||||
filename: infos[i].Meta.GetAttr(pilorama.AttributeFilename),
|
||||
info: &infos[i],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (s *OrderedSlice) IsEmpty() bool {
|
||||
return len(s.nodes) == 0
|
||||
}
|
||||
|
||||
func (s *OrderedSlice) ExtractMin() pilorama.NodeInfo {
|
||||
if !s.sorted {
|
||||
sortByFilename(s.nodes)
|
||||
s.sorted = true
|
||||
}
|
||||
|
||||
node := s.nodes[0]
|
||||
s.nodes = s.nodes[1:]
|
||||
return *node.info
|
||||
}
|
||||
|
||||
func sortByFilename(nodes []node) {
|
||||
if len(nodes) == 0 {
|
||||
return
|
||||
}
|
||||
less := func(i, j int) bool {
|
||||
return bytes.Compare(nodes[i].filename, nodes[j].filename) < 0
|
||||
}
|
||||
sort.Slice(nodes, less)
|
||||
}
|
68
pkg/services/tree/heap/pairing.go
Normal file
68
pkg/services/tree/heap/pairing.go
Normal file
|
@ -0,0 +1,68 @@
|
|||
package heap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||
)
|
||||
|
||||
type MinPairingHeap struct {
|
||||
head *phNode
|
||||
size int
|
||||
|
||||
}
|
||||
|
||||
type phNode struct {
|
||||
val *pilorama.NodeInfo
|
||||
key []byte
|
||||
child *phNode
|
||||
sibling *phNode
|
||||
}
|
||||
|
||||
func NewPairing() *MinPairingHeap {
|
||||
return &MinPairingHeap{}
|
||||
}
|
||||
|
||||
func (m *MinPairingHeap) Insert(infos ...pilorama.NodeInfo) {
|
||||
aarifullin marked this conversation as resolved
aarifullin
commented
[Optionally] Sorry, it seems I have gone far away 😄 Wouldn't you like to consider generic solution?
For check
[Optionally]
Sorry, it seems I have gone far away 😄
Wouldn't you like to consider generic solution?
```go
package heap
type MinPairingHeap[ValT, KeyT any] struct {
head *phNode[ValT, KeyT]
getKey func(*ValT) KeyT
cmp func(KeyT, KeyT) bool
}
type phNode[ValT, KeyT any] struct {
val *ValT
key func(*ValT) KeyT
child *phNode[ValT, KeyT]
sibling *phNode[ValT, KeyT]
}
func (p *phNode[ValT, KeyT]) Key() KeyT {
return p.key(p.val)
}
func NewPairing[ValT, KeyT any](key func(*ValT) KeyT, cmp func(KeyT, KeyT) bool) *MinPairingHeap[ValT, KeyT] {
return &MinPairingHeap[ValT, KeyT]{
getKey: key,
cmp: cmp,
}
}
func (m *MinPairingHeap[ValT, KeyT]) Insert(infos ...ValT) {
for i := range infos {
tmp := &phNode[ValT, KeyT]{val: &infos[i], key: m.getKey}
m.head = meld(tmp, m.head, m.cmp)
}
}
func meld[ValT, KeyT any](m1, m2 *phNode[ValT, KeyT], cmp func(KeyT, KeyT) bool) *phNode[ValT, KeyT] {
if m1 == nil {
return m2
}
if m2 == nil {
return m1
}
if cmp(m1.Key(), m2.Key()) {
m1.child, m2.sibling = m2, m1.child
return m1
}
m2.child, m1.sibling = m1, m2.child
return m2
}
```
For check
```go
func TestMin(t *testing.T) {
nodes := make([]pilorama.NodeInfo, 0, 10)
for i := 1; i <= 10; i++ { // Start with 1 to avoid intersecting with RootID = 0.
nodes = append(nodes, pilorama.NodeInfo{
ParentID: pilorama.RootID,
ID: pilorama.Node(i),
Meta: pilorama.Meta{
Items: []pilorama.KeyValue{{
Key: pilorama.AttributeFilename,
Value: []byte(uuid.New().String()),
}},
},
})
}
pairing := NewPairing(func(vt *pilorama.NodeInfo) []byte {
return vt.Meta.GetAttr(pilorama.AttributeFilename)
}, func(lhs []byte, rhs []byte) bool {
return bytes.Compare(lhs, rhs) == -1
})
pairing.Insert(nodes...)
}
```
fyrchik
commented
I have rejected it, because it looks complicated (hello, callbacks) and I don't see any other usecases. I have rejected it, because it looks complicated (hello, callbacks) and I don't see any other usecases.
Not to mention that even slight performance degradation because of genericness can become crucial for 4M elements listing.
|
||||
for i := range infos {
|
||||
tmp := &phNode{key: infos[i].Meta.GetAttr(pilorama.AttributeFilename), val: &infos[i]}
|
||||
m.head = meld(tmp, m.head)
|
||||
m.size++
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MinPairingHeap) IsEmpty() bool {
|
||||
return m.head == nil
|
||||
}
|
||||
|
||||
func (m *MinPairingHeap) ExtractMin() pilorama.NodeInfo {
|
||||
val := m.head.val
|
||||
m.head = mergePairs(m.head.child)
|
||||
m.size--
|
||||
return *val
|
||||
}
|
||||
|
||||
func meld(m1, m2 *phNode) *phNode {
|
||||
if m1 == nil {
|
||||
return m2
|
||||
}
|
||||
if m2 == nil {
|
||||
return m1
|
||||
}
|
||||
if bytes.Compare(m1.key, m2.key) == -1 {
|
||||
m1.child, m2.sibling = m2, m1.child
|
||||
return m1
|
||||
}
|
||||
m2.child, m1.sibling = m1, m2.child
|
||||
return m2
|
||||
}
|
||||
|
||||
func mergePairs(n *phNode) *phNode {
|
||||
if n == nil {
|
||||
return nil
|
||||
}
|
||||
if n.sibling == nil {
|
||||
return n
|
||||
}
|
||||
tmp := n.sibling.sibling
|
||||
return meld(meld(n, n.sibling), mergePairs(tmp))
|
||||
}
|
25
pkg/services/tree/heap/unordered_slice.go
Normal file
25
pkg/services/tree/heap/unordered_slice.go
Normal file
|
@ -0,0 +1,25 @@
|
|||
package heap
|
||||
|
||||
import (
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||
)
|
||||
|
||||
type UnorderedSlice []pilorama.NodeInfo
|
||||
|
||||
func NewUnorderedSlice() *UnorderedSlice {
|
||||
return &UnorderedSlice{}
|
||||
}
|
||||
|
||||
func (s *UnorderedSlice) Insert(infos ...pilorama.NodeInfo) {
|
||||
*s = append(*s, infos...)
|
||||
}
|
||||
|
||||
func (s *UnorderedSlice) IsEmpty() bool {
|
||||
return len(*s) == 0
|
||||
}
|
||||
|
||||
func (s *UnorderedSlice) ExtractMin() pilorama.NodeInfo {
|
||||
node := (*s)[0]
|
||||
*s = (*s)[1:]
|
||||
return node
|
||||
}
|
|
@ -5,11 +5,11 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/tree/heap"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/acl"
|
||||
cidSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
|
@ -440,29 +440,56 @@ func (s *Service) GetSubTree(req *GetSubTreeRequest, srv TreeService_GetSubTreeS
|
|||
return getSubTree(srv.Context(), srv, cid, b, s.forest)
|
||||
}
|
||||
|
||||
type Heap interface {
|
||||
Insert(...pilorama.NodeInfo)
|
||||
IsEmpty() bool
|
||||
ExtractMin() pilorama.NodeInfo
|
||||
}
|
||||
|
||||
// simpleHeapFallbackThreshold is the least number of nodes on a single level,
|
||||
// for which pairing heap implementation is used.
|
||||
const simpleHeapFallbackThreshold = 100_000
|
||||
|
||||
func makeHeap(ordered bool, count int) Heap {
|
||||
switch {
|
||||
case ordered && simpleHeapFallbackThreshold <= count:
|
||||
return heap.NewPairing()
|
||||
case ordered:
|
||||
return heap.NewOrderedSlice()
|
||||
default:
|
||||
return heap.NewUnorderedSlice()
|
||||
}
|
||||
}
|
||||
|
||||
func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
|
||||
ordered, err := needOrder(b.GetOrderBy().GetDirection())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Traverse the tree in a DFS manner. Because we need to support arbitrary depth,
|
||||
// recursive implementation is not suitable here, so we maintain explicit stack.
|
||||
m, p, err := forest.TreeGetMeta(ctx, cid, b.GetTreeId(), b.GetRootId())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stack := [][]pilorama.NodeInfo{{{
|
||||
|
||||
stack := []Heap{makeHeap(ordered, 1)}
|
||||
stack[0].Insert(pilorama.NodeInfo{
|
||||
ID: b.GetRootId(),
|
||||
Meta: m,
|
||||
ParentID: p,
|
||||
}}}
|
||||
})
|
||||
|
||||
for {
|
||||
if len(stack) == 0 {
|
||||
break
|
||||
} else if len(stack[len(stack)-1]) == 0 {
|
||||
} else if stack[len(stack)-1].IsEmpty() {
|
||||
stack = stack[:len(stack)-1]
|
||||
continue
|
||||
}
|
||||
|
||||
node := stack[len(stack)-1][0]
|
||||
stack[len(stack)-1] = stack[len(stack)-1][1:]
|
||||
node := stack[len(stack)-1].ExtractMin()
|
||||
|
||||
err = srv.Send(&GetSubTreeResponse{
|
||||
Body: &GetSubTreeResponse_Body{
|
||||
|
@ -481,33 +508,24 @@ func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSD
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
children, err = sortByFilename(children, b.GetOrderBy().GetDirection())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(children) != 0 {
|
||||
stack = append(stack, children)
|
||||
h := makeHeap(ordered, len(children))
|
||||
h.Insert(children...)
|
||||
stack = append(stack, h)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func sortByFilename(nodes []pilorama.NodeInfo, d GetSubTreeRequest_Body_Order_Direction) ([]pilorama.NodeInfo, error) {
|
||||
func needOrder(d GetSubTreeRequest_Body_Order_Direction) (bool, error) {
|
||||
switch d {
|
||||
case GetSubTreeRequest_Body_Order_None:
|
||||
return nodes, nil
|
||||
return false, nil
|
||||
case GetSubTreeRequest_Body_Order_Asc:
|
||||
if len(nodes) == 0 {
|
||||
return nodes, nil
|
||||
}
|
||||
less := func(i, j int) bool {
|
||||
return bytes.Compare(nodes[i].Meta.GetAttr(pilorama.AttributeFilename), nodes[j].Meta.GetAttr(pilorama.AttributeFilename)) < 0
|
||||
}
|
||||
sort.Slice(nodes, less)
|
||||
return nodes, nil
|
||||
return true, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported order direction: %s", d.String())
|
||||
return false, fmt.Errorf("unsupported order direction: %s", d.String())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue
Looks like size is redundant.