Optimize tree service GetSubTree latency #957
5 changed files with 310 additions and 22 deletions
121
pkg/services/tree/bench_test.go
Normal file
121
pkg/services/tree/bench_test.go
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
package tree
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||||
|
cidSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
|
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
"google.golang.org/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkGetSubTree(b *testing.B) {
|
||||||
|
const count = simpleHeapFallbackThreshold
|
||||||
|
|
||||||
|
d := pilorama.CIDDescriptor{CID: cidtest.ID(), Size: 1}
|
||||||
|
treeID := "sometree"
|
||||||
|
|
||||||
|
body := &GetSubTreeRequest_Body{
|
||||||
|
TreeId: treeID,
|
||||||
|
RootId: pilorama.RootID,
|
||||||
|
Depth: 2,
|
||||||
|
OrderBy: &GetSubTreeRequest_Body_Order{
|
||||||
|
Direction: GetSubTreeRequest_Body_Order_Asc,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
p := newBenchPilorama(count)
|
||||||
|
|
||||||
|
b.Run("latency", func(b *testing.B) {
|
||||||
|
b.ReportAllocs()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
acc := benchServer{start: time.Now(), errIndex: count + 1}
|
||||||
|
err := getSubTree(context.Background(), &acc, d.CID, body, p)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("Error: %v, expected: %d, got %d", err, count, acc.seen)
|
||||||
|
}
|
||||||
|
b.ReportMetric(float64(uint64(acc.first)/uint64(b.N)), "ns/op")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
b.Run("total time", func(b *testing.B) {
|
||||||
|
b.ReportAllocs()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
acc := benchServer{start: time.Now(), errIndex: count + 1}
|
||||||
|
err := getSubTree(context.Background(), &acc, d.CID, body, p)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("Error: %v, expected: %d, got %d", err, count, acc.seen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// benchPilorama represents flat pilorama, i.e. tree of height 2 with many child nodes.
|
||||||
|
type benchPilorama struct {
|
||||||
|
pilorama.Forest // Satisfy interface.
|
||||||
|
nodes []pilorama.NodeInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBenchPilorama(size int) *benchPilorama {
|
||||||
|
nodes := make([]pilorama.NodeInfo, 0, size)
|
||||||
|
|
||||||
|
for i := 1; i <= size; i++ { // Start with 1 to avoid intersecting with RootID = 0.
|
||||||
|
nodes = append(nodes, pilorama.NodeInfo{
|
||||||
|
ParentID: pilorama.RootID,
|
||||||
|
ID: pilorama.Node(i),
|
||||||
|
Meta: pilorama.Meta{
|
||||||
|
Items: []pilorama.KeyValue{{
|
||||||
|
Key: pilorama.AttributeFilename,
|
||||||
|
Value: []byte(uuid.New().String()),
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return &benchPilorama{nodes: nodes}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *benchPilorama) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID string, nodeID pilorama.Node) ([]pilorama.NodeInfo, error) {
|
||||||
|
switch nodeID {
|
||||||
|
case pilorama.RootID:
|
||||||
|
result := make([]pilorama.NodeInfo, len(p.nodes))
|
||||||
|
copy(result, p.nodes)
|
||||||
|
return result, nil
|
||||||
|
default:
|
||||||
|
panic("unexpected")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *benchPilorama) TreeGetMeta(ctx context.Context, cid cidSDK.ID, treeID string, root pilorama.Node) (pilorama.Meta, pilorama.Node, error) {
|
||||||
|
if root == pilorama.RootID {
|
||||||
|
return pilorama.Meta{}, pilorama.RootID, nil
|
||||||
|
}
|
||||||
|
return p.nodes[root-1].Meta, p.nodes[root-1].ParentID, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type benchServer struct {
|
||||||
|
grpc.ServerStream // to satisfy the interface
|
||||||
|
start time.Time
|
||||||
|
first time.Duration
|
||||||
|
seen int
|
||||||
|
errIndex int
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ TreeService_GetSubTreeServer = (*benchServer)(nil)
|
||||||
|
|
||||||
|
func (s *benchServer) Send(r *GetSubTreeResponse) error {
|
||||||
|
if s.seen == 1 {
|
||||||
|
s.first = time.Since(s.start)
|
||||||
|
}
|
||||||
|
s.seen++
|
||||||
|
|
||||||
|
if s.errIndex >= 0 {
|
||||||
|
if s.seen == s.errIndex+1 {
|
||||||
|
return errSubTreeSend
|
||||||
|
}
|
||||||
|
if s.errIndex >= 0 && s.seen > s.errIndex {
|
||||||
|
return errSubTreeSendAfterError
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
56
pkg/services/tree/heap/ordered_slice.go
Normal file
56
pkg/services/tree/heap/ordered_slice.go
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
package heap
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||||
|
)
|
||||||
|
|
||||||
|
type OrderedSlice struct {
|
||||||
|
nodes []node
|
||||||
|
sorted bool
|
||||||
|
}
|
||||||
|
type node struct {
|
||||||
|
filename []byte
|
||||||
|
info *pilorama.NodeInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewOrderedSlice() *OrderedSlice {
|
||||||
|
return new(OrderedSlice)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *OrderedSlice) Insert(infos ...pilorama.NodeInfo) {
|
||||||
|
s.sorted = false
|
||||||
|
for i := range infos {
|
||||||
|
s.nodes = append(s.nodes, node{
|
||||||
|
filename: infos[i].Meta.GetAttr(pilorama.AttributeFilename),
|
||||||
|
info: &infos[i],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *OrderedSlice) IsEmpty() bool {
|
||||||
|
return len(s.nodes) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *OrderedSlice) ExtractMin() pilorama.NodeInfo {
|
||||||
|
if !s.sorted {
|
||||||
|
sortByFilename(s.nodes)
|
||||||
|
s.sorted = true
|
||||||
|
}
|
||||||
|
|
||||||
|
node := s.nodes[0]
|
||||||
|
s.nodes = s.nodes[1:]
|
||||||
|
return *node.info
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortByFilename(nodes []node) {
|
||||||
|
if len(nodes) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
less := func(i, j int) bool {
|
||||||
|
return bytes.Compare(nodes[i].filename, nodes[j].filename) < 0
|
||||||
|
}
|
||||||
|
sort.Slice(nodes, less)
|
||||||
|
}
|
68
pkg/services/tree/heap/pairing.go
Normal file
68
pkg/services/tree/heap/pairing.go
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
package heap
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MinPairingHeap struct {
|
||||||
|
head *phNode
|
||||||
|
size int
|
||||||
|
|||||||
|
}
|
||||||
|
|
||||||
|
type phNode struct {
|
||||||
|
val *pilorama.NodeInfo
|
||||||
|
key []byte
|
||||||
|
child *phNode
|
||||||
|
sibling *phNode
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPairing() *MinPairingHeap {
|
||||||
|
return &MinPairingHeap{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MinPairingHeap) Insert(infos ...pilorama.NodeInfo) {
|
||||||
aarifullin marked this conversation as resolved
aarifullin
commented
[Optionally] Sorry, it seems I have gone far away 😄 Wouldn't you like to consider generic solution?
For check
[Optionally]
Sorry, it seems I have gone far away 😄
Wouldn't you like to consider generic solution?
```go
package heap
type MinPairingHeap[ValT, KeyT any] struct {
head *phNode[ValT, KeyT]
getKey func(*ValT) KeyT
cmp func(KeyT, KeyT) bool
}
type phNode[ValT, KeyT any] struct {
val *ValT
key func(*ValT) KeyT
child *phNode[ValT, KeyT]
sibling *phNode[ValT, KeyT]
}
func (p *phNode[ValT, KeyT]) Key() KeyT {
return p.key(p.val)
}
func NewPairing[ValT, KeyT any](key func(*ValT) KeyT, cmp func(KeyT, KeyT) bool) *MinPairingHeap[ValT, KeyT] {
return &MinPairingHeap[ValT, KeyT]{
getKey: key,
cmp: cmp,
}
}
func (m *MinPairingHeap[ValT, KeyT]) Insert(infos ...ValT) {
for i := range infos {
tmp := &phNode[ValT, KeyT]{val: &infos[i], key: m.getKey}
m.head = meld(tmp, m.head, m.cmp)
}
}
func meld[ValT, KeyT any](m1, m2 *phNode[ValT, KeyT], cmp func(KeyT, KeyT) bool) *phNode[ValT, KeyT] {
if m1 == nil {
return m2
}
if m2 == nil {
return m1
}
if cmp(m1.Key(), m2.Key()) {
m1.child, m2.sibling = m2, m1.child
return m1
}
m2.child, m1.sibling = m1, m2.child
return m2
}
```
For check
```go
func TestMin(t *testing.T) {
nodes := make([]pilorama.NodeInfo, 0, 10)
for i := 1; i <= 10; i++ { // Start with 1 to avoid intersecting with RootID = 0.
nodes = append(nodes, pilorama.NodeInfo{
ParentID: pilorama.RootID,
ID: pilorama.Node(i),
Meta: pilorama.Meta{
Items: []pilorama.KeyValue{{
Key: pilorama.AttributeFilename,
Value: []byte(uuid.New().String()),
}},
},
})
}
pairing := NewPairing(func(vt *pilorama.NodeInfo) []byte {
return vt.Meta.GetAttr(pilorama.AttributeFilename)
}, func(lhs []byte, rhs []byte) bool {
return bytes.Compare(lhs, rhs) == -1
})
pairing.Insert(nodes...)
}
```
fyrchik
commented
I have rejected it, because it looks complicated (hello, callbacks) and I don't see any other usecases. I have rejected it, because it looks complicated (hello, callbacks) and I don't see any other usecases.
Not to mention that even slight performance degradation because of genericness can become crucial for 4M elements listing.
|
|||||||
|
for i := range infos {
|
||||||
|
tmp := &phNode{key: infos[i].Meta.GetAttr(pilorama.AttributeFilename), val: &infos[i]}
|
||||||
|
m.head = meld(tmp, m.head)
|
||||||
|
m.size++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MinPairingHeap) IsEmpty() bool {
|
||||||
|
return m.head == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MinPairingHeap) ExtractMin() pilorama.NodeInfo {
|
||||||
|
val := m.head.val
|
||||||
|
m.head = mergePairs(m.head.child)
|
||||||
|
m.size--
|
||||||
|
return *val
|
||||||
|
}
|
||||||
|
|
||||||
|
func meld(m1, m2 *phNode) *phNode {
|
||||||
|
if m1 == nil {
|
||||||
|
return m2
|
||||||
|
}
|
||||||
|
if m2 == nil {
|
||||||
|
return m1
|
||||||
|
}
|
||||||
|
if bytes.Compare(m1.key, m2.key) == -1 {
|
||||||
|
m1.child, m2.sibling = m2, m1.child
|
||||||
|
return m1
|
||||||
|
}
|
||||||
|
m2.child, m1.sibling = m1, m2.child
|
||||||
|
return m2
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergePairs(n *phNode) *phNode {
|
||||||
|
if n == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if n.sibling == nil {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
tmp := n.sibling.sibling
|
||||||
|
return meld(meld(n, n.sibling), mergePairs(tmp))
|
||||||
|
}
|
25
pkg/services/tree/heap/unordered_slice.go
Normal file
25
pkg/services/tree/heap/unordered_slice.go
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
package heap
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||||
|
)
|
||||||
|
|
||||||
|
type UnorderedSlice []pilorama.NodeInfo
|
||||||
|
|
||||||
|
func NewUnorderedSlice() *UnorderedSlice {
|
||||||
|
return &UnorderedSlice{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *UnorderedSlice) Insert(infos ...pilorama.NodeInfo) {
|
||||||
|
*s = append(*s, infos...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *UnorderedSlice) IsEmpty() bool {
|
||||||
|
return len(*s) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *UnorderedSlice) ExtractMin() pilorama.NodeInfo {
|
||||||
|
node := (*s)[0]
|
||||||
|
*s = (*s)[1:]
|
||||||
|
return node
|
||||||
|
}
|
|
@ -5,11 +5,11 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/tree/heap"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/acl"
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/acl"
|
||||||
cidSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cidSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
|
@ -440,29 +440,56 @@ func (s *Service) GetSubTree(req *GetSubTreeRequest, srv TreeService_GetSubTreeS
|
||||||
return getSubTree(srv.Context(), srv, cid, b, s.forest)
|
return getSubTree(srv.Context(), srv, cid, b, s.forest)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Heap interface {
|
||||||
|
Insert(...pilorama.NodeInfo)
|
||||||
|
IsEmpty() bool
|
||||||
|
ExtractMin() pilorama.NodeInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
// simpleHeapFallbackThreshold is the least number of nodes on a single level,
|
||||||
|
// for which pairing heap implementation is used.
|
||||||
|
const simpleHeapFallbackThreshold = 100_000
|
||||||
|
|
||||||
|
func makeHeap(ordered bool, count int) Heap {
|
||||||
|
switch {
|
||||||
|
case ordered && simpleHeapFallbackThreshold <= count:
|
||||||
|
return heap.NewPairing()
|
||||||
|
case ordered:
|
||||||
|
return heap.NewOrderedSlice()
|
||||||
|
default:
|
||||||
|
return heap.NewUnorderedSlice()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
|
func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSDK.ID, b *GetSubTreeRequest_Body, forest pilorama.Forest) error {
|
||||||
|
ordered, err := needOrder(b.GetOrderBy().GetDirection())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Traverse the tree in a DFS manner. Because we need to support arbitrary depth,
|
// Traverse the tree in a DFS manner. Because we need to support arbitrary depth,
|
||||||
// recursive implementation is not suitable here, so we maintain explicit stack.
|
// recursive implementation is not suitable here, so we maintain explicit stack.
|
||||||
m, p, err := forest.TreeGetMeta(ctx, cid, b.GetTreeId(), b.GetRootId())
|
m, p, err := forest.TreeGetMeta(ctx, cid, b.GetTreeId(), b.GetRootId())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
stack := [][]pilorama.NodeInfo{{{
|
|
||||||
|
stack := []Heap{makeHeap(ordered, 1)}
|
||||||
|
stack[0].Insert(pilorama.NodeInfo{
|
||||||
ID: b.GetRootId(),
|
ID: b.GetRootId(),
|
||||||
Meta: m,
|
Meta: m,
|
||||||
ParentID: p,
|
ParentID: p,
|
||||||
}}}
|
})
|
||||||
|
|
||||||
for {
|
for {
|
||||||
if len(stack) == 0 {
|
if len(stack) == 0 {
|
||||||
break
|
break
|
||||||
} else if len(stack[len(stack)-1]) == 0 {
|
} else if stack[len(stack)-1].IsEmpty() {
|
||||||
stack = stack[:len(stack)-1]
|
stack = stack[:len(stack)-1]
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
node := stack[len(stack)-1][0]
|
node := stack[len(stack)-1].ExtractMin()
|
||||||
stack[len(stack)-1] = stack[len(stack)-1][1:]
|
|
||||||
|
|
||||||
err = srv.Send(&GetSubTreeResponse{
|
err = srv.Send(&GetSubTreeResponse{
|
||||||
Body: &GetSubTreeResponse_Body{
|
Body: &GetSubTreeResponse_Body{
|
||||||
|
@ -481,33 +508,24 @@ func getSubTree(ctx context.Context, srv TreeService_GetSubTreeServer, cid cidSD
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
children, err = sortByFilename(children, b.GetOrderBy().GetDirection())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if len(children) != 0 {
|
if len(children) != 0 {
|
||||||
stack = append(stack, children)
|
h := makeHeap(ordered, len(children))
|
||||||
|
h.Insert(children...)
|
||||||
|
stack = append(stack, h)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func sortByFilename(nodes []pilorama.NodeInfo, d GetSubTreeRequest_Body_Order_Direction) ([]pilorama.NodeInfo, error) {
|
func needOrder(d GetSubTreeRequest_Body_Order_Direction) (bool, error) {
|
||||||
switch d {
|
switch d {
|
||||||
case GetSubTreeRequest_Body_Order_None:
|
case GetSubTreeRequest_Body_Order_None:
|
||||||
return nodes, nil
|
return false, nil
|
||||||
case GetSubTreeRequest_Body_Order_Asc:
|
case GetSubTreeRequest_Body_Order_Asc:
|
||||||
if len(nodes) == 0 {
|
return true, nil
|
||||||
return nodes, nil
|
|
||||||
}
|
|
||||||
less := func(i, j int) bool {
|
|
||||||
return bytes.Compare(nodes[i].Meta.GetAttr(pilorama.AttributeFilename), nodes[j].Meta.GetAttr(pilorama.AttributeFilename)) < 0
|
|
||||||
}
|
|
||||||
sort.Slice(nodes, less)
|
|
||||||
return nodes, nil
|
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unsupported order direction: %s", d.String())
|
return false, fmt.Errorf("unsupported order direction: %s", d.String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue
Looks like size is redundant.