pilorama: optimize create op #447

Merged
fyrchik merged 3 commits from fyrchik/frostfs-node:pilorama-clever-create into master 2023-06-26 07:42:07 +00:00
3 changed files with 125 additions and 8 deletions

View file

@ -1,6 +1,7 @@
package pilorama
import (
"encoding/binary"
"sort"
"sync"
"time"
@ -49,10 +50,66 @@ func (b *batch) run() {
sort.Slice(b.operations, func(i, j int) bool {
return b.operations[i].Time < b.operations[j].Time
})
b.operations = removeDuplicatesInPlace(b.operations)
var lm Move
return b.forest.applyOperation(bLog, bTree, b.operations, &lm)
// Our main use-case is addition of new items. In this case,
// we do not need to perform undo()/redo(), just do().
// https://github.com/trvedata/move-op/blob/6c23447c12a7862ff31b7fc2205f6c90fbdb9dc0/proof/Move_Create.thy#L259
//
// For this optimization to work we need to ensure three things:
// 1. The node itself is not yet in tree.
// 2. The node is not a parent. This case is not mentioned in the article, because
// they consider a "static order" (perform all CREATE operations before MOVE).
// We need this because if node _is_ a parent, we could violate (3) for some late operation.
// See TestForest_ApplySameOperation for details.
// 3. Parent of each operation is already in tree.
var parents map[uint64]struct{}
var cKey [maxKeySize]byte
var slow bool
for i := range b.operations {
_, _, _, inTree := b.forest.getState(bTree, stateKey(cKey[:], b.operations[i].Child))
if inTree {
slow = true
break
}
key := childrenKey(cKey[:], b.operations[i].Child, 0)
k, _ := bTree.Cursor().Seek(key)
if len(k) == childrenKeySize && binary.LittleEndian.Uint64(k[1:]) == b.operations[i].Child {
slow = true
break
}
if b.operations[i].Parent == RootID {
continue
} else if parents == nil {
// Attaching key only to root is done frequently,
// no allocations are performed unless necessary.
parents = make(map[uint64]struct{})
} else if _, ok := parents[b.operations[i].Parent]; ok {
continue
}
p := b.operations[i].Parent
_, ts, _, inTree := b.forest.getState(bTree, stateKey(cKey[:], p))
if !inTree || b.operations[0].Time < ts {
slow = true
break
}
parents[b.operations[i].Parent] = struct{}{}
}
if slow {
var lm Move
return b.forest.applyOperation(bLog, bTree, b.operations, &lm)
}
for i := range b.operations {
if err := b.forest.do(bLog, bTree, cKey[:], b.operations[i]); err != nil {
return err
}
}
return nil
})
for i := range b.results {
b.results[i] <- err

View file

@ -0,0 +1,55 @@
package pilorama
import (
"context"
"os"
"path/filepath"
"runtime"
"sync/atomic"
"testing"
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
"github.com/stretchr/testify/require"
)
func getTimestamp(reorder int, ts Timestamp) Timestamp {
base := ts / Timestamp(reorder)
rem := ts % Timestamp(reorder)
return base*Timestamp(reorder) + Timestamp(reorder) - rem
}
func BenchmarkCreate(b *testing.B) {
// Use `os.TempDir` because we construct multiple times in the same test.
tmpDir, err := os.MkdirTemp(os.TempDir(), "*")
require.NoError(b, err)
f := NewBoltForest(
WithPath(filepath.Join(tmpDir, "test.db")),
WithMaxBatchSize(runtime.GOMAXPROCS(0)))
require.NoError(b, f.Open(false))
require.NoError(b, f.Init())
b.Cleanup(func() {
require.NoError(b, f.Close())
require.NoError(b, os.RemoveAll(tmpDir))
})
cid := cidtest.ID()
treeID := "tree"
ctx := context.Background()
var index atomic.Int32
index.Store(-1)
b.SetParallelism(2)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
i := index.Add(1)
op := &Move{
Meta: Meta{Time: getTimestamp(runtime.GOMAXPROCS(0)*2, Timestamp(i+1))},
Child: Node(i + 1),
Parent: RootID,
}
if err := f.TreeApply(ctx, cid, treeID, op, true); err != nil {
b.FailNow()
}
}
})
}

View file

@ -37,6 +37,11 @@ type boltForest struct {
cfg
}
const (
childrenKeySize = 17
maxKeySize = childrenKeySize
)
var (
dataBucket = []byte{0}
logBucket = []byte{1}
@ -185,7 +190,7 @@ func (t *boltForest) TreeMove(ctx context.Context, d CIDDescriptor, treeID strin
if lm.Child == RootID {
lm.Child = t.findSpareID(bTree)
}
return t.do(bLog, bTree, make([]byte, 17), &lm)
return t.do(bLog, bTree, make([]byte, maxKeySize), &lm)
}))
}
@ -340,7 +345,7 @@ func (t *boltForest) TreeAddByPath(ctx context.Context, d CIDDescriptor, treeID
}
var lm []Move
var key [17]byte
var key [maxKeySize]byte
fullID := bucketName(d.CID, treeID)
err := t.db.Batch(func(tx *bbolt.Tx) error {
@ -542,7 +547,7 @@ func (t *boltForest) getTreeBuckets(tx *bbolt.Tx, treeRoot []byte) (*bbolt.Bucke
// applyOperations applies log operations. Assumes lm are sorted by timestamp.
func (t *boltForest) applyOperation(logBucket, treeBucket *bbolt.Bucket, ms []*Move, lm *Move) error {
var tmp Move
var cKey [17]byte
var cKey [maxKeySize]byte
c := logBucket.Cursor()
@ -864,7 +869,7 @@ func (t *boltForest) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID
b := treeRoot.Bucket(dataBucket)
c := b.Cursor()
for k, _ := c.Seek(key); len(k) == 17 && binary.LittleEndian.Uint64(k[1:]) == nodeID; k, _ = c.Next() {
for k, _ := c.Seek(key); len(k) == childrenKeySize && binary.LittleEndian.Uint64(k[1:]) == nodeID; k, _ = c.Next() {
children = append(children, binary.LittleEndian.Uint64(k[9:]))
}
return nil
@ -1093,7 +1098,7 @@ func childrenKey(key []byte, child, parent Node) []byte {
key[0] = 'c'
binary.LittleEndian.PutUint64(key[1:], parent)
binary.LittleEndian.PutUint64(key[9:], child)
return key[:17]
return key[:childrenKeySize]
}
// 'i' + attribute name (string) + attribute value (string) + parent (id) + node (id) -> 0/1.