pilorama: optimize create op #447
3 changed files with 125 additions and 8 deletions
|
@ -1,6 +1,7 @@
|
||||||
package pilorama
|
package pilorama
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/binary"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
@ -49,10 +50,66 @@ func (b *batch) run() {
|
||||||
sort.Slice(b.operations, func(i, j int) bool {
|
sort.Slice(b.operations, func(i, j int) bool {
|
||||||
return b.operations[i].Time < b.operations[j].Time
|
return b.operations[i].Time < b.operations[j].Time
|
||||||
})
|
})
|
||||||
|
|
||||||
b.operations = removeDuplicatesInPlace(b.operations)
|
b.operations = removeDuplicatesInPlace(b.operations)
|
||||||
var lm Move
|
|
||||||
return b.forest.applyOperation(bLog, bTree, b.operations, &lm)
|
// Our main use-case is addition of new items. In this case,
|
||||||
|
// we do not need to perform undo()/redo(), just do().
|
||||||
|
// https://github.com/trvedata/move-op/blob/6c23447c12a7862ff31b7fc2205f6c90fbdb9dc0/proof/Move_Create.thy#L259
|
||||||
|
//
|
||||||
|
// For this optimization to work we need to ensure three things:
|
||||||
|
// 1. The node itself is not yet in tree.
|
||||||
|
// 2. The node is not a parent. This case is not mentioned in the article, because
|
||||||
|
// they consider a "static order" (perform all CREATE operations before MOVE).
|
||||||
|
// We need this because if node _is_ a parent, we could violate (3) for some late operation.
|
||||||
|
// See TestForest_ApplySameOperation for details.
|
||||||
|
// 3. Parent of each operation is already in tree.
|
||||||
|
var parents map[uint64]struct{}
|
||||||
|
var cKey [maxKeySize]byte
|
||||||
|
var slow bool
|
||||||
|
for i := range b.operations {
|
||||||
|
_, _, _, inTree := b.forest.getState(bTree, stateKey(cKey[:], b.operations[i].Child))
|
||||||
|
if inTree {
|
||||||
|
slow = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
key := childrenKey(cKey[:], b.operations[i].Child, 0)
|
||||||
|
k, _ := bTree.Cursor().Seek(key)
|
||||||
|
if len(k) == childrenKeySize && binary.LittleEndian.Uint64(k[1:]) == b.operations[i].Child {
|
||||||
|
slow = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.operations[i].Parent == RootID {
|
||||||
|
continue
|
||||||
|
} else if parents == nil {
|
||||||
|
// Attaching key only to root is done frequently,
|
||||||
|
// no allocations are performed unless necessary.
|
||||||
|
parents = make(map[uint64]struct{})
|
||||||
|
} else if _, ok := parents[b.operations[i].Parent]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
p := b.operations[i].Parent
|
||||||
|
_, ts, _, inTree := b.forest.getState(bTree, stateKey(cKey[:], p))
|
||||||
|
if !inTree || b.operations[0].Time < ts {
|
||||||
|
slow = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
parents[b.operations[i].Parent] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
if slow {
|
||||||
|
var lm Move
|
||||||
|
return b.forest.applyOperation(bLog, bTree, b.operations, &lm)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range b.operations {
|
||||||
|
if err := b.forest.do(bLog, bTree, cKey[:], b.operations[i]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
})
|
})
|
||||||
for i := range b.results {
|
for i := range b.results {
|
||||||
b.results[i] <- err
|
b.results[i] <- err
|
||||||
|
|
55
pkg/local_object_storage/pilorama/bench_test.go
Normal file
55
pkg/local_object_storage/pilorama/bench_test.go
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
package pilorama
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getTimestamp(reorder int, ts Timestamp) Timestamp {
|
||||||
|
base := ts / Timestamp(reorder)
|
||||||
|
rem := ts % Timestamp(reorder)
|
||||||
|
return base*Timestamp(reorder) + Timestamp(reorder) - rem
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCreate(b *testing.B) {
|
||||||
|
// Use `os.TempDir` because we construct multiple times in the same test.
|
||||||
|
tmpDir, err := os.MkdirTemp(os.TempDir(), "*")
|
||||||
|
require.NoError(b, err)
|
||||||
|
|
||||||
|
f := NewBoltForest(
|
||||||
|
WithPath(filepath.Join(tmpDir, "test.db")),
|
||||||
|
WithMaxBatchSize(runtime.GOMAXPROCS(0)))
|
||||||
|
require.NoError(b, f.Open(false))
|
||||||
|
require.NoError(b, f.Init())
|
||||||
|
b.Cleanup(func() {
|
||||||
|
require.NoError(b, f.Close())
|
||||||
|
require.NoError(b, os.RemoveAll(tmpDir))
|
||||||
|
})
|
||||||
|
|
||||||
|
cid := cidtest.ID()
|
||||||
|
treeID := "tree"
|
||||||
|
ctx := context.Background()
|
||||||
|
var index atomic.Int32
|
||||||
|
index.Store(-1)
|
||||||
|
b.SetParallelism(2)
|
||||||
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
for pb.Next() {
|
||||||
|
i := index.Add(1)
|
||||||
|
op := &Move{
|
||||||
|
Meta: Meta{Time: getTimestamp(runtime.GOMAXPROCS(0)*2, Timestamp(i+1))},
|
||||||
|
Child: Node(i + 1),
|
||||||
|
Parent: RootID,
|
||||||
|
}
|
||||||
|
if err := f.TreeApply(ctx, cid, treeID, op, true); err != nil {
|
||||||
|
b.FailNow()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
|
@ -37,6 +37,11 @@ type boltForest struct {
|
||||||
cfg
|
cfg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
childrenKeySize = 17
|
||||||
|
maxKeySize = childrenKeySize
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
dataBucket = []byte{0}
|
dataBucket = []byte{0}
|
||||||
logBucket = []byte{1}
|
logBucket = []byte{1}
|
||||||
|
@ -185,7 +190,7 @@ func (t *boltForest) TreeMove(ctx context.Context, d CIDDescriptor, treeID strin
|
||||||
if lm.Child == RootID {
|
if lm.Child == RootID {
|
||||||
lm.Child = t.findSpareID(bTree)
|
lm.Child = t.findSpareID(bTree)
|
||||||
}
|
}
|
||||||
return t.do(bLog, bTree, make([]byte, 17), &lm)
|
return t.do(bLog, bTree, make([]byte, maxKeySize), &lm)
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -340,7 +345,7 @@ func (t *boltForest) TreeAddByPath(ctx context.Context, d CIDDescriptor, treeID
|
||||||
}
|
}
|
||||||
|
|
||||||
var lm []Move
|
var lm []Move
|
||||||
var key [17]byte
|
var key [maxKeySize]byte
|
||||||
|
|
||||||
fullID := bucketName(d.CID, treeID)
|
fullID := bucketName(d.CID, treeID)
|
||||||
err := t.db.Batch(func(tx *bbolt.Tx) error {
|
err := t.db.Batch(func(tx *bbolt.Tx) error {
|
||||||
|
@ -542,7 +547,7 @@ func (t *boltForest) getTreeBuckets(tx *bbolt.Tx, treeRoot []byte) (*bbolt.Bucke
|
||||||
// applyOperations applies log operations. Assumes lm are sorted by timestamp.
|
// applyOperations applies log operations. Assumes lm are sorted by timestamp.
|
||||||
func (t *boltForest) applyOperation(logBucket, treeBucket *bbolt.Bucket, ms []*Move, lm *Move) error {
|
func (t *boltForest) applyOperation(logBucket, treeBucket *bbolt.Bucket, ms []*Move, lm *Move) error {
|
||||||
var tmp Move
|
var tmp Move
|
||||||
var cKey [17]byte
|
var cKey [maxKeySize]byte
|
||||||
|
|
||||||
c := logBucket.Cursor()
|
c := logBucket.Cursor()
|
||||||
|
|
||||||
|
@ -864,7 +869,7 @@ func (t *boltForest) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID
|
||||||
|
|
||||||
b := treeRoot.Bucket(dataBucket)
|
b := treeRoot.Bucket(dataBucket)
|
||||||
c := b.Cursor()
|
c := b.Cursor()
|
||||||
for k, _ := c.Seek(key); len(k) == 17 && binary.LittleEndian.Uint64(k[1:]) == nodeID; k, _ = c.Next() {
|
for k, _ := c.Seek(key); len(k) == childrenKeySize && binary.LittleEndian.Uint64(k[1:]) == nodeID; k, _ = c.Next() {
|
||||||
children = append(children, binary.LittleEndian.Uint64(k[9:]))
|
children = append(children, binary.LittleEndian.Uint64(k[9:]))
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -1093,7 +1098,7 @@ func childrenKey(key []byte, child, parent Node) []byte {
|
||||||
key[0] = 'c'
|
key[0] = 'c'
|
||||||
binary.LittleEndian.PutUint64(key[1:], parent)
|
binary.LittleEndian.PutUint64(key[1:], parent)
|
||||||
binary.LittleEndian.PutUint64(key[9:], child)
|
binary.LittleEndian.PutUint64(key[9:], child)
|
||||||
return key[:17]
|
return key[:childrenKeySize]
|
||||||
}
|
}
|
||||||
|
|
||||||
// 'i' + attribute name (string) + attribute value (string) + parent (id) + node (id) -> 0/1.
|
// 'i' + attribute name (string) + attribute value (string) + parent (id) + node (id) -> 0/1.
|
||||||
|
|
Loading…
Reference in a new issue