[#9999] tree: Use buckets instead of prefixes

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2024-11-01 16:39:11 +03:00
parent d19ab43500
commit 15e642f0ea
Signed by: dstepanov-yadro
GPG key ID: 237AF1A763293BC0
2 changed files with 157 additions and 118 deletions

View file

@ -37,7 +37,7 @@ func (b *batch) trigger() {
func (b *batch) run() { func (b *batch) run() {
fullID := bucketName(b.cid, b.treeID) fullID := bucketName(b.cid, b.treeID)
err := b.forest.db.Update(func(tx *bbolt.Tx) error { err := b.forest.db.Update(func(tx *bbolt.Tx) error {
bLog, bTree, err := b.forest.getTreeBuckets(tx, fullID) buckets, err := b.forest.getTreeBuckets(tx, fullID)
if err != nil { if err != nil {
return err return err
} }
@ -68,15 +68,15 @@ func (b *batch) run() {
var cKey [maxKeySize]byte var cKey [maxKeySize]byte
var slow bool var slow bool
for i := range b.operations { for i := range b.operations {
_, _, _, inTree := b.forest.getState(bTree, stateKey(cKey[:], b.operations[i].Child)) _, _, _, inTree := b.forest.getState(buckets.State, stateKey(cKey[:], b.operations[i].Child))
if inTree { if inTree {
slow = true slow = true
break break
} }
key := childrenKey(cKey[:], b.operations[i].Child, 0) key := childrenKey(cKey[:], b.operations[i].Child, 0)
k, _ := bTree.Cursor().Seek(key) k, _ := buckets.Child.Cursor().Seek(key)
if len(k) == childrenKeySize && binary.LittleEndian.Uint64(k[1:]) == b.operations[i].Child { if len(k) == childrenKeySize && binary.LittleEndian.Uint64(k) == b.operations[i].Child {
slow = true slow = true
break break
} }
@ -92,7 +92,7 @@ func (b *batch) run() {
} }
p := b.operations[i].Parent p := b.operations[i].Parent
_, ts, _, inTree := b.forest.getState(bTree, stateKey(cKey[:], p)) _, ts, _, inTree := b.forest.getState(buckets.State, stateKey(cKey[:], p))
if !inTree || b.operations[0].Time < ts { if !inTree || b.operations[0].Time < ts {
slow = true slow = true
break break
@ -102,11 +102,11 @@ func (b *batch) run() {
if slow { if slow {
var lm Move var lm Move
return b.forest.applyOperation(bLog, bTree, b.operations, &lm) return b.forest.applyOperation(buckets, b.operations, &lm)
} }
for i := range b.operations { for i := range b.operations {
if err := b.forest.do(bLog, bTree, cKey[:], b.operations[i]); err != nil { if err := b.forest.do(buckets, cKey[:], b.operations[i]); err != nil {
return err return err
} }
} }

View file

@ -41,7 +41,7 @@ type boltForest struct {
} }
const ( const (
childrenKeySize = 17 childrenKeySize = 16
maxKeySize = childrenKeySize maxKeySize = childrenKeySize
) )
@ -206,16 +206,16 @@ func (t *boltForest) TreeMove(ctx context.Context, d CIDDescriptor, treeID strin
lm := *m lm := *m
fullID := bucketName(d.CID, treeID) fullID := bucketName(d.CID, treeID)
return &lm, metaerr.Wrap(t.db.Batch(func(tx *bbolt.Tx) error { return &lm, metaerr.Wrap(t.db.Batch(func(tx *bbolt.Tx) error {
bLog, bTree, err := t.getTreeBuckets(tx, fullID) b, err := t.getTreeBuckets(tx, fullID)
if err != nil { if err != nil {
return err return err
} }
lm.Time = t.getLatestTimestamp(bLog, d.Position, d.Size) lm.Time = t.getLatestTimestamp(b, d.Position, d.Size)
if lm.Child == RootID { if lm.Child == RootID {
lm.Child = t.findSpareID(bTree) lm.Child = t.findSpareID(b)
} }
return t.do(bLog, bTree, make([]byte, maxKeySize), &lm) return t.do(b, make([]byte, maxKeySize), &lm)
})) }))
} }
@ -415,17 +415,17 @@ func (t *boltForest) addByPathInternal(d CIDDescriptor, attr string, treeID stri
fullID := bucketName(d.CID, treeID) fullID := bucketName(d.CID, treeID)
err := t.db.Batch(func(tx *bbolt.Tx) error { err := t.db.Batch(func(tx *bbolt.Tx) error {
bLog, bTree, err := t.getTreeBuckets(tx, fullID) b, err := t.getTreeBuckets(tx, fullID)
if err != nil { if err != nil {
return err return err
} }
i, node, err := t.getPathPrefix(bTree, attr, path) i, node, err := t.getPathPrefix(b, attr, path)
if err != nil { if err != nil {
return err return err
} }
ts := t.getLatestTimestamp(bLog, d.Position, d.Size) ts := t.getLatestTimestamp(b, d.Position, d.Size)
lm = make([]Move, len(path)-i+1) lm = make([]Move, len(path)-i+1)
for j := i; j < len(path); j++ { for j := i; j < len(path); j++ {
lm[j-i] = Move{ lm[j-i] = Move{
@ -434,10 +434,10 @@ func (t *boltForest) addByPathInternal(d CIDDescriptor, attr string, treeID stri
Time: ts, Time: ts,
Items: []KeyValue{{Key: attr, Value: []byte(path[j])}}, Items: []KeyValue{{Key: attr, Value: []byte(path[j])}},
}, },
Child: t.findSpareID(bTree), Child: t.findSpareID(b),
} }
err := t.do(bLog, bTree, key[:], &lm[j-i]) err := t.do(b, key[:], &lm[j-i])
if err != nil { if err != nil {
return err return err
} }
@ -452,19 +452,19 @@ func (t *boltForest) addByPathInternal(d CIDDescriptor, attr string, treeID stri
Time: ts, Time: ts,
Items: meta, Items: meta,
}, },
Child: t.findSpareID(bTree), Child: t.findSpareID(b),
} }
return t.do(bLog, bTree, key[:], &lm[len(lm)-1]) return t.do(b, key[:], &lm[len(lm)-1])
}) })
return lm, metaerr.Wrap(err) return lm, metaerr.Wrap(err)
} }
// getLatestTimestamp returns timestamp for a new operation which is guaranteed to be bigger than // getLatestTimestamp returns timestamp for a new operation which is guaranteed to be bigger than
// all timestamps corresponding to already stored operations. // all timestamps corresponding to already stored operations.
func (t *boltForest) getLatestTimestamp(bLog *bbolt.Bucket, pos, size int) uint64 { func (t *boltForest) getLatestTimestamp(b *treeBuckets, pos, size int) uint64 {
var ts uint64 var ts uint64
c := bLog.Cursor() c := b.Log.Cursor()
key, _ := c.Last() key, _ := c.Last()
if len(key) != 0 { if len(key) != 0 {
ts = binary.BigEndian.Uint64(key) ts = binary.BigEndian.Uint64(key)
@ -473,12 +473,12 @@ func (t *boltForest) getLatestTimestamp(bLog *bbolt.Bucket, pos, size int) uint6
} }
// findSpareID returns random unused ID. // findSpareID returns random unused ID.
func (t *boltForest) findSpareID(bTree *bbolt.Bucket) uint64 { func (t *boltForest) findSpareID(b *treeBuckets) uint64 {
id := uint64(rand.Int63()) id := uint64(rand.Int63())
key := make([]byte, 9) key := make([]byte, 8)
for { for {
_, _, _, ok := t.getState(bTree, stateKey(key, id)) _, _, _, ok := t.getState(b.State, stateKey(key, id))
if !ok { if !ok {
return id return id
} }
@ -540,13 +540,13 @@ func (t *boltForest) TreeApply(ctx context.Context, cnr cidSDK.ID, treeID string
if t.db.MaxBatchSize == 1 { if t.db.MaxBatchSize == 1 {
fullID := bucketName(cnr, treeID) fullID := bucketName(cnr, treeID)
err := metaerr.Wrap(t.db.Update(func(tx *bbolt.Tx) error { err := metaerr.Wrap(t.db.Update(func(tx *bbolt.Tx) error {
bLog, bTree, err := t.getTreeBuckets(tx, fullID) b, err := t.getTreeBuckets(tx, fullID)
if err != nil { if err != nil {
return err return err
} }
var lm Move var lm Move
return t.applyOperation(bLog, bTree, []*Move{m}, &lm) return t.applyOperation(b, []*Move{m}, &lm)
})) }))
success = err == nil success = err == nil
return err return err
@ -588,7 +588,7 @@ func (t *boltForest) TreeApplyStream(ctx context.Context, cnr cidSDK.ID, treeID
fullID := bucketName(cnr, treeID) fullID := bucketName(cnr, treeID)
err := metaerr.Wrap(t.db.Update(func(tx *bbolt.Tx) error { err := metaerr.Wrap(t.db.Update(func(tx *bbolt.Tx) error {
bLog, bTree, err := t.getTreeBuckets(tx, fullID) b, err := t.getTreeBuckets(tx, fullID)
if err != nil { if err != nil {
return err return err
} }
@ -601,7 +601,7 @@ func (t *boltForest) TreeApplyStream(ctx context.Context, cnr cidSDK.ID, treeID
return nil return nil
} }
var lm Move var lm Move
if e := t.applyOperation(bLog, bTree, []*Move{m}, &lm); e != nil { if e := t.applyOperation(b, []*Move{m}, &lm); e != nil {
return e return e
} }
} }
@ -656,33 +656,70 @@ func (t *boltForest) addBatch(cnr cidSDK.ID, treeID string, m *Move, ch chan err
t.mtx.Unlock() t.mtx.Unlock()
} }
func (t *boltForest) getTreeBuckets(tx *bbolt.Tx, treeRoot []byte) (*bbolt.Bucket, *bbolt.Bucket, error) { type treeBuckets struct {
Log *bbolt.Bucket
Internal *bbolt.Bucket
State *bbolt.Bucket
Old *bbolt.Bucket
Child *bbolt.Bucket
}
func (t *boltForest) getTreeBuckets(tx *bbolt.Tx, treeRoot []byte) (*treeBuckets, error) {
child := tx.Bucket(treeRoot) child := tx.Bucket(treeRoot)
if child != nil { if child != nil {
return child.Bucket(logBucket), child.Bucket(dataBucket), nil db := child.Bucket(dataBucket)
return &treeBuckets{
Log: child.Bucket(logBucket),
Internal: db.Bucket(internalBucket),
State: db.Bucket(stateBucket),
Old: db.Bucket(oldBucket),
Child: db.Bucket(childBucket),
}, nil
} }
child, err := tx.CreateBucket(treeRoot) child, err := tx.CreateBucket(treeRoot)
if err != nil { if err != nil {
return nil, nil, err return nil, err
} }
bLog, err := child.CreateBucket(logBucket) bLog, err := child.CreateBucket(logBucket)
if err != nil { if err != nil {
return nil, nil, err return nil, err
} }
bData, err := child.CreateBucket(dataBucket) bData, err := child.CreateBucket(dataBucket)
if err != nil { if err != nil {
return nil, nil, err return nil, err
} }
return bLog, bData, nil ib, err := bData.CreateBucket(internalBucket)
if err != nil {
return nil, err
}
sb, err := bData.CreateBucket(stateBucket)
if err != nil {
return nil, err
}
ob, err := bData.CreateBucket(oldBucket)
if err != nil {
return nil, err
}
cb, err := bData.CreateBucket(childBucket)
if err != nil {
return nil, err
}
return &treeBuckets{
Log: bLog,
Internal: ib,
State: sb,
Old: ob,
Child: cb,
}, nil
} }
// applyOperations applies log operations. Assumes lm are sorted by timestamp. // applyOperations applies log operations. Assumes lm are sorted by timestamp.
func (t *boltForest) applyOperation(logBucket, treeBucket *bbolt.Bucket, ms []*Move, lm *Move) error { func (t *boltForest) applyOperation(tb *treeBuckets, ms []*Move, lm *Move) error {
var tmp Move var tmp Move
var cKey [maxKeySize]byte var cKey [maxKeySize]byte
c := logBucket.Cursor() c := tb.Log.Cursor()
key, value := c.Last() key, value := c.Last()
@ -699,7 +736,7 @@ func (t *boltForest) applyOperation(logBucket, treeBucket *bbolt.Bucket, ms []*M
if r.Err != nil { if r.Err != nil {
return r.Err return r.Err
} }
if err := t.undo(&tmp, treeBucket, cKey[:]); err != nil { if err := t.undo(&tmp, tb, cKey[:]); err != nil {
return err return err
} }
key, value = c.Prev() key, value = c.Prev()
@ -710,7 +747,7 @@ func (t *boltForest) applyOperation(logBucket, treeBucket *bbolt.Bucket, ms []*M
// 2. Insert the operation. // 2. Insert the operation.
*lm = *ms[i] *lm = *ms[i]
if err := t.do(logBucket, treeBucket, cKey[:], lm); err != nil { if err := t.do(tb, cKey[:], lm); err != nil {
return err return err
} }
@ -724,7 +761,7 @@ func (t *boltForest) applyOperation(logBucket, treeBucket *bbolt.Bucket, ms []*M
if err := t.logFromBytes(&tmp, value); err != nil { if err := t.logFromBytes(&tmp, value); err != nil {
return err return err
} }
if err := t.redo(treeBucket, cKey[:], &tmp, value[16:]); err != nil { if err := t.redo(tb, cKey[:], &tmp, value[16:]); err != nil {
return err return err
} }
key, value = c.Next() key, value = c.Next()
@ -734,25 +771,24 @@ func (t *boltForest) applyOperation(logBucket, treeBucket *bbolt.Bucket, ms []*M
return nil return nil
} }
func (t *boltForest) do(lb *bbolt.Bucket, b *bbolt.Bucket, key []byte, op *Move) error { func (t *boltForest) do(b *treeBuckets, key []byte, op *Move) error {
binary.BigEndian.PutUint64(key, op.Time) binary.BigEndian.PutUint64(key, op.Time)
rawLog := t.logToBytes(op) rawLog := t.logToBytes(op)
if err := lb.Put(key[:8], rawLog); err != nil { if err := b.Log.Put(key[:8], rawLog); err != nil {
return err return err
} }
return t.redo(b, key, op, rawLog[16:]) return t.redo(b, key, op, rawLog[16:])
} }
func (t *boltForest) redo(b *bbolt.Bucket, key []byte, op *Move, rawMeta []byte) error { func (t *boltForest) redo(b *treeBuckets, key []byte, op *Move, rawMeta []byte) error {
var err error var err error
parent, ts, currMeta, inTree := t.getState(b.State, stateKey(key, op.Child))
parent, ts, currMeta, inTree := t.getState(b, stateKey(key, op.Child))
if inTree { if inTree {
err = t.putState(b, oldKey(key, op.Time), parent, ts, currMeta) err = t.putState(b.Old, oldKey(key, op.Time), parent, ts, currMeta)
} else { } else {
ts = op.Time ts = op.Time
err = b.Delete(oldKey(key, op.Time)) err = b.Old.Delete(oldKey(key, op.Time))
} }
if err != nil || op.Child == op.Parent || t.isAncestor(b, op.Child, op.Parent) { if err != nil || op.Child == op.Parent || t.isAncestor(b, op.Child, op.Parent) {
@ -760,7 +796,7 @@ func (t *boltForest) redo(b *bbolt.Bucket, key []byte, op *Move, rawMeta []byte)
} }
if inTree { if inTree {
if err := b.Delete(childrenKey(key, op.Child, parent)); err != nil { if err := b.Child.Delete(childrenKey(key, op.Child, parent)); err != nil {
return err return err
} }
@ -771,7 +807,7 @@ func (t *boltForest) redo(b *bbolt.Bucket, key []byte, op *Move, rawMeta []byte)
for i := range meta.Items { for i := range meta.Items {
if isAttributeInternal(meta.Items[i].Key) { if isAttributeInternal(meta.Items[i].Key) {
key = internalKey(key, meta.Items[i].Key, string(meta.Items[i].Value), parent, op.Child) key = internalKey(key, meta.Items[i].Key, string(meta.Items[i].Value), parent, op.Child)
err := b.Delete(key) err := b.Internal.Delete(key)
if err != nil { if err != nil {
return err return err
} }
@ -782,31 +818,31 @@ func (t *boltForest) redo(b *bbolt.Bucket, key []byte, op *Move, rawMeta []byte)
} }
// removeNode removes node keys from the tree except the children key or its parent. // removeNode removes node keys from the tree except the children key or its parent.
func (t *boltForest) removeNode(b *bbolt.Bucket, key []byte, node, parent Node) error { func (t *boltForest) removeNode(b *treeBuckets, key []byte, node, parent Node) error {
k := stateKey(key, node) k := stateKey(key, node)
_, _, rawMeta, _ := t.getState(b, k) _, _, rawMeta, _ := t.getState(b.State, k)
var meta Meta var meta Meta
if err := meta.FromBytes(rawMeta); err == nil { if err := meta.FromBytes(rawMeta); err == nil {
for i := range meta.Items { for i := range meta.Items {
if isAttributeInternal(meta.Items[i].Key) { if isAttributeInternal(meta.Items[i].Key) {
err := b.Delete(internalKey(nil, meta.Items[i].Key, string(meta.Items[i].Value), parent, node)) err := b.Internal.Delete(internalKey(nil, meta.Items[i].Key, string(meta.Items[i].Value), parent, node))
if err != nil { if err != nil {
return err return err
} }
} }
} }
} }
return b.Delete(k) return b.State.Delete(k)
} }
// addNode adds node keys to the tree except the timestamp key. // addNode adds node keys to the tree except the timestamp key.
func (t *boltForest) addNode(b *bbolt.Bucket, key []byte, child, parent Node, time Timestamp, meta Meta, rawMeta []byte) error { func (t *boltForest) addNode(b *treeBuckets, key []byte, child, parent Node, time Timestamp, meta Meta, rawMeta []byte) error {
if err := t.putState(b, stateKey(key, child), parent, time, rawMeta); err != nil { if err := t.putState(b.State, stateKey(key, child), parent, time, rawMeta); err != nil {
return err return err
} }
err := b.Put(childrenKey(key, child, parent), []byte{1}) err := b.Child.Put(childrenKey(key, child, parent), []byte{1})
if err != nil { if err != nil {
return err return err
} }
@ -818,9 +854,9 @@ func (t *boltForest) addNode(b *bbolt.Bucket, key []byte, child, parent Node, ti
key = internalKey(key, meta.Items[i].Key, string(meta.Items[i].Value), parent, child) key = internalKey(key, meta.Items[i].Key, string(meta.Items[i].Value), parent, child)
if len(meta.Items) == 1 { if len(meta.Items) == 1 {
err = b.Put(key, []byte{1}) err = b.Internal.Put(key, []byte{1})
} else { } else {
err = b.Put(key, []byte{0}) err = b.Internal.Put(key, []byte{0})
} }
if err != nil { if err != nil {
return err return err
@ -829,12 +865,12 @@ func (t *boltForest) addNode(b *bbolt.Bucket, key []byte, child, parent Node, ti
return nil return nil
} }
func (t *boltForest) undo(m *Move, b *bbolt.Bucket, key []byte) error { func (t *boltForest) undo(m *Move, b *treeBuckets, key []byte) error {
if err := b.Delete(childrenKey(key, m.Child, m.Parent)); err != nil { if err := b.Child.Delete(childrenKey(key, m.Child, m.Parent)); err != nil {
return err return err
} }
parent, ts, rawMeta, ok := t.getState(b, oldKey(key, m.Time)) parent, ts, rawMeta, ok := t.getState(b.Old, oldKey(key, m.Time))
if !ok { if !ok {
return t.removeNode(b, key, m.Child, m.Parent) return t.removeNode(b, key, m.Child, m.Parent)
} }
@ -846,12 +882,11 @@ func (t *boltForest) undo(m *Move, b *bbolt.Bucket, key []byte) error {
return t.addNode(b, key, m.Child, parent, ts, meta, rawMeta) return t.addNode(b, key, m.Child, parent, ts, meta, rawMeta)
} }
func (t *boltForest) isAncestor(b *bbolt.Bucket, parent, child Node) bool { func (t *boltForest) isAncestor(b *treeBuckets, parent, child Node) bool {
key := make([]byte, 9) key := make([]byte, 8)
key[0] = 's'
for node := child; node != parent; { for node := child; node != parent; {
binary.LittleEndian.PutUint64(key[1:], node) binary.LittleEndian.PutUint64(key, node)
parent, _, _, ok := t.getState(b, key) parent, _, _, ok := t.getState(b.State, key)
if !ok { if !ok {
return false return false
} }
@ -905,7 +940,10 @@ func (t *boltForest) TreeGetByPath(ctx context.Context, cid cidSDK.ID, treeID st
return ErrTreeNotFound return ErrTreeNotFound
} }
b := treeRoot.Bucket(dataBucket) b, err := t.getTreeBuckets(tx, bucketName(cid, treeID))
if err != nil {
return err
}
i, curNodes, err := t.getPathPrefixMultiTraversal(b, attr, path[:len(path)-1]) i, curNodes, err := t.getPathPrefixMultiTraversal(b, attr, path[:len(path)-1])
if err != nil { if err != nil {
@ -916,8 +954,7 @@ func (t *boltForest) TreeGetByPath(ctx context.Context, cid cidSDK.ID, treeID st
} }
var maxTimestamp uint64 var maxTimestamp uint64
c := b.Internal.Cursor()
c := b.Cursor()
for i := range curNodes { for i := range curNodes {
attrKey := internalKey(nil, attr, path[len(path)-1], curNodes[i], 0) attrKey := internalKey(nil, attr, path[len(path)-1], curNodes[i], 0)
@ -926,7 +963,7 @@ func (t *boltForest) TreeGetByPath(ctx context.Context, cid cidSDK.ID, treeID st
for len(childKey) == len(attrKey)+8 && bytes.Equal(attrKey, childKey[:len(childKey)-8]) { for len(childKey) == len(attrKey)+8 && bytes.Equal(attrKey, childKey[:len(childKey)-8]) {
child := binary.LittleEndian.Uint64(childKey[len(childKey)-8:]) child := binary.LittleEndian.Uint64(childKey[len(childKey)-8:])
if latest { if latest {
_, ts, _, _ := t.getState(b, stateKey(make([]byte, 9), child)) _, ts, _, _ := t.getState(b.State, stateKey(make([]byte, 8), child))
if ts >= maxTimestamp { if ts >= maxTimestamp {
nodes = append(nodes[:0], child) nodes = append(nodes[:0], child)
maxTimestamp = ts maxTimestamp = ts
@ -969,7 +1006,7 @@ func (t *boltForest) TreeGetMeta(ctx context.Context, cid cidSDK.ID, treeID stri
return Meta{}, 0, ErrDegradedMode return Meta{}, 0, ErrDegradedMode
} }
key := stateKey(make([]byte, 9), nodeID) key := stateKey(make([]byte, 8), nodeID)
var m Meta var m Meta
var parentID uint64 var parentID uint64
@ -981,26 +1018,25 @@ func (t *boltForest) TreeGetMeta(ctx context.Context, cid cidSDK.ID, treeID stri
} }
b := treeRoot.Bucket(dataBucket) b := treeRoot.Bucket(dataBucket)
if data := b.Get(key); len(data) != 0 { sb := b.Bucket(stateBucket)
if data := sb.Get(key); len(data) != 0 {
parentID = binary.LittleEndian.Uint64(data) parentID = binary.LittleEndian.Uint64(data)
} }
_, _, meta, _ := t.getState(b, stateKey(key, nodeID)) _, _, meta, _ := t.getState(sb, stateKey(key, nodeID))
return m.FromBytes(meta) return m.FromBytes(meta)
}) })
success = err == nil success = err == nil
return m, parentID, metaerr.Wrap(err) return m, parentID, metaerr.Wrap(err)
} }
func (t *boltForest) hasFewChildren(b *bbolt.Bucket, nodeIDs MultiNode, threshold int) bool { func (t *boltForest) hasFewChildren(b *treeBuckets, nodeIDs MultiNode, threshold int) bool {
key := make([]byte, 9) key := make([]byte, 8)
key[0] = 'c'
count := 0 count := 0
for _, nodeID := range nodeIDs { for _, nodeID := range nodeIDs {
binary.LittleEndian.PutUint64(key[1:], nodeID) binary.LittleEndian.PutUint64(key, nodeID)
c := b.Cursor() c := b.Child.Cursor()
for k, _ := c.Seek(key); len(k) == childrenKeySize && binary.LittleEndian.Uint64(k[1:]) == nodeID; k, _ = c.Next() { for k, _ := c.Seek(key); len(k) == childrenKeySize && binary.LittleEndian.Uint64(k) == nodeID; k, _ = c.Next() {
if count++; count > threshold { if count++; count > threshold {
return false return false
} }
@ -1038,7 +1074,7 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
} }
h := newHeap(last, count) h := newHeap(last, count)
key := make([]byte, 9) key := make([]byte, 8)
var result []NodeInfo var result []NodeInfo
var fewChildren bool var fewChildren bool
@ -1048,9 +1084,10 @@ func (t *boltForest) TreeSortedByFilename(ctx context.Context, cid cidSDK.ID, tr
if treeRoot == nil { if treeRoot == nil {
return ErrTreeNotFound return ErrTreeNotFound
} }
b, err := t.getTreeBuckets(tx, bucketName(cid, treeID))
b := treeRoot.Bucket(dataBucket) if err != nil {
return err
}
// If the node is a leaf, we could scan all filenames in the tree. // If the node is a leaf, we could scan all filenames in the tree.
// To prevent this we first count the number of children: if it is less than // To prevent this we first count the number of children: if it is less than
// the number of nodes we need to return, fallback to TreeGetChildren() implementation. // the number of nodes we need to return, fallback to TreeGetChildren() implementation.
@ -1109,9 +1146,9 @@ func sortAndCut(result []NodeInfo, last *string) []NodeInfo {
return nil return nil
} }
func (t *boltForest) getChildInfo(b *bbolt.Bucket, key []byte, childID Node) (NodeInfo, error) { func (t *boltForest) getChildInfo(b *treeBuckets, key []byte, childID Node) (NodeInfo, error) {
childInfo := NodeInfo{ID: childID} childInfo := NodeInfo{ID: childID}
parentID, _, metaBytes, found := t.getState(b, stateKey(key, childID)) parentID, _, metaBytes, found := t.getState(b.State, stateKey(key, childID))
if found { if found {
childInfo.ParentID = parentID childInfo.ParentID = parentID
if err := childInfo.Meta.FromBytes(metaBytes); err != nil { if err := childInfo.Meta.FromBytes(metaBytes); err != nil {
@ -1121,8 +1158,8 @@ func (t *boltForest) getChildInfo(b *bbolt.Bucket, key []byte, childID Node) (No
return childInfo, nil return childInfo, nil
} }
func (t *boltForest) fillSortedChildren(b *bbolt.Bucket, nodeIDs MultiNode, h *fixedHeap) { func (t *boltForest) fillSortedChildren(b *treeBuckets, nodeIDs MultiNode, h *fixedHeap) {
c := b.Cursor() c := b.Internal.Cursor()
prefix := internalKeyPrefix(nil, AttributeFilename) prefix := internalKeyPrefix(nil, AttributeFilename)
length := uint16(0) length := uint16(0)
@ -1130,7 +1167,7 @@ func (t *boltForest) fillSortedChildren(b *bbolt.Bucket, nodeIDs MultiNode, h *f
var nodes []uint64 var nodes []uint64
var lastFilename *string var lastFilename *string
for k, _ := c.Seek(prefix); len(k) > 0 && k[0] == 'i'; k, _ = c.Next() { for k, _ := c.Seek(prefix); len(k) > 0; k, _ = c.Next() {
if len(k) < len(prefix)+2+16 { if len(k) < len(prefix)+2+16 {
continue continue
} }
@ -1207,10 +1244,10 @@ func (t *boltForest) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID
if treeRoot == nil { if treeRoot == nil {
return ErrTreeNotFound return ErrTreeNotFound
} }
b, err := t.getTreeBuckets(tx, bucketName(cid, treeID))
b := treeRoot.Bucket(dataBucket) if err != nil {
return err
var err error }
result, err = t.getChildren(b, []Node{nodeID}) result, err = t.getChildren(b, []Node{nodeID})
return err return err
}) })
@ -1218,17 +1255,16 @@ func (t *boltForest) TreeGetChildren(ctx context.Context, cid cidSDK.ID, treeID
return result, metaerr.Wrap(err) return result, metaerr.Wrap(err)
} }
func (t *boltForest) getChildren(b *bbolt.Bucket, nodeIDs MultiNode) ([]NodeInfo, error) { func (t *boltForest) getChildren(b *treeBuckets, nodeIDs MultiNode) ([]NodeInfo, error) {
var result []NodeInfo var result []NodeInfo
key := make([]byte, 9) key := make([]byte, 8)
for _, nodeID := range nodeIDs { for _, nodeID := range nodeIDs {
key[0] = 'c' binary.LittleEndian.PutUint64(key, nodeID)
binary.LittleEndian.PutUint64(key[1:], nodeID)
c := b.Cursor() c := b.Child.Cursor()
for k, _ := c.Seek(key); len(k) == childrenKeySize && binary.LittleEndian.Uint64(k[1:]) == nodeID; k, _ = c.Next() { for k, _ := c.Seek(key); len(k) == childrenKeySize && binary.LittleEndian.Uint64(k) == nodeID; k, _ = c.Next() {
childID := binary.LittleEndian.Uint64(k[9:]) childID := binary.LittleEndian.Uint64(k[8:])
childInfo, err := t.getChildInfo(b, key, childID) childInfo, err := t.getChildInfo(b, key, childID)
if err != nil { if err != nil {
return nil, err return nil, err
@ -1448,8 +1484,8 @@ func (t *boltForest) TreeListTrees(ctx context.Context, prm TreeListTreesPrm) (*
return &res, nil return &res, nil
} }
func (t *boltForest) getPathPrefixMultiTraversal(bTree *bbolt.Bucket, attr string, path []string) (int, []Node, error) { func (t *boltForest) getPathPrefixMultiTraversal(buckets *treeBuckets, attr string, path []string) (int, []Node, error) {
c := bTree.Cursor() c := buckets.Internal.Cursor()
var curNodes []Node var curNodes []Node
nextNodes := []Node{RootID} nextNodes := []Node{RootID}
@ -1478,8 +1514,8 @@ func (t *boltForest) getPathPrefixMultiTraversal(bTree *bbolt.Bucket, attr strin
return len(path), nextNodes, nil return len(path), nextNodes, nil
} }
func (t *boltForest) getPathPrefix(bTree *bbolt.Bucket, attr string, path []string) (int, Node, error) { func (t *boltForest) getPathPrefix(b *treeBuckets, attr string, path []string) (int, Node, error) {
c := bTree.Cursor() c := b.Internal.Cursor()
var curNode Node var curNode Node
var attrKey []byte var attrKey []byte
@ -1540,18 +1576,16 @@ func bucketName(cid cidSDK.ID, treeID string) []byte {
return treeRoot return treeRoot
} }
// 'o' + time -> old meta. // time -> old meta.
func oldKey(key []byte, ts Timestamp) []byte { func oldKey(key []byte, ts Timestamp) []byte {
key[0] = 'o' binary.LittleEndian.PutUint64(key, ts)
binary.LittleEndian.PutUint64(key[1:], ts) return key[:8]
return key[:9]
} }
// 's' + child ID -> parent + timestamp of the first appearance + meta. // child ID -> parent + timestamp of the first appearance + meta.
func stateKey(key []byte, child Node) []byte { func stateKey(key []byte, child Node) []byte {
key[0] = 's' binary.LittleEndian.PutUint64(key, child)
binary.LittleEndian.PutUint64(key[1:], child) return key[:8]
return key[:9]
} }
func (t *boltForest) putState(b *bbolt.Bucket, key []byte, parent Node, timestamp Timestamp, meta []byte) error { func (t *boltForest) putState(b *bbolt.Bucket, key []byte, parent Node, timestamp Timestamp, meta []byte) error {
@ -1573,17 +1607,15 @@ func (t *boltForest) getState(b *bbolt.Bucket, key []byte) (Node, Timestamp, []b
return parent, timestamp, data[16:], true return parent, timestamp, data[16:], true
} }
// 'c' + parent (id) + child (id) -> 0/1. // parent (id) + child (id) -> 0/1.
func childrenKey(key []byte, child, parent Node) []byte { func childrenKey(key []byte, child, parent Node) []byte {
key[0] = 'c' binary.LittleEndian.PutUint64(key[0:], parent)
binary.LittleEndian.PutUint64(key[1:], parent) binary.LittleEndian.PutUint64(key[8:], child)
binary.LittleEndian.PutUint64(key[9:], child)
return key[:childrenKeySize] return key[:childrenKeySize]
} }
func internalKeyPrefix(key []byte, k string) []byte { func internalKeyPrefix(key []byte, k string) []byte {
key = key[:0] key = key[:0]
key = append(key, 'i')
l := len(k) l := len(k)
key = append(key, byte(l), byte(l>>8)) key = append(key, byte(l), byte(l>>8))
@ -1591,9 +1623,9 @@ func internalKeyPrefix(key []byte, k string) []byte {
return key return key
} }
// 'i' + attribute name (string) + attribute value (string) + parent (id) + node (id) -> 0/1. // attribute name (string) + attribute value (string) + parent (id) + node (id) -> 0/1.
func internalKey(key []byte, k, v string, parent, node Node) []byte { func internalKey(key []byte, k, v string, parent, node Node) []byte {
size := 1 /* prefix */ + 2*2 /* len */ + 2*8 /* nodes */ + len(k) + len(v) size := 2*2 /* len */ + 2*8 /* nodes */ + len(k) + len(v)
if cap(key) < size { if cap(key) < size {
key = make([]byte, 0, size) key = make([]byte, 0, size)
} }
@ -1612,3 +1644,10 @@ func internalKey(key []byte, k, v string, parent, node Node) []byte {
key = append(key, raw[:]...) key = append(key, raw[:]...)
return key return key
} }
var (
internalBucket = ([]byte)("i")
stateBucket = ([]byte)("s")
oldBucket = ([]byte)("o")
childBucket = ([]byte)("c")
)