[#1482] metabase: Encode database keys in binary

Signed-off-by: Evgenii Stratonikov <evgeniy@morphbits.ru>
This commit is contained in:
Evgenii Stratonikov 2022-09-08 14:54:21 +03:00 committed by fyrchik
parent d6fef68a62
commit ae1dab29bc
19 changed files with 392 additions and 276 deletions

View file

@ -84,6 +84,15 @@ This file describes changes between the metabase versions.
- Key: split ID
- Value: list of object IDs
# History
## Version 2
- Container ID is encoded as 32-byte slice
- Object ID is encoded as 32-byte slice
- Object ID is encoded as 64-byte slice, container ID + object ID
- Bucket naming scheme is changed:
- container ID + suffix -> 1-byte prefix + container ID
## Version 1

View file

@ -1,9 +1,7 @@
package meta
import (
"crypto/sha256"
"encoding/binary"
"strings"
cid "github.com/nspcc-dev/neofs-sdk-go/container/id"
"go.etcd.io/bbolt"
@ -30,7 +28,7 @@ func (db *DB) containers(tx *bbolt.Tx) ([]cid.ID, error) {
err := tx.ForEach(func(name []byte, _ *bbolt.Bucket) error {
if parseContainerID(&cnr, name, unique) {
result = append(result, cnr)
unique[cnr.EncodeToString()] = struct{}{}
unique[string(name[1:bucketKeySize])] = struct{}{}
}
return nil
@ -55,24 +53,20 @@ func (db *DB) containerSize(tx *bbolt.Tx, id cid.ID) (uint64, error) {
return 0, err
}
key := make([]byte, sha256.Size)
key := make([]byte, cidSize)
id.Encode(key)
return parseContainerSize(containerVolume.Get(key)), nil
}
func parseContainerID(dst *cid.ID, name []byte, ignore map[string]struct{}) bool {
strContainerID := strings.Split(string(name), invalidBase58String)[0]
if _, ok := ignore[strContainerID]; ok {
if len(name) != bucketKeySize {
return false
}
if err := dst.DecodeString(strContainerID); err != nil {
if _, ok := ignore[string(name[1:bucketKeySize])]; ok {
return false
}
return true
return dst.Decode(name[1:bucketKeySize]) == nil
}
func parseContainerSize(v []byte) uint64 {
@ -89,7 +83,7 @@ func changeContainerSize(tx *bbolt.Tx, id cid.ID, delta uint64, increase bool) e
return err
}
key := make([]byte, sha256.Size)
key := make([]byte, cidSize)
id.Encode(key)
size := parseContainerSize(containerVolume.Get(key))

View file

@ -125,6 +125,7 @@ func syncCounter(tx *bbolt.Tx, force bool) error {
graveyardBKT := tx.Bucket(graveyardBucketName)
garbageBKT := tx.Bucket(garbageBucketName)
key := make([]byte, addressKeySize)
err = iteratePhyObjects(tx, func(cnr cid.ID, obj oid.ID) error {
phyCounter++
@ -134,7 +135,7 @@ func syncCounter(tx *bbolt.Tx, force bool) error {
// check if an object is available: not with GCMark
// and not covered with a tombstone
if inGraveyardWithKey(addressKey(addr), graveyardBKT, garbageBKT) == 0 {
if inGraveyardWithKey(addressKey(addr, key), graveyardBKT, garbageBKT) == 0 {
logicCounter++
}

View file

@ -11,6 +11,7 @@ import (
"sync"
"time"
"github.com/mr-tron/base58"
v2object "github.com/nspcc-dev/neofs-api-go/v2/object"
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard/mode"
"github.com/nspcc-dev/neofs-node/pkg/util/logger"
@ -111,6 +112,8 @@ func stringifyValue(key string, objVal []byte) string {
switch key {
default:
return string(objVal)
case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent:
return base58.Encode(objVal)
case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash:
return hex.EncodeToString(objVal)
case v2object.FilterHeaderCreationEpoch, v2object.FilterHeaderPayloadLength:
@ -140,6 +143,9 @@ func destringifyValue(key, value string, prefix bool) ([]byte, bool, bool) {
switch key {
default:
return []byte(value), false, true
case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent:
v, err := base58.Decode(value)
return v, false, err == nil
case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash:
v, err := hex.DecodeString(value)
if err != nil {
@ -254,6 +260,12 @@ func unknownMatcherBucket(_ *bbolt.Bucket, _ string, _ string, _ func([]byte, []
// in boltDB. Useful for getting filter values from unique and list indexes.
func bucketKeyHelper(hdr string, val string) []byte {
switch hdr {
case v2object.FilterHeaderParent:
v, err := base58.Decode(val)
if err != nil {
return nil
}
return v
case v2object.FilterHeaderPayloadHash:
v, err := hex.DecodeString(val)
if err != nil {

View file

@ -139,7 +139,8 @@ func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address) (uint64, uint64, er
// object was available before the removal (for calculating the logical object
// counter).
func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (bool, bool, error) {
addrKey := addressKey(addr)
key := make([]byte, addressKeySize)
addrKey := addressKey(addr, key)
garbageBKT := tx.Bucket(garbageBucketName)
graveyardBKT := tx.Bucket(graveyardBucketName)
@ -154,7 +155,7 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter
}
// unmarshal object, work only with physically stored (raw == true) objects
obj, err := db.get(tx, addr, false, true, currEpoch)
obj, err := db.get(tx, addr, key, false, true, currEpoch)
if err != nil {
if errors.As(err, new(apistatus.ObjectNotFound)) {
return false, false, nil
@ -166,9 +167,10 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter
// if object is an only link to a parent, then remove parent
if parent := obj.Parent(); parent != nil {
parAddr := object.AddressOf(parent)
sParAddr := parAddr.EncodeToString()
sParAddr := addressKey(parAddr, key)
k := string(sParAddr)
nRef, ok := refCounter[sParAddr]
nRef, ok := refCounter[k]
if !ok {
nRef = &referenceNumber{
all: parentLength(tx, parAddr),
@ -176,7 +178,7 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter
obj: parent,
}
refCounter[sParAddr] = nRef
refCounter[k] = nRef
}
nRef.cur++
@ -216,12 +218,14 @@ func (db *DB) deleteObject(
// parentLength returns amount of available children from parentid index.
func parentLength(tx *bbolt.Tx, addr oid.Address) int {
bkt := tx.Bucket(parentBucketName(addr.Container()))
bucketName := make([]byte, bucketKeySize)
bkt := tx.Bucket(parentBucketName(addr.Container(), bucketName[:]))
if bkt == nil {
return 0
}
lst, err := decodeList(bkt.Get(objectKey(addr.Object())))
lst, err := decodeList(bkt.Get(objectKey(addr.Object(), bucketName[:])))
if err != nil {
return 0
}
@ -291,23 +295,22 @@ func delListIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error {
addr := object.AddressOf(obj)
objKey := objectKey(addr.Object())
addrKey := addressKey(addr)
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
addrKey := addressKey(addr, make([]byte, addressKeySize))
cnr := addr.Container()
bucketName := make([]byte, bucketKeySize)
// add value to primary unique bucket
if !isParent {
var bucketName []byte
switch obj.Type() {
case objectSDK.TypeRegular:
bucketName = primaryBucketName(cnr)
bucketName = primaryBucketName(cnr, bucketName)
case objectSDK.TypeTombstone:
bucketName = tombstoneBucketName(cnr)
bucketName = tombstoneBucketName(cnr, bucketName)
case objectSDK.TypeStorageGroup:
bucketName = storageGroupBucketName(cnr)
bucketName = storageGroupBucketName(cnr, bucketName)
case objectSDK.TypeLock:
bucketName = bucketNameLockers(cnr)
bucketName = bucketNameLockers(cnr, bucketName)
default:
return ErrUnknownObjectType
}
@ -318,17 +321,17 @@ func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error
})
} else {
delUniqueIndexItem(tx, namedBucketItem{
name: parentBucketName(cnr),
name: parentBucketName(cnr, bucketName),
key: objKey,
})
}
delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index
name: smallBucketName(cnr),
name: smallBucketName(cnr, bucketName),
key: objKey,
})
delUniqueIndexItem(tx, namedBucketItem{ // remove from root index
name: rootBucketName(cnr),
name: rootBucketName(cnr, bucketName),
key: objKey,
})
delUniqueIndexItem(tx, namedBucketItem{ // remove from ToMoveIt index

View file

@ -71,17 +71,18 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) (exists b
return false, object.ErrObjectIsExpired
}
objKey := objectKey(addr.Object())
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
cnr := addr.Container()
key := make([]byte, bucketKeySize)
// if graveyard is empty, then check if object exists in primary bucket
if inBucket(tx, primaryBucketName(cnr), objKey) {
if inBucket(tx, primaryBucketName(cnr, key), objKey) {
return true, nil
}
// if primary bucket is empty, then check if object exists in parent bucket
if inBucket(tx, parentBucketName(cnr), objKey) {
if inBucket(tx, parentBucketName(cnr, key), objKey) {
splitInfo, err := getSplitInfo(tx, cnr, objKey)
if err != nil {
return false, err
@ -105,15 +106,16 @@ func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 {
// GC is expected to collect all the objects that have
// expired previously for less than the one epoch duration
rawOID := []byte(addr.Object().EncodeToString())
var expired bool
// bucket with objects that have expiration attr
expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), objectV2.SysAttributeExpEpoch))
attrKey := make([]byte, bucketKeySize+len(objectV2.SysAttributeExpEpoch))
expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), objectV2.SysAttributeExpEpoch, attrKey))
if expirationBucket != nil {
// bucket that contains objects that expire in the current epoch
prevEpochBkt := expirationBucket.Bucket([]byte(strconv.FormatUint(currEpoch-1, 10)))
if prevEpochBkt != nil {
rawOID := objectKey(addr.Object(), make([]byte, objectKeySize))
if prevEpochBkt.Get(rawOID) != nil {
expired = true
}
@ -126,7 +128,7 @@ func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 {
graveyardBkt := tx.Bucket(graveyardBucketName)
garbageBkt := tx.Bucket(garbageBucketName)
addrKey := addressKey(addr)
addrKey := addressKey(addr, make([]byte, addressKeySize))
return inGraveyardWithKey(addrKey, graveyardBkt, garbageBkt)
}
@ -175,7 +177,8 @@ func inBucket(tx *bbolt.Tx, name, key []byte) bool {
// getSplitInfo returns SplitInfo structure from root index. Returns error
// if there is no `key` record in root index.
func getSplitInfo(tx *bbolt.Tx, cnr cid.ID, key []byte) (*objectSDK.SplitInfo, error) {
rawSplitInfo := getFromBucket(tx, rootBucketName(cnr), key)
bucketName := rootBucketName(cnr, make([]byte, bucketKeySize))
rawSplitInfo := getFromBucket(tx, bucketName, key)
if len(rawSplitInfo) == 0 {
return nil, ErrLackSplitInfo
}

View file

@ -53,7 +53,8 @@ func (db *DB) Get(prm GetPrm) (res GetRes, err error) {
currEpoch := db.epochState.CurrentEpoch()
err = db.boltDB.View(func(tx *bbolt.Tx) error {
res.hdr, err = db.get(tx, prm.addr, true, prm.raw, currEpoch)
key := make([]byte, addressKeySize)
res.hdr, err = db.get(tx, prm.addr, key, true, prm.raw, currEpoch)
return err
})
@ -61,9 +62,7 @@ func (db *DB) Get(prm GetPrm) (res GetRes, err error) {
return
}
func (db *DB) get(tx *bbolt.Tx, addr oid.Address, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) {
key := objectKey(addr.Object())
func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) {
if checkStatus {
switch objectStatus(tx, addr, currEpoch) {
case 1:
@ -79,29 +78,31 @@ func (db *DB) get(tx *bbolt.Tx, addr oid.Address, checkStatus, raw bool, currEpo
}
}
key = objectKey(addr.Object(), key)
cnr := addr.Container()
obj := objectSDK.New()
bucketName := make([]byte, bucketKeySize)
// check in primary index
data := getFromBucket(tx, primaryBucketName(cnr), key)
data := getFromBucket(tx, primaryBucketName(cnr, bucketName), key)
if len(data) != 0 {
return obj, obj.Unmarshal(data)
}
// if not found then check in tombstone index
data = getFromBucket(tx, tombstoneBucketName(cnr), key)
data = getFromBucket(tx, tombstoneBucketName(cnr, bucketName), key)
if len(data) != 0 {
return obj, obj.Unmarshal(data)
}
// if not found then check in storage group index
data = getFromBucket(tx, storageGroupBucketName(cnr), key)
data = getFromBucket(tx, storageGroupBucketName(cnr, bucketName), key)
if len(data) != 0 {
return obj, obj.Unmarshal(data)
}
// if not found then check in locker index
data = getFromBucket(tx, bucketNameLockers(cnr), key)
data = getFromBucket(tx, bucketNameLockers(cnr, bucketName), key)
if len(data) != 0 {
return obj, obj.Unmarshal(data)
}
@ -124,7 +125,8 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD
return nil, getSplitInfoError(tx, cnr, key)
}
parentBucket := tx.Bucket(parentBucketName(cnr))
bucketName := make([]byte, bucketKeySize)
parentBucket := tx.Bucket(parentBucketName(cnr, bucketName))
if parentBucket == nil {
var errNotFound apistatus.ObjectNotFound
@ -146,7 +148,7 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD
// but later list might be sorted so first or last value can be more
// prioritized to choose
virtualOID := relativeLst[len(relativeLst)-1]
data := getFromBucket(tx, primaryBucketName(cnr), virtualOID)
data := getFromBucket(tx, primaryBucketName(cnr, bucketName), virtualOID)
child := objectSDK.New()

View file

@ -174,7 +174,7 @@ func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address)
if offset == nil {
k, v = c.First()
} else {
rawAddr := addressKey(*offset)
rawAddr := addressKey(*offset, make([]byte, addressKeySize))
k, v = c.Seek(rawAddr)
if bytes.Equal(k, rawAddr) {
@ -222,6 +222,8 @@ func graveFromKV(k, v []byte) (res TombstonedObject, err error) {
//
// Returns any error appeared during deletion process.
func (db *DB) DropGraves(tss []TombstonedObject) error {
buf := make([]byte, addressKeySize)
return db.boltDB.Update(func(tx *bbolt.Tx) error {
bkt := tx.Bucket(graveyardBucketName)
if bkt == nil {
@ -229,7 +231,7 @@ func (db *DB) DropGraves(tss []TombstonedObject) error {
}
for _, ts := range tss {
err := bkt.Delete(addressKey(ts.Address()))
err := bkt.Delete(addressKey(ts.Address(), buf))
if err != nil {
return err
}

View file

@ -113,7 +113,7 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) {
if prm.tomb != nil {
bkt = graveyardBKT
tombKey := addressKey(*prm.tomb)
tombKey := addressKey(*prm.tomb, make([]byte, addressKeySize))
// it is forbidden to have a tomb-on-tomb in NeoFS,
// so graveyard keys must not be addresses of tombstones
@ -131,6 +131,7 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) {
value = zeroValue
}
buf := make([]byte, addressKeySize)
for i := range prm.target {
id := prm.target[i].Object()
cnr := prm.target[i].Container()
@ -153,9 +154,8 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) {
lockWasChecked = true
}
targetKey := addressKey(prm.target[i])
obj, err := db.get(tx, prm.target[i], false, true, currEpoch)
obj, err := db.get(tx, prm.target[i], buf, false, true, currEpoch)
targetKey := addressKey(prm.target[i], buf)
if err == nil {
if inGraveyardWithKey(targetKey, graveyardBKT, garbageBKT) == 0 {
// object is available, decrement the

View file

@ -55,6 +55,12 @@ func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler)
return nil
}
var cnrID cid.ID
err := cnrID.Decode(cidBytes)
if err != nil {
return fmt.Errorf("could not parse container ID of expired bucket: %w", err)
}
return b.ForEach(func(expKey, _ []byte) error {
bktExpired := b.Bucket(expKey)
if bktExpired == nil {
@ -71,18 +77,11 @@ func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler)
return bktExpired.ForEach(func(idKey, _ []byte) error {
var id oid.ID
err = id.DecodeString(string(idKey))
err = id.Decode(idKey)
if err != nil {
return fmt.Errorf("could not parse ID of expired object: %w", err)
}
var cnrID cid.ID
err = cnrID.DecodeString(string(cidBytes))
if err != nil {
return fmt.Errorf("could not parse container ID of expired bucket: %w", err)
}
// Ignore locked objects.
//
// To slightly optimize performance we can check only REGULAR objects
@ -131,7 +130,11 @@ func (db *DB) iterateCoveredByTombstones(tx *bbolt.Tx, tss map[string]oid.Addres
}
err := bktGraveyard.ForEach(func(k, v []byte) error {
if _, ok := tss[string(v)]; ok {
var addr oid.Address
if err := decodeAddressFromKey(&addr, v); err != nil {
return err
}
if _, ok := tss[addr.EncodeToString()]; ok {
var addr oid.Address
err := decodeAddressFromKey(&addr, k)
@ -161,22 +164,22 @@ func iteratePhyObjects(tx *bbolt.Tx, f func(cid.ID, oid.ID) error) error {
var oid oid.ID
return tx.ForEach(func(name []byte, b *bbolt.Bucket) error {
b58CID, postfix := parseContainerIDWithPostfix(&cid, name)
b58CID, postfix := parseContainerIDWithPrefix(&cid, name)
if len(b58CID) == 0 {
return nil
}
switch postfix {
case "",
storageGroupPostfix,
bucketNameSuffixLockers,
tombstonePostfix:
case primaryPrefix,
storageGroupPrefix,
lockersPrefix,
tombstonePrefix:
default:
return nil
}
return b.ForEach(func(k, v []byte) error {
if oid.DecodeString(string(k)) == nil {
if oid.Decode(k) == nil {
return f(cid, oid)
}

View file

@ -1,7 +1,6 @@
package meta
import (
"bytes"
"errors"
cid "github.com/nspcc-dev/neofs-sdk-go/container/id"
@ -90,30 +89,28 @@ func (db *DB) listWithCursor(tx *bbolt.Tx, result []oid.Address, count int, curs
graveyardBkt := tx.Bucket(graveyardBucketName)
garbageBkt := tx.Bucket(garbageBucketName)
const idSize = 44 // size of the stringified object and container ids
var rawAddr = make([]byte, idSize*2+1)
var rawAddr = make([]byte, cidSize, addressKeySize)
loop:
for ; name != nil; name, _ = c.Next() {
b58CID, postfix := parseContainerIDWithPostfix(&containerID, name)
if b58CID == nil {
cidRaw, prefix := parseContainerIDWithPrefix(&containerID, name)
if cidRaw == nil {
continue
}
switch postfix {
switch prefix {
case
"",
storageGroupPostfix,
bucketNameSuffixLockers,
tombstonePostfix:
primaryPrefix,
storageGroupPrefix,
lockersPrefix,
tombstonePrefix:
default:
continue
}
bkt := tx.Bucket(name)
if bkt != nil {
rawAddr = append(rawAddr[:0], b58CID...)
rawAddr = append(rawAddr, '/')
copy(rawAddr, cidRaw)
result, offset, cursor = selectNFromBucket(bkt, graveyardBkt, garbageBkt, rawAddr, containerID,
result, count, cursor, threshold)
}
@ -150,7 +147,7 @@ loop:
// object to start selecting from. Ignores inhumed objects.
func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
graveyardBkt, garbageBkt *bbolt.Bucket, // cached graveyard buckets
addrRaw []byte, // container ID prefix, optimization
cidRaw []byte, // container ID prefix, optimization
cnt cid.ID, // container ID
to []oid.Address, // listing result
limit int, // stop listing at `limit` items in result
@ -178,12 +175,12 @@ func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
}
var obj oid.ID
if err := obj.DecodeString(string(k)); err != nil {
if err := obj.Decode(k); err != nil {
break
}
offset = k
if inGraveyardWithKey(append(addrRaw, k...), graveyardBkt, garbageBkt) > 0 {
if inGraveyardWithKey(append(cidRaw, k...), graveyardBkt, garbageBkt) > 0 {
continue
}
@ -197,21 +194,16 @@ func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
return to, offset, cursor
}
func parseContainerIDWithPostfix(containerID *cid.ID, name []byte) ([]byte, string) {
var (
containerIDStr = name
postfix []byte
)
ind := bytes.IndexByte(name, invalidBase58String[0])
if ind > 0 {
postfix = containerIDStr[ind:]
containerIDStr = containerIDStr[:ind]
func parseContainerIDWithPrefix(containerID *cid.ID, name []byte) ([]byte, byte) {
if len(name) < bucketKeySize {
return nil, 0
}
if err := containerID.DecodeString(string(containerIDStr)); err != nil {
return nil, ""
rawID := name[1:bucketKeySize]
if err := containerID.Decode(rawID); err != nil {
return nil, 0
}
return containerIDStr, string(postfix)
return rawID, name[0]
}

View file

@ -11,15 +11,11 @@ import (
"go.etcd.io/bbolt"
)
// bucket name for locked objects.
var bucketNameLocked = []byte(invalidBase58String + "Locked")
// suffix for container buckets with objects of type LOCK.
const bucketNameSuffixLockers = invalidBase58String + "LOCKER"
var bucketNameLocked = []byte{lockedPrefix}
// returns name of the bucket with objects of type LOCK for specified container.
func bucketNameLockers(idCnr cid.ID) []byte {
return []byte(idCnr.EncodeToString() + bucketNameSuffixLockers)
func bucketNameLockers(idCnr cid.ID, key []byte) []byte {
return bucketName(idCnr, lockersPrefix, key)
}
// Lock marks objects as locked with another object. All objects are from the
@ -36,14 +32,14 @@ func (db *DB) Lock(cnr cid.ID, locker oid.ID, locked []oid.ID) error {
panic("empty locked list")
}
// check if all objects are regular
bucketKeysLocked := make([][]byte, len(locked))
for i := range locked {
bucketKeysLocked[i] = objectKey(locked[i], make([]byte, objectKeySize))
}
key := make([]byte, cidSize)
return db.boltDB.Update(func(tx *bbolt.Tx) error {
// check if all objects are regular
bucketKeysLocked := make([][]byte, len(locked))
for i := range locked {
bucketKeysLocked[i] = objectKey(locked[i])
}
if firstIrregularObjectType(tx, cnr, bucketKeysLocked...) != object.TypeRegular {
return apistatus.LockNonRegularObject{}
}
@ -53,12 +49,13 @@ func (db *DB) Lock(cnr cid.ID, locker oid.ID, locked []oid.ID) error {
return fmt.Errorf("create global bucket for locked objects: %w", err)
}
bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists([]byte(cnr.EncodeToString()))
cnr.Encode(key)
bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists(key)
if err != nil {
return fmt.Errorf("create container bucket for locked objects %v: %w", cnr, err)
}
keyLocker := objectKey(locker)
keyLocker := objectKey(locker, key)
var exLockers [][]byte
var updLockers []byte
@ -114,9 +111,11 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) error {
func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool {
bucketLocked := tx.Bucket(bucketNameLocked)
if bucketLocked != nil {
bucketLockedContainer := bucketLocked.Bucket([]byte(idCnr.EncodeToString()))
key := make([]byte, cidSize)
idCnr.Encode(key)
bucketLockedContainer := bucketLocked.Bucket(key)
if bucketLockedContainer != nil {
return bucketLockedContainer.Get(objectKey(idObj)) != nil
return bucketLockedContainer.Get(objectKey(idObj, key)) != nil
}
}
@ -131,9 +130,12 @@ func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool {
func freePotentialLocks(tx *bbolt.Tx, idCnr cid.ID, locker oid.ID) error {
bucketLocked := tx.Bucket(bucketNameLocked)
if bucketLocked != nil {
bucketLockedContainer := bucketLocked.Bucket([]byte(idCnr.EncodeToString()))
key := make([]byte, cidSize)
idCnr.Encode(key)
bucketLockedContainer := bucketLocked.Bucket(key)
if bucketLockedContainer != nil {
keyLocker := objectKey(locker)
keyLocker := objectKey(locker, key)
return bucketLockedContainer.ForEach(func(k, v []byte) error {
keyLockers, err := decodeList(v)
if err != nil {

View file

@ -52,13 +52,16 @@ func (db *DB) ToMoveIt(prm ToMoveItPrm) (res ToMoveItRes, err error) {
db.modeMtx.RLock()
defer db.modeMtx.RUnlock()
key := make([]byte, addressKeySize)
key = addressKey(prm.addr, key)
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
toMoveIt, err := tx.CreateBucketIfNotExists(toMoveItBucketName)
if err != nil {
return err
}
return toMoveIt.Put(addressKey(prm.addr), zeroValue)
return toMoveIt.Put(key, zeroValue)
})
return
@ -69,13 +72,16 @@ func (db *DB) DoNotMove(prm DoNotMovePrm) (res DoNotMoveRes, err error) {
db.modeMtx.RLock()
defer db.modeMtx.RUnlock()
key := make([]byte, addressKeySize)
key = addressKey(prm.addr, key)
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
toMoveIt := tx.Bucket(toMoveItBucketName)
if toMoveIt == nil {
return nil
}
return toMoveIt.Delete(addressKey(prm.addr))
return toMoveIt.Delete(key)
})
return

View file

@ -168,21 +168,20 @@ func putUniqueIndexes(
isParent := si != nil
addr := object.AddressOf(obj)
cnr := addr.Container()
objKey := objectKey(addr.Object())
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
bucketName := make([]byte, bucketKeySize)
// add value to primary unique bucket
if !isParent {
var bucketName []byte
switch obj.Type() {
case objectSDK.TypeRegular:
bucketName = primaryBucketName(cnr)
bucketName = primaryBucketName(cnr, bucketName)
case objectSDK.TypeTombstone:
bucketName = tombstoneBucketName(cnr)
bucketName = tombstoneBucketName(cnr, bucketName)
case objectSDK.TypeStorageGroup:
bucketName = storageGroupBucketName(cnr)
bucketName = storageGroupBucketName(cnr, bucketName)
case objectSDK.TypeLock:
bucketName = bucketNameLockers(cnr)
bucketName = bucketNameLockers(cnr, bucketName)
default:
return ErrUnknownObjectType
}
@ -204,7 +203,7 @@ func putUniqueIndexes(
// index storageID if it is present
if id != nil {
err = putUniqueIndexItem(tx, namedBucketItem{
name: smallBucketName(cnr),
name: smallBucketName(cnr, bucketName),
key: objKey,
val: id,
})
@ -229,7 +228,7 @@ func putUniqueIndexes(
}
err = putUniqueIndexItem(tx, namedBucketItem{
name: rootBucketName(cnr),
name: rootBucketName(cnr, bucketName),
key: objKey,
val: splitInfo,
})
@ -246,13 +245,14 @@ type updateIndexItemFunc = func(tx *bbolt.Tx, item namedBucketItem) error
func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error {
idObj, _ := obj.ID()
cnr, _ := obj.ContainerID()
objKey := objectKey(idObj)
objKey := objectKey(idObj, make([]byte, objectKeySize))
bucketName := make([]byte, bucketKeySize)
cs, _ := obj.PayloadChecksum()
// index payload hashes
err := f(tx, namedBucketItem{
name: payloadHashBucketName(cnr),
name: payloadHashBucketName(cnr, bucketName),
key: cs.Value(),
val: objKey,
})
@ -265,8 +265,8 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
// index parent ids
if ok {
err := f(tx, namedBucketItem{
name: parentBucketName(cnr),
key: objectKey(idParent),
name: parentBucketName(cnr, bucketName),
key: objectKey(idParent, make([]byte, objectKeySize)),
val: objKey,
})
if err != nil {
@ -277,7 +277,7 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
// index split ids
if obj.SplitID() != nil {
err := f(tx, namedBucketItem{
name: splitBucketName(cnr),
name: splitBucketName(cnr, bucketName),
key: obj.SplitID().ToV2(),
val: objKey,
})
@ -292,12 +292,13 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error {
id, _ := obj.ID()
cnr, _ := obj.ContainerID()
objKey := []byte(id.EncodeToString())
objKey := objectKey(id, make([]byte, objectKeySize))
attrs := obj.Attributes()
key := make([]byte, bucketKeySize)
err := f(tx, namedBucketItem{
name: ownerBucketName(cnr),
name: ownerBucketName(cnr, key),
key: []byte(obj.OwnerID().EncodeToString()),
val: objKey,
})
@ -307,8 +308,9 @@ func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
// user specified attributes
for i := range attrs {
key = attributeBucketName(cnr, attrs[i].Key(), key)
err := f(tx, namedBucketItem{
name: attributeBucketName(cnr, attrs[i].Key()),
name: key,
key: []byte(attrs[i].Value()),
val: objKey,
})
@ -437,25 +439,27 @@ func getVarUint(data []byte) (uint64, int, error) {
// updateStorageID for existing objects if they were moved from one
// storage location to another.
func updateStorageID(tx *bbolt.Tx, addr oid.Address, id []byte) error {
bkt, err := tx.CreateBucketIfNotExists(smallBucketName(addr.Container()))
key := make([]byte, bucketKeySize)
bkt, err := tx.CreateBucketIfNotExists(smallBucketName(addr.Container(), key))
if err != nil {
return err
}
return bkt.Put(objectKey(addr.Object()), id)
return bkt.Put(objectKey(addr.Object(), key), id)
}
// updateSpliInfo for existing objects if storage filled with extra information
// about last object in split hierarchy or linking object.
func updateSplitInfo(tx *bbolt.Tx, addr oid.Address, from *objectSDK.SplitInfo) error {
bkt := tx.Bucket(rootBucketName(addr.Container()))
key := make([]byte, bucketKeySize)
bkt := tx.Bucket(rootBucketName(addr.Container(), key))
if bkt == nil {
// if object doesn't exists and we want to update split info on it
// then ignore, this should never happen
return ErrIncorrectSplitInfoUpdate
}
objectKey := objectKey(addr.Object())
objectKey := objectKey(addr.Object(), key)
rawSplitInfo := bkt.Get(objectKey)
if len(rawSplitInfo) == 0 {

View file

@ -107,13 +107,16 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs object.SearchFilters, c
continue // ignore objects with unmatched fast filters
}
var addr oid.Address
err = decodeAddressFromKey(&addr, []byte(a))
var id oid.ID
err = id.Decode([]byte(a))
if err != nil {
return nil, err
}
var addr oid.Address
addr.SetContainer(cnr)
addr.SetObject(id)
if objectStatus(tx, addr, currEpoch) > 0 {
continue // ignore removed objects
}
@ -130,26 +133,24 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs object.SearchFilters, c
// selectAll adds to resulting cache all available objects in metabase.
func (db *DB) selectAll(tx *bbolt.Tx, cnr cid.ID, to map[string]int) {
prefix := cnr.EncodeToString() + "/"
selectAllFromBucket(tx, primaryBucketName(cnr), prefix, to, 0)
selectAllFromBucket(tx, tombstoneBucketName(cnr), prefix, to, 0)
selectAllFromBucket(tx, storageGroupBucketName(cnr), prefix, to, 0)
selectAllFromBucket(tx, parentBucketName(cnr), prefix, to, 0)
selectAllFromBucket(tx, bucketNameLockers(cnr), prefix, to, 0)
bucketName := make([]byte, bucketKeySize)
selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, 0)
selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, 0)
selectAllFromBucket(tx, storageGroupBucketName(cnr, bucketName), to, 0)
selectAllFromBucket(tx, parentBucketName(cnr, bucketName), to, 0)
selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, 0)
}
// selectAllFromBucket goes through all keys in bucket and adds them in a
// resulting cache. Keys should be stringed object ids.
func selectAllFromBucket(tx *bbolt.Tx, name []byte, prefix string, to map[string]int, fNum int) {
func selectAllFromBucket(tx *bbolt.Tx, name []byte, to map[string]int, fNum int) {
bkt := tx.Bucket(name)
if bkt == nil {
return
}
_ = bkt.ForEach(func(k, v []byte) error {
key := prefix + string(k) // consider using string builders from sync.Pool
markAddressInCache(to, fNum, key)
markAddressInCache(to, fNum, string(k))
return nil
})
@ -164,47 +165,46 @@ func (db *DB) selectFastFilter(
to map[string]int, // resulting cache
fNum int, // index of filter
) {
prefix := cnr.EncodeToString() + "/"
currEpoch := db.epochState.CurrentEpoch()
bucketName := make([]byte, bucketKeySize)
switch f.Header() {
case v2object.FilterHeaderObjectID:
db.selectObjectID(tx, f, cnr, to, fNum, currEpoch)
case v2object.FilterHeaderOwnerID:
bucketName := ownerBucketName(cnr)
db.selectFromFKBT(tx, bucketName, f, prefix, to, fNum)
bucketName := ownerBucketName(cnr, bucketName)
db.selectFromFKBT(tx, bucketName, f, to, fNum)
case v2object.FilterHeaderPayloadHash:
bucketName := payloadHashBucketName(cnr)
db.selectFromList(tx, bucketName, f, prefix, to, fNum)
bucketName := payloadHashBucketName(cnr, bucketName)
db.selectFromList(tx, bucketName, f, to, fNum)
case v2object.FilterHeaderObjectType:
for _, bucketName := range bucketNamesForType(cnr, f.Operation(), f.Value()) {
selectAllFromBucket(tx, bucketName, prefix, to, fNum)
selectAllFromBucket(tx, bucketName, to, fNum)
}
case v2object.FilterHeaderParent:
bucketName := parentBucketName(cnr)
db.selectFromList(tx, bucketName, f, prefix, to, fNum)
bucketName := parentBucketName(cnr, bucketName)
db.selectFromList(tx, bucketName, f, to, fNum)
case v2object.FilterHeaderSplitID:
bucketName := splitBucketName(cnr)
db.selectFromList(tx, bucketName, f, prefix, to, fNum)
bucketName := splitBucketName(cnr, bucketName)
db.selectFromList(tx, bucketName, f, to, fNum)
case v2object.FilterPropertyRoot:
selectAllFromBucket(tx, rootBucketName(cnr), prefix, to, fNum)
selectAllFromBucket(tx, rootBucketName(cnr, bucketName), to, fNum)
case v2object.FilterPropertyPhy:
selectAllFromBucket(tx, primaryBucketName(cnr), prefix, to, fNum)
selectAllFromBucket(tx, tombstoneBucketName(cnr), prefix, to, fNum)
selectAllFromBucket(tx, storageGroupBucketName(cnr), prefix, to, fNum)
selectAllFromBucket(tx, bucketNameLockers(cnr), prefix, to, fNum)
selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, fNum)
selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, fNum)
selectAllFromBucket(tx, storageGroupBucketName(cnr, bucketName), to, fNum)
selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, fNum)
default: // user attribute
bucketName := attributeBucketName(cnr, f.Header())
bucketName := attributeBucketName(cnr, f.Header(), bucketName)
if f.Operation() == object.MatchNotPresent {
selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, f, prefix, to, fNum)
selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, to, fNum)
} else {
db.selectFromFKBT(tx, bucketName, f, prefix, to, fNum)
db.selectFromFKBT(tx, bucketName, f, to, fNum)
}
}
}
var mBucketNaming = map[string][]func(cid.ID) []byte{
var mBucketNaming = map[string][]func(cid.ID, []byte) []byte{
v2object.TypeRegular.String(): {primaryBucketName, parentBucketName},
v2object.TypeTombstone.String(): {tombstoneBucketName},
v2object.TypeStorageGroup.String(): {storageGroupBucketName},
@ -214,7 +214,7 @@ var mBucketNaming = map[string][]func(cid.ID) []byte{
func allBucketNames(cnr cid.ID) (names [][]byte) {
for _, fns := range mBucketNaming {
for _, fn := range fns {
names = append(names, fn(cnr))
names = append(names, fn(cnr, make([]byte, bucketKeySize)))
}
}
@ -226,7 +226,7 @@ func bucketNamesForType(cnr cid.ID, mType object.SearchMatchType, typeVal string
fns, ok := mBucketNaming[key]
if ok {
for _, fn := range fns {
names = append(names, fn(cnr))
names = append(names, fn(cnr, make([]byte, bucketKeySize)))
}
}
}
@ -258,7 +258,6 @@ func (db *DB) selectFromFKBT(
tx *bbolt.Tx,
name []byte, // fkbt root bucket name
f object.SearchFilter, // filter for operation and value
prefix string, // prefix to create addr from oid in index
to map[string]int, // resulting cache
fNum int, // index of filter
) { //
@ -281,8 +280,7 @@ func (db *DB) selectFromFKBT(
}
return fkbtLeaf.ForEach(func(k, _ []byte) error {
addr := prefix + string(k)
markAddressInCache(to, fNum, addr)
markAddressInCache(to, fNum, string(k))
return nil
})
@ -298,8 +296,6 @@ func selectOutsideFKBT(
tx *bbolt.Tx,
incl [][]byte, // buckets
name []byte, // fkbt root bucket name
f object.SearchFilter, // filter for operation and value
prefix string, // prefix to create addr from oid in index
to map[string]int, // resulting cache
fNum int, // index of filter
) {
@ -314,8 +310,7 @@ func selectOutsideFKBT(
}
return exclBktLeaf.ForEach(func(k, _ []byte) error {
addr := prefix + string(k)
mExcl[addr] = struct{}{}
mExcl[string(k)] = struct{}{}
return nil
})
@ -329,10 +324,8 @@ func selectOutsideFKBT(
}
_ = bktIncl.ForEach(func(k, _ []byte) error {
addr := prefix + string(k)
if _, ok := mExcl[addr]; !ok {
markAddressInCache(to, fNum, addr)
if _, ok := mExcl[string(k)]; !ok {
markAddressInCache(to, fNum, string(k))
}
return nil
@ -346,7 +339,6 @@ func (db *DB) selectFromList(
tx *bbolt.Tx,
name []byte, // list root bucket name
f object.SearchFilter, // filter for operation and value
prefix string, // prefix to create addr from oid in index
to map[string]int, // resulting cache
fNum int, // index of filter
) { //
@ -398,8 +390,7 @@ func (db *DB) selectFromList(
}
for i := range lst {
addr := prefix + string(lst[i])
markAddressInCache(to, fNum, addr)
markAddressInCache(to, fNum, string(lst[i]))
}
}
@ -412,30 +403,25 @@ func (db *DB) selectObjectID(
fNum int, // index of filter
currEpoch uint64,
) {
prefix := cnr.EncodeToString() + "/"
appendOID := func(strObj string) {
addrStr := prefix + strObj
appendOID := func(id oid.ID) {
var addr oid.Address
err := decodeAddressFromKey(&addr, []byte(addrStr))
if err != nil {
db.log.Debug("can't decode object id address",
zap.String("addr", addrStr),
zap.String("error", err.Error()))
return
}
addr.SetContainer(cnr)
addr.SetObject(id)
ok, err := db.exists(tx, addr, currEpoch)
if (err == nil && ok) || errors.As(err, &splitInfoError) {
markAddressInCache(to, fNum, addrStr)
raw := make([]byte, objectKeySize)
id.Encode(raw)
markAddressInCache(to, fNum, string(raw))
}
}
switch op := f.Operation(); op {
case object.MatchStringEqual:
appendOID(f.Value())
var id oid.ID
if err := id.DecodeString(f.Value()); err == nil {
appendOID(id)
}
default:
fMatch, ok := db.matchers[op]
if !ok {
@ -454,7 +440,10 @@ func (db *DB) selectObjectID(
}
err := fMatch.matchBucket(bkt, f.Header(), f.Value(), func(k, v []byte) error {
appendOID(string(k))
var id oid.ID
if err := id.Decode(k); err == nil {
appendOID(id)
}
return nil
})
if err != nil {
@ -472,7 +461,8 @@ func (db *DB) matchSlowFilters(tx *bbolt.Tx, addr oid.Address, f object.SearchFi
return true
}
obj, err := db.get(tx, addr, true, false, currEpoch)
buf := make([]byte, addressKeySize)
obj, err := db.get(tx, addr, buf, true, false, currEpoch)
if err != nil {
return false
}

View file

@ -6,7 +6,7 @@ import (
)
var (
shardInfoBucket = []byte(invalidBase58String + "i")
shardInfoBucket = []byte{shardInfoPrefix}
shardIDKey = []byte("id")
)

View file

@ -39,12 +39,13 @@ func (db *DB) StorageID(prm StorageIDPrm) (res StorageIDRes, err error) {
}
func (db *DB) storageID(tx *bbolt.Tx, addr oid.Address) ([]byte, error) {
smallBucket := tx.Bucket(smallBucketName(addr.Container()))
key := make([]byte, bucketKeySize)
smallBucket := tx.Bucket(smallBucketName(addr.Container(), key))
if smallBucket == nil {
return nil, nil
}
storageID := smallBucket.Get(objectKey(addr.Object()))
storageID := smallBucket.Get(objectKey(addr.Object(), key))
if storageID == nil {
return nil, nil
}

View file

@ -2,8 +2,8 @@ package meta
import (
"bytes"
"crypto/sha256"
"fmt"
"strings"
cid "github.com/nspcc-dev/neofs-sdk-go/container/id"
"github.com/nspcc-dev/neofs-sdk-go/object"
@ -11,126 +11,214 @@ import (
"go.etcd.io/bbolt"
)
/*
We might increase performance by not using string representation of
identities and addresses. String representation require base58 encoding that
slows execution. Instead we can try to marshal these structures directly into
bytes. Check it later.
*/
const invalidBase58String = "_"
var (
// graveyardBucketName stores rows with the objects that have been
// covered with Tombstone objects. That objects should not be returned
// from the node and should not be accepted by the node from other
// nodes.
graveyardBucketName = []byte(invalidBase58String + "Graveyard")
graveyardBucketName = []byte{graveyardPrefix}
// garbageBucketName stores rows with the objects that should be physically
// deleted by the node (Garbage Collector routine).
garbageBucketName = []byte(invalidBase58String + "Garbage")
toMoveItBucketName = []byte(invalidBase58String + "ToMoveIt")
containerVolumeBucketName = []byte(invalidBase58String + "ContainerSize")
garbageBucketName = []byte{garbagePrefix}
toMoveItBucketName = []byte{toMoveItPrefix}
containerVolumeBucketName = []byte{containerVolumePrefix}
zeroValue = []byte{0xFF}
smallPostfix = invalidBase58String + "small"
storageGroupPostfix = invalidBase58String + "SG"
tombstonePostfix = invalidBase58String + "TS"
ownerPostfix = invalidBase58String + "ownerid"
payloadHashPostfix = invalidBase58String + "payloadhash"
rootPostfix = invalidBase58String + "root"
parentPostfix = invalidBase58String + "parent"
splitPostfix = invalidBase58String + "splitid"
userAttributePostfix = invalidBase58String + "attr_"
splitInfoError *object.SplitInfoError // for errors.As comparisons
)
// Prefix bytes for database keys. All ids and addresses are encoded in binary
// unless specified otherwise.
const (
// graveyardPrefix is used for the graveyard bucket.
// Key: object address
// Value: tombstone address
graveyardPrefix = iota
// garbagePrefix is used for the garbage bucket.
// Key: object address
// Value: dummy value
garbagePrefix
// toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving
// to another shard.
toMoveItPrefix
// containerVolumePrefix is used for storing container size estimations.
// Key: container ID
// Value: container size in bytes as little-endian uint64
containerVolumePrefix
// lockedPrefix is used for storing locked objects information.
// Key: container ID
// Value: bucket mapping objects locked to the list of corresponding LOCK objects.
lockedPrefix
// shardInfoPrefix is used for storing shard ID. All keys are custom and are not connected to the container.
shardInfoPrefix
//======================
// Unique index buckets.
//======================
// primaryPrefix is used for prefixing buckets containing objects of REGULAR type.
// Key: object ID
// Value: marshalled object
primaryPrefix
// lockersPrefix is used for prefixing buckets containing objects of LOCK type.
// Key: object ID
// Value: marshalled object
lockersPrefix
// storageGroupPrefix is used for prefixing buckets containing objects of STORAGEGROUP type.
// Key: object ID
// Value: marshaled object
storageGroupPrefix
// tombstonePrefix is used for prefixing buckets containing objects of TOMBSTONE type.
// Key: object ID
// Value: marshaled object
tombstonePrefix
// smallPrefix is used for prefixing buckets mapping objects to the blobovniczas they are stored in.
// Key: object ID
// Value: blobovnicza ID
smallPrefix
// rootPrefix is used for prefixing buckets mapping parent object to the split info.
// Key: object ID
// Value: split info
rootPrefix
//====================
// FKBT index buckets.
//====================
// ownerPrefix is used for prefixing FKBT index buckets mapping owner to object IDs.
// Key: owner ID
// Value: bucket containing object IDs as keys
ownerPrefix
// userAttributePrefix is used for prefixing FKBT index buckets containing objects.
// Key: attribute value
// Value: bucket containing object IDs as keys
userAttributePrefix
//====================
// List index buckets.
//====================
// payloadHashPrefix is used for prefixing List index buckets mapping payload hash to a list of object IDs.
// Key: payload hash
// Value: list of object IDs
payloadHashPrefix
// parentPrefix is used for prefixing List index buckets mapping parent ID to a list of children IDs.
// Key: parent ID
// Value: list of object IDs
parentPrefix
// splitPrefix is used for prefixing List index buckets mapping split ID to a list of object IDs.
// Key: split ID
// Value: list of object IDs
splitPrefix
)
const (
cidSize = sha256.Size
bucketKeySize = 1 + cidSize
objectKeySize = sha256.Size
addressKeySize = cidSize + objectKeySize
)
var splitInfoError *object.SplitInfoError // for errors.As comparisons
func bucketName(cnr cid.ID, prefix byte, key []byte) []byte {
key[0] = prefix
cnr.Encode(key[1:])
return key[:bucketKeySize]
}
// primaryBucketName returns <CID>.
func primaryBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString())
func primaryBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, primaryPrefix, key)
}
// tombstoneBucketName returns <CID>_TS.
func tombstoneBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString() + tombstonePostfix)
func tombstoneBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, tombstonePrefix, key)
}
// storageGroupBucketName returns <CID>_SG.
func storageGroupBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString() + storageGroupPostfix)
func storageGroupBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, storageGroupPrefix, key)
}
// smallBucketName returns <CID>_small.
func smallBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString() + smallPostfix) // consider caching output values
func smallBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, smallPrefix, key)
}
// attributeBucketName returns <CID>_attr_<attributeKey>.
func attributeBucketName(cnr cid.ID, attributeKey string) []byte {
sb := strings.Builder{} // consider getting string builders from sync.Pool
sb.WriteString(cnr.EncodeToString())
sb.WriteString(userAttributePostfix)
sb.WriteString(attributeKey)
return []byte(sb.String())
func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte {
key[0] = userAttributePrefix
cnr.Encode(key[1:])
return append(key[:bucketKeySize], attributeKey...)
}
// returns <CID> from attributeBucketName result, nil otherwise.
func cidFromAttributeBucket(val []byte, attributeKey string) []byte {
suffix := []byte(userAttributePostfix + attributeKey)
if !bytes.HasSuffix(val, suffix) {
if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) {
return nil
}
return val[:len(val)-len(suffix)]
return val[1:bucketKeySize]
}
// payloadHashBucketName returns <CID>_payloadhash.
func payloadHashBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString() + payloadHashPostfix)
func payloadHashBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, payloadHashPrefix, key)
}
// rootBucketName returns <CID>_root.
func rootBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString() + rootPostfix)
func rootBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, rootPrefix, key)
}
// ownerBucketName returns <CID>_ownerid.
func ownerBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString() + ownerPostfix)
func ownerBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, ownerPrefix, key)
}
// parentBucketName returns <CID>_parent.
func parentBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString() + parentPostfix)
func parentBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, parentPrefix, key)
}
// splitBucketName returns <CID>_splitid.
func splitBucketName(cnr cid.ID) []byte {
return []byte(cnr.EncodeToString() + splitPostfix)
func splitBucketName(cnr cid.ID, key []byte) []byte {
return bucketName(cnr, splitPrefix, key)
}
// addressKey returns key for K-V tables when key is a whole address.
func addressKey(addr oid.Address) []byte {
return []byte(addr.EncodeToString())
func addressKey(addr oid.Address, key []byte) []byte {
addr.Container().Encode(key)
addr.Object().Encode(key[cidSize:])
return key[:addressKeySize]
}
// parses object address formed by addressKey.
func decodeAddressFromKey(dst *oid.Address, k []byte) error {
err := dst.DecodeString(string(k))
if err != nil {
return fmt.Errorf("decode object address from db key: %w", err)
if len(k) != addressKeySize {
return fmt.Errorf("invalid length")
}
var cnr cid.ID
if err := cnr.Decode(k[:cidSize]); err != nil {
return err
}
var obj oid.ID
if err := obj.Decode(k[cidSize:]); err != nil {
return err
}
dst.SetObject(obj)
dst.SetContainer(cnr)
return nil
}
// objectKey returns key for K-V tables when key is an object id.
func objectKey(obj oid.ID) []byte {
return []byte(obj.EncodeToString())
func objectKey(obj oid.ID, key []byte) []byte {
obj.Encode(key)
return key[:objectKeySize]
}
// removes all bucket elements.
@ -152,13 +240,15 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object
panic("empty object list in firstIrregularObjectType")
}
var keys [3][1 + cidSize]byte
irregularTypeBuckets := [...]struct {
typ object.Type
name []byte
}{
{object.TypeTombstone, tombstoneBucketName(idCnr)},
{object.TypeStorageGroup, storageGroupBucketName(idCnr)},
{object.TypeLock, bucketNameLockers(idCnr)},
{object.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])},
{object.TypeStorageGroup, storageGroupBucketName(idCnr, keys[1][:])},
{object.TypeLock, bucketNameLockers(idCnr, keys[2][:])},
}
for i := range objs {
@ -174,5 +264,7 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object
// return true if provided object is of LOCK type.
func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool {
return inBucket(tx, bucketNameLockers(idCnr), objectKey(obj))
return inBucket(tx,
bucketNameLockers(idCnr, make([]byte, bucketKeySize)),
objectKey(obj, make([]byte, objectKeySize)))
}

View file

@ -8,7 +8,7 @@ import (
)
// version contains current metabase version.
const version = 1
const version = 2
var versionKey = []byte("version")