[#1482] metabase: Encode database keys in binary

Signed-off-by: Evgenii Stratonikov <evgeniy@morphbits.ru>
This commit is contained in:
Evgenii Stratonikov 2022-09-08 14:54:21 +03:00 committed by fyrchik
parent d6fef68a62
commit ae1dab29bc
19 changed files with 392 additions and 276 deletions

View file

@ -84,6 +84,15 @@ This file describes changes between the metabase versions.
- Key: split ID - Key: split ID
- Value: list of object IDs - Value: list of object IDs
# History
## Version 2
- Container ID is encoded as 32-byte slice
- Object ID is encoded as 32-byte slice
- Object ID is encoded as 64-byte slice, container ID + object ID
- Bucket naming scheme is changed:
- container ID + suffix -> 1-byte prefix + container ID
## Version 1 ## Version 1

View file

@ -1,9 +1,7 @@
package meta package meta
import ( import (
"crypto/sha256"
"encoding/binary" "encoding/binary"
"strings"
cid "github.com/nspcc-dev/neofs-sdk-go/container/id" cid "github.com/nspcc-dev/neofs-sdk-go/container/id"
"go.etcd.io/bbolt" "go.etcd.io/bbolt"
@ -30,7 +28,7 @@ func (db *DB) containers(tx *bbolt.Tx) ([]cid.ID, error) {
err := tx.ForEach(func(name []byte, _ *bbolt.Bucket) error { err := tx.ForEach(func(name []byte, _ *bbolt.Bucket) error {
if parseContainerID(&cnr, name, unique) { if parseContainerID(&cnr, name, unique) {
result = append(result, cnr) result = append(result, cnr)
unique[cnr.EncodeToString()] = struct{}{} unique[string(name[1:bucketKeySize])] = struct{}{}
} }
return nil return nil
@ -55,24 +53,20 @@ func (db *DB) containerSize(tx *bbolt.Tx, id cid.ID) (uint64, error) {
return 0, err return 0, err
} }
key := make([]byte, sha256.Size) key := make([]byte, cidSize)
id.Encode(key) id.Encode(key)
return parseContainerSize(containerVolume.Get(key)), nil return parseContainerSize(containerVolume.Get(key)), nil
} }
func parseContainerID(dst *cid.ID, name []byte, ignore map[string]struct{}) bool { func parseContainerID(dst *cid.ID, name []byte, ignore map[string]struct{}) bool {
strContainerID := strings.Split(string(name), invalidBase58String)[0] if len(name) != bucketKeySize {
if _, ok := ignore[strContainerID]; ok {
return false return false
} }
if _, ok := ignore[string(name[1:bucketKeySize])]; ok {
if err := dst.DecodeString(strContainerID); err != nil {
return false return false
} }
return dst.Decode(name[1:bucketKeySize]) == nil
return true
} }
func parseContainerSize(v []byte) uint64 { func parseContainerSize(v []byte) uint64 {
@ -89,7 +83,7 @@ func changeContainerSize(tx *bbolt.Tx, id cid.ID, delta uint64, increase bool) e
return err return err
} }
key := make([]byte, sha256.Size) key := make([]byte, cidSize)
id.Encode(key) id.Encode(key)
size := parseContainerSize(containerVolume.Get(key)) size := parseContainerSize(containerVolume.Get(key))

View file

@ -125,6 +125,7 @@ func syncCounter(tx *bbolt.Tx, force bool) error {
graveyardBKT := tx.Bucket(graveyardBucketName) graveyardBKT := tx.Bucket(graveyardBucketName)
garbageBKT := tx.Bucket(garbageBucketName) garbageBKT := tx.Bucket(garbageBucketName)
key := make([]byte, addressKeySize)
err = iteratePhyObjects(tx, func(cnr cid.ID, obj oid.ID) error { err = iteratePhyObjects(tx, func(cnr cid.ID, obj oid.ID) error {
phyCounter++ phyCounter++
@ -134,7 +135,7 @@ func syncCounter(tx *bbolt.Tx, force bool) error {
// check if an object is available: not with GCMark // check if an object is available: not with GCMark
// and not covered with a tombstone // and not covered with a tombstone
if inGraveyardWithKey(addressKey(addr), graveyardBKT, garbageBKT) == 0 { if inGraveyardWithKey(addressKey(addr, key), graveyardBKT, garbageBKT) == 0 {
logicCounter++ logicCounter++
} }

View file

@ -11,6 +11,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/mr-tron/base58"
v2object "github.com/nspcc-dev/neofs-api-go/v2/object" v2object "github.com/nspcc-dev/neofs-api-go/v2/object"
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard/mode" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard/mode"
"github.com/nspcc-dev/neofs-node/pkg/util/logger" "github.com/nspcc-dev/neofs-node/pkg/util/logger"
@ -111,6 +112,8 @@ func stringifyValue(key string, objVal []byte) string {
switch key { switch key {
default: default:
return string(objVal) return string(objVal)
case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent:
return base58.Encode(objVal)
case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash: case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash:
return hex.EncodeToString(objVal) return hex.EncodeToString(objVal)
case v2object.FilterHeaderCreationEpoch, v2object.FilterHeaderPayloadLength: case v2object.FilterHeaderCreationEpoch, v2object.FilterHeaderPayloadLength:
@ -140,6 +143,9 @@ func destringifyValue(key, value string, prefix bool) ([]byte, bool, bool) {
switch key { switch key {
default: default:
return []byte(value), false, true return []byte(value), false, true
case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent:
v, err := base58.Decode(value)
return v, false, err == nil
case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash: case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash:
v, err := hex.DecodeString(value) v, err := hex.DecodeString(value)
if err != nil { if err != nil {
@ -254,6 +260,12 @@ func unknownMatcherBucket(_ *bbolt.Bucket, _ string, _ string, _ func([]byte, []
// in boltDB. Useful for getting filter values from unique and list indexes. // in boltDB. Useful for getting filter values from unique and list indexes.
func bucketKeyHelper(hdr string, val string) []byte { func bucketKeyHelper(hdr string, val string) []byte {
switch hdr { switch hdr {
case v2object.FilterHeaderParent:
v, err := base58.Decode(val)
if err != nil {
return nil
}
return v
case v2object.FilterHeaderPayloadHash: case v2object.FilterHeaderPayloadHash:
v, err := hex.DecodeString(val) v, err := hex.DecodeString(val)
if err != nil { if err != nil {

View file

@ -139,7 +139,8 @@ func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address) (uint64, uint64, er
// object was available before the removal (for calculating the logical object // object was available before the removal (for calculating the logical object
// counter). // counter).
func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (bool, bool, error) { func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (bool, bool, error) {
addrKey := addressKey(addr) key := make([]byte, addressKeySize)
addrKey := addressKey(addr, key)
garbageBKT := tx.Bucket(garbageBucketName) garbageBKT := tx.Bucket(garbageBucketName)
graveyardBKT := tx.Bucket(graveyardBucketName) graveyardBKT := tx.Bucket(graveyardBucketName)
@ -154,7 +155,7 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter
} }
// unmarshal object, work only with physically stored (raw == true) objects // unmarshal object, work only with physically stored (raw == true) objects
obj, err := db.get(tx, addr, false, true, currEpoch) obj, err := db.get(tx, addr, key, false, true, currEpoch)
if err != nil { if err != nil {
if errors.As(err, new(apistatus.ObjectNotFound)) { if errors.As(err, new(apistatus.ObjectNotFound)) {
return false, false, nil return false, false, nil
@ -166,9 +167,10 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter
// if object is an only link to a parent, then remove parent // if object is an only link to a parent, then remove parent
if parent := obj.Parent(); parent != nil { if parent := obj.Parent(); parent != nil {
parAddr := object.AddressOf(parent) parAddr := object.AddressOf(parent)
sParAddr := parAddr.EncodeToString() sParAddr := addressKey(parAddr, key)
k := string(sParAddr)
nRef, ok := refCounter[sParAddr] nRef, ok := refCounter[k]
if !ok { if !ok {
nRef = &referenceNumber{ nRef = &referenceNumber{
all: parentLength(tx, parAddr), all: parentLength(tx, parAddr),
@ -176,7 +178,7 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter
obj: parent, obj: parent,
} }
refCounter[sParAddr] = nRef refCounter[k] = nRef
} }
nRef.cur++ nRef.cur++
@ -216,12 +218,14 @@ func (db *DB) deleteObject(
// parentLength returns amount of available children from parentid index. // parentLength returns amount of available children from parentid index.
func parentLength(tx *bbolt.Tx, addr oid.Address) int { func parentLength(tx *bbolt.Tx, addr oid.Address) int {
bkt := tx.Bucket(parentBucketName(addr.Container())) bucketName := make([]byte, bucketKeySize)
bkt := tx.Bucket(parentBucketName(addr.Container(), bucketName[:]))
if bkt == nil { if bkt == nil {
return 0 return 0
} }
lst, err := decodeList(bkt.Get(objectKey(addr.Object()))) lst, err := decodeList(bkt.Get(objectKey(addr.Object(), bucketName[:])))
if err != nil { if err != nil {
return 0 return 0
} }
@ -291,23 +295,22 @@ func delListIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error { func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error {
addr := object.AddressOf(obj) addr := object.AddressOf(obj)
objKey := objectKey(addr.Object()) objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
addrKey := addressKey(addr) addrKey := addressKey(addr, make([]byte, addressKeySize))
cnr := addr.Container() cnr := addr.Container()
bucketName := make([]byte, bucketKeySize)
// add value to primary unique bucket // add value to primary unique bucket
if !isParent { if !isParent {
var bucketName []byte
switch obj.Type() { switch obj.Type() {
case objectSDK.TypeRegular: case objectSDK.TypeRegular:
bucketName = primaryBucketName(cnr) bucketName = primaryBucketName(cnr, bucketName)
case objectSDK.TypeTombstone: case objectSDK.TypeTombstone:
bucketName = tombstoneBucketName(cnr) bucketName = tombstoneBucketName(cnr, bucketName)
case objectSDK.TypeStorageGroup: case objectSDK.TypeStorageGroup:
bucketName = storageGroupBucketName(cnr) bucketName = storageGroupBucketName(cnr, bucketName)
case objectSDK.TypeLock: case objectSDK.TypeLock:
bucketName = bucketNameLockers(cnr) bucketName = bucketNameLockers(cnr, bucketName)
default: default:
return ErrUnknownObjectType return ErrUnknownObjectType
} }
@ -318,17 +321,17 @@ func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error
}) })
} else { } else {
delUniqueIndexItem(tx, namedBucketItem{ delUniqueIndexItem(tx, namedBucketItem{
name: parentBucketName(cnr), name: parentBucketName(cnr, bucketName),
key: objKey, key: objKey,
}) })
} }
delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index
name: smallBucketName(cnr), name: smallBucketName(cnr, bucketName),
key: objKey, key: objKey,
}) })
delUniqueIndexItem(tx, namedBucketItem{ // remove from root index delUniqueIndexItem(tx, namedBucketItem{ // remove from root index
name: rootBucketName(cnr), name: rootBucketName(cnr, bucketName),
key: objKey, key: objKey,
}) })
delUniqueIndexItem(tx, namedBucketItem{ // remove from ToMoveIt index delUniqueIndexItem(tx, namedBucketItem{ // remove from ToMoveIt index

View file

@ -71,17 +71,18 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) (exists b
return false, object.ErrObjectIsExpired return false, object.ErrObjectIsExpired
} }
objKey := objectKey(addr.Object()) objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
cnr := addr.Container() cnr := addr.Container()
key := make([]byte, bucketKeySize)
// if graveyard is empty, then check if object exists in primary bucket // if graveyard is empty, then check if object exists in primary bucket
if inBucket(tx, primaryBucketName(cnr), objKey) { if inBucket(tx, primaryBucketName(cnr, key), objKey) {
return true, nil return true, nil
} }
// if primary bucket is empty, then check if object exists in parent bucket // if primary bucket is empty, then check if object exists in parent bucket
if inBucket(tx, parentBucketName(cnr), objKey) { if inBucket(tx, parentBucketName(cnr, key), objKey) {
splitInfo, err := getSplitInfo(tx, cnr, objKey) splitInfo, err := getSplitInfo(tx, cnr, objKey)
if err != nil { if err != nil {
return false, err return false, err
@ -105,15 +106,16 @@ func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 {
// GC is expected to collect all the objects that have // GC is expected to collect all the objects that have
// expired previously for less than the one epoch duration // expired previously for less than the one epoch duration
rawOID := []byte(addr.Object().EncodeToString())
var expired bool var expired bool
// bucket with objects that have expiration attr // bucket with objects that have expiration attr
expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), objectV2.SysAttributeExpEpoch)) attrKey := make([]byte, bucketKeySize+len(objectV2.SysAttributeExpEpoch))
expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), objectV2.SysAttributeExpEpoch, attrKey))
if expirationBucket != nil { if expirationBucket != nil {
// bucket that contains objects that expire in the current epoch // bucket that contains objects that expire in the current epoch
prevEpochBkt := expirationBucket.Bucket([]byte(strconv.FormatUint(currEpoch-1, 10))) prevEpochBkt := expirationBucket.Bucket([]byte(strconv.FormatUint(currEpoch-1, 10)))
if prevEpochBkt != nil { if prevEpochBkt != nil {
rawOID := objectKey(addr.Object(), make([]byte, objectKeySize))
if prevEpochBkt.Get(rawOID) != nil { if prevEpochBkt.Get(rawOID) != nil {
expired = true expired = true
} }
@ -126,7 +128,7 @@ func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 {
graveyardBkt := tx.Bucket(graveyardBucketName) graveyardBkt := tx.Bucket(graveyardBucketName)
garbageBkt := tx.Bucket(garbageBucketName) garbageBkt := tx.Bucket(garbageBucketName)
addrKey := addressKey(addr) addrKey := addressKey(addr, make([]byte, addressKeySize))
return inGraveyardWithKey(addrKey, graveyardBkt, garbageBkt) return inGraveyardWithKey(addrKey, graveyardBkt, garbageBkt)
} }
@ -175,7 +177,8 @@ func inBucket(tx *bbolt.Tx, name, key []byte) bool {
// getSplitInfo returns SplitInfo structure from root index. Returns error // getSplitInfo returns SplitInfo structure from root index. Returns error
// if there is no `key` record in root index. // if there is no `key` record in root index.
func getSplitInfo(tx *bbolt.Tx, cnr cid.ID, key []byte) (*objectSDK.SplitInfo, error) { func getSplitInfo(tx *bbolt.Tx, cnr cid.ID, key []byte) (*objectSDK.SplitInfo, error) {
rawSplitInfo := getFromBucket(tx, rootBucketName(cnr), key) bucketName := rootBucketName(cnr, make([]byte, bucketKeySize))
rawSplitInfo := getFromBucket(tx, bucketName, key)
if len(rawSplitInfo) == 0 { if len(rawSplitInfo) == 0 {
return nil, ErrLackSplitInfo return nil, ErrLackSplitInfo
} }

View file

@ -53,7 +53,8 @@ func (db *DB) Get(prm GetPrm) (res GetRes, err error) {
currEpoch := db.epochState.CurrentEpoch() currEpoch := db.epochState.CurrentEpoch()
err = db.boltDB.View(func(tx *bbolt.Tx) error { err = db.boltDB.View(func(tx *bbolt.Tx) error {
res.hdr, err = db.get(tx, prm.addr, true, prm.raw, currEpoch) key := make([]byte, addressKeySize)
res.hdr, err = db.get(tx, prm.addr, key, true, prm.raw, currEpoch)
return err return err
}) })
@ -61,9 +62,7 @@ func (db *DB) Get(prm GetPrm) (res GetRes, err error) {
return return
} }
func (db *DB) get(tx *bbolt.Tx, addr oid.Address, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) { func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) {
key := objectKey(addr.Object())
if checkStatus { if checkStatus {
switch objectStatus(tx, addr, currEpoch) { switch objectStatus(tx, addr, currEpoch) {
case 1: case 1:
@ -79,29 +78,31 @@ func (db *DB) get(tx *bbolt.Tx, addr oid.Address, checkStatus, raw bool, currEpo
} }
} }
key = objectKey(addr.Object(), key)
cnr := addr.Container() cnr := addr.Container()
obj := objectSDK.New() obj := objectSDK.New()
bucketName := make([]byte, bucketKeySize)
// check in primary index // check in primary index
data := getFromBucket(tx, primaryBucketName(cnr), key) data := getFromBucket(tx, primaryBucketName(cnr, bucketName), key)
if len(data) != 0 { if len(data) != 0 {
return obj, obj.Unmarshal(data) return obj, obj.Unmarshal(data)
} }
// if not found then check in tombstone index // if not found then check in tombstone index
data = getFromBucket(tx, tombstoneBucketName(cnr), key) data = getFromBucket(tx, tombstoneBucketName(cnr, bucketName), key)
if len(data) != 0 { if len(data) != 0 {
return obj, obj.Unmarshal(data) return obj, obj.Unmarshal(data)
} }
// if not found then check in storage group index // if not found then check in storage group index
data = getFromBucket(tx, storageGroupBucketName(cnr), key) data = getFromBucket(tx, storageGroupBucketName(cnr, bucketName), key)
if len(data) != 0 { if len(data) != 0 {
return obj, obj.Unmarshal(data) return obj, obj.Unmarshal(data)
} }
// if not found then check in locker index // if not found then check in locker index
data = getFromBucket(tx, bucketNameLockers(cnr), key) data = getFromBucket(tx, bucketNameLockers(cnr, bucketName), key)
if len(data) != 0 { if len(data) != 0 {
return obj, obj.Unmarshal(data) return obj, obj.Unmarshal(data)
} }
@ -124,7 +125,8 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD
return nil, getSplitInfoError(tx, cnr, key) return nil, getSplitInfoError(tx, cnr, key)
} }
parentBucket := tx.Bucket(parentBucketName(cnr)) bucketName := make([]byte, bucketKeySize)
parentBucket := tx.Bucket(parentBucketName(cnr, bucketName))
if parentBucket == nil { if parentBucket == nil {
var errNotFound apistatus.ObjectNotFound var errNotFound apistatus.ObjectNotFound
@ -146,7 +148,7 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD
// but later list might be sorted so first or last value can be more // but later list might be sorted so first or last value can be more
// prioritized to choose // prioritized to choose
virtualOID := relativeLst[len(relativeLst)-1] virtualOID := relativeLst[len(relativeLst)-1]
data := getFromBucket(tx, primaryBucketName(cnr), virtualOID) data := getFromBucket(tx, primaryBucketName(cnr, bucketName), virtualOID)
child := objectSDK.New() child := objectSDK.New()

View file

@ -174,7 +174,7 @@ func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address)
if offset == nil { if offset == nil {
k, v = c.First() k, v = c.First()
} else { } else {
rawAddr := addressKey(*offset) rawAddr := addressKey(*offset, make([]byte, addressKeySize))
k, v = c.Seek(rawAddr) k, v = c.Seek(rawAddr)
if bytes.Equal(k, rawAddr) { if bytes.Equal(k, rawAddr) {
@ -222,6 +222,8 @@ func graveFromKV(k, v []byte) (res TombstonedObject, err error) {
// //
// Returns any error appeared during deletion process. // Returns any error appeared during deletion process.
func (db *DB) DropGraves(tss []TombstonedObject) error { func (db *DB) DropGraves(tss []TombstonedObject) error {
buf := make([]byte, addressKeySize)
return db.boltDB.Update(func(tx *bbolt.Tx) error { return db.boltDB.Update(func(tx *bbolt.Tx) error {
bkt := tx.Bucket(graveyardBucketName) bkt := tx.Bucket(graveyardBucketName)
if bkt == nil { if bkt == nil {
@ -229,7 +231,7 @@ func (db *DB) DropGraves(tss []TombstonedObject) error {
} }
for _, ts := range tss { for _, ts := range tss {
err := bkt.Delete(addressKey(ts.Address())) err := bkt.Delete(addressKey(ts.Address(), buf))
if err != nil { if err != nil {
return err return err
} }

View file

@ -113,7 +113,7 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) {
if prm.tomb != nil { if prm.tomb != nil {
bkt = graveyardBKT bkt = graveyardBKT
tombKey := addressKey(*prm.tomb) tombKey := addressKey(*prm.tomb, make([]byte, addressKeySize))
// it is forbidden to have a tomb-on-tomb in NeoFS, // it is forbidden to have a tomb-on-tomb in NeoFS,
// so graveyard keys must not be addresses of tombstones // so graveyard keys must not be addresses of tombstones
@ -131,6 +131,7 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) {
value = zeroValue value = zeroValue
} }
buf := make([]byte, addressKeySize)
for i := range prm.target { for i := range prm.target {
id := prm.target[i].Object() id := prm.target[i].Object()
cnr := prm.target[i].Container() cnr := prm.target[i].Container()
@ -153,9 +154,8 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) {
lockWasChecked = true lockWasChecked = true
} }
targetKey := addressKey(prm.target[i]) obj, err := db.get(tx, prm.target[i], buf, false, true, currEpoch)
targetKey := addressKey(prm.target[i], buf)
obj, err := db.get(tx, prm.target[i], false, true, currEpoch)
if err == nil { if err == nil {
if inGraveyardWithKey(targetKey, graveyardBKT, garbageBKT) == 0 { if inGraveyardWithKey(targetKey, graveyardBKT, garbageBKT) == 0 {
// object is available, decrement the // object is available, decrement the

View file

@ -55,6 +55,12 @@ func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler)
return nil return nil
} }
var cnrID cid.ID
err := cnrID.Decode(cidBytes)
if err != nil {
return fmt.Errorf("could not parse container ID of expired bucket: %w", err)
}
return b.ForEach(func(expKey, _ []byte) error { return b.ForEach(func(expKey, _ []byte) error {
bktExpired := b.Bucket(expKey) bktExpired := b.Bucket(expKey)
if bktExpired == nil { if bktExpired == nil {
@ -71,18 +77,11 @@ func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler)
return bktExpired.ForEach(func(idKey, _ []byte) error { return bktExpired.ForEach(func(idKey, _ []byte) error {
var id oid.ID var id oid.ID
err = id.DecodeString(string(idKey)) err = id.Decode(idKey)
if err != nil { if err != nil {
return fmt.Errorf("could not parse ID of expired object: %w", err) return fmt.Errorf("could not parse ID of expired object: %w", err)
} }
var cnrID cid.ID
err = cnrID.DecodeString(string(cidBytes))
if err != nil {
return fmt.Errorf("could not parse container ID of expired bucket: %w", err)
}
// Ignore locked objects. // Ignore locked objects.
// //
// To slightly optimize performance we can check only REGULAR objects // To slightly optimize performance we can check only REGULAR objects
@ -131,7 +130,11 @@ func (db *DB) iterateCoveredByTombstones(tx *bbolt.Tx, tss map[string]oid.Addres
} }
err := bktGraveyard.ForEach(func(k, v []byte) error { err := bktGraveyard.ForEach(func(k, v []byte) error {
if _, ok := tss[string(v)]; ok { var addr oid.Address
if err := decodeAddressFromKey(&addr, v); err != nil {
return err
}
if _, ok := tss[addr.EncodeToString()]; ok {
var addr oid.Address var addr oid.Address
err := decodeAddressFromKey(&addr, k) err := decodeAddressFromKey(&addr, k)
@ -161,22 +164,22 @@ func iteratePhyObjects(tx *bbolt.Tx, f func(cid.ID, oid.ID) error) error {
var oid oid.ID var oid oid.ID
return tx.ForEach(func(name []byte, b *bbolt.Bucket) error { return tx.ForEach(func(name []byte, b *bbolt.Bucket) error {
b58CID, postfix := parseContainerIDWithPostfix(&cid, name) b58CID, postfix := parseContainerIDWithPrefix(&cid, name)
if len(b58CID) == 0 { if len(b58CID) == 0 {
return nil return nil
} }
switch postfix { switch postfix {
case "", case primaryPrefix,
storageGroupPostfix, storageGroupPrefix,
bucketNameSuffixLockers, lockersPrefix,
tombstonePostfix: tombstonePrefix:
default: default:
return nil return nil
} }
return b.ForEach(func(k, v []byte) error { return b.ForEach(func(k, v []byte) error {
if oid.DecodeString(string(k)) == nil { if oid.Decode(k) == nil {
return f(cid, oid) return f(cid, oid)
} }

View file

@ -1,7 +1,6 @@
package meta package meta
import ( import (
"bytes"
"errors" "errors"
cid "github.com/nspcc-dev/neofs-sdk-go/container/id" cid "github.com/nspcc-dev/neofs-sdk-go/container/id"
@ -90,30 +89,28 @@ func (db *DB) listWithCursor(tx *bbolt.Tx, result []oid.Address, count int, curs
graveyardBkt := tx.Bucket(graveyardBucketName) graveyardBkt := tx.Bucket(graveyardBucketName)
garbageBkt := tx.Bucket(garbageBucketName) garbageBkt := tx.Bucket(garbageBucketName)
const idSize = 44 // size of the stringified object and container ids var rawAddr = make([]byte, cidSize, addressKeySize)
var rawAddr = make([]byte, idSize*2+1)
loop: loop:
for ; name != nil; name, _ = c.Next() { for ; name != nil; name, _ = c.Next() {
b58CID, postfix := parseContainerIDWithPostfix(&containerID, name) cidRaw, prefix := parseContainerIDWithPrefix(&containerID, name)
if b58CID == nil { if cidRaw == nil {
continue continue
} }
switch postfix { switch prefix {
case case
"", primaryPrefix,
storageGroupPostfix, storageGroupPrefix,
bucketNameSuffixLockers, lockersPrefix,
tombstonePostfix: tombstonePrefix:
default: default:
continue continue
} }
bkt := tx.Bucket(name) bkt := tx.Bucket(name)
if bkt != nil { if bkt != nil {
rawAddr = append(rawAddr[:0], b58CID...) copy(rawAddr, cidRaw)
rawAddr = append(rawAddr, '/')
result, offset, cursor = selectNFromBucket(bkt, graveyardBkt, garbageBkt, rawAddr, containerID, result, offset, cursor = selectNFromBucket(bkt, graveyardBkt, garbageBkt, rawAddr, containerID,
result, count, cursor, threshold) result, count, cursor, threshold)
} }
@ -150,7 +147,7 @@ loop:
// object to start selecting from. Ignores inhumed objects. // object to start selecting from. Ignores inhumed objects.
func selectNFromBucket(bkt *bbolt.Bucket, // main bucket func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
graveyardBkt, garbageBkt *bbolt.Bucket, // cached graveyard buckets graveyardBkt, garbageBkt *bbolt.Bucket, // cached graveyard buckets
addrRaw []byte, // container ID prefix, optimization cidRaw []byte, // container ID prefix, optimization
cnt cid.ID, // container ID cnt cid.ID, // container ID
to []oid.Address, // listing result to []oid.Address, // listing result
limit int, // stop listing at `limit` items in result limit int, // stop listing at `limit` items in result
@ -178,12 +175,12 @@ func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
} }
var obj oid.ID var obj oid.ID
if err := obj.DecodeString(string(k)); err != nil { if err := obj.Decode(k); err != nil {
break break
} }
offset = k offset = k
if inGraveyardWithKey(append(addrRaw, k...), graveyardBkt, garbageBkt) > 0 { if inGraveyardWithKey(append(cidRaw, k...), graveyardBkt, garbageBkt) > 0 {
continue continue
} }
@ -197,21 +194,16 @@ func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
return to, offset, cursor return to, offset, cursor
} }
func parseContainerIDWithPostfix(containerID *cid.ID, name []byte) ([]byte, string) { func parseContainerIDWithPrefix(containerID *cid.ID, name []byte) ([]byte, byte) {
var ( if len(name) < bucketKeySize {
containerIDStr = name return nil, 0
postfix []byte
)
ind := bytes.IndexByte(name, invalidBase58String[0])
if ind > 0 {
postfix = containerIDStr[ind:]
containerIDStr = containerIDStr[:ind]
} }
if err := containerID.DecodeString(string(containerIDStr)); err != nil { rawID := name[1:bucketKeySize]
return nil, ""
if err := containerID.Decode(rawID); err != nil {
return nil, 0
} }
return containerIDStr, string(postfix) return rawID, name[0]
} }

View file

@ -11,15 +11,11 @@ import (
"go.etcd.io/bbolt" "go.etcd.io/bbolt"
) )
// bucket name for locked objects. var bucketNameLocked = []byte{lockedPrefix}
var bucketNameLocked = []byte(invalidBase58String + "Locked")
// suffix for container buckets with objects of type LOCK.
const bucketNameSuffixLockers = invalidBase58String + "LOCKER"
// returns name of the bucket with objects of type LOCK for specified container. // returns name of the bucket with objects of type LOCK for specified container.
func bucketNameLockers(idCnr cid.ID) []byte { func bucketNameLockers(idCnr cid.ID, key []byte) []byte {
return []byte(idCnr.EncodeToString() + bucketNameSuffixLockers) return bucketName(idCnr, lockersPrefix, key)
} }
// Lock marks objects as locked with another object. All objects are from the // Lock marks objects as locked with another object. All objects are from the
@ -36,14 +32,14 @@ func (db *DB) Lock(cnr cid.ID, locker oid.ID, locked []oid.ID) error {
panic("empty locked list") panic("empty locked list")
} }
// check if all objects are regular
bucketKeysLocked := make([][]byte, len(locked))
for i := range locked {
bucketKeysLocked[i] = objectKey(locked[i], make([]byte, objectKeySize))
}
key := make([]byte, cidSize)
return db.boltDB.Update(func(tx *bbolt.Tx) error { return db.boltDB.Update(func(tx *bbolt.Tx) error {
// check if all objects are regular
bucketKeysLocked := make([][]byte, len(locked))
for i := range locked {
bucketKeysLocked[i] = objectKey(locked[i])
}
if firstIrregularObjectType(tx, cnr, bucketKeysLocked...) != object.TypeRegular { if firstIrregularObjectType(tx, cnr, bucketKeysLocked...) != object.TypeRegular {
return apistatus.LockNonRegularObject{} return apistatus.LockNonRegularObject{}
} }
@ -53,12 +49,13 @@ func (db *DB) Lock(cnr cid.ID, locker oid.ID, locked []oid.ID) error {
return fmt.Errorf("create global bucket for locked objects: %w", err) return fmt.Errorf("create global bucket for locked objects: %w", err)
} }
bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists([]byte(cnr.EncodeToString())) cnr.Encode(key)
bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists(key)
if err != nil { if err != nil {
return fmt.Errorf("create container bucket for locked objects %v: %w", cnr, err) return fmt.Errorf("create container bucket for locked objects %v: %w", cnr, err)
} }
keyLocker := objectKey(locker) keyLocker := objectKey(locker, key)
var exLockers [][]byte var exLockers [][]byte
var updLockers []byte var updLockers []byte
@ -114,9 +111,11 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) error {
func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool { func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool {
bucketLocked := tx.Bucket(bucketNameLocked) bucketLocked := tx.Bucket(bucketNameLocked)
if bucketLocked != nil { if bucketLocked != nil {
bucketLockedContainer := bucketLocked.Bucket([]byte(idCnr.EncodeToString())) key := make([]byte, cidSize)
idCnr.Encode(key)
bucketLockedContainer := bucketLocked.Bucket(key)
if bucketLockedContainer != nil { if bucketLockedContainer != nil {
return bucketLockedContainer.Get(objectKey(idObj)) != nil return bucketLockedContainer.Get(objectKey(idObj, key)) != nil
} }
} }
@ -131,9 +130,12 @@ func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool {
func freePotentialLocks(tx *bbolt.Tx, idCnr cid.ID, locker oid.ID) error { func freePotentialLocks(tx *bbolt.Tx, idCnr cid.ID, locker oid.ID) error {
bucketLocked := tx.Bucket(bucketNameLocked) bucketLocked := tx.Bucket(bucketNameLocked)
if bucketLocked != nil { if bucketLocked != nil {
bucketLockedContainer := bucketLocked.Bucket([]byte(idCnr.EncodeToString())) key := make([]byte, cidSize)
idCnr.Encode(key)
bucketLockedContainer := bucketLocked.Bucket(key)
if bucketLockedContainer != nil { if bucketLockedContainer != nil {
keyLocker := objectKey(locker) keyLocker := objectKey(locker, key)
return bucketLockedContainer.ForEach(func(k, v []byte) error { return bucketLockedContainer.ForEach(func(k, v []byte) error {
keyLockers, err := decodeList(v) keyLockers, err := decodeList(v)
if err != nil { if err != nil {

View file

@ -52,13 +52,16 @@ func (db *DB) ToMoveIt(prm ToMoveItPrm) (res ToMoveItRes, err error) {
db.modeMtx.RLock() db.modeMtx.RLock()
defer db.modeMtx.RUnlock() defer db.modeMtx.RUnlock()
key := make([]byte, addressKeySize)
key = addressKey(prm.addr, key)
err = db.boltDB.Update(func(tx *bbolt.Tx) error { err = db.boltDB.Update(func(tx *bbolt.Tx) error {
toMoveIt, err := tx.CreateBucketIfNotExists(toMoveItBucketName) toMoveIt, err := tx.CreateBucketIfNotExists(toMoveItBucketName)
if err != nil { if err != nil {
return err return err
} }
return toMoveIt.Put(addressKey(prm.addr), zeroValue) return toMoveIt.Put(key, zeroValue)
}) })
return return
@ -69,13 +72,16 @@ func (db *DB) DoNotMove(prm DoNotMovePrm) (res DoNotMoveRes, err error) {
db.modeMtx.RLock() db.modeMtx.RLock()
defer db.modeMtx.RUnlock() defer db.modeMtx.RUnlock()
key := make([]byte, addressKeySize)
key = addressKey(prm.addr, key)
err = db.boltDB.Update(func(tx *bbolt.Tx) error { err = db.boltDB.Update(func(tx *bbolt.Tx) error {
toMoveIt := tx.Bucket(toMoveItBucketName) toMoveIt := tx.Bucket(toMoveItBucketName)
if toMoveIt == nil { if toMoveIt == nil {
return nil return nil
} }
return toMoveIt.Delete(addressKey(prm.addr)) return toMoveIt.Delete(key)
}) })
return return

View file

@ -168,21 +168,20 @@ func putUniqueIndexes(
isParent := si != nil isParent := si != nil
addr := object.AddressOf(obj) addr := object.AddressOf(obj)
cnr := addr.Container() cnr := addr.Container()
objKey := objectKey(addr.Object()) objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
bucketName := make([]byte, bucketKeySize)
// add value to primary unique bucket // add value to primary unique bucket
if !isParent { if !isParent {
var bucketName []byte
switch obj.Type() { switch obj.Type() {
case objectSDK.TypeRegular: case objectSDK.TypeRegular:
bucketName = primaryBucketName(cnr) bucketName = primaryBucketName(cnr, bucketName)
case objectSDK.TypeTombstone: case objectSDK.TypeTombstone:
bucketName = tombstoneBucketName(cnr) bucketName = tombstoneBucketName(cnr, bucketName)
case objectSDK.TypeStorageGroup: case objectSDK.TypeStorageGroup:
bucketName = storageGroupBucketName(cnr) bucketName = storageGroupBucketName(cnr, bucketName)
case objectSDK.TypeLock: case objectSDK.TypeLock:
bucketName = bucketNameLockers(cnr) bucketName = bucketNameLockers(cnr, bucketName)
default: default:
return ErrUnknownObjectType return ErrUnknownObjectType
} }
@ -204,7 +203,7 @@ func putUniqueIndexes(
// index storageID if it is present // index storageID if it is present
if id != nil { if id != nil {
err = putUniqueIndexItem(tx, namedBucketItem{ err = putUniqueIndexItem(tx, namedBucketItem{
name: smallBucketName(cnr), name: smallBucketName(cnr, bucketName),
key: objKey, key: objKey,
val: id, val: id,
}) })
@ -229,7 +228,7 @@ func putUniqueIndexes(
} }
err = putUniqueIndexItem(tx, namedBucketItem{ err = putUniqueIndexItem(tx, namedBucketItem{
name: rootBucketName(cnr), name: rootBucketName(cnr, bucketName),
key: objKey, key: objKey,
val: splitInfo, val: splitInfo,
}) })
@ -246,13 +245,14 @@ type updateIndexItemFunc = func(tx *bbolt.Tx, item namedBucketItem) error
func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error { func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error {
idObj, _ := obj.ID() idObj, _ := obj.ID()
cnr, _ := obj.ContainerID() cnr, _ := obj.ContainerID()
objKey := objectKey(idObj) objKey := objectKey(idObj, make([]byte, objectKeySize))
bucketName := make([]byte, bucketKeySize)
cs, _ := obj.PayloadChecksum() cs, _ := obj.PayloadChecksum()
// index payload hashes // index payload hashes
err := f(tx, namedBucketItem{ err := f(tx, namedBucketItem{
name: payloadHashBucketName(cnr), name: payloadHashBucketName(cnr, bucketName),
key: cs.Value(), key: cs.Value(),
val: objKey, val: objKey,
}) })
@ -265,8 +265,8 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
// index parent ids // index parent ids
if ok { if ok {
err := f(tx, namedBucketItem{ err := f(tx, namedBucketItem{
name: parentBucketName(cnr), name: parentBucketName(cnr, bucketName),
key: objectKey(idParent), key: objectKey(idParent, make([]byte, objectKeySize)),
val: objKey, val: objKey,
}) })
if err != nil { if err != nil {
@ -277,7 +277,7 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
// index split ids // index split ids
if obj.SplitID() != nil { if obj.SplitID() != nil {
err := f(tx, namedBucketItem{ err := f(tx, namedBucketItem{
name: splitBucketName(cnr), name: splitBucketName(cnr, bucketName),
key: obj.SplitID().ToV2(), key: obj.SplitID().ToV2(),
val: objKey, val: objKey,
}) })
@ -292,12 +292,13 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error { func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error {
id, _ := obj.ID() id, _ := obj.ID()
cnr, _ := obj.ContainerID() cnr, _ := obj.ContainerID()
objKey := []byte(id.EncodeToString()) objKey := objectKey(id, make([]byte, objectKeySize))
attrs := obj.Attributes() attrs := obj.Attributes()
key := make([]byte, bucketKeySize)
err := f(tx, namedBucketItem{ err := f(tx, namedBucketItem{
name: ownerBucketName(cnr), name: ownerBucketName(cnr, key),
key: []byte(obj.OwnerID().EncodeToString()), key: []byte(obj.OwnerID().EncodeToString()),
val: objKey, val: objKey,
}) })
@ -307,8 +308,9 @@ func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
// user specified attributes // user specified attributes
for i := range attrs { for i := range attrs {
key = attributeBucketName(cnr, attrs[i].Key(), key)
err := f(tx, namedBucketItem{ err := f(tx, namedBucketItem{
name: attributeBucketName(cnr, attrs[i].Key()), name: key,
key: []byte(attrs[i].Value()), key: []byte(attrs[i].Value()),
val: objKey, val: objKey,
}) })
@ -437,25 +439,27 @@ func getVarUint(data []byte) (uint64, int, error) {
// updateStorageID for existing objects if they were moved from one // updateStorageID for existing objects if they were moved from one
// storage location to another. // storage location to another.
func updateStorageID(tx *bbolt.Tx, addr oid.Address, id []byte) error { func updateStorageID(tx *bbolt.Tx, addr oid.Address, id []byte) error {
bkt, err := tx.CreateBucketIfNotExists(smallBucketName(addr.Container())) key := make([]byte, bucketKeySize)
bkt, err := tx.CreateBucketIfNotExists(smallBucketName(addr.Container(), key))
if err != nil { if err != nil {
return err return err
} }
return bkt.Put(objectKey(addr.Object()), id) return bkt.Put(objectKey(addr.Object(), key), id)
} }
// updateSpliInfo for existing objects if storage filled with extra information // updateSpliInfo for existing objects if storage filled with extra information
// about last object in split hierarchy or linking object. // about last object in split hierarchy or linking object.
func updateSplitInfo(tx *bbolt.Tx, addr oid.Address, from *objectSDK.SplitInfo) error { func updateSplitInfo(tx *bbolt.Tx, addr oid.Address, from *objectSDK.SplitInfo) error {
bkt := tx.Bucket(rootBucketName(addr.Container())) key := make([]byte, bucketKeySize)
bkt := tx.Bucket(rootBucketName(addr.Container(), key))
if bkt == nil { if bkt == nil {
// if object doesn't exists and we want to update split info on it // if object doesn't exists and we want to update split info on it
// then ignore, this should never happen // then ignore, this should never happen
return ErrIncorrectSplitInfoUpdate return ErrIncorrectSplitInfoUpdate
} }
objectKey := objectKey(addr.Object()) objectKey := objectKey(addr.Object(), key)
rawSplitInfo := bkt.Get(objectKey) rawSplitInfo := bkt.Get(objectKey)
if len(rawSplitInfo) == 0 { if len(rawSplitInfo) == 0 {

View file

@ -107,13 +107,16 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs object.SearchFilters, c
continue // ignore objects with unmatched fast filters continue // ignore objects with unmatched fast filters
} }
var addr oid.Address var id oid.ID
err = id.Decode([]byte(a))
err = decodeAddressFromKey(&addr, []byte(a))
if err != nil { if err != nil {
return nil, err return nil, err
} }
var addr oid.Address
addr.SetContainer(cnr)
addr.SetObject(id)
if objectStatus(tx, addr, currEpoch) > 0 { if objectStatus(tx, addr, currEpoch) > 0 {
continue // ignore removed objects continue // ignore removed objects
} }
@ -130,26 +133,24 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs object.SearchFilters, c
// selectAll adds to resulting cache all available objects in metabase. // selectAll adds to resulting cache all available objects in metabase.
func (db *DB) selectAll(tx *bbolt.Tx, cnr cid.ID, to map[string]int) { func (db *DB) selectAll(tx *bbolt.Tx, cnr cid.ID, to map[string]int) {
prefix := cnr.EncodeToString() + "/" bucketName := make([]byte, bucketKeySize)
selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, 0)
selectAllFromBucket(tx, primaryBucketName(cnr), prefix, to, 0) selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, 0)
selectAllFromBucket(tx, tombstoneBucketName(cnr), prefix, to, 0) selectAllFromBucket(tx, storageGroupBucketName(cnr, bucketName), to, 0)
selectAllFromBucket(tx, storageGroupBucketName(cnr), prefix, to, 0) selectAllFromBucket(tx, parentBucketName(cnr, bucketName), to, 0)
selectAllFromBucket(tx, parentBucketName(cnr), prefix, to, 0) selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, 0)
selectAllFromBucket(tx, bucketNameLockers(cnr), prefix, to, 0)
} }
// selectAllFromBucket goes through all keys in bucket and adds them in a // selectAllFromBucket goes through all keys in bucket and adds them in a
// resulting cache. Keys should be stringed object ids. // resulting cache. Keys should be stringed object ids.
func selectAllFromBucket(tx *bbolt.Tx, name []byte, prefix string, to map[string]int, fNum int) { func selectAllFromBucket(tx *bbolt.Tx, name []byte, to map[string]int, fNum int) {
bkt := tx.Bucket(name) bkt := tx.Bucket(name)
if bkt == nil { if bkt == nil {
return return
} }
_ = bkt.ForEach(func(k, v []byte) error { _ = bkt.ForEach(func(k, v []byte) error {
key := prefix + string(k) // consider using string builders from sync.Pool markAddressInCache(to, fNum, string(k))
markAddressInCache(to, fNum, key)
return nil return nil
}) })
@ -164,47 +165,46 @@ func (db *DB) selectFastFilter(
to map[string]int, // resulting cache to map[string]int, // resulting cache
fNum int, // index of filter fNum int, // index of filter
) { ) {
prefix := cnr.EncodeToString() + "/"
currEpoch := db.epochState.CurrentEpoch() currEpoch := db.epochState.CurrentEpoch()
bucketName := make([]byte, bucketKeySize)
switch f.Header() { switch f.Header() {
case v2object.FilterHeaderObjectID: case v2object.FilterHeaderObjectID:
db.selectObjectID(tx, f, cnr, to, fNum, currEpoch) db.selectObjectID(tx, f, cnr, to, fNum, currEpoch)
case v2object.FilterHeaderOwnerID: case v2object.FilterHeaderOwnerID:
bucketName := ownerBucketName(cnr) bucketName := ownerBucketName(cnr, bucketName)
db.selectFromFKBT(tx, bucketName, f, prefix, to, fNum) db.selectFromFKBT(tx, bucketName, f, to, fNum)
case v2object.FilterHeaderPayloadHash: case v2object.FilterHeaderPayloadHash:
bucketName := payloadHashBucketName(cnr) bucketName := payloadHashBucketName(cnr, bucketName)
db.selectFromList(tx, bucketName, f, prefix, to, fNum) db.selectFromList(tx, bucketName, f, to, fNum)
case v2object.FilterHeaderObjectType: case v2object.FilterHeaderObjectType:
for _, bucketName := range bucketNamesForType(cnr, f.Operation(), f.Value()) { for _, bucketName := range bucketNamesForType(cnr, f.Operation(), f.Value()) {
selectAllFromBucket(tx, bucketName, prefix, to, fNum) selectAllFromBucket(tx, bucketName, to, fNum)
} }
case v2object.FilterHeaderParent: case v2object.FilterHeaderParent:
bucketName := parentBucketName(cnr) bucketName := parentBucketName(cnr, bucketName)
db.selectFromList(tx, bucketName, f, prefix, to, fNum) db.selectFromList(tx, bucketName, f, to, fNum)
case v2object.FilterHeaderSplitID: case v2object.FilterHeaderSplitID:
bucketName := splitBucketName(cnr) bucketName := splitBucketName(cnr, bucketName)
db.selectFromList(tx, bucketName, f, prefix, to, fNum) db.selectFromList(tx, bucketName, f, to, fNum)
case v2object.FilterPropertyRoot: case v2object.FilterPropertyRoot:
selectAllFromBucket(tx, rootBucketName(cnr), prefix, to, fNum) selectAllFromBucket(tx, rootBucketName(cnr, bucketName), to, fNum)
case v2object.FilterPropertyPhy: case v2object.FilterPropertyPhy:
selectAllFromBucket(tx, primaryBucketName(cnr), prefix, to, fNum) selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, fNum)
selectAllFromBucket(tx, tombstoneBucketName(cnr), prefix, to, fNum) selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, fNum)
selectAllFromBucket(tx, storageGroupBucketName(cnr), prefix, to, fNum) selectAllFromBucket(tx, storageGroupBucketName(cnr, bucketName), to, fNum)
selectAllFromBucket(tx, bucketNameLockers(cnr), prefix, to, fNum) selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, fNum)
default: // user attribute default: // user attribute
bucketName := attributeBucketName(cnr, f.Header()) bucketName := attributeBucketName(cnr, f.Header(), bucketName)
if f.Operation() == object.MatchNotPresent { if f.Operation() == object.MatchNotPresent {
selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, f, prefix, to, fNum) selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, to, fNum)
} else { } else {
db.selectFromFKBT(tx, bucketName, f, prefix, to, fNum) db.selectFromFKBT(tx, bucketName, f, to, fNum)
} }
} }
} }
var mBucketNaming = map[string][]func(cid.ID) []byte{ var mBucketNaming = map[string][]func(cid.ID, []byte) []byte{
v2object.TypeRegular.String(): {primaryBucketName, parentBucketName}, v2object.TypeRegular.String(): {primaryBucketName, parentBucketName},
v2object.TypeTombstone.String(): {tombstoneBucketName}, v2object.TypeTombstone.String(): {tombstoneBucketName},
v2object.TypeStorageGroup.String(): {storageGroupBucketName}, v2object.TypeStorageGroup.String(): {storageGroupBucketName},
@ -214,7 +214,7 @@ var mBucketNaming = map[string][]func(cid.ID) []byte{
func allBucketNames(cnr cid.ID) (names [][]byte) { func allBucketNames(cnr cid.ID) (names [][]byte) {
for _, fns := range mBucketNaming { for _, fns := range mBucketNaming {
for _, fn := range fns { for _, fn := range fns {
names = append(names, fn(cnr)) names = append(names, fn(cnr, make([]byte, bucketKeySize)))
} }
} }
@ -226,7 +226,7 @@ func bucketNamesForType(cnr cid.ID, mType object.SearchMatchType, typeVal string
fns, ok := mBucketNaming[key] fns, ok := mBucketNaming[key]
if ok { if ok {
for _, fn := range fns { for _, fn := range fns {
names = append(names, fn(cnr)) names = append(names, fn(cnr, make([]byte, bucketKeySize)))
} }
} }
} }
@ -258,7 +258,6 @@ func (db *DB) selectFromFKBT(
tx *bbolt.Tx, tx *bbolt.Tx,
name []byte, // fkbt root bucket name name []byte, // fkbt root bucket name
f object.SearchFilter, // filter for operation and value f object.SearchFilter, // filter for operation and value
prefix string, // prefix to create addr from oid in index
to map[string]int, // resulting cache to map[string]int, // resulting cache
fNum int, // index of filter fNum int, // index of filter
) { // ) { //
@ -281,8 +280,7 @@ func (db *DB) selectFromFKBT(
} }
return fkbtLeaf.ForEach(func(k, _ []byte) error { return fkbtLeaf.ForEach(func(k, _ []byte) error {
addr := prefix + string(k) markAddressInCache(to, fNum, string(k))
markAddressInCache(to, fNum, addr)
return nil return nil
}) })
@ -298,8 +296,6 @@ func selectOutsideFKBT(
tx *bbolt.Tx, tx *bbolt.Tx,
incl [][]byte, // buckets incl [][]byte, // buckets
name []byte, // fkbt root bucket name name []byte, // fkbt root bucket name
f object.SearchFilter, // filter for operation and value
prefix string, // prefix to create addr from oid in index
to map[string]int, // resulting cache to map[string]int, // resulting cache
fNum int, // index of filter fNum int, // index of filter
) { ) {
@ -314,8 +310,7 @@ func selectOutsideFKBT(
} }
return exclBktLeaf.ForEach(func(k, _ []byte) error { return exclBktLeaf.ForEach(func(k, _ []byte) error {
addr := prefix + string(k) mExcl[string(k)] = struct{}{}
mExcl[addr] = struct{}{}
return nil return nil
}) })
@ -329,10 +324,8 @@ func selectOutsideFKBT(
} }
_ = bktIncl.ForEach(func(k, _ []byte) error { _ = bktIncl.ForEach(func(k, _ []byte) error {
addr := prefix + string(k) if _, ok := mExcl[string(k)]; !ok {
markAddressInCache(to, fNum, string(k))
if _, ok := mExcl[addr]; !ok {
markAddressInCache(to, fNum, addr)
} }
return nil return nil
@ -346,7 +339,6 @@ func (db *DB) selectFromList(
tx *bbolt.Tx, tx *bbolt.Tx,
name []byte, // list root bucket name name []byte, // list root bucket name
f object.SearchFilter, // filter for operation and value f object.SearchFilter, // filter for operation and value
prefix string, // prefix to create addr from oid in index
to map[string]int, // resulting cache to map[string]int, // resulting cache
fNum int, // index of filter fNum int, // index of filter
) { // ) { //
@ -398,8 +390,7 @@ func (db *DB) selectFromList(
} }
for i := range lst { for i := range lst {
addr := prefix + string(lst[i]) markAddressInCache(to, fNum, string(lst[i]))
markAddressInCache(to, fNum, addr)
} }
} }
@ -412,30 +403,25 @@ func (db *DB) selectObjectID(
fNum int, // index of filter fNum int, // index of filter
currEpoch uint64, currEpoch uint64,
) { ) {
prefix := cnr.EncodeToString() + "/" appendOID := func(id oid.ID) {
appendOID := func(strObj string) {
addrStr := prefix + strObj
var addr oid.Address var addr oid.Address
addr.SetContainer(cnr)
err := decodeAddressFromKey(&addr, []byte(addrStr)) addr.SetObject(id)
if err != nil {
db.log.Debug("can't decode object id address",
zap.String("addr", addrStr),
zap.String("error", err.Error()))
return
}
ok, err := db.exists(tx, addr, currEpoch) ok, err := db.exists(tx, addr, currEpoch)
if (err == nil && ok) || errors.As(err, &splitInfoError) { if (err == nil && ok) || errors.As(err, &splitInfoError) {
markAddressInCache(to, fNum, addrStr) raw := make([]byte, objectKeySize)
id.Encode(raw)
markAddressInCache(to, fNum, string(raw))
} }
} }
switch op := f.Operation(); op { switch op := f.Operation(); op {
case object.MatchStringEqual: case object.MatchStringEqual:
appendOID(f.Value()) var id oid.ID
if err := id.DecodeString(f.Value()); err == nil {
appendOID(id)
}
default: default:
fMatch, ok := db.matchers[op] fMatch, ok := db.matchers[op]
if !ok { if !ok {
@ -454,7 +440,10 @@ func (db *DB) selectObjectID(
} }
err := fMatch.matchBucket(bkt, f.Header(), f.Value(), func(k, v []byte) error { err := fMatch.matchBucket(bkt, f.Header(), f.Value(), func(k, v []byte) error {
appendOID(string(k)) var id oid.ID
if err := id.Decode(k); err == nil {
appendOID(id)
}
return nil return nil
}) })
if err != nil { if err != nil {
@ -472,7 +461,8 @@ func (db *DB) matchSlowFilters(tx *bbolt.Tx, addr oid.Address, f object.SearchFi
return true return true
} }
obj, err := db.get(tx, addr, true, false, currEpoch) buf := make([]byte, addressKeySize)
obj, err := db.get(tx, addr, buf, true, false, currEpoch)
if err != nil { if err != nil {
return false return false
} }

View file

@ -6,7 +6,7 @@ import (
) )
var ( var (
shardInfoBucket = []byte(invalidBase58String + "i") shardInfoBucket = []byte{shardInfoPrefix}
shardIDKey = []byte("id") shardIDKey = []byte("id")
) )

View file

@ -39,12 +39,13 @@ func (db *DB) StorageID(prm StorageIDPrm) (res StorageIDRes, err error) {
} }
func (db *DB) storageID(tx *bbolt.Tx, addr oid.Address) ([]byte, error) { func (db *DB) storageID(tx *bbolt.Tx, addr oid.Address) ([]byte, error) {
smallBucket := tx.Bucket(smallBucketName(addr.Container())) key := make([]byte, bucketKeySize)
smallBucket := tx.Bucket(smallBucketName(addr.Container(), key))
if smallBucket == nil { if smallBucket == nil {
return nil, nil return nil, nil
} }
storageID := smallBucket.Get(objectKey(addr.Object())) storageID := smallBucket.Get(objectKey(addr.Object(), key))
if storageID == nil { if storageID == nil {
return nil, nil return nil, nil
} }

View file

@ -2,8 +2,8 @@ package meta
import ( import (
"bytes" "bytes"
"crypto/sha256"
"fmt" "fmt"
"strings"
cid "github.com/nspcc-dev/neofs-sdk-go/container/id" cid "github.com/nspcc-dev/neofs-sdk-go/container/id"
"github.com/nspcc-dev/neofs-sdk-go/object" "github.com/nspcc-dev/neofs-sdk-go/object"
@ -11,126 +11,214 @@ import (
"go.etcd.io/bbolt" "go.etcd.io/bbolt"
) )
/*
We might increase performance by not using string representation of
identities and addresses. String representation require base58 encoding that
slows execution. Instead we can try to marshal these structures directly into
bytes. Check it later.
*/
const invalidBase58String = "_"
var ( var (
// graveyardBucketName stores rows with the objects that have been // graveyardBucketName stores rows with the objects that have been
// covered with Tombstone objects. That objects should not be returned // covered with Tombstone objects. That objects should not be returned
// from the node and should not be accepted by the node from other // from the node and should not be accepted by the node from other
// nodes. // nodes.
graveyardBucketName = []byte(invalidBase58String + "Graveyard") graveyardBucketName = []byte{graveyardPrefix}
// garbageBucketName stores rows with the objects that should be physically // garbageBucketName stores rows with the objects that should be physically
// deleted by the node (Garbage Collector routine). // deleted by the node (Garbage Collector routine).
garbageBucketName = []byte(invalidBase58String + "Garbage") garbageBucketName = []byte{garbagePrefix}
toMoveItBucketName = []byte(invalidBase58String + "ToMoveIt") toMoveItBucketName = []byte{toMoveItPrefix}
containerVolumeBucketName = []byte(invalidBase58String + "ContainerSize") containerVolumeBucketName = []byte{containerVolumePrefix}
zeroValue = []byte{0xFF} zeroValue = []byte{0xFF}
smallPostfix = invalidBase58String + "small"
storageGroupPostfix = invalidBase58String + "SG"
tombstonePostfix = invalidBase58String + "TS"
ownerPostfix = invalidBase58String + "ownerid"
payloadHashPostfix = invalidBase58String + "payloadhash"
rootPostfix = invalidBase58String + "root"
parentPostfix = invalidBase58String + "parent"
splitPostfix = invalidBase58String + "splitid"
userAttributePostfix = invalidBase58String + "attr_"
splitInfoError *object.SplitInfoError // for errors.As comparisons
) )
// Prefix bytes for database keys. All ids and addresses are encoded in binary
// unless specified otherwise.
const (
// graveyardPrefix is used for the graveyard bucket.
// Key: object address
// Value: tombstone address
graveyardPrefix = iota
// garbagePrefix is used for the garbage bucket.
// Key: object address
// Value: dummy value
garbagePrefix
// toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving
// to another shard.
toMoveItPrefix
// containerVolumePrefix is used for storing container size estimations.
// Key: container ID
// Value: container size in bytes as little-endian uint64
containerVolumePrefix
// lockedPrefix is used for storing locked objects information.
// Key: container ID
// Value: bucket mapping objects locked to the list of corresponding LOCK objects.
lockedPrefix
// shardInfoPrefix is used for storing shard ID. All keys are custom and are not connected to the container.
shardInfoPrefix
//======================
// Unique index buckets.
//======================
// primaryPrefix is used for prefixing buckets containing objects of REGULAR type.
// Key: object ID
// Value: marshalled object
primaryPrefix
// lockersPrefix is used for prefixing buckets containing objects of LOCK type.
// Key: object ID
// Value: marshalled object
lockersPrefix
// storageGroupPrefix is used for prefixing buckets containing objects of STORAGEGROUP type.
// Key: object ID
// Value: marshaled object
storageGroupPrefix
// tombstonePrefix is used for prefixing buckets containing objects of TOMBSTONE type.
// Key: object ID
// Value: marshaled object
tombstonePrefix
// smallPrefix is used for prefixing buckets mapping objects to the blobovniczas they are stored in.
// Key: object ID
// Value: blobovnicza ID
smallPrefix
// rootPrefix is used for prefixing buckets mapping parent object to the split info.
// Key: object ID
// Value: split info
rootPrefix
//====================
// FKBT index buckets.
//====================
// ownerPrefix is used for prefixing FKBT index buckets mapping owner to object IDs.
// Key: owner ID
// Value: bucket containing object IDs as keys
ownerPrefix
// userAttributePrefix is used for prefixing FKBT index buckets containing objects.
// Key: attribute value
// Value: bucket containing object IDs as keys
userAttributePrefix
//====================
// List index buckets.
//====================
// payloadHashPrefix is used for prefixing List index buckets mapping payload hash to a list of object IDs.
// Key: payload hash
// Value: list of object IDs
payloadHashPrefix
// parentPrefix is used for prefixing List index buckets mapping parent ID to a list of children IDs.
// Key: parent ID
// Value: list of object IDs
parentPrefix
// splitPrefix is used for prefixing List index buckets mapping split ID to a list of object IDs.
// Key: split ID
// Value: list of object IDs
splitPrefix
)
const (
cidSize = sha256.Size
bucketKeySize = 1 + cidSize
objectKeySize = sha256.Size
addressKeySize = cidSize + objectKeySize
)
var splitInfoError *object.SplitInfoError // for errors.As comparisons
func bucketName(cnr cid.ID, prefix byte, key []byte) []byte {
key[0] = prefix
cnr.Encode(key[1:])
return key[:bucketKeySize]
}
// primaryBucketName returns <CID>. // primaryBucketName returns <CID>.
func primaryBucketName(cnr cid.ID) []byte { func primaryBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString()) return bucketName(cnr, primaryPrefix, key)
} }
// tombstoneBucketName returns <CID>_TS. // tombstoneBucketName returns <CID>_TS.
func tombstoneBucketName(cnr cid.ID) []byte { func tombstoneBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString() + tombstonePostfix) return bucketName(cnr, tombstonePrefix, key)
} }
// storageGroupBucketName returns <CID>_SG. // storageGroupBucketName returns <CID>_SG.
func storageGroupBucketName(cnr cid.ID) []byte { func storageGroupBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString() + storageGroupPostfix) return bucketName(cnr, storageGroupPrefix, key)
} }
// smallBucketName returns <CID>_small. // smallBucketName returns <CID>_small.
func smallBucketName(cnr cid.ID) []byte { func smallBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString() + smallPostfix) // consider caching output values return bucketName(cnr, smallPrefix, key)
} }
// attributeBucketName returns <CID>_attr_<attributeKey>. // attributeBucketName returns <CID>_attr_<attributeKey>.
func attributeBucketName(cnr cid.ID, attributeKey string) []byte { func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte {
sb := strings.Builder{} // consider getting string builders from sync.Pool key[0] = userAttributePrefix
sb.WriteString(cnr.EncodeToString()) cnr.Encode(key[1:])
sb.WriteString(userAttributePostfix) return append(key[:bucketKeySize], attributeKey...)
sb.WriteString(attributeKey)
return []byte(sb.String())
} }
// returns <CID> from attributeBucketName result, nil otherwise. // returns <CID> from attributeBucketName result, nil otherwise.
func cidFromAttributeBucket(val []byte, attributeKey string) []byte { func cidFromAttributeBucket(val []byte, attributeKey string) []byte {
suffix := []byte(userAttributePostfix + attributeKey) if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) {
if !bytes.HasSuffix(val, suffix) {
return nil return nil
} }
return val[:len(val)-len(suffix)] return val[1:bucketKeySize]
} }
// payloadHashBucketName returns <CID>_payloadhash. // payloadHashBucketName returns <CID>_payloadhash.
func payloadHashBucketName(cnr cid.ID) []byte { func payloadHashBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString() + payloadHashPostfix) return bucketName(cnr, payloadHashPrefix, key)
} }
// rootBucketName returns <CID>_root. // rootBucketName returns <CID>_root.
func rootBucketName(cnr cid.ID) []byte { func rootBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString() + rootPostfix) return bucketName(cnr, rootPrefix, key)
} }
// ownerBucketName returns <CID>_ownerid. // ownerBucketName returns <CID>_ownerid.
func ownerBucketName(cnr cid.ID) []byte { func ownerBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString() + ownerPostfix) return bucketName(cnr, ownerPrefix, key)
} }
// parentBucketName returns <CID>_parent. // parentBucketName returns <CID>_parent.
func parentBucketName(cnr cid.ID) []byte { func parentBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString() + parentPostfix) return bucketName(cnr, parentPrefix, key)
} }
// splitBucketName returns <CID>_splitid. // splitBucketName returns <CID>_splitid.
func splitBucketName(cnr cid.ID) []byte { func splitBucketName(cnr cid.ID, key []byte) []byte {
return []byte(cnr.EncodeToString() + splitPostfix) return bucketName(cnr, splitPrefix, key)
} }
// addressKey returns key for K-V tables when key is a whole address. // addressKey returns key for K-V tables when key is a whole address.
func addressKey(addr oid.Address) []byte { func addressKey(addr oid.Address, key []byte) []byte {
return []byte(addr.EncodeToString()) addr.Container().Encode(key)
addr.Object().Encode(key[cidSize:])
return key[:addressKeySize]
} }
// parses object address formed by addressKey. // parses object address formed by addressKey.
func decodeAddressFromKey(dst *oid.Address, k []byte) error { func decodeAddressFromKey(dst *oid.Address, k []byte) error {
err := dst.DecodeString(string(k)) if len(k) != addressKeySize {
if err != nil { return fmt.Errorf("invalid length")
return fmt.Errorf("decode object address from db key: %w", err)
} }
var cnr cid.ID
if err := cnr.Decode(k[:cidSize]); err != nil {
return err
}
var obj oid.ID
if err := obj.Decode(k[cidSize:]); err != nil {
return err
}
dst.SetObject(obj)
dst.SetContainer(cnr)
return nil return nil
} }
// objectKey returns key for K-V tables when key is an object id. // objectKey returns key for K-V tables when key is an object id.
func objectKey(obj oid.ID) []byte { func objectKey(obj oid.ID, key []byte) []byte {
return []byte(obj.EncodeToString()) obj.Encode(key)
return key[:objectKeySize]
} }
// removes all bucket elements. // removes all bucket elements.
@ -152,13 +240,15 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object
panic("empty object list in firstIrregularObjectType") panic("empty object list in firstIrregularObjectType")
} }
var keys [3][1 + cidSize]byte
irregularTypeBuckets := [...]struct { irregularTypeBuckets := [...]struct {
typ object.Type typ object.Type
name []byte name []byte
}{ }{
{object.TypeTombstone, tombstoneBucketName(idCnr)}, {object.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])},
{object.TypeStorageGroup, storageGroupBucketName(idCnr)}, {object.TypeStorageGroup, storageGroupBucketName(idCnr, keys[1][:])},
{object.TypeLock, bucketNameLockers(idCnr)}, {object.TypeLock, bucketNameLockers(idCnr, keys[2][:])},
} }
for i := range objs { for i := range objs {
@ -174,5 +264,7 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object
// return true if provided object is of LOCK type. // return true if provided object is of LOCK type.
func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool { func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool {
return inBucket(tx, bucketNameLockers(idCnr), objectKey(obj)) return inBucket(tx,
bucketNameLockers(idCnr, make([]byte, bucketKeySize)),
objectKey(obj, make([]byte, objectKeySize)))
} }

View file

@ -8,7 +8,7 @@ import (
) )
// version contains current metabase version. // version contains current metabase version.
const version = 1 const version = 2
var versionKey = []byte("version") var versionKey = []byte("version")