[#1482] metabase: Encode database keys in binary
Signed-off-by: Evgenii Stratonikov <evgeniy@morphbits.ru>
This commit is contained in:
parent
d6fef68a62
commit
ae1dab29bc
19 changed files with 392 additions and 276 deletions
|
@ -2,8 +2,8 @@ package meta
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
cid "github.com/nspcc-dev/neofs-sdk-go/container/id"
|
||||
"github.com/nspcc-dev/neofs-sdk-go/object"
|
||||
|
@ -11,126 +11,214 @@ import (
|
|||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
/*
|
||||
We might increase performance by not using string representation of
|
||||
identities and addresses. String representation require base58 encoding that
|
||||
slows execution. Instead we can try to marshal these structures directly into
|
||||
bytes. Check it later.
|
||||
*/
|
||||
|
||||
const invalidBase58String = "_"
|
||||
|
||||
var (
|
||||
// graveyardBucketName stores rows with the objects that have been
|
||||
// covered with Tombstone objects. That objects should not be returned
|
||||
// from the node and should not be accepted by the node from other
|
||||
// nodes.
|
||||
graveyardBucketName = []byte(invalidBase58String + "Graveyard")
|
||||
graveyardBucketName = []byte{graveyardPrefix}
|
||||
// garbageBucketName stores rows with the objects that should be physically
|
||||
// deleted by the node (Garbage Collector routine).
|
||||
garbageBucketName = []byte(invalidBase58String + "Garbage")
|
||||
toMoveItBucketName = []byte(invalidBase58String + "ToMoveIt")
|
||||
containerVolumeBucketName = []byte(invalidBase58String + "ContainerSize")
|
||||
garbageBucketName = []byte{garbagePrefix}
|
||||
toMoveItBucketName = []byte{toMoveItPrefix}
|
||||
containerVolumeBucketName = []byte{containerVolumePrefix}
|
||||
|
||||
zeroValue = []byte{0xFF}
|
||||
|
||||
smallPostfix = invalidBase58String + "small"
|
||||
storageGroupPostfix = invalidBase58String + "SG"
|
||||
tombstonePostfix = invalidBase58String + "TS"
|
||||
ownerPostfix = invalidBase58String + "ownerid"
|
||||
payloadHashPostfix = invalidBase58String + "payloadhash"
|
||||
rootPostfix = invalidBase58String + "root"
|
||||
parentPostfix = invalidBase58String + "parent"
|
||||
splitPostfix = invalidBase58String + "splitid"
|
||||
|
||||
userAttributePostfix = invalidBase58String + "attr_"
|
||||
|
||||
splitInfoError *object.SplitInfoError // for errors.As comparisons
|
||||
)
|
||||
|
||||
// Prefix bytes for database keys. All ids and addresses are encoded in binary
|
||||
// unless specified otherwise.
|
||||
const (
|
||||
// graveyardPrefix is used for the graveyard bucket.
|
||||
// Key: object address
|
||||
// Value: tombstone address
|
||||
graveyardPrefix = iota
|
||||
// garbagePrefix is used for the garbage bucket.
|
||||
// Key: object address
|
||||
// Value: dummy value
|
||||
garbagePrefix
|
||||
// toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving
|
||||
// to another shard.
|
||||
toMoveItPrefix
|
||||
// containerVolumePrefix is used for storing container size estimations.
|
||||
// Key: container ID
|
||||
// Value: container size in bytes as little-endian uint64
|
||||
containerVolumePrefix
|
||||
// lockedPrefix is used for storing locked objects information.
|
||||
// Key: container ID
|
||||
// Value: bucket mapping objects locked to the list of corresponding LOCK objects.
|
||||
lockedPrefix
|
||||
// shardInfoPrefix is used for storing shard ID. All keys are custom and are not connected to the container.
|
||||
shardInfoPrefix
|
||||
|
||||
//======================
|
||||
// Unique index buckets.
|
||||
//======================
|
||||
|
||||
// primaryPrefix is used for prefixing buckets containing objects of REGULAR type.
|
||||
// Key: object ID
|
||||
// Value: marshalled object
|
||||
primaryPrefix
|
||||
// lockersPrefix is used for prefixing buckets containing objects of LOCK type.
|
||||
// Key: object ID
|
||||
// Value: marshalled object
|
||||
lockersPrefix
|
||||
// storageGroupPrefix is used for prefixing buckets containing objects of STORAGEGROUP type.
|
||||
// Key: object ID
|
||||
// Value: marshaled object
|
||||
storageGroupPrefix
|
||||
// tombstonePrefix is used for prefixing buckets containing objects of TOMBSTONE type.
|
||||
// Key: object ID
|
||||
// Value: marshaled object
|
||||
tombstonePrefix
|
||||
// smallPrefix is used for prefixing buckets mapping objects to the blobovniczas they are stored in.
|
||||
// Key: object ID
|
||||
// Value: blobovnicza ID
|
||||
smallPrefix
|
||||
// rootPrefix is used for prefixing buckets mapping parent object to the split info.
|
||||
// Key: object ID
|
||||
// Value: split info
|
||||
rootPrefix
|
||||
|
||||
//====================
|
||||
// FKBT index buckets.
|
||||
//====================
|
||||
|
||||
// ownerPrefix is used for prefixing FKBT index buckets mapping owner to object IDs.
|
||||
// Key: owner ID
|
||||
// Value: bucket containing object IDs as keys
|
||||
ownerPrefix
|
||||
// userAttributePrefix is used for prefixing FKBT index buckets containing objects.
|
||||
// Key: attribute value
|
||||
// Value: bucket containing object IDs as keys
|
||||
userAttributePrefix
|
||||
|
||||
//====================
|
||||
// List index buckets.
|
||||
//====================
|
||||
|
||||
// payloadHashPrefix is used for prefixing List index buckets mapping payload hash to a list of object IDs.
|
||||
// Key: payload hash
|
||||
// Value: list of object IDs
|
||||
payloadHashPrefix
|
||||
// parentPrefix is used for prefixing List index buckets mapping parent ID to a list of children IDs.
|
||||
// Key: parent ID
|
||||
// Value: list of object IDs
|
||||
parentPrefix
|
||||
// splitPrefix is used for prefixing List index buckets mapping split ID to a list of object IDs.
|
||||
// Key: split ID
|
||||
// Value: list of object IDs
|
||||
splitPrefix
|
||||
)
|
||||
|
||||
const (
|
||||
cidSize = sha256.Size
|
||||
bucketKeySize = 1 + cidSize
|
||||
objectKeySize = sha256.Size
|
||||
addressKeySize = cidSize + objectKeySize
|
||||
)
|
||||
|
||||
var splitInfoError *object.SplitInfoError // for errors.As comparisons
|
||||
|
||||
func bucketName(cnr cid.ID, prefix byte, key []byte) []byte {
|
||||
key[0] = prefix
|
||||
cnr.Encode(key[1:])
|
||||
return key[:bucketKeySize]
|
||||
}
|
||||
|
||||
// primaryBucketName returns <CID>.
|
||||
func primaryBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString())
|
||||
func primaryBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, primaryPrefix, key)
|
||||
}
|
||||
|
||||
// tombstoneBucketName returns <CID>_TS.
|
||||
func tombstoneBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString() + tombstonePostfix)
|
||||
func tombstoneBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, tombstonePrefix, key)
|
||||
}
|
||||
|
||||
// storageGroupBucketName returns <CID>_SG.
|
||||
func storageGroupBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString() + storageGroupPostfix)
|
||||
func storageGroupBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, storageGroupPrefix, key)
|
||||
}
|
||||
|
||||
// smallBucketName returns <CID>_small.
|
||||
func smallBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString() + smallPostfix) // consider caching output values
|
||||
func smallBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, smallPrefix, key)
|
||||
}
|
||||
|
||||
// attributeBucketName returns <CID>_attr_<attributeKey>.
|
||||
func attributeBucketName(cnr cid.ID, attributeKey string) []byte {
|
||||
sb := strings.Builder{} // consider getting string builders from sync.Pool
|
||||
sb.WriteString(cnr.EncodeToString())
|
||||
sb.WriteString(userAttributePostfix)
|
||||
sb.WriteString(attributeKey)
|
||||
|
||||
return []byte(sb.String())
|
||||
func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte {
|
||||
key[0] = userAttributePrefix
|
||||
cnr.Encode(key[1:])
|
||||
return append(key[:bucketKeySize], attributeKey...)
|
||||
}
|
||||
|
||||
// returns <CID> from attributeBucketName result, nil otherwise.
|
||||
func cidFromAttributeBucket(val []byte, attributeKey string) []byte {
|
||||
suffix := []byte(userAttributePostfix + attributeKey)
|
||||
if !bytes.HasSuffix(val, suffix) {
|
||||
if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return val[:len(val)-len(suffix)]
|
||||
return val[1:bucketKeySize]
|
||||
}
|
||||
|
||||
// payloadHashBucketName returns <CID>_payloadhash.
|
||||
func payloadHashBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString() + payloadHashPostfix)
|
||||
func payloadHashBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, payloadHashPrefix, key)
|
||||
}
|
||||
|
||||
// rootBucketName returns <CID>_root.
|
||||
func rootBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString() + rootPostfix)
|
||||
func rootBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, rootPrefix, key)
|
||||
}
|
||||
|
||||
// ownerBucketName returns <CID>_ownerid.
|
||||
func ownerBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString() + ownerPostfix)
|
||||
func ownerBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, ownerPrefix, key)
|
||||
}
|
||||
|
||||
// parentBucketName returns <CID>_parent.
|
||||
func parentBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString() + parentPostfix)
|
||||
func parentBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, parentPrefix, key)
|
||||
}
|
||||
|
||||
// splitBucketName returns <CID>_splitid.
|
||||
func splitBucketName(cnr cid.ID) []byte {
|
||||
return []byte(cnr.EncodeToString() + splitPostfix)
|
||||
func splitBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, splitPrefix, key)
|
||||
}
|
||||
|
||||
// addressKey returns key for K-V tables when key is a whole address.
|
||||
func addressKey(addr oid.Address) []byte {
|
||||
return []byte(addr.EncodeToString())
|
||||
func addressKey(addr oid.Address, key []byte) []byte {
|
||||
addr.Container().Encode(key)
|
||||
addr.Object().Encode(key[cidSize:])
|
||||
return key[:addressKeySize]
|
||||
}
|
||||
|
||||
// parses object address formed by addressKey.
|
||||
func decodeAddressFromKey(dst *oid.Address, k []byte) error {
|
||||
err := dst.DecodeString(string(k))
|
||||
if err != nil {
|
||||
return fmt.Errorf("decode object address from db key: %w", err)
|
||||
if len(k) != addressKeySize {
|
||||
return fmt.Errorf("invalid length")
|
||||
}
|
||||
|
||||
var cnr cid.ID
|
||||
if err := cnr.Decode(k[:cidSize]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(k[cidSize:]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dst.SetObject(obj)
|
||||
dst.SetContainer(cnr)
|
||||
return nil
|
||||
}
|
||||
|
||||
// objectKey returns key for K-V tables when key is an object id.
|
||||
func objectKey(obj oid.ID) []byte {
|
||||
return []byte(obj.EncodeToString())
|
||||
func objectKey(obj oid.ID, key []byte) []byte {
|
||||
obj.Encode(key)
|
||||
return key[:objectKeySize]
|
||||
}
|
||||
|
||||
// removes all bucket elements.
|
||||
|
@ -152,13 +240,15 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object
|
|||
panic("empty object list in firstIrregularObjectType")
|
||||
}
|
||||
|
||||
var keys [3][1 + cidSize]byte
|
||||
|
||||
irregularTypeBuckets := [...]struct {
|
||||
typ object.Type
|
||||
name []byte
|
||||
}{
|
||||
{object.TypeTombstone, tombstoneBucketName(idCnr)},
|
||||
{object.TypeStorageGroup, storageGroupBucketName(idCnr)},
|
||||
{object.TypeLock, bucketNameLockers(idCnr)},
|
||||
{object.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])},
|
||||
{object.TypeStorageGroup, storageGroupBucketName(idCnr, keys[1][:])},
|
||||
{object.TypeLock, bucketNameLockers(idCnr, keys[2][:])},
|
||||
}
|
||||
|
||||
for i := range objs {
|
||||
|
@ -174,5 +264,7 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object
|
|||
|
||||
// return true if provided object is of LOCK type.
|
||||
func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool {
|
||||
return inBucket(tx, bucketNameLockers(idCnr), objectKey(obj))
|
||||
return inBucket(tx,
|
||||
bucketNameLockers(idCnr, make([]byte, bucketKeySize)),
|
||||
objectKey(obj, make([]byte, objectKeySize)))
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue