[#1482] metabase: Encode database keys in binary

Signed-off-by: Evgenii Stratonikov <evgeniy@morphbits.ru>
2022-09-08 14:54:21 +03:00 · 2022-09-08 14:54:21 +03:00 · ae1dab29bc
commit ae1dab29bc
parent d6fef68a62
19 changed files with 392 additions and 276 deletions
--- a/pkg/local_object_storage/metabase/util.go
+++ b/pkg/local_object_storage/metabase/util.go
@ -2,8 +2,8 @@ package meta

 import (
 	"bytes"
+	"crypto/sha256"
 	"fmt"
-	"strings"

 	cid "github.com/nspcc-dev/neofs-sdk-go/container/id"
 	"github.com/nspcc-dev/neofs-sdk-go/object"
@ -11,126 +11,214 @@ import (
 	"go.etcd.io/bbolt"
 )

-/*
-We might increase performance by not using string representation of
-identities and addresses. String representation require base58 encoding that
-slows execution. Instead we can try to marshal these structures directly into
-bytes. Check it later.
-*/
-
-const invalidBase58String = "_"
-
 var (
 	// graveyardBucketName stores rows with the objects that have been
 	// covered with Tombstone objects. That objects should not be returned
 	// from the node and should not be accepted by the node from other
 	// nodes.
-	graveyardBucketName = []byte(invalidBase58String + "Graveyard")
+	graveyardBucketName = []byte{graveyardPrefix}
 	// garbageBucketName stores rows with the objects that should be physically
 	// deleted by the node (Garbage Collector routine).
-	garbageBucketName         = []byte(invalidBase58String + "Garbage")
-	toMoveItBucketName        = []byte(invalidBase58String + "ToMoveIt")
-	containerVolumeBucketName = []byte(invalidBase58String + "ContainerSize")
+	garbageBucketName         = []byte{garbagePrefix}
+	toMoveItBucketName        = []byte{toMoveItPrefix}
+	containerVolumeBucketName = []byte{containerVolumePrefix}

 	zeroValue = []byte{0xFF}
-
-	smallPostfix        = invalidBase58String + "small"
-	storageGroupPostfix = invalidBase58String + "SG"
-	tombstonePostfix    = invalidBase58String + "TS"
-	ownerPostfix        = invalidBase58String + "ownerid"
-	payloadHashPostfix  = invalidBase58String + "payloadhash"
-	rootPostfix         = invalidBase58String + "root"
-	parentPostfix       = invalidBase58String + "parent"
-	splitPostfix        = invalidBase58String + "splitid"
-
-	userAttributePostfix = invalidBase58String + "attr_"
-
-	splitInfoError *object.SplitInfoError // for errors.As comparisons
 )

+// Prefix bytes for database keys. All ids and addresses are encoded in binary
+// unless specified otherwise.
+const (
+	// graveyardPrefix is used for the graveyard bucket.
+	// 	Key: object address
+	// 	Value: tombstone address
+	graveyardPrefix = iota
+	// garbagePrefix is used for the garbage bucket.
+	// 	Key: object address
+	// 	Value: dummy value
+	garbagePrefix
+	// toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving
+	// to another shard.
+	toMoveItPrefix
+	// containerVolumePrefix is used for storing container size estimations.
+	//	Key: container ID
+	//  Value: container size in bytes as little-endian uint64
+	containerVolumePrefix
+	// lockedPrefix is used for storing locked objects information.
+	//  Key: container ID
+	//  Value: bucket mapping objects locked to the list of corresponding LOCK objects.
+	lockedPrefix
+	// shardInfoPrefix is used for storing shard ID. All keys are custom and are not connected to the container.
+	shardInfoPrefix
+
+	//======================
+	// Unique index buckets.
+	//======================
+
+	// primaryPrefix is used for prefixing buckets containing objects of REGULAR type.
+	//  Key: object ID
+	//  Value: marshalled object
+	primaryPrefix
+	// lockersPrefix is used for prefixing buckets containing objects of LOCK type.
+	//  Key: object ID
+	//  Value: marshalled object
+	lockersPrefix
+	// storageGroupPrefix is used for prefixing buckets containing objects of STORAGEGROUP type.
+	//  Key: object ID
+	//  Value: marshaled object
+	storageGroupPrefix
+	// tombstonePrefix is used for prefixing buckets containing objects of TOMBSTONE type.
+	//  Key: object ID
+	//  Value: marshaled object
+	tombstonePrefix
+	// smallPrefix is used for prefixing buckets mapping objects to the blobovniczas they are stored in.
+	//  Key: object ID
+	//  Value: blobovnicza ID
+	smallPrefix
+	// rootPrefix is used for prefixing buckets mapping parent object to the split info.
+	//  Key: object ID
+	//  Value: split info
+	rootPrefix
+
+	//====================
+	// FKBT index buckets.
+	//====================
+
+	// ownerPrefix is used for prefixing FKBT index buckets mapping owner to object IDs.
+	// Key: owner ID
+	// Value: bucket containing object IDs as keys
+	ownerPrefix
+	// userAttributePrefix is used for prefixing FKBT index buckets containing objects.
+	// Key: attribute value
+	// Value: bucket containing object IDs as keys
+	userAttributePrefix
+
+	//====================
+	// List index buckets.
+	//====================
+
+	// payloadHashPrefix is used for prefixing List index buckets mapping payload hash to a list of object IDs.
+	//  Key: payload hash
+	//  Value: list of object IDs
+	payloadHashPrefix
+	// parentPrefix is used for prefixing List index buckets mapping parent ID to a list of children IDs.
+	//  Key: parent ID
+	//  Value: list of object IDs
+	parentPrefix
+	// splitPrefix is used for prefixing List index buckets mapping split ID to a list of object IDs.
+	//  Key: split ID
+	//  Value: list of object IDs
+	splitPrefix
+)
+
+const (
+	cidSize        = sha256.Size
+	bucketKeySize  = 1 + cidSize
+	objectKeySize  = sha256.Size
+	addressKeySize = cidSize + objectKeySize
+)
+
+var splitInfoError *object.SplitInfoError // for errors.As comparisons
+
+func bucketName(cnr cid.ID, prefix byte, key []byte) []byte {
+	key[0] = prefix
+	cnr.Encode(key[1:])
+	return key[:bucketKeySize]
+}
+
 // primaryBucketName returns <CID>.
-func primaryBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString())
+func primaryBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, primaryPrefix, key)
 }

 // tombstoneBucketName returns <CID>_TS.
-func tombstoneBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString() + tombstonePostfix)
+func tombstoneBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, tombstonePrefix, key)
 }

 // storageGroupBucketName returns <CID>_SG.
-func storageGroupBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString() + storageGroupPostfix)
+func storageGroupBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, storageGroupPrefix, key)
 }

 // smallBucketName returns <CID>_small.
-func smallBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString() + smallPostfix) // consider caching output values
+func smallBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, smallPrefix, key)
 }

 // attributeBucketName returns <CID>_attr_<attributeKey>.
-func attributeBucketName(cnr cid.ID, attributeKey string) []byte {
-	sb := strings.Builder{} // consider getting string builders from sync.Pool
-	sb.WriteString(cnr.EncodeToString())
-	sb.WriteString(userAttributePostfix)
-	sb.WriteString(attributeKey)
-
-	return []byte(sb.String())
+func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte {
+	key[0] = userAttributePrefix
+	cnr.Encode(key[1:])
+	return append(key[:bucketKeySize], attributeKey...)
 }

 // returns <CID> from attributeBucketName result, nil otherwise.
 func cidFromAttributeBucket(val []byte, attributeKey string) []byte {
-	suffix := []byte(userAttributePostfix + attributeKey)
-	if !bytes.HasSuffix(val, suffix) {
+	if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) {
 		return nil
 	}

-	return val[:len(val)-len(suffix)]
+	return val[1:bucketKeySize]
 }

 // payloadHashBucketName returns <CID>_payloadhash.
-func payloadHashBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString() + payloadHashPostfix)
+func payloadHashBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, payloadHashPrefix, key)
 }

 // rootBucketName returns <CID>_root.
-func rootBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString() + rootPostfix)
+func rootBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, rootPrefix, key)
 }

 // ownerBucketName returns <CID>_ownerid.
-func ownerBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString() + ownerPostfix)
+func ownerBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, ownerPrefix, key)
 }

 // parentBucketName returns <CID>_parent.
-func parentBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString() + parentPostfix)
+func parentBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, parentPrefix, key)
 }

 // splitBucketName returns <CID>_splitid.
-func splitBucketName(cnr cid.ID) []byte {
-	return []byte(cnr.EncodeToString() + splitPostfix)
+func splitBucketName(cnr cid.ID, key []byte) []byte {
+	return bucketName(cnr, splitPrefix, key)
 }

 // addressKey returns key for K-V tables when key is a whole address.
-func addressKey(addr oid.Address) []byte {
-	return []byte(addr.EncodeToString())
+func addressKey(addr oid.Address, key []byte) []byte {
+	addr.Container().Encode(key)
+	addr.Object().Encode(key[cidSize:])
+	return key[:addressKeySize]
 }

 // parses object address formed by addressKey.
 func decodeAddressFromKey(dst *oid.Address, k []byte) error {
-	err := dst.DecodeString(string(k))
-	if err != nil {
-		return fmt.Errorf("decode object address from db key: %w", err)
+	if len(k) != addressKeySize {
+		return fmt.Errorf("invalid length")
 	}

+	var cnr cid.ID
+	if err := cnr.Decode(k[:cidSize]); err != nil {
+		return err
+	}
+
+	var obj oid.ID
+	if err := obj.Decode(k[cidSize:]); err != nil {
+		return err
+	}
+
+	dst.SetObject(obj)
+	dst.SetContainer(cnr)
 	return nil
 }

 // objectKey returns key for K-V tables when key is an object id.
-func objectKey(obj oid.ID) []byte {
-	return []byte(obj.EncodeToString())
+func objectKey(obj oid.ID, key []byte) []byte {
+	obj.Encode(key)
+	return key[:objectKeySize]
 }

 // removes all bucket elements.
@ -152,13 +240,15 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object
 		panic("empty object list in firstIrregularObjectType")
 	}

+	var keys [3][1 + cidSize]byte
+
 	irregularTypeBuckets := [...]struct {
 		typ  object.Type
 		name []byte
 	}{
-		{object.TypeTombstone, tombstoneBucketName(idCnr)},
-		{object.TypeStorageGroup, storageGroupBucketName(idCnr)},
-		{object.TypeLock, bucketNameLockers(idCnr)},
+		{object.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])},
+		{object.TypeStorageGroup, storageGroupBucketName(idCnr, keys[1][:])},
+		{object.TypeLock, bucketNameLockers(idCnr, keys[2][:])},
 	}

 	for i := range objs {
@ -174,5 +264,7 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object

 // return true if provided object is of LOCK type.
 func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool {
-	return inBucket(tx, bucketNameLockers(idCnr), objectKey(obj))
+	return inBucket(tx,
+		bucketNameLockers(idCnr, make([]byte, bucketKeySize)),
+		objectKey(obj, make([]byte, objectKeySize)))
 }