From ae1dab29bc1cd8946cc2b520470964d1b2b32df1 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 8 Sep 2022 14:54:21 +0300 Subject: [PATCH] [#1482] metabase: Encode database keys in binary Signed-off-by: Evgenii Stratonikov --- pkg/local_object_storage/metabase/VERSION.md | 9 + .../metabase/containers.go | 18 +- pkg/local_object_storage/metabase/counter.go | 3 +- pkg/local_object_storage/metabase/db.go | 12 + pkg/local_object_storage/metabase/delete.go | 39 +-- pkg/local_object_storage/metabase/exists.go | 17 +- pkg/local_object_storage/metabase/get.go | 22 +- .../metabase/graveyard.go | 6 +- pkg/local_object_storage/metabase/inhume.go | 8 +- .../metabase/iterators.go | 33 +-- pkg/local_object_storage/metabase/list.go | 48 ++-- pkg/local_object_storage/metabase/lock.go | 42 ++-- pkg/local_object_storage/metabase/movable.go | 10 +- pkg/local_object_storage/metabase/put.go | 46 ++-- pkg/local_object_storage/metabase/select.go | 122 +++++----- pkg/local_object_storage/metabase/shard_id.go | 2 +- .../metabase/storage_id.go | 5 +- pkg/local_object_storage/metabase/util.go | 224 ++++++++++++------ pkg/local_object_storage/metabase/version.go | 2 +- 19 files changed, 392 insertions(+), 276 deletions(-) diff --git a/pkg/local_object_storage/metabase/VERSION.md b/pkg/local_object_storage/metabase/VERSION.md index 0affc36c9..c9707c007 100644 --- a/pkg/local_object_storage/metabase/VERSION.md +++ b/pkg/local_object_storage/metabase/VERSION.md @@ -84,6 +84,15 @@ This file describes changes between the metabase versions. - Key: split ID - Value: list of object IDs +# History + +## Version 2 + +- Container ID is encoded as 32-byte slice +- Object ID is encoded as 32-byte slice +- Object ID is encoded as 64-byte slice, container ID + object ID +- Bucket naming scheme is changed: + - container ID + suffix -> 1-byte prefix + container ID ## Version 1 diff --git a/pkg/local_object_storage/metabase/containers.go b/pkg/local_object_storage/metabase/containers.go index da7cb0f2b..fb1b48790 100644 --- a/pkg/local_object_storage/metabase/containers.go +++ b/pkg/local_object_storage/metabase/containers.go @@ -1,9 +1,7 @@ package meta import ( - "crypto/sha256" "encoding/binary" - "strings" cid "github.com/nspcc-dev/neofs-sdk-go/container/id" "go.etcd.io/bbolt" @@ -30,7 +28,7 @@ func (db *DB) containers(tx *bbolt.Tx) ([]cid.ID, error) { err := tx.ForEach(func(name []byte, _ *bbolt.Bucket) error { if parseContainerID(&cnr, name, unique) { result = append(result, cnr) - unique[cnr.EncodeToString()] = struct{}{} + unique[string(name[1:bucketKeySize])] = struct{}{} } return nil @@ -55,24 +53,20 @@ func (db *DB) containerSize(tx *bbolt.Tx, id cid.ID) (uint64, error) { return 0, err } - key := make([]byte, sha256.Size) + key := make([]byte, cidSize) id.Encode(key) return parseContainerSize(containerVolume.Get(key)), nil } func parseContainerID(dst *cid.ID, name []byte, ignore map[string]struct{}) bool { - strContainerID := strings.Split(string(name), invalidBase58String)[0] - - if _, ok := ignore[strContainerID]; ok { + if len(name) != bucketKeySize { return false } - - if err := dst.DecodeString(strContainerID); err != nil { + if _, ok := ignore[string(name[1:bucketKeySize])]; ok { return false } - - return true + return dst.Decode(name[1:bucketKeySize]) == nil } func parseContainerSize(v []byte) uint64 { @@ -89,7 +83,7 @@ func changeContainerSize(tx *bbolt.Tx, id cid.ID, delta uint64, increase bool) e return err } - key := make([]byte, sha256.Size) + key := make([]byte, cidSize) id.Encode(key) size := parseContainerSize(containerVolume.Get(key)) diff --git a/pkg/local_object_storage/metabase/counter.go b/pkg/local_object_storage/metabase/counter.go index 0ab1d9805..5085c3ee0 100644 --- a/pkg/local_object_storage/metabase/counter.go +++ b/pkg/local_object_storage/metabase/counter.go @@ -125,6 +125,7 @@ func syncCounter(tx *bbolt.Tx, force bool) error { graveyardBKT := tx.Bucket(graveyardBucketName) garbageBKT := tx.Bucket(garbageBucketName) + key := make([]byte, addressKeySize) err = iteratePhyObjects(tx, func(cnr cid.ID, obj oid.ID) error { phyCounter++ @@ -134,7 +135,7 @@ func syncCounter(tx *bbolt.Tx, force bool) error { // check if an object is available: not with GCMark // and not covered with a tombstone - if inGraveyardWithKey(addressKey(addr), graveyardBKT, garbageBKT) == 0 { + if inGraveyardWithKey(addressKey(addr, key), graveyardBKT, garbageBKT) == 0 { logicCounter++ } diff --git a/pkg/local_object_storage/metabase/db.go b/pkg/local_object_storage/metabase/db.go index a05edf6cb..29ba942c0 100644 --- a/pkg/local_object_storage/metabase/db.go +++ b/pkg/local_object_storage/metabase/db.go @@ -11,6 +11,7 @@ import ( "sync" "time" + "github.com/mr-tron/base58" v2object "github.com/nspcc-dev/neofs-api-go/v2/object" "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard/mode" "github.com/nspcc-dev/neofs-node/pkg/util/logger" @@ -111,6 +112,8 @@ func stringifyValue(key string, objVal []byte) string { switch key { default: return string(objVal) + case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent: + return base58.Encode(objVal) case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash: return hex.EncodeToString(objVal) case v2object.FilterHeaderCreationEpoch, v2object.FilterHeaderPayloadLength: @@ -140,6 +143,9 @@ func destringifyValue(key, value string, prefix bool) ([]byte, bool, bool) { switch key { default: return []byte(value), false, true + case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent: + v, err := base58.Decode(value) + return v, false, err == nil case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash: v, err := hex.DecodeString(value) if err != nil { @@ -254,6 +260,12 @@ func unknownMatcherBucket(_ *bbolt.Bucket, _ string, _ string, _ func([]byte, [] // in boltDB. Useful for getting filter values from unique and list indexes. func bucketKeyHelper(hdr string, val string) []byte { switch hdr { + case v2object.FilterHeaderParent: + v, err := base58.Decode(val) + if err != nil { + return nil + } + return v case v2object.FilterHeaderPayloadHash: v, err := hex.DecodeString(val) if err != nil { diff --git a/pkg/local_object_storage/metabase/delete.go b/pkg/local_object_storage/metabase/delete.go index 01398153b..30c804900 100644 --- a/pkg/local_object_storage/metabase/delete.go +++ b/pkg/local_object_storage/metabase/delete.go @@ -139,7 +139,8 @@ func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address) (uint64, uint64, er // object was available before the removal (for calculating the logical object // counter). func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (bool, bool, error) { - addrKey := addressKey(addr) + key := make([]byte, addressKeySize) + addrKey := addressKey(addr, key) garbageBKT := tx.Bucket(garbageBucketName) graveyardBKT := tx.Bucket(graveyardBucketName) @@ -154,7 +155,7 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter } // unmarshal object, work only with physically stored (raw == true) objects - obj, err := db.get(tx, addr, false, true, currEpoch) + obj, err := db.get(tx, addr, key, false, true, currEpoch) if err != nil { if errors.As(err, new(apistatus.ObjectNotFound)) { return false, false, nil @@ -166,9 +167,10 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter // if object is an only link to a parent, then remove parent if parent := obj.Parent(); parent != nil { parAddr := object.AddressOf(parent) - sParAddr := parAddr.EncodeToString() + sParAddr := addressKey(parAddr, key) + k := string(sParAddr) - nRef, ok := refCounter[sParAddr] + nRef, ok := refCounter[k] if !ok { nRef = &referenceNumber{ all: parentLength(tx, parAddr), @@ -176,7 +178,7 @@ func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter obj: parent, } - refCounter[sParAddr] = nRef + refCounter[k] = nRef } nRef.cur++ @@ -216,12 +218,14 @@ func (db *DB) deleteObject( // parentLength returns amount of available children from parentid index. func parentLength(tx *bbolt.Tx, addr oid.Address) int { - bkt := tx.Bucket(parentBucketName(addr.Container())) + bucketName := make([]byte, bucketKeySize) + + bkt := tx.Bucket(parentBucketName(addr.Container(), bucketName[:])) if bkt == nil { return 0 } - lst, err := decodeList(bkt.Get(objectKey(addr.Object()))) + lst, err := decodeList(bkt.Get(objectKey(addr.Object(), bucketName[:]))) if err != nil { return 0 } @@ -291,23 +295,22 @@ func delListIndexItem(tx *bbolt.Tx, item namedBucketItem) error { func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error { addr := object.AddressOf(obj) - objKey := objectKey(addr.Object()) - addrKey := addressKey(addr) + objKey := objectKey(addr.Object(), make([]byte, objectKeySize)) + addrKey := addressKey(addr, make([]byte, addressKeySize)) cnr := addr.Container() + bucketName := make([]byte, bucketKeySize) // add value to primary unique bucket if !isParent { - var bucketName []byte - switch obj.Type() { case objectSDK.TypeRegular: - bucketName = primaryBucketName(cnr) + bucketName = primaryBucketName(cnr, bucketName) case objectSDK.TypeTombstone: - bucketName = tombstoneBucketName(cnr) + bucketName = tombstoneBucketName(cnr, bucketName) case objectSDK.TypeStorageGroup: - bucketName = storageGroupBucketName(cnr) + bucketName = storageGroupBucketName(cnr, bucketName) case objectSDK.TypeLock: - bucketName = bucketNameLockers(cnr) + bucketName = bucketNameLockers(cnr, bucketName) default: return ErrUnknownObjectType } @@ -318,17 +321,17 @@ func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error }) } else { delUniqueIndexItem(tx, namedBucketItem{ - name: parentBucketName(cnr), + name: parentBucketName(cnr, bucketName), key: objKey, }) } delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index - name: smallBucketName(cnr), + name: smallBucketName(cnr, bucketName), key: objKey, }) delUniqueIndexItem(tx, namedBucketItem{ // remove from root index - name: rootBucketName(cnr), + name: rootBucketName(cnr, bucketName), key: objKey, }) delUniqueIndexItem(tx, namedBucketItem{ // remove from ToMoveIt index diff --git a/pkg/local_object_storage/metabase/exists.go b/pkg/local_object_storage/metabase/exists.go index 00965d758..34da4d206 100644 --- a/pkg/local_object_storage/metabase/exists.go +++ b/pkg/local_object_storage/metabase/exists.go @@ -71,17 +71,18 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) (exists b return false, object.ErrObjectIsExpired } - objKey := objectKey(addr.Object()) + objKey := objectKey(addr.Object(), make([]byte, objectKeySize)) cnr := addr.Container() + key := make([]byte, bucketKeySize) // if graveyard is empty, then check if object exists in primary bucket - if inBucket(tx, primaryBucketName(cnr), objKey) { + if inBucket(tx, primaryBucketName(cnr, key), objKey) { return true, nil } // if primary bucket is empty, then check if object exists in parent bucket - if inBucket(tx, parentBucketName(cnr), objKey) { + if inBucket(tx, parentBucketName(cnr, key), objKey) { splitInfo, err := getSplitInfo(tx, cnr, objKey) if err != nil { return false, err @@ -105,15 +106,16 @@ func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 { // GC is expected to collect all the objects that have // expired previously for less than the one epoch duration - rawOID := []byte(addr.Object().EncodeToString()) var expired bool // bucket with objects that have expiration attr - expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), objectV2.SysAttributeExpEpoch)) + attrKey := make([]byte, bucketKeySize+len(objectV2.SysAttributeExpEpoch)) + expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), objectV2.SysAttributeExpEpoch, attrKey)) if expirationBucket != nil { // bucket that contains objects that expire in the current epoch prevEpochBkt := expirationBucket.Bucket([]byte(strconv.FormatUint(currEpoch-1, 10))) if prevEpochBkt != nil { + rawOID := objectKey(addr.Object(), make([]byte, objectKeySize)) if prevEpochBkt.Get(rawOID) != nil { expired = true } @@ -126,7 +128,7 @@ func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 { graveyardBkt := tx.Bucket(graveyardBucketName) garbageBkt := tx.Bucket(garbageBucketName) - addrKey := addressKey(addr) + addrKey := addressKey(addr, make([]byte, addressKeySize)) return inGraveyardWithKey(addrKey, graveyardBkt, garbageBkt) } @@ -175,7 +177,8 @@ func inBucket(tx *bbolt.Tx, name, key []byte) bool { // getSplitInfo returns SplitInfo structure from root index. Returns error // if there is no `key` record in root index. func getSplitInfo(tx *bbolt.Tx, cnr cid.ID, key []byte) (*objectSDK.SplitInfo, error) { - rawSplitInfo := getFromBucket(tx, rootBucketName(cnr), key) + bucketName := rootBucketName(cnr, make([]byte, bucketKeySize)) + rawSplitInfo := getFromBucket(tx, bucketName, key) if len(rawSplitInfo) == 0 { return nil, ErrLackSplitInfo } diff --git a/pkg/local_object_storage/metabase/get.go b/pkg/local_object_storage/metabase/get.go index 2b67823dc..1a34b398f 100644 --- a/pkg/local_object_storage/metabase/get.go +++ b/pkg/local_object_storage/metabase/get.go @@ -53,7 +53,8 @@ func (db *DB) Get(prm GetPrm) (res GetRes, err error) { currEpoch := db.epochState.CurrentEpoch() err = db.boltDB.View(func(tx *bbolt.Tx) error { - res.hdr, err = db.get(tx, prm.addr, true, prm.raw, currEpoch) + key := make([]byte, addressKeySize) + res.hdr, err = db.get(tx, prm.addr, key, true, prm.raw, currEpoch) return err }) @@ -61,9 +62,7 @@ func (db *DB) Get(prm GetPrm) (res GetRes, err error) { return } -func (db *DB) get(tx *bbolt.Tx, addr oid.Address, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) { - key := objectKey(addr.Object()) - +func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) { if checkStatus { switch objectStatus(tx, addr, currEpoch) { case 1: @@ -79,29 +78,31 @@ func (db *DB) get(tx *bbolt.Tx, addr oid.Address, checkStatus, raw bool, currEpo } } + key = objectKey(addr.Object(), key) cnr := addr.Container() obj := objectSDK.New() + bucketName := make([]byte, bucketKeySize) // check in primary index - data := getFromBucket(tx, primaryBucketName(cnr), key) + data := getFromBucket(tx, primaryBucketName(cnr, bucketName), key) if len(data) != 0 { return obj, obj.Unmarshal(data) } // if not found then check in tombstone index - data = getFromBucket(tx, tombstoneBucketName(cnr), key) + data = getFromBucket(tx, tombstoneBucketName(cnr, bucketName), key) if len(data) != 0 { return obj, obj.Unmarshal(data) } // if not found then check in storage group index - data = getFromBucket(tx, storageGroupBucketName(cnr), key) + data = getFromBucket(tx, storageGroupBucketName(cnr, bucketName), key) if len(data) != 0 { return obj, obj.Unmarshal(data) } // if not found then check in locker index - data = getFromBucket(tx, bucketNameLockers(cnr), key) + data = getFromBucket(tx, bucketNameLockers(cnr, bucketName), key) if len(data) != 0 { return obj, obj.Unmarshal(data) } @@ -124,7 +125,8 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD return nil, getSplitInfoError(tx, cnr, key) } - parentBucket := tx.Bucket(parentBucketName(cnr)) + bucketName := make([]byte, bucketKeySize) + parentBucket := tx.Bucket(parentBucketName(cnr, bucketName)) if parentBucket == nil { var errNotFound apistatus.ObjectNotFound @@ -146,7 +148,7 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD // but later list might be sorted so first or last value can be more // prioritized to choose virtualOID := relativeLst[len(relativeLst)-1] - data := getFromBucket(tx, primaryBucketName(cnr), virtualOID) + data := getFromBucket(tx, primaryBucketName(cnr, bucketName), virtualOID) child := objectSDK.New() diff --git a/pkg/local_object_storage/metabase/graveyard.go b/pkg/local_object_storage/metabase/graveyard.go index 3b96bcb5f..dcf7d1e83 100644 --- a/pkg/local_object_storage/metabase/graveyard.go +++ b/pkg/local_object_storage/metabase/graveyard.go @@ -174,7 +174,7 @@ func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address) if offset == nil { k, v = c.First() } else { - rawAddr := addressKey(*offset) + rawAddr := addressKey(*offset, make([]byte, addressKeySize)) k, v = c.Seek(rawAddr) if bytes.Equal(k, rawAddr) { @@ -222,6 +222,8 @@ func graveFromKV(k, v []byte) (res TombstonedObject, err error) { // // Returns any error appeared during deletion process. func (db *DB) DropGraves(tss []TombstonedObject) error { + buf := make([]byte, addressKeySize) + return db.boltDB.Update(func(tx *bbolt.Tx) error { bkt := tx.Bucket(graveyardBucketName) if bkt == nil { @@ -229,7 +231,7 @@ func (db *DB) DropGraves(tss []TombstonedObject) error { } for _, ts := range tss { - err := bkt.Delete(addressKey(ts.Address())) + err := bkt.Delete(addressKey(ts.Address(), buf)) if err != nil { return err } diff --git a/pkg/local_object_storage/metabase/inhume.go b/pkg/local_object_storage/metabase/inhume.go index 344259b61..296e3bd6c 100644 --- a/pkg/local_object_storage/metabase/inhume.go +++ b/pkg/local_object_storage/metabase/inhume.go @@ -113,7 +113,7 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) { if prm.tomb != nil { bkt = graveyardBKT - tombKey := addressKey(*prm.tomb) + tombKey := addressKey(*prm.tomb, make([]byte, addressKeySize)) // it is forbidden to have a tomb-on-tomb in NeoFS, // so graveyard keys must not be addresses of tombstones @@ -131,6 +131,7 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) { value = zeroValue } + buf := make([]byte, addressKeySize) for i := range prm.target { id := prm.target[i].Object() cnr := prm.target[i].Container() @@ -153,9 +154,8 @@ func (db *DB) Inhume(prm InhumePrm) (res InhumeRes, err error) { lockWasChecked = true } - targetKey := addressKey(prm.target[i]) - - obj, err := db.get(tx, prm.target[i], false, true, currEpoch) + obj, err := db.get(tx, prm.target[i], buf, false, true, currEpoch) + targetKey := addressKey(prm.target[i], buf) if err == nil { if inGraveyardWithKey(targetKey, graveyardBKT, garbageBKT) == 0 { // object is available, decrement the diff --git a/pkg/local_object_storage/metabase/iterators.go b/pkg/local_object_storage/metabase/iterators.go index 85ed49f23..cc9b032b1 100644 --- a/pkg/local_object_storage/metabase/iterators.go +++ b/pkg/local_object_storage/metabase/iterators.go @@ -55,6 +55,12 @@ func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler) return nil } + var cnrID cid.ID + err := cnrID.Decode(cidBytes) + if err != nil { + return fmt.Errorf("could not parse container ID of expired bucket: %w", err) + } + return b.ForEach(func(expKey, _ []byte) error { bktExpired := b.Bucket(expKey) if bktExpired == nil { @@ -71,18 +77,11 @@ func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler) return bktExpired.ForEach(func(idKey, _ []byte) error { var id oid.ID - err = id.DecodeString(string(idKey)) + err = id.Decode(idKey) if err != nil { return fmt.Errorf("could not parse ID of expired object: %w", err) } - var cnrID cid.ID - - err = cnrID.DecodeString(string(cidBytes)) - if err != nil { - return fmt.Errorf("could not parse container ID of expired bucket: %w", err) - } - // Ignore locked objects. // // To slightly optimize performance we can check only REGULAR objects @@ -131,7 +130,11 @@ func (db *DB) iterateCoveredByTombstones(tx *bbolt.Tx, tss map[string]oid.Addres } err := bktGraveyard.ForEach(func(k, v []byte) error { - if _, ok := tss[string(v)]; ok { + var addr oid.Address + if err := decodeAddressFromKey(&addr, v); err != nil { + return err + } + if _, ok := tss[addr.EncodeToString()]; ok { var addr oid.Address err := decodeAddressFromKey(&addr, k) @@ -161,22 +164,22 @@ func iteratePhyObjects(tx *bbolt.Tx, f func(cid.ID, oid.ID) error) error { var oid oid.ID return tx.ForEach(func(name []byte, b *bbolt.Bucket) error { - b58CID, postfix := parseContainerIDWithPostfix(&cid, name) + b58CID, postfix := parseContainerIDWithPrefix(&cid, name) if len(b58CID) == 0 { return nil } switch postfix { - case "", - storageGroupPostfix, - bucketNameSuffixLockers, - tombstonePostfix: + case primaryPrefix, + storageGroupPrefix, + lockersPrefix, + tombstonePrefix: default: return nil } return b.ForEach(func(k, v []byte) error { - if oid.DecodeString(string(k)) == nil { + if oid.Decode(k) == nil { return f(cid, oid) } diff --git a/pkg/local_object_storage/metabase/list.go b/pkg/local_object_storage/metabase/list.go index 1b2acc319..5ab883045 100644 --- a/pkg/local_object_storage/metabase/list.go +++ b/pkg/local_object_storage/metabase/list.go @@ -1,7 +1,6 @@ package meta import ( - "bytes" "errors" cid "github.com/nspcc-dev/neofs-sdk-go/container/id" @@ -90,30 +89,28 @@ func (db *DB) listWithCursor(tx *bbolt.Tx, result []oid.Address, count int, curs graveyardBkt := tx.Bucket(graveyardBucketName) garbageBkt := tx.Bucket(garbageBucketName) - const idSize = 44 // size of the stringified object and container ids - var rawAddr = make([]byte, idSize*2+1) + var rawAddr = make([]byte, cidSize, addressKeySize) loop: for ; name != nil; name, _ = c.Next() { - b58CID, postfix := parseContainerIDWithPostfix(&containerID, name) - if b58CID == nil { + cidRaw, prefix := parseContainerIDWithPrefix(&containerID, name) + if cidRaw == nil { continue } - switch postfix { + switch prefix { case - "", - storageGroupPostfix, - bucketNameSuffixLockers, - tombstonePostfix: + primaryPrefix, + storageGroupPrefix, + lockersPrefix, + tombstonePrefix: default: continue } bkt := tx.Bucket(name) if bkt != nil { - rawAddr = append(rawAddr[:0], b58CID...) - rawAddr = append(rawAddr, '/') + copy(rawAddr, cidRaw) result, offset, cursor = selectNFromBucket(bkt, graveyardBkt, garbageBkt, rawAddr, containerID, result, count, cursor, threshold) } @@ -150,7 +147,7 @@ loop: // object to start selecting from. Ignores inhumed objects. func selectNFromBucket(bkt *bbolt.Bucket, // main bucket graveyardBkt, garbageBkt *bbolt.Bucket, // cached graveyard buckets - addrRaw []byte, // container ID prefix, optimization + cidRaw []byte, // container ID prefix, optimization cnt cid.ID, // container ID to []oid.Address, // listing result limit int, // stop listing at `limit` items in result @@ -178,12 +175,12 @@ func selectNFromBucket(bkt *bbolt.Bucket, // main bucket } var obj oid.ID - if err := obj.DecodeString(string(k)); err != nil { + if err := obj.Decode(k); err != nil { break } offset = k - if inGraveyardWithKey(append(addrRaw, k...), graveyardBkt, garbageBkt) > 0 { + if inGraveyardWithKey(append(cidRaw, k...), graveyardBkt, garbageBkt) > 0 { continue } @@ -197,21 +194,16 @@ func selectNFromBucket(bkt *bbolt.Bucket, // main bucket return to, offset, cursor } -func parseContainerIDWithPostfix(containerID *cid.ID, name []byte) ([]byte, string) { - var ( - containerIDStr = name - postfix []byte - ) - - ind := bytes.IndexByte(name, invalidBase58String[0]) - if ind > 0 { - postfix = containerIDStr[ind:] - containerIDStr = containerIDStr[:ind] +func parseContainerIDWithPrefix(containerID *cid.ID, name []byte) ([]byte, byte) { + if len(name) < bucketKeySize { + return nil, 0 } - if err := containerID.DecodeString(string(containerIDStr)); err != nil { - return nil, "" + rawID := name[1:bucketKeySize] + + if err := containerID.Decode(rawID); err != nil { + return nil, 0 } - return containerIDStr, string(postfix) + return rawID, name[0] } diff --git a/pkg/local_object_storage/metabase/lock.go b/pkg/local_object_storage/metabase/lock.go index ab83ee59f..c9e0ff5db 100644 --- a/pkg/local_object_storage/metabase/lock.go +++ b/pkg/local_object_storage/metabase/lock.go @@ -11,15 +11,11 @@ import ( "go.etcd.io/bbolt" ) -// bucket name for locked objects. -var bucketNameLocked = []byte(invalidBase58String + "Locked") - -// suffix for container buckets with objects of type LOCK. -const bucketNameSuffixLockers = invalidBase58String + "LOCKER" +var bucketNameLocked = []byte{lockedPrefix} // returns name of the bucket with objects of type LOCK for specified container. -func bucketNameLockers(idCnr cid.ID) []byte { - return []byte(idCnr.EncodeToString() + bucketNameSuffixLockers) +func bucketNameLockers(idCnr cid.ID, key []byte) []byte { + return bucketName(idCnr, lockersPrefix, key) } // Lock marks objects as locked with another object. All objects are from the @@ -36,14 +32,14 @@ func (db *DB) Lock(cnr cid.ID, locker oid.ID, locked []oid.ID) error { panic("empty locked list") } + // check if all objects are regular + bucketKeysLocked := make([][]byte, len(locked)) + for i := range locked { + bucketKeysLocked[i] = objectKey(locked[i], make([]byte, objectKeySize)) + } + key := make([]byte, cidSize) + return db.boltDB.Update(func(tx *bbolt.Tx) error { - // check if all objects are regular - bucketKeysLocked := make([][]byte, len(locked)) - - for i := range locked { - bucketKeysLocked[i] = objectKey(locked[i]) - } - if firstIrregularObjectType(tx, cnr, bucketKeysLocked...) != object.TypeRegular { return apistatus.LockNonRegularObject{} } @@ -53,12 +49,13 @@ func (db *DB) Lock(cnr cid.ID, locker oid.ID, locked []oid.ID) error { return fmt.Errorf("create global bucket for locked objects: %w", err) } - bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists([]byte(cnr.EncodeToString())) + cnr.Encode(key) + bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists(key) if err != nil { return fmt.Errorf("create container bucket for locked objects %v: %w", cnr, err) } - keyLocker := objectKey(locker) + keyLocker := objectKey(locker, key) var exLockers [][]byte var updLockers []byte @@ -114,9 +111,11 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) error { func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool { bucketLocked := tx.Bucket(bucketNameLocked) if bucketLocked != nil { - bucketLockedContainer := bucketLocked.Bucket([]byte(idCnr.EncodeToString())) + key := make([]byte, cidSize) + idCnr.Encode(key) + bucketLockedContainer := bucketLocked.Bucket(key) if bucketLockedContainer != nil { - return bucketLockedContainer.Get(objectKey(idObj)) != nil + return bucketLockedContainer.Get(objectKey(idObj, key)) != nil } } @@ -131,9 +130,12 @@ func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool { func freePotentialLocks(tx *bbolt.Tx, idCnr cid.ID, locker oid.ID) error { bucketLocked := tx.Bucket(bucketNameLocked) if bucketLocked != nil { - bucketLockedContainer := bucketLocked.Bucket([]byte(idCnr.EncodeToString())) + key := make([]byte, cidSize) + idCnr.Encode(key) + + bucketLockedContainer := bucketLocked.Bucket(key) if bucketLockedContainer != nil { - keyLocker := objectKey(locker) + keyLocker := objectKey(locker, key) return bucketLockedContainer.ForEach(func(k, v []byte) error { keyLockers, err := decodeList(v) if err != nil { diff --git a/pkg/local_object_storage/metabase/movable.go b/pkg/local_object_storage/metabase/movable.go index 42935e391..8011490d2 100644 --- a/pkg/local_object_storage/metabase/movable.go +++ b/pkg/local_object_storage/metabase/movable.go @@ -52,13 +52,16 @@ func (db *DB) ToMoveIt(prm ToMoveItPrm) (res ToMoveItRes, err error) { db.modeMtx.RLock() defer db.modeMtx.RUnlock() + key := make([]byte, addressKeySize) + key = addressKey(prm.addr, key) + err = db.boltDB.Update(func(tx *bbolt.Tx) error { toMoveIt, err := tx.CreateBucketIfNotExists(toMoveItBucketName) if err != nil { return err } - return toMoveIt.Put(addressKey(prm.addr), zeroValue) + return toMoveIt.Put(key, zeroValue) }) return @@ -69,13 +72,16 @@ func (db *DB) DoNotMove(prm DoNotMovePrm) (res DoNotMoveRes, err error) { db.modeMtx.RLock() defer db.modeMtx.RUnlock() + key := make([]byte, addressKeySize) + key = addressKey(prm.addr, key) + err = db.boltDB.Update(func(tx *bbolt.Tx) error { toMoveIt := tx.Bucket(toMoveItBucketName) if toMoveIt == nil { return nil } - return toMoveIt.Delete(addressKey(prm.addr)) + return toMoveIt.Delete(key) }) return diff --git a/pkg/local_object_storage/metabase/put.go b/pkg/local_object_storage/metabase/put.go index 3f439e3ba..b39005633 100644 --- a/pkg/local_object_storage/metabase/put.go +++ b/pkg/local_object_storage/metabase/put.go @@ -168,21 +168,20 @@ func putUniqueIndexes( isParent := si != nil addr := object.AddressOf(obj) cnr := addr.Container() - objKey := objectKey(addr.Object()) + objKey := objectKey(addr.Object(), make([]byte, objectKeySize)) + bucketName := make([]byte, bucketKeySize) // add value to primary unique bucket if !isParent { - var bucketName []byte - switch obj.Type() { case objectSDK.TypeRegular: - bucketName = primaryBucketName(cnr) + bucketName = primaryBucketName(cnr, bucketName) case objectSDK.TypeTombstone: - bucketName = tombstoneBucketName(cnr) + bucketName = tombstoneBucketName(cnr, bucketName) case objectSDK.TypeStorageGroup: - bucketName = storageGroupBucketName(cnr) + bucketName = storageGroupBucketName(cnr, bucketName) case objectSDK.TypeLock: - bucketName = bucketNameLockers(cnr) + bucketName = bucketNameLockers(cnr, bucketName) default: return ErrUnknownObjectType } @@ -204,7 +203,7 @@ func putUniqueIndexes( // index storageID if it is present if id != nil { err = putUniqueIndexItem(tx, namedBucketItem{ - name: smallBucketName(cnr), + name: smallBucketName(cnr, bucketName), key: objKey, val: id, }) @@ -229,7 +228,7 @@ func putUniqueIndexes( } err = putUniqueIndexItem(tx, namedBucketItem{ - name: rootBucketName(cnr), + name: rootBucketName(cnr, bucketName), key: objKey, val: splitInfo, }) @@ -246,13 +245,14 @@ type updateIndexItemFunc = func(tx *bbolt.Tx, item namedBucketItem) error func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error { idObj, _ := obj.ID() cnr, _ := obj.ContainerID() - objKey := objectKey(idObj) + objKey := objectKey(idObj, make([]byte, objectKeySize)) + bucketName := make([]byte, bucketKeySize) cs, _ := obj.PayloadChecksum() // index payload hashes err := f(tx, namedBucketItem{ - name: payloadHashBucketName(cnr), + name: payloadHashBucketName(cnr, bucketName), key: cs.Value(), val: objKey, }) @@ -265,8 +265,8 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun // index parent ids if ok { err := f(tx, namedBucketItem{ - name: parentBucketName(cnr), - key: objectKey(idParent), + name: parentBucketName(cnr, bucketName), + key: objectKey(idParent, make([]byte, objectKeySize)), val: objKey, }) if err != nil { @@ -277,7 +277,7 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun // index split ids if obj.SplitID() != nil { err := f(tx, namedBucketItem{ - name: splitBucketName(cnr), + name: splitBucketName(cnr, bucketName), key: obj.SplitID().ToV2(), val: objKey, }) @@ -292,12 +292,13 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error { id, _ := obj.ID() cnr, _ := obj.ContainerID() - objKey := []byte(id.EncodeToString()) + objKey := objectKey(id, make([]byte, objectKeySize)) attrs := obj.Attributes() + key := make([]byte, bucketKeySize) err := f(tx, namedBucketItem{ - name: ownerBucketName(cnr), + name: ownerBucketName(cnr, key), key: []byte(obj.OwnerID().EncodeToString()), val: objKey, }) @@ -307,8 +308,9 @@ func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun // user specified attributes for i := range attrs { + key = attributeBucketName(cnr, attrs[i].Key(), key) err := f(tx, namedBucketItem{ - name: attributeBucketName(cnr, attrs[i].Key()), + name: key, key: []byte(attrs[i].Value()), val: objKey, }) @@ -437,25 +439,27 @@ func getVarUint(data []byte) (uint64, int, error) { // updateStorageID for existing objects if they were moved from one // storage location to another. func updateStorageID(tx *bbolt.Tx, addr oid.Address, id []byte) error { - bkt, err := tx.CreateBucketIfNotExists(smallBucketName(addr.Container())) + key := make([]byte, bucketKeySize) + bkt, err := tx.CreateBucketIfNotExists(smallBucketName(addr.Container(), key)) if err != nil { return err } - return bkt.Put(objectKey(addr.Object()), id) + return bkt.Put(objectKey(addr.Object(), key), id) } // updateSpliInfo for existing objects if storage filled with extra information // about last object in split hierarchy or linking object. func updateSplitInfo(tx *bbolt.Tx, addr oid.Address, from *objectSDK.SplitInfo) error { - bkt := tx.Bucket(rootBucketName(addr.Container())) + key := make([]byte, bucketKeySize) + bkt := tx.Bucket(rootBucketName(addr.Container(), key)) if bkt == nil { // if object doesn't exists and we want to update split info on it // then ignore, this should never happen return ErrIncorrectSplitInfoUpdate } - objectKey := objectKey(addr.Object()) + objectKey := objectKey(addr.Object(), key) rawSplitInfo := bkt.Get(objectKey) if len(rawSplitInfo) == 0 { diff --git a/pkg/local_object_storage/metabase/select.go b/pkg/local_object_storage/metabase/select.go index a8b40b152..1da56b12f 100644 --- a/pkg/local_object_storage/metabase/select.go +++ b/pkg/local_object_storage/metabase/select.go @@ -107,13 +107,16 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs object.SearchFilters, c continue // ignore objects with unmatched fast filters } - var addr oid.Address - - err = decodeAddressFromKey(&addr, []byte(a)) + var id oid.ID + err = id.Decode([]byte(a)) if err != nil { return nil, err } + var addr oid.Address + addr.SetContainer(cnr) + addr.SetObject(id) + if objectStatus(tx, addr, currEpoch) > 0 { continue // ignore removed objects } @@ -130,26 +133,24 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs object.SearchFilters, c // selectAll adds to resulting cache all available objects in metabase. func (db *DB) selectAll(tx *bbolt.Tx, cnr cid.ID, to map[string]int) { - prefix := cnr.EncodeToString() + "/" - - selectAllFromBucket(tx, primaryBucketName(cnr), prefix, to, 0) - selectAllFromBucket(tx, tombstoneBucketName(cnr), prefix, to, 0) - selectAllFromBucket(tx, storageGroupBucketName(cnr), prefix, to, 0) - selectAllFromBucket(tx, parentBucketName(cnr), prefix, to, 0) - selectAllFromBucket(tx, bucketNameLockers(cnr), prefix, to, 0) + bucketName := make([]byte, bucketKeySize) + selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, 0) + selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, 0) + selectAllFromBucket(tx, storageGroupBucketName(cnr, bucketName), to, 0) + selectAllFromBucket(tx, parentBucketName(cnr, bucketName), to, 0) + selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, 0) } // selectAllFromBucket goes through all keys in bucket and adds them in a // resulting cache. Keys should be stringed object ids. -func selectAllFromBucket(tx *bbolt.Tx, name []byte, prefix string, to map[string]int, fNum int) { +func selectAllFromBucket(tx *bbolt.Tx, name []byte, to map[string]int, fNum int) { bkt := tx.Bucket(name) if bkt == nil { return } _ = bkt.ForEach(func(k, v []byte) error { - key := prefix + string(k) // consider using string builders from sync.Pool - markAddressInCache(to, fNum, key) + markAddressInCache(to, fNum, string(k)) return nil }) @@ -164,47 +165,46 @@ func (db *DB) selectFastFilter( to map[string]int, // resulting cache fNum int, // index of filter ) { - prefix := cnr.EncodeToString() + "/" currEpoch := db.epochState.CurrentEpoch() - + bucketName := make([]byte, bucketKeySize) switch f.Header() { case v2object.FilterHeaderObjectID: db.selectObjectID(tx, f, cnr, to, fNum, currEpoch) case v2object.FilterHeaderOwnerID: - bucketName := ownerBucketName(cnr) - db.selectFromFKBT(tx, bucketName, f, prefix, to, fNum) + bucketName := ownerBucketName(cnr, bucketName) + db.selectFromFKBT(tx, bucketName, f, to, fNum) case v2object.FilterHeaderPayloadHash: - bucketName := payloadHashBucketName(cnr) - db.selectFromList(tx, bucketName, f, prefix, to, fNum) + bucketName := payloadHashBucketName(cnr, bucketName) + db.selectFromList(tx, bucketName, f, to, fNum) case v2object.FilterHeaderObjectType: for _, bucketName := range bucketNamesForType(cnr, f.Operation(), f.Value()) { - selectAllFromBucket(tx, bucketName, prefix, to, fNum) + selectAllFromBucket(tx, bucketName, to, fNum) } case v2object.FilterHeaderParent: - bucketName := parentBucketName(cnr) - db.selectFromList(tx, bucketName, f, prefix, to, fNum) + bucketName := parentBucketName(cnr, bucketName) + db.selectFromList(tx, bucketName, f, to, fNum) case v2object.FilterHeaderSplitID: - bucketName := splitBucketName(cnr) - db.selectFromList(tx, bucketName, f, prefix, to, fNum) + bucketName := splitBucketName(cnr, bucketName) + db.selectFromList(tx, bucketName, f, to, fNum) case v2object.FilterPropertyRoot: - selectAllFromBucket(tx, rootBucketName(cnr), prefix, to, fNum) + selectAllFromBucket(tx, rootBucketName(cnr, bucketName), to, fNum) case v2object.FilterPropertyPhy: - selectAllFromBucket(tx, primaryBucketName(cnr), prefix, to, fNum) - selectAllFromBucket(tx, tombstoneBucketName(cnr), prefix, to, fNum) - selectAllFromBucket(tx, storageGroupBucketName(cnr), prefix, to, fNum) - selectAllFromBucket(tx, bucketNameLockers(cnr), prefix, to, fNum) + selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, fNum) + selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, fNum) + selectAllFromBucket(tx, storageGroupBucketName(cnr, bucketName), to, fNum) + selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, fNum) default: // user attribute - bucketName := attributeBucketName(cnr, f.Header()) + bucketName := attributeBucketName(cnr, f.Header(), bucketName) if f.Operation() == object.MatchNotPresent { - selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, f, prefix, to, fNum) + selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, to, fNum) } else { - db.selectFromFKBT(tx, bucketName, f, prefix, to, fNum) + db.selectFromFKBT(tx, bucketName, f, to, fNum) } } } -var mBucketNaming = map[string][]func(cid.ID) []byte{ +var mBucketNaming = map[string][]func(cid.ID, []byte) []byte{ v2object.TypeRegular.String(): {primaryBucketName, parentBucketName}, v2object.TypeTombstone.String(): {tombstoneBucketName}, v2object.TypeStorageGroup.String(): {storageGroupBucketName}, @@ -214,7 +214,7 @@ var mBucketNaming = map[string][]func(cid.ID) []byte{ func allBucketNames(cnr cid.ID) (names [][]byte) { for _, fns := range mBucketNaming { for _, fn := range fns { - names = append(names, fn(cnr)) + names = append(names, fn(cnr, make([]byte, bucketKeySize))) } } @@ -226,7 +226,7 @@ func bucketNamesForType(cnr cid.ID, mType object.SearchMatchType, typeVal string fns, ok := mBucketNaming[key] if ok { for _, fn := range fns { - names = append(names, fn(cnr)) + names = append(names, fn(cnr, make([]byte, bucketKeySize))) } } } @@ -258,7 +258,6 @@ func (db *DB) selectFromFKBT( tx *bbolt.Tx, name []byte, // fkbt root bucket name f object.SearchFilter, // filter for operation and value - prefix string, // prefix to create addr from oid in index to map[string]int, // resulting cache fNum int, // index of filter ) { // @@ -281,8 +280,7 @@ func (db *DB) selectFromFKBT( } return fkbtLeaf.ForEach(func(k, _ []byte) error { - addr := prefix + string(k) - markAddressInCache(to, fNum, addr) + markAddressInCache(to, fNum, string(k)) return nil }) @@ -298,8 +296,6 @@ func selectOutsideFKBT( tx *bbolt.Tx, incl [][]byte, // buckets name []byte, // fkbt root bucket name - f object.SearchFilter, // filter for operation and value - prefix string, // prefix to create addr from oid in index to map[string]int, // resulting cache fNum int, // index of filter ) { @@ -314,8 +310,7 @@ func selectOutsideFKBT( } return exclBktLeaf.ForEach(func(k, _ []byte) error { - addr := prefix + string(k) - mExcl[addr] = struct{}{} + mExcl[string(k)] = struct{}{} return nil }) @@ -329,10 +324,8 @@ func selectOutsideFKBT( } _ = bktIncl.ForEach(func(k, _ []byte) error { - addr := prefix + string(k) - - if _, ok := mExcl[addr]; !ok { - markAddressInCache(to, fNum, addr) + if _, ok := mExcl[string(k)]; !ok { + markAddressInCache(to, fNum, string(k)) } return nil @@ -346,7 +339,6 @@ func (db *DB) selectFromList( tx *bbolt.Tx, name []byte, // list root bucket name f object.SearchFilter, // filter for operation and value - prefix string, // prefix to create addr from oid in index to map[string]int, // resulting cache fNum int, // index of filter ) { // @@ -398,8 +390,7 @@ func (db *DB) selectFromList( } for i := range lst { - addr := prefix + string(lst[i]) - markAddressInCache(to, fNum, addr) + markAddressInCache(to, fNum, string(lst[i])) } } @@ -412,30 +403,25 @@ func (db *DB) selectObjectID( fNum int, // index of filter currEpoch uint64, ) { - prefix := cnr.EncodeToString() + "/" - - appendOID := func(strObj string) { - addrStr := prefix + strObj + appendOID := func(id oid.ID) { var addr oid.Address - - err := decodeAddressFromKey(&addr, []byte(addrStr)) - if err != nil { - db.log.Debug("can't decode object id address", - zap.String("addr", addrStr), - zap.String("error", err.Error())) - - return - } + addr.SetContainer(cnr) + addr.SetObject(id) ok, err := db.exists(tx, addr, currEpoch) if (err == nil && ok) || errors.As(err, &splitInfoError) { - markAddressInCache(to, fNum, addrStr) + raw := make([]byte, objectKeySize) + id.Encode(raw) + markAddressInCache(to, fNum, string(raw)) } } switch op := f.Operation(); op { case object.MatchStringEqual: - appendOID(f.Value()) + var id oid.ID + if err := id.DecodeString(f.Value()); err == nil { + appendOID(id) + } default: fMatch, ok := db.matchers[op] if !ok { @@ -454,7 +440,10 @@ func (db *DB) selectObjectID( } err := fMatch.matchBucket(bkt, f.Header(), f.Value(), func(k, v []byte) error { - appendOID(string(k)) + var id oid.ID + if err := id.Decode(k); err == nil { + appendOID(id) + } return nil }) if err != nil { @@ -472,7 +461,8 @@ func (db *DB) matchSlowFilters(tx *bbolt.Tx, addr oid.Address, f object.SearchFi return true } - obj, err := db.get(tx, addr, true, false, currEpoch) + buf := make([]byte, addressKeySize) + obj, err := db.get(tx, addr, buf, true, false, currEpoch) if err != nil { return false } diff --git a/pkg/local_object_storage/metabase/shard_id.go b/pkg/local_object_storage/metabase/shard_id.go index 93313032c..312f3900c 100644 --- a/pkg/local_object_storage/metabase/shard_id.go +++ b/pkg/local_object_storage/metabase/shard_id.go @@ -6,7 +6,7 @@ import ( ) var ( - shardInfoBucket = []byte(invalidBase58String + "i") + shardInfoBucket = []byte{shardInfoPrefix} shardIDKey = []byte("id") ) diff --git a/pkg/local_object_storage/metabase/storage_id.go b/pkg/local_object_storage/metabase/storage_id.go index ce893cffd..e8cca5b26 100644 --- a/pkg/local_object_storage/metabase/storage_id.go +++ b/pkg/local_object_storage/metabase/storage_id.go @@ -39,12 +39,13 @@ func (db *DB) StorageID(prm StorageIDPrm) (res StorageIDRes, err error) { } func (db *DB) storageID(tx *bbolt.Tx, addr oid.Address) ([]byte, error) { - smallBucket := tx.Bucket(smallBucketName(addr.Container())) + key := make([]byte, bucketKeySize) + smallBucket := tx.Bucket(smallBucketName(addr.Container(), key)) if smallBucket == nil { return nil, nil } - storageID := smallBucket.Get(objectKey(addr.Object())) + storageID := smallBucket.Get(objectKey(addr.Object(), key)) if storageID == nil { return nil, nil } diff --git a/pkg/local_object_storage/metabase/util.go b/pkg/local_object_storage/metabase/util.go index b18549b90..eae4f49f4 100644 --- a/pkg/local_object_storage/metabase/util.go +++ b/pkg/local_object_storage/metabase/util.go @@ -2,8 +2,8 @@ package meta import ( "bytes" + "crypto/sha256" "fmt" - "strings" cid "github.com/nspcc-dev/neofs-sdk-go/container/id" "github.com/nspcc-dev/neofs-sdk-go/object" @@ -11,126 +11,214 @@ import ( "go.etcd.io/bbolt" ) -/* -We might increase performance by not using string representation of -identities and addresses. String representation require base58 encoding that -slows execution. Instead we can try to marshal these structures directly into -bytes. Check it later. -*/ - -const invalidBase58String = "_" - var ( // graveyardBucketName stores rows with the objects that have been // covered with Tombstone objects. That objects should not be returned // from the node and should not be accepted by the node from other // nodes. - graveyardBucketName = []byte(invalidBase58String + "Graveyard") + graveyardBucketName = []byte{graveyardPrefix} // garbageBucketName stores rows with the objects that should be physically // deleted by the node (Garbage Collector routine). - garbageBucketName = []byte(invalidBase58String + "Garbage") - toMoveItBucketName = []byte(invalidBase58String + "ToMoveIt") - containerVolumeBucketName = []byte(invalidBase58String + "ContainerSize") + garbageBucketName = []byte{garbagePrefix} + toMoveItBucketName = []byte{toMoveItPrefix} + containerVolumeBucketName = []byte{containerVolumePrefix} zeroValue = []byte{0xFF} - - smallPostfix = invalidBase58String + "small" - storageGroupPostfix = invalidBase58String + "SG" - tombstonePostfix = invalidBase58String + "TS" - ownerPostfix = invalidBase58String + "ownerid" - payloadHashPostfix = invalidBase58String + "payloadhash" - rootPostfix = invalidBase58String + "root" - parentPostfix = invalidBase58String + "parent" - splitPostfix = invalidBase58String + "splitid" - - userAttributePostfix = invalidBase58String + "attr_" - - splitInfoError *object.SplitInfoError // for errors.As comparisons ) +// Prefix bytes for database keys. All ids and addresses are encoded in binary +// unless specified otherwise. +const ( + // graveyardPrefix is used for the graveyard bucket. + // Key: object address + // Value: tombstone address + graveyardPrefix = iota + // garbagePrefix is used for the garbage bucket. + // Key: object address + // Value: dummy value + garbagePrefix + // toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving + // to another shard. + toMoveItPrefix + // containerVolumePrefix is used for storing container size estimations. + // Key: container ID + // Value: container size in bytes as little-endian uint64 + containerVolumePrefix + // lockedPrefix is used for storing locked objects information. + // Key: container ID + // Value: bucket mapping objects locked to the list of corresponding LOCK objects. + lockedPrefix + // shardInfoPrefix is used for storing shard ID. All keys are custom and are not connected to the container. + shardInfoPrefix + + //====================== + // Unique index buckets. + //====================== + + // primaryPrefix is used for prefixing buckets containing objects of REGULAR type. + // Key: object ID + // Value: marshalled object + primaryPrefix + // lockersPrefix is used for prefixing buckets containing objects of LOCK type. + // Key: object ID + // Value: marshalled object + lockersPrefix + // storageGroupPrefix is used for prefixing buckets containing objects of STORAGEGROUP type. + // Key: object ID + // Value: marshaled object + storageGroupPrefix + // tombstonePrefix is used for prefixing buckets containing objects of TOMBSTONE type. + // Key: object ID + // Value: marshaled object + tombstonePrefix + // smallPrefix is used for prefixing buckets mapping objects to the blobovniczas they are stored in. + // Key: object ID + // Value: blobovnicza ID + smallPrefix + // rootPrefix is used for prefixing buckets mapping parent object to the split info. + // Key: object ID + // Value: split info + rootPrefix + + //==================== + // FKBT index buckets. + //==================== + + // ownerPrefix is used for prefixing FKBT index buckets mapping owner to object IDs. + // Key: owner ID + // Value: bucket containing object IDs as keys + ownerPrefix + // userAttributePrefix is used for prefixing FKBT index buckets containing objects. + // Key: attribute value + // Value: bucket containing object IDs as keys + userAttributePrefix + + //==================== + // List index buckets. + //==================== + + // payloadHashPrefix is used for prefixing List index buckets mapping payload hash to a list of object IDs. + // Key: payload hash + // Value: list of object IDs + payloadHashPrefix + // parentPrefix is used for prefixing List index buckets mapping parent ID to a list of children IDs. + // Key: parent ID + // Value: list of object IDs + parentPrefix + // splitPrefix is used for prefixing List index buckets mapping split ID to a list of object IDs. + // Key: split ID + // Value: list of object IDs + splitPrefix +) + +const ( + cidSize = sha256.Size + bucketKeySize = 1 + cidSize + objectKeySize = sha256.Size + addressKeySize = cidSize + objectKeySize +) + +var splitInfoError *object.SplitInfoError // for errors.As comparisons + +func bucketName(cnr cid.ID, prefix byte, key []byte) []byte { + key[0] = prefix + cnr.Encode(key[1:]) + return key[:bucketKeySize] +} + // primaryBucketName returns . -func primaryBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString()) +func primaryBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, primaryPrefix, key) } // tombstoneBucketName returns _TS. -func tombstoneBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString() + tombstonePostfix) +func tombstoneBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, tombstonePrefix, key) } // storageGroupBucketName returns _SG. -func storageGroupBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString() + storageGroupPostfix) +func storageGroupBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, storageGroupPrefix, key) } // smallBucketName returns _small. -func smallBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString() + smallPostfix) // consider caching output values +func smallBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, smallPrefix, key) } // attributeBucketName returns _attr_. -func attributeBucketName(cnr cid.ID, attributeKey string) []byte { - sb := strings.Builder{} // consider getting string builders from sync.Pool - sb.WriteString(cnr.EncodeToString()) - sb.WriteString(userAttributePostfix) - sb.WriteString(attributeKey) - - return []byte(sb.String()) +func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte { + key[0] = userAttributePrefix + cnr.Encode(key[1:]) + return append(key[:bucketKeySize], attributeKey...) } // returns from attributeBucketName result, nil otherwise. func cidFromAttributeBucket(val []byte, attributeKey string) []byte { - suffix := []byte(userAttributePostfix + attributeKey) - if !bytes.HasSuffix(val, suffix) { + if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) { return nil } - return val[:len(val)-len(suffix)] + return val[1:bucketKeySize] } // payloadHashBucketName returns _payloadhash. -func payloadHashBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString() + payloadHashPostfix) +func payloadHashBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, payloadHashPrefix, key) } // rootBucketName returns _root. -func rootBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString() + rootPostfix) +func rootBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, rootPrefix, key) } // ownerBucketName returns _ownerid. -func ownerBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString() + ownerPostfix) +func ownerBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, ownerPrefix, key) } // parentBucketName returns _parent. -func parentBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString() + parentPostfix) +func parentBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, parentPrefix, key) } // splitBucketName returns _splitid. -func splitBucketName(cnr cid.ID) []byte { - return []byte(cnr.EncodeToString() + splitPostfix) +func splitBucketName(cnr cid.ID, key []byte) []byte { + return bucketName(cnr, splitPrefix, key) } // addressKey returns key for K-V tables when key is a whole address. -func addressKey(addr oid.Address) []byte { - return []byte(addr.EncodeToString()) +func addressKey(addr oid.Address, key []byte) []byte { + addr.Container().Encode(key) + addr.Object().Encode(key[cidSize:]) + return key[:addressKeySize] } // parses object address formed by addressKey. func decodeAddressFromKey(dst *oid.Address, k []byte) error { - err := dst.DecodeString(string(k)) - if err != nil { - return fmt.Errorf("decode object address from db key: %w", err) + if len(k) != addressKeySize { + return fmt.Errorf("invalid length") } + var cnr cid.ID + if err := cnr.Decode(k[:cidSize]); err != nil { + return err + } + + var obj oid.ID + if err := obj.Decode(k[cidSize:]); err != nil { + return err + } + + dst.SetObject(obj) + dst.SetContainer(cnr) return nil } // objectKey returns key for K-V tables when key is an object id. -func objectKey(obj oid.ID) []byte { - return []byte(obj.EncodeToString()) +func objectKey(obj oid.ID, key []byte) []byte { + obj.Encode(key) + return key[:objectKeySize] } // removes all bucket elements. @@ -152,13 +240,15 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object panic("empty object list in firstIrregularObjectType") } + var keys [3][1 + cidSize]byte + irregularTypeBuckets := [...]struct { typ object.Type name []byte }{ - {object.TypeTombstone, tombstoneBucketName(idCnr)}, - {object.TypeStorageGroup, storageGroupBucketName(idCnr)}, - {object.TypeLock, bucketNameLockers(idCnr)}, + {object.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])}, + {object.TypeStorageGroup, storageGroupBucketName(idCnr, keys[1][:])}, + {object.TypeLock, bucketNameLockers(idCnr, keys[2][:])}, } for i := range objs { @@ -174,5 +264,7 @@ func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) object // return true if provided object is of LOCK type. func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool { - return inBucket(tx, bucketNameLockers(idCnr), objectKey(obj)) + return inBucket(tx, + bucketNameLockers(idCnr, make([]byte, bucketKeySize)), + objectKey(obj, make([]byte, objectKeySize))) } diff --git a/pkg/local_object_storage/metabase/version.go b/pkg/local_object_storage/metabase/version.go index 78b4ed3f4..f5af18c12 100644 --- a/pkg/local_object_storage/metabase/version.go +++ b/pkg/local_object_storage/metabase/version.go @@ -8,7 +8,7 @@ import ( ) // version contains current metabase version. -const version = 1 +const version = 2 var versionKey = []byte("version")