From c7955c63def6eed4ba88c6c0a53e8218c8ef9f38 Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Mon, 1 Jul 2024 19:17:26 +0300 Subject: [PATCH] [#9999] metabase: Fix db engine to pebble in exists.go Signed-off-by: Dmitrii Stepanov --- pkg/local_object_storage/metabase/counter.go | 7 +- pkg/local_object_storage/metabase/db.go | 2 +- pkg/local_object_storage/metabase/exists.go | 177 +++-- pkg/local_object_storage/metabase/expired.go | 41 ++ pkg/local_object_storage/metabase/get.go | 60 +- pkg/local_object_storage/metabase/lock.go | 28 +- pkg/local_object_storage/metabase/pebble.go | 48 +- pkg/local_object_storage/metabase/put.go | 7 +- pkg/local_object_storage/metabase/util.go | 662 +++++++++++++++---- 9 files changed, 795 insertions(+), 237 deletions(-) diff --git a/pkg/local_object_storage/metabase/counter.go b/pkg/local_object_storage/metabase/counter.go index da3cfae26..c007811d0 100644 --- a/pkg/local_object_storage/metabase/counter.go +++ b/pkg/local_object_storage/metabase/counter.go @@ -25,8 +25,11 @@ var ( ) var ( - errInvalidKeyLenght = errors.New("invalid key length") - errInvalidValueLenght = errors.New("invalid value length") + errInvalidKeyLenght = errors.New("invalid key length") + errInvalidKeyPrefix = errors.New("invalid key prefix") + errInvalidValueLenght = errors.New("invalid value length") + errInvalidContainerIDValue = errors.New("invalid container ID value") + errInvalidAttributeKey = errors.New("invalid userr attribute key") ) type objectType uint8 diff --git a/pkg/local_object_storage/metabase/db.go b/pkg/local_object_storage/metabase/db.go index 21f5d34c2..159913137 100644 --- a/pkg/local_object_storage/metabase/db.go +++ b/pkg/local_object_storage/metabase/db.go @@ -188,7 +188,7 @@ func stringEqualMatcherBucket(r pebble.Reader, fKey string, fValue string, f fun if !ok { return nil } - data, err := getSafe(r, val) + data, err := valueSafe(r, val) if err != nil { return err } diff --git a/pkg/local_object_storage/metabase/exists.go b/pkg/local_object_storage/metabase/exists.go index de52fe71d..9b8d239e2 100644 --- a/pkg/local_object_storage/metabase/exists.go +++ b/pkg/local_object_storage/metabase/exists.go @@ -1,11 +1,11 @@ package meta import ( + "bytes" "context" "fmt" "time" - objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" @@ -13,6 +13,7 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/cockroachdb/pebble" "go.etcd.io/bbolt" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -81,8 +82,8 @@ func (db *DB) Exists(ctx context.Context, prm ExistsPrm) (res ExistsRes, err err currEpoch := db.epochState.CurrentEpoch() - err = db.database.View(func(tx *bbolt.Tx) error { - res.exists, res.locked, err = db.exists(tx, prm.addr, prm.paddr, currEpoch) + err = db.snapshot(func(s *pebble.Snapshot) error { + res.exists, res.locked, err = db.exists(ctx, s, prm.addr, prm.paddr, currEpoch) return err }) @@ -90,13 +91,21 @@ func (db *DB) Exists(ctx context.Context, prm ExistsPrm) (res ExistsRes, err err return res, metaerr.Wrap(err) } -func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpoch uint64) (bool, bool, error) { +func (db *DB) exists(ctx context.Context, r pebble.Reader, addr oid.Address, parent oid.Address, currEpoch uint64) (bool, bool, error) { var locked bool + var err error if !parent.Equals(oid.Address{}) { - locked = objectLocked(tx, parent.Container(), parent.Object()) + locked, err = objectLocked(ctx, r, parent.Container(), parent.Object()) + if err != nil { + return false, false, err + } } // check graveyard and object expiration first - switch objectStatus(tx, addr, currEpoch) { + st, err := objectStatus(ctx, r, addr, currEpoch) + if err != nil { + return false, false, err + } + switch st { case 1: return false, locked, logicerr.Wrap(new(apistatus.ObjectNotFound)) case 2: @@ -105,32 +114,41 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpo return false, locked, ErrObjectIsExpired } - objKey := objectKey(addr.Object(), make([]byte, objectKeySize)) - - cnr := addr.Container() - key := make([]byte, bucketKeySize) - - // if graveyard is empty, then check if object exists in primary bucket - if inBucket(tx, primaryBucketName(cnr, key), objKey) { + v, err := valueSafe(r, primaryKey(addr.Container(), addr.Object())) + if err != nil { + return false, false, err + } + if v != nil { return true, locked, nil } - // if primary bucket is empty, then check if object exists in parent bucket - if inBucket(tx, parentBucketName(cnr, key), objKey) { - splitInfo, err := getSplitInfo(tx, cnr, objKey) + children, err := selectByPrefixBatch(ctx, r, parentKeyLongPrefix(addr.Container(), addr.Object()), 1) // try to found any child + if err != nil { + return false, false, err + } + if len(children) > 0 { + splitInfo, err := getSplitInfo(r, addr) if err != nil { - return false, locked, err + return false, false, err } - return false, locked, logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo)) } - // if parent bucket is empty, then check if object exists in ec bucket - if data := getFromBucket(tx, ecInfoBucketName(cnr, key), objKey); len(data) != 0 { - return false, locked, getECInfoError(tx, cnr, data) + + // if parent bucket is empty, then check if object exists with ec prefix + children, err = selectByPrefixBatch(ctx, r, ecInfoLongKeyPrefix(addr.Container(), addr.Object()), 1) // try to found any child + if err != nil { + return false, locked, err + } + if len(children) > 0 { + return false, locked, getECInfoError(ctx, r, addr) } - // if parent bucket is empty, then check if object exists in typed buckets - return firstIrregularObjectType(tx, cnr, objKey) != objectSDK.TypeRegular, locked, nil + t, err := firstIrregularObjectType(r, addr.Container(), addr.Object()) + if err != nil { + return false, false, err + } + + return t != objectSDK.TypeRegular, locked, nil } // objectStatus returns: @@ -138,59 +156,52 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpo // - 1 if object with GC mark; // - 2 if object is covered with tombstone; // - 3 if object is expired. -func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 { +func objectStatus(ctx context.Context, r pebble.Reader, addr oid.Address, currEpoch uint64) (uint8, error) { // locked object could not be removed/marked with GC/expired - if objectLocked(tx, addr.Container(), addr.Object()) { - return 0 + locked, err := objectLocked(ctx, r, addr.Container(), addr.Object()) + if err != nil { + return 0, err + } + if locked { + return 0, nil } - // we check only if the object is expired in the current - // epoch since it is considered the only corner case: the - // GC is expected to collect all the objects that have - // expired previously for less than the one epoch duration + st, err := inGraveyardWithKey(r, addr) + if err != nil { + return 0, err + } + if st > 0 { + return st, nil + } - expired := isExpiredWithAttribute(tx, objectV2.SysAttributeExpEpoch, addr, currEpoch) - if !expired { - expired = isExpiredWithAttribute(tx, objectV2.SysAttributeExpEpochNeoFS, addr, currEpoch) + expired, err := isExpired(ctx, r, addr, currEpoch) + if err != nil { + return 0, err } if expired { - return 3 + return 3, nil } - graveyardBkt := tx.Bucket(graveyardBucketName) - garbageBkt := tx.Bucket(garbageBucketName) - addrKey := addressKey(addr, make([]byte, addressKeySize)) - return inGraveyardWithKey(addrKey, graveyardBkt, garbageBkt) + return 0, nil } -func inGraveyardWithKey(addrKey []byte, graveyard, garbageBCK *bbolt.Bucket) uint8 { - if graveyard == nil { - // incorrect metabase state, does not make - // sense to check garbage bucket - return 0 +func inGraveyardWithKey(r pebble.Reader, addr oid.Address) (uint8, error) { + v, err := valueSafe(r, graveyardKey(addr.Container(), addr.Object())) + if err != nil { + return 0, err } - - val := graveyard.Get(addrKey) - if val == nil { - if garbageBCK == nil { - // incorrect node state - return 0 - } - - val = garbageBCK.Get(addrKey) - if val != nil { - // object has been marked with GC - return 1 - } - - // neither in the graveyard - // nor was marked with GC mark - return 0 + if v != nil { + return 2, nil } - - // object in the graveyard - return 2 + v, err = valueSafe(r, garbageKey(addr.Container(), addr.Object())) + if err != nil { + return 0, err + } + if v != nil { + return 1, nil + } + return 0, nil } // inBucket checks if key is present in bucket . @@ -208,19 +219,45 @@ func inBucket(tx *bbolt.Tx, name, key []byte) bool { // getSplitInfo returns SplitInfo structure from root index. Returns error // if there is no `key` record in root index. -func getSplitInfo(tx *bbolt.Tx, cnr cid.ID, key []byte) (*objectSDK.SplitInfo, error) { - bucketName := rootBucketName(cnr, make([]byte, bucketKeySize)) - rawSplitInfo := getFromBucket(tx, bucketName, key) - if len(rawSplitInfo) == 0 { +func getSplitInfo(r pebble.Reader, addr oid.Address) (*objectSDK.SplitInfo, error) { + rawSplitInfo, err := valueSafe(r, rootKey(addr.Container(), addr.Object())) + if err != nil { + return nil, err + } + if len(rawSplitInfo) == 0 || bytes.Equal(zeroValue, rawSplitInfo) { return nil, ErrLackSplitInfo } - splitInfo := objectSDK.NewSplitInfo() - - err := splitInfo.Unmarshal(rawSplitInfo) + err = splitInfo.Unmarshal(rawSplitInfo) if err != nil { return nil, fmt.Errorf("can't unmarshal split info from root index: %w", err) } return splitInfo, nil } + +// if meets irregular object container in objs - returns its type, otherwise returns object.TypeRegular. +// +// firstIrregularObjectType(tx, cnr, obj) usage allows getting object type. +func firstIrregularObjectType(r pebble.Reader, idCnr cid.ID, objs ...oid.ID) (objectSDK.Type, error) { + for _, objID := range objs { + key := tombstoneKey(idCnr, objID) + v, err := valueSafe(r, key) + if err != nil { + return objectSDK.TypeRegular, err + } + if v != nil { + return objectSDK.TypeTombstone, nil + } + key = lockersKey(idCnr, objID) + v, err = valueSafe(r, key) + if err != nil { + return objectSDK.TypeRegular, err + } + if v != nil { + return objectSDK.TypeLock, nil + } + } + + return objectSDK.TypeRegular, nil +} diff --git a/pkg/local_object_storage/metabase/expired.go b/pkg/local_object_storage/metabase/expired.go index f5ff131b6..d6c455832 100644 --- a/pkg/local_object_storage/metabase/expired.go +++ b/pkg/local_object_storage/metabase/expired.go @@ -1,6 +1,7 @@ package meta import ( + "bytes" "context" "errors" "fmt" @@ -12,6 +13,7 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/cockroachdb/pebble" "go.etcd.io/bbolt" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -163,3 +165,42 @@ func selectExpiredObjectIDs(tx *bbolt.Tx, attr string, epoch uint64, containerID return result, nil } + +func isExpired(ctx context.Context, r pebble.Reader, addr oid.Address, currEpoch uint64) (bool, error) { + prefix := []byte{expiredPrefix} + it, err := r.NewIter(&pebble.IterOptions{ + LowerBound: []byte{expiredPrefix}, + OnlyReadGuaranteedDurable: true, + }) + if err != nil { + return false, err + } + + // iteration does in ascending order by expiration epoch. + // gc does expired objects collect every epoch, so here should be not so much items. + for v := it.First(); v && bytes.HasPrefix(it.Key(), prefix); v = it.Next() { + select { + case <-ctx.Done(): + return false, ctx.Err() + default: + } + + expEpoch, err := expirationEpochFromExpiredKey(it.Key()) + if err != nil { + return false, err + } + + if expEpoch >= currEpoch { + return false, nil // keys are ordered by epoch, so next items will be discarded anyway. + } + + curAddr, err := addressFromExpiredKey(it.Key()) + if err != nil { + return false, err + } + if curAddr == addr { + return true, nil + } + } + return false, nil +} diff --git a/pkg/local_object_storage/metabase/get.go b/pkg/local_object_storage/metabase/get.go index 8df92d871..7a8dd006d 100644 --- a/pkg/local_object_storage/metabase/get.go +++ b/pkg/local_object_storage/metabase/get.go @@ -12,6 +12,7 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/cockroachdb/pebble" "go.etcd.io/bbolt" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -191,27 +192,50 @@ func getSplitInfoError(tx *bbolt.Tx, cnr cid.ID, key []byte) error { return logicerr.Wrap(new(apistatus.ObjectNotFound)) } -func getECInfoError(tx *bbolt.Tx, cnr cid.ID, data []byte) error { - keys, err := decodeList(data) - if err != nil { - return err - } - ecInfo := objectSDK.NewECInfo() - for _, key := range keys { - // check in primary index - ojbData := getFromBucket(tx, primaryBucketName(cnr, make([]byte, bucketKeySize)), key) - if len(ojbData) != 0 { - obj := objectSDK.New() - if err := obj.Unmarshal(ojbData); err != nil { +func getECInfoError(ctx context.Context, r pebble.Reader, addr oid.Address) error { + var chunkAddresses []oid.Address + for { + keys, err := selectByPrefixBatch(ctx, r, ecInfoLongKeyPrefix(addr.Container(), addr.Object()), batchSize) + if err != nil { + return err + } + for _, key := range keys { + chunkAddress, err := addressOfChunkFromECInfoKey(key) + if err != nil { return err } - chunk := objectSDK.ECChunk{} - id, _ := obj.ID() - chunk.SetID(id) - chunk.Index = obj.ECHeader().Index() - chunk.Total = obj.ECHeader().Total() - ecInfo.AddChunk(chunk) + chunkAddresses = append(chunkAddresses, chunkAddress) } + if len(keys) < batchSize { + break + } + } + ecInfo := objectSDK.NewECInfo() + for _, chunkAddress := range chunkAddresses { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + objData, err := valueSafe(r, primaryKey(chunkAddress.Container(), chunkAddress.Object())) + if err != nil { + return err + } + if len(objData) == 0 { + continue + } + + obj := objectSDK.New() + if err := obj.Unmarshal(objData); err != nil { + return err + } + chunk := objectSDK.ECChunk{} + id, _ := obj.ID() + chunk.SetID(id) + chunk.Index = obj.ECHeader().Index() + chunk.Total = obj.ECHeader().Total() + ecInfo.AddChunk(chunk) } return logicerr.Wrap(objectSDK.NewECInfoError(ecInfo)) } diff --git a/pkg/local_object_storage/metabase/lock.go b/pkg/local_object_storage/metabase/lock.go index e8e724dd1..3f08d5ec4 100644 --- a/pkg/local_object_storage/metabase/lock.go +++ b/pkg/local_object_storage/metabase/lock.go @@ -13,6 +13,7 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/cockroachdb/pebble" "go.etcd.io/bbolt" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -161,18 +162,13 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) { } // checks if specified object is locked in the specified container. -func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool { - bucketLocked := tx.Bucket(bucketNameLocked) - if bucketLocked != nil { - key := make([]byte, cidSize) - idCnr.Encode(key) - bucketLockedContainer := bucketLocked.Bucket(key) - if bucketLockedContainer != nil { - return bucketLockedContainer.Get(objectKey(idObj, key)) != nil - } +func objectLocked(ctx context.Context, r pebble.Reader, idCnr cid.ID, idObj oid.ID) (bool, error) { + prefix := lockedKeyLongPrefix(idCnr, idObj) + items, err := selectByPrefixBatch(ctx, r, prefix, 1) + if err != nil { + return false, err } - - return false + return len(items) > 0, nil } // return `LOCK` id's if specified object is locked in the specified container. @@ -383,3 +379,13 @@ func (db *DB) GetLocked(ctx context.Context, addr oid.Address) (res []oid.ID, er success = err == nil return res, err } + +// return true if provided object is of LOCK type. +func isLockObject(r pebble.Reader, idCnr cid.ID, obj oid.ID) (bool, error) { + key := lockersKey(idCnr, obj) + v, err := valueSafe(r, key) + if err != nil { + return false, err + } + return v != nil, nil +} diff --git a/pkg/local_object_storage/metabase/pebble.go b/pkg/local_object_storage/metabase/pebble.go index d1cfda5c3..9921ce119 100644 --- a/pkg/local_object_storage/metabase/pebble.go +++ b/pkg/local_object_storage/metabase/pebble.go @@ -2,12 +2,15 @@ package meta import ( "bytes" + "context" "errors" "github.com/cockroachdb/pebble" ) -func getSafe(r pebble.Reader, key []byte) ([]byte, error) { +const batchSize = 1000 + +func valueSafe(r pebble.Reader, key []byte) ([]byte, error) { data, closer, err := r.Get(key) if err != nil { if errors.Is(err, pebble.ErrNotFound) { @@ -21,3 +24,46 @@ func getSafe(r pebble.Reader, key []byte) ([]byte, error) { } return result, nil } + +func (db *DB) batch(f func(batch *pebble.Batch) error) error { + b := db.database.NewIndexedBatch() + err := f(b) + if err != nil { + return errors.Join(err, b.Close()) + } + return errors.Join(b.Commit(pebble.Sync), b.Close()) +} + +func (db *DB) snapshot(f func(*pebble.Snapshot) error) error { + s := db.database.NewSnapshot() + err := f(s) + if err != nil { + return errors.Join(err, s.Close()) + } + return s.Close() +} + +func selectByPrefixBatch(ctx context.Context, r pebble.Reader, prefix []byte, batchSize int) ([][]byte, error) { + it, err := r.NewIter(&pebble.IterOptions{ + LowerBound: prefix, + OnlyReadGuaranteedDurable: true, + }) + if err != nil { + return nil, err + } + + var result [][]byte + for v := it.First(); v && bytes.HasPrefix(it.Key(), prefix); v = it.Next() { + select { + case <-ctx.Done(): + return nil, errors.Join(ctx.Err(), it.Close()) + default: + } + + result = append(result, bytes.Clone(it.Key())) + if len(result) == batchSize { + return result, it.Close() + } + } + return result, it.Close() +} diff --git a/pkg/local_object_storage/metabase/put.go b/pkg/local_object_storage/metabase/put.go index cf7fa9cb7..d9af2a681 100644 --- a/pkg/local_object_storage/metabase/put.go +++ b/pkg/local_object_storage/metabase/put.go @@ -16,6 +16,7 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/cockroachdb/pebble" "github.com/nspcc-dev/neo-go/pkg/io" "go.etcd.io/bbolt" "go.opentelemetry.io/otel/attribute" @@ -86,9 +87,9 @@ func (db *DB) Put(ctx context.Context, prm PutPrm) (res PutRes, err error) { currEpoch := db.epochState.CurrentEpoch() - err = db.database.Batch(func(tx *bbolt.Tx) error { + err = db.batch(func(b *pebble.Batch) error { var e error - res, e = db.put(tx, prm.obj, prm.id, nil, currEpoch) + res, e = db.put(b, prm.obj, prm.id, nil, currEpoch) return e }) if err == nil { @@ -101,7 +102,7 @@ func (db *DB) Put(ctx context.Context, prm PutPrm) (res PutRes, err error) { return res, metaerr.Wrap(err) } -func (db *DB) put(tx *bbolt.Tx, +func (db *DB) put(batch *pebble.Batch, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, diff --git a/pkg/local_object_storage/metabase/util.go b/pkg/local_object_storage/metabase/util.go index 9249ae49b..cb7f03ddc 100644 --- a/pkg/local_object_storage/metabase/util.go +++ b/pkg/local_object_storage/metabase/util.go @@ -3,31 +3,14 @@ package meta import ( "bytes" "crypto/sha256" - "errors" + "encoding/binary" + "fmt" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" - objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" ) -var ( - // graveyardBucketName stores rows with the objects that have been - // covered with Tombstone objects. That objects should not be returned - // from the node and should not be accepted by the node from other - // nodes. - graveyardBucketName = []byte{graveyardPrefix} - // garbageBucketName stores rows with the objects that should be physically - // deleted by the node (Garbage Collector routine). - garbageBucketName = []byte{garbagePrefix} - toMoveItBucketName = []byte{toMoveItPrefix} - containerVolumeBucketName = []byte{containerVolumePrefix} - containerCounterBucketName = []byte{containerCountersPrefix} - - zeroValue = []byte{0xFF} - - errInvalidLength = errors.New("invalid length") -) +var zeroValue = []byte{0xFF} // Prefix bytes for database keys. All ids and addresses are encoded in binary // unless specified otherwise. @@ -42,13 +25,13 @@ const ( // Key: object address // Value: dummy value garbagePrefix - // toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving + // _ Previous usage was for for bucket containing IDs of objects that are candidates for moving // to another shard. - toMoveItPrefix - // containerVolumePrefix is used for storing container size estimations. + _ + // containerSizePrefix is used for storing container size estimations. // Key: container ID // Value: container size in bytes as little-endian uint64 - containerVolumePrefix + containerSizePrefix // lockedPrefix is used for storing locked objects information. // Key: container ID // Value: bucket mapping objects locked to the list of corresponding LOCK objects. @@ -124,6 +107,9 @@ const ( // Key: container ID + type // Value: Object id ecInfoPrefix + + // expiredPrefix used to store expiration info. + expiredPrefix ) const ( @@ -133,139 +119,553 @@ const ( addressKeySize = cidSize + objectKeySize ) -func bucketName(cnr cid.ID, prefix byte, key []byte) []byte { - key[0] = prefix - cnr.Encode(key[1:]) - return key[:bucketKeySize] +func keyPrefix(cnr cid.ID, prefix byte) []byte { + result := make([]byte, 1+cidSize) + result[0] = prefix + cnr.Encode(result[1:]) + return result } -// primaryBucketName returns . -func primaryBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, primaryPrefix, key) +func keyObject(prefix byte, cnr cid.ID, objID oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize) + result[0] = prefix + cnr.Encode(result[1:]) + objID.Encode(result[1+cidSize:]) + return result } -// tombstoneBucketName returns _TS. -func tombstoneBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, tombstonePrefix, key) +func addressFromKey(prefix byte, key []byte) (oid.Address, error) { + if len(key) != 1+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != prefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cont cid.ID + if err := cont.Decode(key[1 : 1+cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var obj oid.ID + if err := obj.Decode(key[1+cidSize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cont) + result.SetObject(obj) + return result, nil } -// smallBucketName returns _small. -func smallBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, smallPrefix, key) +// primaryKeyPrefix returns primaryPrefix_. +func primaryKeyPrefix(cnr cid.ID) []byte { + return keyPrefix(cnr, primaryPrefix) } -// attributeBucketName returns _attr_. -func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte { - key[0] = userAttributePrefix - cnr.Encode(key[1:]) - return append(key[:bucketKeySize], attributeKey...) +func primaryKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(primaryPrefix, cnr, objID) } -// returns from attributeBucketName result, nil otherwise. -func cidFromAttributeBucket(val []byte, attributeKey string) []byte { - if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) { - return nil +func addressFromPrimaryKey(v []byte) (oid.Address, error) { + return addressFromKey(primaryPrefix, v) +} + +// tombstoneKeyPrefix returns tombstonePrefix_. +func tombstoneKeyPrefix(cnr cid.ID) []byte { + return keyPrefix(cnr, tombstonePrefix) +} + +func tombstoneKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(tombstonePrefix, cnr, objID) +} + +func addressFromTombstoneKey(v []byte) (oid.Address, error) { + return addressFromKey(tombstonePrefix, v) +} + +func garbageKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(garbagePrefix, cnr, objID) +} + +func addressFromGarbageKey(v []byte) (oid.Address, error) { + return addressFromKey(garbagePrefix, v) +} + +func graveyardKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(graveyardPrefix, cnr, objID) +} + +func addressFromGraveyardKey(v []byte) (oid.Address, error) { + return addressFromKey(graveyardPrefix, v) +} + +func smallKey(cnr cid.ID, obj oid.ID) []byte { + return keyObject(smallPrefix, cnr, obj) +} + +// attributeKeyPrefix returns userAttributePrefix___. +func attributeKeyPrefix(cnr cid.ID, attributeKey, attributeValue string) []byte { + result := make([]byte, 1+len(attributeKey)+cidSize+len(attributeValue)) + result[0] = userAttributePrefix + copy(result[1:], []byte(attributeKey)) + cnr.Encode(result[1+len(attributeKey):]) + copy(result[1+len(attributeKey)+cidSize:], []byte(attributeValue)) + return result +} + +// userAttributePrefix+attributeKey++attributeValue+. +func attributeKey(cnr cid.ID, objID oid.ID, attributeKey, attributeValue string) []byte { + result := make([]byte, 1+len(attributeKey)+cidSize+len(attributeValue)+objectKeySize) + result[0] = userAttributePrefix + copy(result[1:], []byte(attributeKey)) + cnr.Encode(result[1+len(attributeKey):]) + copy(result[1+len(attributeKey)+cidSize:], []byte(attributeValue)) + objID.Encode(result[1+cidSize+len(attributeKey)+len(attributeValue):]) + return result +} + +// returns attributeValue from attributeKey result, nil otherwise. +func attributeValueFromAttributeKey(key []byte, attributeKey string) ([]byte, error) { + if len(key) < 1+len(attributeKey)+cidSize+objectKeySize { + return nil, errInvalidKeyLenght + } + if key[0] != userAttributePrefix { + return nil, errInvalidKeyPrefix + } + if !bytes.Equal(key[1:1+len(attributeKey)], []byte(attributeKey)) { + return nil, errInvalidAttributeKey } - return val[1:bucketKeySize] + return key[1+len(attributeKey)+cidSize : len(key)-objectKeySize], nil } -// payloadHashBucketName returns _payloadhash. -func payloadHashBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, payloadHashPrefix, key) -} - -// rootBucketName returns _root. -func rootBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, rootPrefix, key) -} - -// ownerBucketName returns _ownerid. -func ownerBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, ownerPrefix, key) -} - -// parentBucketName returns _parent. -func parentBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, parentPrefix, key) -} - -// splitBucketName returns _splitid. -func splitBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, splitPrefix, key) -} - -// ecInfoBucketName returns _ecinfo. -func ecInfoBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, ecInfoPrefix, key) -} - -// addressKey returns key for K-V tables when key is a whole address. -func addressKey(addr oid.Address, key []byte) []byte { - addr.Container().Encode(key) - addr.Object().Encode(key[cidSize:]) - return key[:addressKeySize] -} - -// parses object address formed by addressKey. -func decodeAddressFromKey(dst *oid.Address, k []byte) error { - if len(k) != addressKeySize { - return errInvalidLength +func addressFromAttributeKey(key []byte, attributeKey string) (oid.Address, error) { + if len(key) < 1+len(attributeKey)+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght } + if key[0] != userAttributePrefix { + return oid.Address{}, errInvalidKeyPrefix + } + if !bytes.Equal(key[1:1+len(attributeKey)], []byte(attributeKey)) { + return oid.Address{}, errInvalidAttributeKey + } + var cnrID cid.ID + if err := cnrID.Decode(key[1+len(attributeKey) : 1+len(attributeKey)+cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var objID oid.ID + if err := objID.Decode(key[len(key)-objectKeySize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cnrID) + result.SetObject(objID) + return result, nil +} +// payloadHashKeyLongPrefix returns payloadHashPrefix__hash. +func payloadHashKeyLongPrefix(cnr cid.ID, hash []byte) []byte { + result := make([]byte, 1+cidSize+len(hash)) + result[0] = payloadHashPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], hash) + return result +} + +// payloadHashKeyShortPrefix returns payloadHashPrefix_. +func payloadHashKeyShortPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = payloadHashPrefix + cnr.Encode(result[1:]) + return result +} + +// payloadHashKey returns payloadHashPrefix__hash_. +func payloadHashKey(cnr cid.ID, obj oid.ID, hash []byte) []byte { + result := make([]byte, 1+cidSize+len(hash)+objectKeySize) + result[0] = payloadHashPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], hash) + obj.Encode(result[1+cidSize+len(hash):]) + return result +} + +func addressFromPayloadHashKey(k []byte) (oid.Address, error) { + if len(k) < 1+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if k[0] != payloadHashPrefix { + return oid.Address{}, errInvalidKeyPrefix + } var cnr cid.ID - if err := cnr.Decode(k[:cidSize]); err != nil { - return err + if err := cnr.Decode(k[1 : 1+cidSize]); err != nil { + return oid.Address{}, err } var obj oid.ID - if err := obj.Decode(k[cidSize:]); err != nil { - return err + if err := obj.Decode(k[len(k)-objectKeySize:]); err != nil { + return oid.Address{}, err } - dst.SetObject(obj) - dst.SetContainer(cnr) - return nil + var result oid.Address + result.SetObject(obj) + result.SetContainer(cnr) + return result, nil } -// objectKey returns key for K-V tables when key is an object id. -func objectKey(obj oid.ID, key []byte) []byte { - obj.Encode(key) - return key[:objectKeySize] -} - -// if meets irregular object container in objs - returns its type, otherwise returns object.TypeRegular. -// -// firstIrregularObjectType(tx, cnr, obj) usage allows getting object type. -func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) objectSDK.Type { - if len(objs) == 0 { - panic("empty object list in firstIrregularObjectType") +func payloadHashFromPayloadHashKey(k []byte) ([]byte, error) { + if len(k) < 1+cidSize+objectKeySize { + return nil, errInvalidKeyLenght + } + if k[0] != payloadHashPrefix { + return nil, errInvalidKeyPrefix } - var keys [2][1 + cidSize]byte - - irregularTypeBuckets := [...]struct { - typ objectSDK.Type - name []byte - }{ - {objectSDK.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])}, - {objectSDK.TypeLock, bucketNameLockers(idCnr, keys[1][:])}, - } - - for i := range objs { - for j := range irregularTypeBuckets { - if inBucket(tx, irregularTypeBuckets[j].name, objs[i]) { - return irregularTypeBuckets[j].typ - } - } - } - - return objectSDK.TypeRegular + return bytes.Clone(k[1+cidSize : len(k)-objectKeySize]), nil } -// return true if provided object is of LOCK type. -func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool { - return inBucket(tx, - bucketNameLockers(idCnr, make([]byte, bucketKeySize)), - objectKey(obj, make([]byte, objectKeySize))) +// rootBucketName returns rootPrefix_. +func rootKeyPrefix(cnr cid.ID) []byte { + return keyPrefix(cnr, rootPrefix) +} + +func rootKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(rootPrefix, cnr, objID) +} + +func addressFromRootKey(key []byte) (oid.Address, error) { + return addressFromKey(rootPrefix, key) +} + +// ownerKey returns ownerPrefix__owner_. +func ownerKey(cnr cid.ID, obj oid.ID, owner []byte) []byte { + result := make([]byte, 1+cidSize+len(owner)+objectKeySize) + result[0] = ownerPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], owner) + obj.Encode(result[1+cidSize+len(owner):]) + return result +} + +// ownerKeyShortPrefix returns ownerPrefix_. +func ownerKeyShortPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = ownerPrefix + cnr.Encode(result[1:]) + return result +} + +// ownerKeyLongPrefix returns ownerPrefix__owner. +func ownerKeyLongPrefix(cnr cid.ID, owner []byte) []byte { + result := make([]byte, 1+cidSize+len(owner)) + result[0] = ownerPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], owner) + return result +} + +func addressFromOwnerKey(k []byte) (oid.Address, error) { + if len(k) < 1+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if k[0] != ownerPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cnr cid.ID + if err := cnr.Decode(k[1 : 1+cidSize]); err != nil { + return oid.Address{}, err + } + + var obj oid.ID + if err := obj.Decode(k[len(k)-objectKeySize:]); err != nil { + return oid.Address{}, err + } + + var result oid.Address + result.SetObject(obj) + result.SetContainer(cnr) + return result, nil +} + +func ownerFromOwnerKey(k []byte) ([]byte, error) { + if len(k) < 1+cidSize+objectKeySize { + return nil, errInvalidKeyLenght + } + if k[0] != ownerPrefix { + return nil, errInvalidKeyPrefix + } + return bytes.Clone(k[1+cidSize : len(k)-objectKeySize]), nil +} + +// ecInfoLongKeyPrefix returns ecInfoPrefix__. +func ecInfoLongKeyPrefix(cnr cid.ID, parent oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize) + result[0] = ecInfoPrefix + cnr.Encode(result[1:]) + parent.Encode(result[1+cidSize:]) + return result +} + +// ecInfoShortKeyPrefix returns ecInfoPrefix_. +func ecInfoShortKeyPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = ecInfoPrefix + cnr.Encode(result[1:]) + return result +} + +func ecInfoKey(cnr cid.ID, parent, chunk oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize+objectKeySize) + result[0] = ecInfoPrefix + cnr.Encode(result[1:]) + parent.Encode(result[1+cidSize:]) + chunk.Encode(result[1+cidSize+objectKeySize:]) + return result +} + +func addressOfParentFromECInfoKey(key []byte) (oid.Address, error) { + return addressFromKey(ecInfoPrefix, key[:1+cidSize+objectKeySize]) +} + +func addressOfChunkFromECInfoKey(key []byte) (oid.Address, error) { + if len(key) != 1+cidSize+objectKeySize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != ecInfoPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cont cid.ID + if err := cont.Decode(key[1 : 1+cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var obj oid.ID + if err := obj.Decode(key[1+cidSize+objectKeySize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cont) + result.SetObject(obj) + return result, nil +} + +// parentKeyShortPrefix returns parentPrefix_. +func parentKeyShortPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = parentPrefix + cnr.Encode(result[1:]) + return result +} + +func addressOfParentFromParentKey(key []byte) (oid.Address, error) { + return addressFromKey(parentPrefix, key[:1+cidSize+objectKeySize]) +} + +func addressOfTargetFromParentKey(key []byte) (oid.Address, error) { + if len(key) != 1+cidSize+objectKeySize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != parentPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cont cid.ID + if err := cont.Decode(key[1 : 1+cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var obj oid.ID + if err := obj.Decode(key[1+cidSize+objectKeySize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cont) + result.SetObject(obj) + return result, nil +} + +// parentKeyLongPrefix returns parentPrefix__. +func parentKeyLongPrefix(cnr cid.ID, parentObj oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize) + result[0] = parentPrefix + cnr.Encode(result[1:]) + parentObj.Encode(result[bucketKeySize:]) + return result +} + +func parentKey(cnr cid.ID, parentObj, obj oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize+objectKeySize) + result[0] = parentPrefix + cnr.Encode(result[1:]) + parentObj.Encode(result[1+cidSize:]) + obj.Encode(result[1+cidSize+objectKeySize:]) + return result +} + +// splitKeyLongPrefix returns splitPrefix__splitID. +func splitKeyLongPrefix(cnr cid.ID, splitID []byte) []byte { + result := make([]byte, 1+cidSize+len(splitID)) + result[0] = splitPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], splitID) + return result +} + +// splitKeyShortPrefix returns splitPrefix_. +func splitKeyShortPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = splitPrefix + cnr.Encode(result[1:]) + return result +} + +// splitKey returns splitPrefix__splitID_. +func splitKey(cnr cid.ID, obj oid.ID, splitID []byte) []byte { + result := make([]byte, 1+cidSize+len(splitID)+objectKeySize) + result[0] = splitPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], splitID) + obj.Encode(result[1+cidSize+len(splitID):]) + return result +} + +func addressFromSplitKey(key []byte) (oid.Address, error) { + if len(key) < 1+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != splitPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cnr cid.ID + if err := cnr.Decode(key[1 : 1+cidSize]); err != nil { + return oid.Address{}, err + } + + var obj oid.ID + if err := obj.Decode(key[len(key)-objectKeySize:]); err != nil { + return oid.Address{}, err + } + + var result oid.Address + result.SetObject(obj) + result.SetContainer(cnr) + return result, nil +} + +func splitIDFromSplitKey(key []byte) ([]byte, error) { + if len(key) < 1+cidSize+objectKeySize { + return nil, errInvalidKeyLenght + } + if key[0] != splitPrefix { + return nil, errInvalidKeyPrefix + } + + return bytes.Clone(key[1+cidSize : len(key)-objectKeySize]), nil +} + +// returns prefix of the keys with objects of type LOCK for specified container. +func lockersKeyPrefix(idCnr cid.ID) []byte { + return keyPrefix(idCnr, lockersPrefix) +} + +func lockersKey(cnrID cid.ID, objID oid.ID) []byte { + return keyObject(lockersPrefix, cnrID, objID) +} + +func addressFromLockersKey(v []byte) (oid.Address, error) { + return addressFromKey(lockersPrefix, v) +} + +// returns lockedPrefix__. +func lockedKeyLongPrefix(cnrID cid.ID, objID oid.ID) []byte { + prefix := make([]byte, 1+cidSize+objectKeySize) + prefix[0] = lockedPrefix + cnrID.Encode(prefix[1:]) + objID.Encode(prefix[1+cidSize:]) + return prefix +} + +// returns lockedPrefix_. +func lockedKeyShortPrefix(cnrID cid.ID) []byte { + prefix := make([]byte, 1+cidSize) + prefix[0] = lockedPrefix + cnrID.Encode(prefix[1:]) + return prefix +} + +// returns lockedPrefix___. +func lockedKey(cnrID cid.ID, objID, lockerObjID oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize+objectKeySize) + result[0] = lockedPrefix + cnrID.Encode(result[1:]) + objID.Encode(result[1+cidSize:]) + lockerObjID.Encode(result[1+cidSize+objectKeySize:]) + return result +} + +func lockerObjectIDFromLockedKey(k []byte) (oid.ID, error) { + if len(k) != 1+cidSize+objectKeySize+objectKeySize { + return oid.ID{}, errInvalidKeyLenght + } + if k[0] != lockedPrefix { + return oid.ID{}, errInvalidKeyPrefix + } + var result oid.ID + if err := result.Decode(k[1+cidSize+objectKeySize:]); err != nil { + return oid.ID{}, fmt.Errorf("failed to decode lockers object ID: %w", err) + } + return result, nil +} + +func objectIDFromLockedKey(k []byte) (oid.ID, error) { + if len(k) != 1+cidSize+objectKeySize+objectKeySize { + return oid.ID{}, errInvalidKeyLenght + } + if k[0] != lockedPrefix { + return oid.ID{}, errInvalidKeyPrefix + } + var result oid.ID + if err := result.Decode(k[1+cidSize : 1+cidSize+objectKeySize]); err != nil { + return oid.ID{}, fmt.Errorf("failed to decode locked object ID: %w", err) + } + return result, nil +} + +func expiredKey(cnr cid.ID, obj oid.ID, epoch uint64) []byte { + result := make([]byte, 1+8+cidSize+objectKeySize) + result[0] = expiredPrefix + // BigEndian is important for iteration order + binary.BigEndian.PutUint64(result[1:1+8], epoch) + cnr.Encode(result[1+8 : 1+8+cidSize]) + obj.Encode(result[1+8+cidSize:]) + return result +} + +func expirationEpochFromExpiredKey(key []byte) (uint64, error) { + if len(key) != 1+8+cidSize+objectKeySize { + return 0, errInvalidKeyLenght + } + if key[0] != expiredPrefix { + return 0, errInvalidKeyPrefix + } + // BigEndian is important for iteration order + return binary.BigEndian.Uint64(key[1 : 1+8]), nil +} + +func addressFromExpiredKey(key []byte) (oid.Address, error) { + if len(key) != 1+8+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != expiredPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cnr cid.ID + if err := cnr.Decode(key[1+8 : 1+8+cidSize]); err != nil { + return oid.Address{}, err + } + + var obj oid.ID + if err := obj.Decode(key[len(key)-objectKeySize:]); err != nil { + return oid.Address{}, err + } + + var result oid.Address + result.SetObject(obj) + result.SetContainer(cnr) + return result, nil }