frostfs-node/pkg/local_object_storage/metabase/util.go

package meta

import (
	"crypto/sha256"
	"encoding/binary"
	"errors"
	"fmt"

	cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
	objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
	oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
	"go.etcd.io/bbolt"
)

var (
	// graveyardBucketName stores rows with the objects that have been
	// covered with Tombstone objects. That objects should not be returned
	// from the node and should not be accepted by the node from other
	// nodes.
	graveyardBucketName = []byte{graveyardPrefix}
	// garbageBucketName stores rows with the objects that should be physically
	// deleted by the node (Garbage Collector routine).
	garbageBucketName          = []byte{garbagePrefix}
	toMoveItBucketName         = []byte{toMoveItPrefix}
	containerVolumeBucketName  = []byte{containerVolumePrefix}
	containerCounterBucketName = []byte{containerCountersPrefix}
	expEpochToObjectBucketName = []byte{expirationEpochToObjectPrefix}

	zeroValue = []byte{0xFF}

	errInvalidLength = errors.New("invalid length")
)

// Prefix bytes for database keys. All ids and addresses are encoded in binary
// unless specified otherwise.
//
//nolint:godot
const (
	// graveyardPrefix is used for the graveyard bucket.
	// 	Key: object address
	// 	Value: tombstone address
	graveyardPrefix = iota
	// garbagePrefix is used for the garbage bucket.
	// 	Key: object address
	// 	Value: dummy value
	garbagePrefix
	// toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving
	// to another shard.
	toMoveItPrefix
	// containerVolumePrefix is used for storing container size estimations.
	//	Key: container ID
	//  Value: container size in bytes as little-endian uint64
	containerVolumePrefix
	// lockedPrefix is used for storing locked objects information.
	//  Key: container ID
	//  Value: bucket mapping objects locked to the list of corresponding LOCK objects.
	lockedPrefix
	// shardInfoPrefix is used for storing shard ID. All keys are custom and are not connected to the container.
	shardInfoPrefix

	// ======================
	// Unique index buckets.
	// ======================

	// primaryPrefix is used for prefixing buckets containing objects of REGULAR type.
	//  Key: object ID
	//  Value: marshalled object
	primaryPrefix
	// lockersPrefix is used for prefixing buckets containing objects of LOCK type.
	//  Key: object ID
	//  Value: marshalled object
	lockersPrefix
	// _ is unused. Previous usage was for prefixing buckets containing objects of STORAGEGROUP type.
	//  Key: object ID
	//  Value: marshaled object
	_
	// tombstonePrefix is used for prefixing buckets containing objects of TOMBSTONE type.
	//  Key: object ID
	//  Value: marshaled object
	tombstonePrefix
	// smallPrefix is used for prefixing buckets mapping objects to the blobovniczas they are stored in.
	//  Key: object ID
	//  Value: blobovnicza ID
	smallPrefix
	// rootPrefix is used for prefixing buckets mapping parent object to the split info.
	//  Key: object ID
	//  Value: split info
	rootPrefix

	// ====================
	// FKBT index buckets.
	// ====================

	// ownerPrefix was used for prefixing FKBT index buckets mapping owner to object IDs.
	// Key: owner ID
	// Value: bucket containing object IDs as keys
	// removed in version 3
	ownerPrefix
	// userAttributePrefix was used for prefixing FKBT index buckets containing objects.
	// Key: attribute value
	// Value: bucket containing object IDs as keys
	userAttributePrefix

	// ====================
	// List index buckets.
	// ====================

	// payloadHashPrefix was used for prefixing List index buckets mapping payload hash to a list of object IDs.
	//  Key: payload hash
	//  Value: list of object IDs
	// removed in version 3
	payloadHashPrefix
	// parentPrefix is used for prefixing List index buckets mapping parent ID to a list of children IDs.
	//  Key: parent ID
	//  Value: list of object IDs
	parentPrefix
	// splitPrefix is used for prefixing List index buckets mapping split ID to a list of object IDs.
	//  Key: split ID
	//  Value: list of object IDs
	splitPrefix

	// containerCountersPrefix is used for storing container object counters.
	//	Key: container ID + type
	//  Value: container size in bytes as little-endian uint64
	containerCountersPrefix

	// ecInfoPrefix is used for storing relation between EC parent id and chunk id.
	//	Key: container ID + type
	//  Value: Object id
	ecInfoPrefix

	// expirationEpochToObjectPrefix is used for storing relation between expiration epoch and object id.
	//	Key: expiration epoch + object address
	//  Value: zero
	expirationEpochToObjectPrefix

	// objectToExpirationEpochPrefix is used for storing relation between expiration epoch and object id.
	//	Key: object address
	//  Value: expiration epoch
	objectToExpirationEpochPrefix
)

const (
	cidSize        = sha256.Size
	bucketKeySize  = 1 + cidSize
	objectKeySize  = sha256.Size
	addressKeySize = cidSize + objectKeySize
	epochSize      = 8
)

func bucketName(cnr cid.ID, prefix byte, key []byte) []byte {
	key[0] = prefix
	cnr.Encode(key[1:])
	return key[:bucketKeySize]
}

// primaryBucketName returns <CID>.
func primaryBucketName(cnr cid.ID, key []byte) []byte {
	return bucketName(cnr, primaryPrefix, key)
}

// tombstoneBucketName returns <CID>_TS.
func tombstoneBucketName(cnr cid.ID, key []byte) []byte {
	return bucketName(cnr, tombstonePrefix, key)
}

// smallBucketName returns <CID>_small.
func smallBucketName(cnr cid.ID, key []byte) []byte {
	return bucketName(cnr, smallPrefix, key)
}

// attributeBucketName returns <CID>_<attributeKey>.
func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte {
	key[0] = userAttributePrefix
	cnr.Encode(key[1:])
	return append(key[:bucketKeySize], attributeKey...)
}

// rootBucketName returns <CID>_root.
func rootBucketName(cnr cid.ID, key []byte) []byte {
	return bucketName(cnr, rootPrefix, key)
}

// parentBucketName returns <CID>_parent.
func parentBucketName(cnr cid.ID, key []byte) []byte {
	return bucketName(cnr, parentPrefix, key)
}

// splitBucketName returns <CID>_splitid.
func splitBucketName(cnr cid.ID, key []byte) []byte {
	return bucketName(cnr, splitPrefix, key)
}

// ecInfoBucketName returns <CID>_ecinfo.
func ecInfoBucketName(cnr cid.ID, key []byte) []byte {
	return bucketName(cnr, ecInfoPrefix, key)
}

// objectToExpirationEpochBucketName returns objectToExpirationEpochPrefix_<CID>.
func objectToExpirationEpochBucketName(cnr cid.ID, key []byte) []byte {
	return bucketName(cnr, objectToExpirationEpochPrefix, key)
}

func expirationEpochKey(epoch uint64, cnr cid.ID, obj oid.ID) []byte {
	result := make([]byte, epochSize+addressKeySize)
	binary.BigEndian.PutUint64(result, epoch)
	cnr.Encode(result[epochSize:])
	obj.Encode(result[epochSize+cidSize:])
	return result
}

func parseExpirationEpochKey(key []byte) (uint64, cid.ID, oid.ID, error) {
	if len(key) != epochSize+addressKeySize {
		return 0, cid.ID{}, oid.ID{}, fmt.Errorf("unexpected expiration epoch to object key length: %d", len(key))
	}
	epoch := binary.BigEndian.Uint64(key)
	var cnr cid.ID
	if err := cnr.Decode(key[epochSize : epochSize+cidSize]); err != nil {
		return 0, cid.ID{}, oid.ID{}, fmt.Errorf("failed to decode expiration epoch to object key (container ID): %w", err)
	}
	var obj oid.ID
	if err := obj.Decode(key[epochSize+cidSize:]); err != nil {
		return 0, cid.ID{}, oid.ID{}, fmt.Errorf("failed to decode expiration epoch to object key (object ID): %w", err)
	}
	return epoch, cnr, obj, nil
}

// addressKey returns key for K-V tables when key is a whole address.
func addressKey(addr oid.Address, key []byte) []byte {
	addr.Container().Encode(key)
	addr.Object().Encode(key[cidSize:])
	return key[:addressKeySize]
}

// parses object address formed by addressKey.
func decodeAddressFromKey(dst *oid.Address, k []byte) error {
	if len(k) != addressKeySize {
		return errInvalidLength
	}

	var cnr cid.ID
	if err := cnr.Decode(k[:cidSize]); err != nil {
		return err
	}

	var obj oid.ID
	if err := obj.Decode(k[cidSize:]); err != nil {
		return err
	}

	dst.SetObject(obj)
	dst.SetContainer(cnr)
	return nil
}

// objectKey returns key for K-V tables when key is an object id.
func objectKey(obj oid.ID, key []byte) []byte {
	obj.Encode(key)
	return key[:objectKeySize]
}

// if meets irregular object container in objs - returns its type, otherwise returns object.TypeRegular.
//
// firstIrregularObjectType(tx, cnr, obj) usage allows getting object type.
func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) objectSDK.Type {
	if len(objs) == 0 {
		panic("empty object list in firstIrregularObjectType")
	}

	var keys [2][1 + cidSize]byte

	irregularTypeBuckets := [...]struct {
		typ  objectSDK.Type
		name []byte
	}{
		{objectSDK.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])},
		{objectSDK.TypeLock, bucketNameLockers(idCnr, keys[1][:])},
	}

	for i := range objs {
		for j := range irregularTypeBuckets {
			if inBucket(tx, irregularTypeBuckets[j].name, objs[i]) {
				return irregularTypeBuckets[j].typ
			}
		}
	}

	return objectSDK.TypeRegular
}

// return true if provided object is of LOCK type.
func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool {
	return inBucket(tx,
		bucketNameLockers(idCnr, make([]byte, bucketKeySize)),
		objectKey(obj, make([]byte, objectKeySize)))
}