package meta import ( "crypto/sha256" "encoding/binary" "errors" "fmt" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" "go.etcd.io/bbolt" ) var ( // graveyardBucketName stores rows with the objects that have been // covered with Tombstone objects. That objects should not be returned // from the node and should not be accepted by the node from other // nodes. graveyardBucketName = []byte{graveyardPrefix} // garbageBucketName stores rows with the objects that should be physically // deleted by the node (Garbage Collector routine). garbageBucketName = []byte{garbagePrefix} toMoveItBucketName = []byte{toMoveItPrefix} containerVolumeBucketName = []byte{containerVolumePrefix} containerCounterBucketName = []byte{containerCountersPrefix} expEpochToObjectBucketName = []byte{expirationEpochToObjectPrefix} zeroValue = []byte{0xFF} errInvalidLength = errors.New("invalid length") ) // Prefix bytes for database keys. All ids and addresses are encoded in binary // unless specified otherwise. // //nolint:godot const ( // graveyardPrefix is used for the graveyard bucket. // Key: object address // Value: tombstone address graveyardPrefix = iota // garbagePrefix is used for the garbage bucket. // Key: object address // Value: dummy value garbagePrefix // toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving // to another shard. toMoveItPrefix // containerVolumePrefix is used for storing container size estimations. // Key: container ID // Value: container size in bytes as little-endian uint64 containerVolumePrefix // lockedPrefix is used for storing locked objects information. // Key: container ID // Value: bucket mapping objects locked to the list of corresponding LOCK objects. lockedPrefix // shardInfoPrefix is used for storing shard ID. All keys are custom and are not connected to the container. shardInfoPrefix // ====================== // Unique index buckets. // ====================== // primaryPrefix is used for prefixing buckets containing objects of REGULAR type. // Key: object ID // Value: marshalled object primaryPrefix // lockersPrefix is used for prefixing buckets containing objects of LOCK type. // Key: object ID // Value: marshalled object lockersPrefix // _ is unused. Previous usage was for prefixing buckets containing objects of STORAGEGROUP type. // Key: object ID // Value: marshaled object _ // tombstonePrefix is used for prefixing buckets containing objects of TOMBSTONE type. // Key: object ID // Value: marshaled object tombstonePrefix // smallPrefix is used for prefixing buckets mapping objects to the blobovniczas they are stored in. // Key: object ID // Value: blobovnicza ID smallPrefix // rootPrefix is used for prefixing buckets mapping parent object to the split info. // Key: object ID // Value: split info rootPrefix // ==================== // FKBT index buckets. // ==================== // ownerPrefix was used for prefixing FKBT index buckets mapping owner to object IDs. // Key: owner ID // Value: bucket containing object IDs as keys // removed in version 3 ownerPrefix // userAttributePrefix was used for prefixing FKBT index buckets containing objects. // Key: attribute value // Value: bucket containing object IDs as keys userAttributePrefix // ==================== // List index buckets. // ==================== // payloadHashPrefix was used for prefixing List index buckets mapping payload hash to a list of object IDs. // Key: payload hash // Value: list of object IDs // removed in version 3 payloadHashPrefix // parentPrefix is used for prefixing List index buckets mapping parent ID to a list of children IDs. // Key: parent ID // Value: list of object IDs parentPrefix // splitPrefix is used for prefixing List index buckets mapping split ID to a list of object IDs. // Key: split ID // Value: list of object IDs splitPrefix // containerCountersPrefix is used for storing container object counters. // Key: container ID + type // Value: container size in bytes as little-endian uint64 containerCountersPrefix // ecInfoPrefix is used for storing relation between EC parent id and chunk id. // Key: container ID + type // Value: Object id ecInfoPrefix // expirationEpochToObjectPrefix is used for storing relation between expiration epoch and object id. // Key: expiration epoch + object address // Value: zero expirationEpochToObjectPrefix // objectToExpirationEpochPrefix is used for storing relation between expiration epoch and object id. // Key: object address // Value: expiration epoch objectToExpirationEpochPrefix ) const ( cidSize = sha256.Size bucketKeySize = 1 + cidSize objectKeySize = sha256.Size addressKeySize = cidSize + objectKeySize epochSize = 8 ) func bucketName(cnr cid.ID, prefix byte, key []byte) []byte { key[0] = prefix cnr.Encode(key[1:]) return key[:bucketKeySize] } // primaryBucketName returns . func primaryBucketName(cnr cid.ID, key []byte) []byte { return bucketName(cnr, primaryPrefix, key) } // tombstoneBucketName returns _TS. func tombstoneBucketName(cnr cid.ID, key []byte) []byte { return bucketName(cnr, tombstonePrefix, key) } // smallBucketName returns _small. func smallBucketName(cnr cid.ID, key []byte) []byte { return bucketName(cnr, smallPrefix, key) } // attributeBucketName returns _. func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte { key[0] = userAttributePrefix cnr.Encode(key[1:]) return append(key[:bucketKeySize], attributeKey...) } func cidFromAttributeBucket(bucketName []byte) (cid.ID, bool) { if len(bucketName) < bucketKeySize || bucketName[0] != userAttributePrefix { return cid.ID{}, false } var result cid.ID return result, result.Decode(bucketName[1:bucketKeySize]) == nil } func attributeFromAttributeBucket(bucketName []byte) (string, bool) { if len(bucketName) < bucketKeySize || bucketName[0] != userAttributePrefix { return "", false } return string(bucketName[bucketKeySize:]), true } // rootBucketName returns _root. func rootBucketName(cnr cid.ID, key []byte) []byte { return bucketName(cnr, rootPrefix, key) } // parentBucketName returns _parent. func parentBucketName(cnr cid.ID, key []byte) []byte { return bucketName(cnr, parentPrefix, key) } // splitBucketName returns _splitid. func splitBucketName(cnr cid.ID, key []byte) []byte { return bucketName(cnr, splitPrefix, key) } // ecInfoBucketName returns _ecinfo. func ecInfoBucketName(cnr cid.ID, key []byte) []byte { return bucketName(cnr, ecInfoPrefix, key) } // objectToExpirationEpochBucketName returns objectToExpirationEpochPrefix_. func objectToExpirationEpochBucketName(cnr cid.ID, key []byte) []byte { return bucketName(cnr, objectToExpirationEpochPrefix, key) } func expirationEpochKey(epoch uint64, cnr cid.ID, obj oid.ID) []byte { result := make([]byte, epochSize+addressKeySize) binary.BigEndian.PutUint64(result, epoch) cnr.Encode(result[epochSize:]) obj.Encode(result[epochSize+cidSize:]) return result } func parseExpirationEpochKey(key []byte) (uint64, cid.ID, oid.ID, error) { if len(key) != epochSize+addressKeySize { return 0, cid.ID{}, oid.ID{}, fmt.Errorf("unexpected expiration epoch to object key length: %d", len(key)) } epoch := binary.BigEndian.Uint64(key) var cnr cid.ID if err := cnr.Decode(key[epochSize : epochSize+cidSize]); err != nil { return 0, cid.ID{}, oid.ID{}, fmt.Errorf("failed to decode expiration epoch to object key (container ID): %w", err) } var obj oid.ID if err := obj.Decode(key[epochSize+cidSize:]); err != nil { return 0, cid.ID{}, oid.ID{}, fmt.Errorf("failed to decode expiration epoch to object key (object ID): %w", err) } return epoch, cnr, obj, nil } // addressKey returns key for K-V tables when key is a whole address. func addressKey(addr oid.Address, key []byte) []byte { addr.Container().Encode(key) addr.Object().Encode(key[cidSize:]) return key[:addressKeySize] } // parses object address formed by addressKey. func decodeAddressFromKey(dst *oid.Address, k []byte) error { if len(k) != addressKeySize { return errInvalidLength } var cnr cid.ID if err := cnr.Decode(k[:cidSize]); err != nil { return err } var obj oid.ID if err := obj.Decode(k[cidSize:]); err != nil { return err } dst.SetObject(obj) dst.SetContainer(cnr) return nil } // objectKey returns key for K-V tables when key is an object id. func objectKey(obj oid.ID, key []byte) []byte { obj.Encode(key) return key[:objectKeySize] } // if meets irregular object container in objs - returns its type, otherwise returns object.TypeRegular. // // firstIrregularObjectType(tx, cnr, obj) usage allows getting object type. func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) objectSDK.Type { if len(objs) == 0 { panic("empty object list in firstIrregularObjectType") } var keys [2][1 + cidSize]byte irregularTypeBuckets := [...]struct { typ objectSDK.Type name []byte }{ {objectSDK.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])}, {objectSDK.TypeLock, bucketNameLockers(idCnr, keys[1][:])}, } for i := range objs { for j := range irregularTypeBuckets { if inBucket(tx, irregularTypeBuckets[j].name, objs[i]) { return irregularTypeBuckets[j].typ } } } return objectSDK.TypeRegular } // return true if provided object is of LOCK type. func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool { return inBucket(tx, bucketNameLockers(idCnr, make([]byte, bucketKeySize)), objectKey(obj, make([]byte, objectKeySize))) } const NoExpirationEpoch uint64 = 0 // encodeTombstoneWithExpEpoch encodes a tombstone label in the following // format: tombstone_address + expiration_epoch. // // Returns an error if the buffer length isn't 32. // // The expiration epoch shouldn't be [NoExpirationEpoch], as tombstone labels // are intended to have a valid expiration epoch. // // The use of [NoExpirationEpoch] is allowed only for test purposes. func encodeTombstoneWithExpEpoch(addr oid.Address, expEpoch uint64, dst []byte) error { if len(dst) != addressKeySize+epochSize { return errInvalidLength } addr.Container().Encode(dst[:cidSize]) addr.Object().Encode(dst[cidSize:addressKeySize]) binary.LittleEndian.PutUint64(dst[addressKeySize:], expEpoch) return nil } // decodeTombstoneWithExpEpoch decodes a tombstone label in the following // formats: tombstone address or tombstone address + expiration epoch. // // Expiration epoch is set to [NoExpirationEpoch] if the label doesn't have one. func decodeTombstoneWithExpEpoch(addr *oid.Address, expEpoch *uint64, src []byte) error { if len(src) != addressKeySize && len(src) != addressKeySize+epochSize { return errInvalidLength } var cnt cid.ID if err := cnt.Decode(src[:cidSize]); err != nil { return err } var obj oid.ID if err := obj.Decode(src[cidSize:addressKeySize]); err != nil { return err } addr.SetContainer(cnt) addr.SetObject(obj) if len(src) > addressKeySize { *expEpoch = binary.LittleEndian.Uint64(src[addressKeySize:]) } else { *expEpoch = NoExpirationEpoch } return nil } // lockWithExpEpoch contains the ID and expiration epoch of the lock. type lockWithExpEpoch struct { id [objectKeySize]byte expEpoch [epochSize]byte } // decode decodes the ID and expiration epoch of the lock. // // If the lock has no expiration epoch, uses [NoExpirationEpoch] instead. func (l lockWithExpEpoch) decode(id *oid.ID, expEpoch *uint64) error { if err := id.Decode(l.id[:]); err != nil { return err } *expEpoch = binary.LittleEndian.Uint64(l.expEpoch[:]) return nil } // encode encodes the ID and expiration epoch of the lock. func (l *lockWithExpEpoch) encode(id oid.ID, expEpoch uint64) { id.Encode(l.id[:]) binary.LittleEndian.PutUint64(l.expEpoch[:], expEpoch) } // decodeLockWithExpEpochList decodes the lock list encoded with // [encodeLockWithExpEpochList]. // // If some locks have no expiration epoch, uses [NoExpirationEpoch] instead. func decodeLockWithExpEpochList(data []byte) (locks []lockWithExpEpoch, err error) { xs, err := decodeList(data) if err != nil { return nil, fmt.Errorf("couldn't decode list: %w", err) } var id []byte var idMatched bool for _, x := range xs { switch size := len(x); { case size == objectKeySize && !idMatched: id = x idMatched = true case size == objectKeySize && idMatched: var lock lockWithExpEpoch copy(lock.id[:], id) locks = append(locks, lock) id = x case size == epochSize && idMatched: var lock lockWithExpEpoch copy(lock.id[:], id) copy(lock.expEpoch[:], x) locks = append(locks, lock) idMatched = false case size == epochSize && !idMatched: return nil, errors.New("found expiration epoch but expected lock") default: return nil, fmt.Errorf("unexpected list element size %d", size) } } if idMatched { var lock lockWithExpEpoch copy(lock.id[:], id) locks = append(locks, lock) } return locks, nil } // encodeLockWithExpEpochList encodes the lock list. // // If some locks have [NoExpirationEpoch], encodes only their IDs. func encodeLockWithExpEpochList(locks []lockWithExpEpoch) (data []byte, err error) { var noEpoch [epochSize]byte var xs [][]byte for _, lock := range locks { xs = append(xs, lock.id[:]) if lock.expEpoch != noEpoch { xs = append(xs, lock.expEpoch[:]) } } if data, err = encodeList(xs); err != nil { return nil, fmt.Errorf("couldn't encode list: %w", err) } return }