frostfs-node/pkg/local_object_storage/metabase/inhume.go

444 lines
12 KiB
Go
Raw Normal View History

package meta
import (
"bytes"
"context"
"errors"
"fmt"
"time"
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status"
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"go.etcd.io/bbolt"
)
// InhumePrm encapsulates parameters for Inhume operation.
type InhumePrm struct {
tomb *oid.Address
target []oid.Address
lockObjectHandling bool
forceRemoval bool
expEpoch uint64
}
// DeletionInfo contains details on deleted object.
type DeletionInfo struct {
Size uint64
CID cid.ID
IsUser bool
}
// InhumeRes encapsulates results of Inhume operation.
type InhumeRes struct {
deletedLockObj []oid.Address
logicInhumed uint64
userInhumed uint64
inhumedByCnrID map[cid.ID]ObjectCounters
deletionDetails []DeletionInfo
}
// LogicInhumed return number of logic object
// that have been inhumed.
func (i InhumeRes) LogicInhumed() uint64 {
return i.logicInhumed
}
func (i InhumeRes) UserInhumed() uint64 {
return i.userInhumed
}
// InhumedByCnrID return number of object
// that have been inhumed by container ID.
func (i InhumeRes) InhumedByCnrID() map[cid.ID]ObjectCounters {
return i.inhumedByCnrID
}
// DeletedLockObjects returns deleted object of LOCK
// type. Returns always nil if WithoutLockObjectHandling
// was provided to the InhumePrm.
func (i InhumeRes) DeletedLockObjects() []oid.Address {
return i.deletedLockObj
}
// GetDeletionInfoLength returns amount of stored elements
// in deleted sizes array.
func (i InhumeRes) GetDeletionInfoLength() int {
return len(i.deletionDetails)
}
// GetDeletionInfoByIndex returns both deleted object sizes and
// associated container ID by index.
func (i InhumeRes) GetDeletionInfoByIndex(target int) DeletionInfo {
return i.deletionDetails[target]
}
// StoreDeletionInfo stores size of deleted object and associated container ID
// in corresponding arrays.
func (i *InhumeRes) storeDeletionInfo(containerID cid.ID, deletedSize uint64, isUser bool) {
i.deletionDetails = append(i.deletionDetails, DeletionInfo{
Size: deletedSize,
CID: containerID,
IsUser: isUser,
})
i.logicInhumed++
if isUser {
i.userInhumed++
}
if v, ok := i.inhumedByCnrID[containerID]; ok {
v.Logic++
if isUser {
v.User++
}
i.inhumedByCnrID[containerID] = v
} else {
v = ObjectCounters{
Logic: 1,
}
if isUser {
v.User = 1
}
i.inhumedByCnrID[containerID] = v
}
}
// SetAddresses sets a list of object addresses that should be inhumed.
func (p *InhumePrm) SetAddresses(addrs ...oid.Address) {
p.target = addrs
}
// SetTombstoneAddress sets tombstone address as the reason for inhume operation.
//
// addr should not be nil.
// Should not be called along with SetGCMark.
func (p *InhumePrm) SetTombstoneAddress(addr oid.Address, expEpoch uint64) {
p.tomb = &addr
p.expEpoch = expEpoch
}
// SetGCMark marks the object to be physically removed.
//
// Should not be called along with SetTombstoneAddress.
func (p *InhumePrm) SetGCMark() {
p.tomb = nil
}
// SetLockObjectHandling checks if there were
// any LOCK object among the targets set via WithAddresses.
func (p *InhumePrm) SetLockObjectHandling() {
p.lockObjectHandling = true
}
// SetForceGCMark allows removal any object. Expected to be
// called only in control service.
func (p *InhumePrm) SetForceGCMark() {
p.tomb = nil
p.forceRemoval = true
}
func (p *InhumePrm) validate() error {
if p == nil {
return nil
}
if p.tomb != nil {
for _, addr := range p.target {
if addr.Container() != p.tomb.Container() {
return fmt.Errorf("object %s and tombstone %s have different container ID", addr, p.tomb)
}
}
}
return nil
}
var errBreakBucketForEach = errors.New("bucket ForEach break")
// ErrLockObjectRemoval is returned when inhume operation is being
// performed on lock object, and it is not a forced object removal.
var ErrLockObjectRemoval = logicerr.New("lock object removal")
// Inhume marks objects as removed but not removes it from metabase.
//
// Allows inhuming non-locked objects only. Returns apistatus.ObjectLocked
// if at least one object is locked. Returns ErrLockObjectRemoval if inhuming
// is being performed on lock (not locked) object.
//
// NOTE: Marks any object with GC mark (despite any prohibitions on operations
// with that object) if WithForceGCMark option has been provided.
func (db *DB) Inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) {
var (
startedAt = time.Now()
success = false
)
defer func() {
db.metrics.AddMethodDuration("Inhume", time.Since(startedAt), success)
}()
_, span := tracing.StartSpanFromContext(ctx, "metabase.Inhume")
defer span.End()
db.modeMtx.RLock()
defer db.modeMtx.RUnlock()
if err := prm.validate(); err != nil {
return InhumeRes{}, err
}
if db.mode.NoMetabase() {
return InhumeRes{}, ErrDegradedMode
} else if db.mode.ReadOnly() {
return InhumeRes{}, ErrReadOnlyMode
}
res := InhumeRes{
inhumedByCnrID: make(map[cid.ID]ObjectCounters),
}
currEpoch := db.epochState.CurrentEpoch()
err := db.boltDB.Batch(func(tx *bbolt.Tx) error {
return db.inhumeTx(tx, currEpoch, prm, &res)
})
success = err == nil
if success {
for _, addr := range prm.target {
storagelog.Write(ctx, db.log,
storagelog.AddressField(addr),
storagelog.OpField("metabase INHUME"))
}
}
return res, metaerr.Wrap(err)
}
func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes) error {
garbageBKT := tx.Bucket(garbageBucketName)
graveyardBKT := tx.Bucket(graveyardBucketName)
bkt, value, err := db.getInhumeTargetBucketAndValue(garbageBKT, graveyardBKT, prm)
if err != nil {
return err
}
buf := make([]byte, addressKeySize)
for i := range prm.target {
if err := db.inhumeTxSingle(bkt, value, graveyardBKT, garbageBKT, prm.target[i], buf, epoch, prm, res); err != nil {
return err
}
}
return db.applyInhumeResToCounters(tx, res)
}
func (db *DB) inhumeTxSingle(bkt *bbolt.Bucket, value []byte, graveyardBKT, garbageBKT *bbolt.Bucket, addr oid.Address, buf []byte, epoch uint64, prm InhumePrm, res *InhumeRes) error {
id := addr.Object()
cnr := addr.Container()
tx := bkt.Tx()
// prevent locked objects to be inhumed
if !prm.forceRemoval && objectLocked(tx, cnr, id) {
return new(apistatus.ObjectLocked)
}
var lockWasChecked bool
// prevent lock objects to be inhumed
// if `Inhume` was called not with the
// `WithForceGCMark` option
if !prm.forceRemoval {
if isLockObject(tx, cnr, id) {
return ErrLockObjectRemoval
}
lockWasChecked = true
}
obj, err := db.get(tx, addr, buf, false, true, epoch)
targetKey := addressKey(addr, buf)
var ecErr *objectSDK.ECInfoError
if err == nil {
err = db.updateDeleteInfo(tx, garbageBKT, graveyardBKT, targetKey, cnr, obj, res)
if err != nil {
return err
}
} else if errors.As(err, &ecErr) {
err = db.inhumeECInfo(tx, epoch, prm.tomb, res, garbageBKT, graveyardBKT, ecErr.ECInfo(), cnr, bkt, value)
if err != nil {
return err
}
}
if prm.tomb != nil {
var isTomb bool
isTomb, err = db.markAsGC(graveyardBKT, garbageBKT, targetKey)
if err != nil {
return err
}
if isTomb {
return nil
}
}
// consider checking if target is already in graveyard?
err = bkt.Put(targetKey, value)
if err != nil {
return err
}
if prm.lockObjectHandling {
// do not perform lock check if
// it was already called
if lockWasChecked {
// inhumed object is not of
// the LOCK type
return nil
}
// This condition should be checked in the beginning because
// of a possible race with GC.
if isLockObject(tx, cnr, id) {
res.deletedLockObj = append(res.deletedLockObj, addr)
}
}
return nil
}
func (db *DB) inhumeECInfo(tx *bbolt.Tx, epoch uint64, tomb *oid.Address, res *InhumeRes,
garbageBKT *bbolt.Bucket, graveyardBKT *bbolt.Bucket,
ecInfo *objectSDK.ECInfo, cnr cid.ID, targetBucket *bbolt.Bucket, value []byte,
) error {
for _, chunk := range ecInfo.Chunks {
chunkBuf := make([]byte, addressKeySize)
var chunkAddr oid.Address
chunkAddr.SetContainer(cnr)
var chunkID oid.ID
err := chunkID.ReadFromV2(chunk.ID)
if err != nil {
return err
}
chunkAddr.SetObject(chunkID)
chunkObj, err := db.get(tx, chunkAddr, chunkBuf, false, true, epoch)
if err != nil {
return err
}
chunkKey := addressKey(chunkAddr, chunkBuf)
err = db.updateDeleteInfo(tx, garbageBKT, graveyardBKT, chunkKey, cnr, chunkObj, res)
if err != nil {
return err
}
if tomb != nil {
_, err = db.markAsGC(graveyardBKT, garbageBKT, chunkKey)
if err != nil {
return err
}
}
err = targetBucket.Put(chunkKey, value)
if err != nil {
return err
}
}
return nil
}
func (db *DB) applyInhumeResToCounters(tx *bbolt.Tx, res *InhumeRes) error {
if err := db.updateShardObjectCounter(tx, logical, res.LogicInhumed(), false); err != nil {
return err
}
if err := db.updateShardObjectCounter(tx, user, res.UserInhumed(), false); err != nil {
return err
}
return db.updateContainerCounter(tx, res.inhumedByCnrID, false)
}
// getInhumeTargetBucketAndValue return target bucket to store inhume result and value that will be put in the bucket.
//
// target bucket of the operation, one of the:
// 1. Graveyard if Inhume was called with a Tombstone
// 2. Garbage if Inhume was called with a GC mark
//
// value that will be put in the bucket, one of the:
// 1. tombstone address if Inhume was called with
// a Tombstone
// 2. zeroValue if Inhume was called with a GC mark
func (db *DB) getInhumeTargetBucketAndValue(garbageBKT, graveyardBKT *bbolt.Bucket, prm InhumePrm) (targetBucket *bbolt.Bucket, value []byte, err error) {
if prm.tomb == nil {
return garbageBKT, zeroValue, nil
}
tombKey := make([]byte, addressKeySize+epochSize)
if err = encodeTombstoneWithExpEpoch(*prm.tomb, prm.expEpoch, tombKey); err != nil {
return nil, nil, fmt.Errorf("encode tombstone with expiration epoch: %w", err)
}
// it is forbidden to have a tomb-on-tomb in FrostFS,
// so graveyard keys must not be addresses of tombstones
if err := graveyardBKT.Delete(tombKey); err != nil {
return nil, nil, fmt.Errorf("could not remove grave with tombstone key: %w", err)
}
// because it can be a tombstone without expiration epoch (old tombstone format)
if err := graveyardBKT.Delete(tombKey[:addressKeySize]); err != nil {
return nil, nil, fmt.Errorf("could not remove grave with tombstone key: %w", err)
}
return graveyardBKT, tombKey, nil
}
func (db *DB) markAsGC(graveyardBKT, garbageBKT *bbolt.Bucket, addressKey []byte) (bool, error) {
targetIsTomb := isTomb(graveyardBKT, addressKey)
// do not add grave if target is a tombstone
if targetIsTomb {
return true, nil
}
// if tombstone appears object must be
// additionally marked with GC
return false, garbageBKT.Put(addressKey, zeroValue)
}
func (db *DB) updateDeleteInfo(tx *bbolt.Tx, garbageBKT, graveyardBKT *bbolt.Bucket, targetKey []byte, cnr cid.ID, obj *objectSDK.Object, res *InhumeRes) error {
containerID, _ := obj.ContainerID()
if inGraveyardWithKey(targetKey, graveyardBKT, garbageBKT) == 0 {
res.storeDeletionInfo(containerID, obj.PayloadSize(), IsUserObject(obj))
}
// if object is stored, and it is regular object then update bucket
// with container size estimations
if obj.Type() == objectSDK.TypeRegular {
err := changeContainerSize(tx, cnr, obj.PayloadSize(), false)
if err != nil {
return err
}
}
return nil
}
func isTomb(graveyardBucket *bbolt.Bucket, addressKey []byte) bool {
targetIsTomb := false
// because it can contain an expiration epoch (new tombstone format)
addressKey = addressKey[:addressKeySize]
// iterate over graveyard and check if target address
// is the address of tombstone in graveyard.
// tombstone must have the same container ID as key.
c := graveyardBucket.Cursor()
containerPrefix := addressKey[:cidSize]
for k, v := c.Seek(containerPrefix); k != nil && bytes.HasPrefix(k, containerPrefix); k, v = c.Next() {
// check if graveyard has record with key corresponding
// to tombstone address (at least one)
targetIsTomb = bytes.Equal(v[:addressKeySize], addressKey)
if targetIsTomb {
break
}
}
return targetIsTomb
}