forked from TrueCloudLab/frostfs-node
390 lines
10 KiB
Go
390 lines
10 KiB
Go
package meta
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
|
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
|
apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status"
|
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
|
"go.etcd.io/bbolt"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"go.opentelemetry.io/otel/trace"
|
|
)
|
|
|
|
// DeletePrm groups the parameters of Delete operation.
|
|
type DeletePrm struct {
|
|
addrs []oid.Address
|
|
}
|
|
|
|
// DeleteRes groups the resulting values of Delete operation.
|
|
type DeleteRes struct {
|
|
rawRemoved uint64
|
|
availableRemoved uint64
|
|
sizes []uint64
|
|
availableSizes []uint64
|
|
}
|
|
|
|
// AvailableObjectsRemoved returns the number of removed available
|
|
// objects.
|
|
func (d DeleteRes) AvailableObjectsRemoved() uint64 {
|
|
return d.availableRemoved
|
|
}
|
|
|
|
// RawObjectsRemoved returns the number of removed raw objects.
|
|
func (d DeleteRes) RawObjectsRemoved() uint64 {
|
|
return d.rawRemoved
|
|
}
|
|
|
|
// RemovedPhysicalObjectSizes returns the sizes of removed physical objects.
|
|
func (d DeleteRes) RemovedPhysicalObjectSizes() []uint64 {
|
|
return d.sizes
|
|
}
|
|
|
|
// RemovedLogicalObjectSizes returns the sizes of removed logical objects.
|
|
func (d DeleteRes) RemovedLogicalObjectSizes() []uint64 {
|
|
return d.availableSizes
|
|
}
|
|
|
|
// SetAddresses is a Delete option to set the addresses of the objects to delete.
|
|
//
|
|
// Option is required.
|
|
func (p *DeletePrm) SetAddresses(addrs ...oid.Address) {
|
|
p.addrs = addrs
|
|
}
|
|
|
|
type referenceNumber struct {
|
|
all, cur int
|
|
|
|
addr oid.Address
|
|
|
|
obj *objectSDK.Object
|
|
}
|
|
|
|
type referenceCounter map[string]*referenceNumber
|
|
|
|
// Delete removed object records from metabase indexes.
|
|
func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
|
|
var (
|
|
startedAt = time.Now()
|
|
deleted = false
|
|
)
|
|
defer func() {
|
|
db.metrics.AddMethodDuration("Delete", time.Since(startedAt), deleted)
|
|
}()
|
|
|
|
_, span := tracing.StartSpanFromContext(ctx, "metabase.Delete",
|
|
trace.WithAttributes(
|
|
attribute.Int("addr_count", len(prm.addrs)),
|
|
))
|
|
defer span.End()
|
|
|
|
db.modeMtx.RLock()
|
|
defer db.modeMtx.RUnlock()
|
|
|
|
if db.mode.NoMetabase() {
|
|
return DeleteRes{}, ErrDegradedMode
|
|
} else if db.mode.ReadOnly() {
|
|
return DeleteRes{}, ErrReadOnlyMode
|
|
}
|
|
|
|
var rawRemoved uint64
|
|
var availableRemoved uint64
|
|
var err error
|
|
var sizes = make([]uint64, len(prm.addrs))
|
|
var availableSizes = make([]uint64, len(prm.addrs))
|
|
|
|
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
|
|
// We need to clear slice because tx can try to execute multiple times.
|
|
rawRemoved, availableRemoved, err = db.deleteGroup(tx, prm.addrs, sizes, availableSizes)
|
|
return err
|
|
})
|
|
if err == nil {
|
|
deleted = true
|
|
for i := range prm.addrs {
|
|
storagelog.Write(db.log,
|
|
storagelog.AddressField(prm.addrs[i]),
|
|
storagelog.OpField("metabase DELETE"))
|
|
}
|
|
}
|
|
return DeleteRes{
|
|
rawRemoved: rawRemoved,
|
|
availableRemoved: availableRemoved,
|
|
sizes: sizes,
|
|
availableSizes: availableSizes,
|
|
}, metaerr.Wrap(err)
|
|
}
|
|
|
|
// deleteGroup deletes object from the metabase. Handles removal of the
|
|
// references of the split objects.
|
|
// The first return value is a physical objects removed number: physical
|
|
// objects that were stored. The second return value is a logical objects
|
|
// removed number: objects that were available (without Tombstones, GCMarks
|
|
// non-expired, etc.)
|
|
func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address, sizes []uint64, availableSizes []uint64) (uint64, uint64, error) {
|
|
refCounter := make(referenceCounter, len(addrs))
|
|
currEpoch := db.epochState.CurrentEpoch()
|
|
|
|
var rawDeleted uint64
|
|
var availableDeleted uint64
|
|
|
|
for i := range addrs {
|
|
removed, available, size, err := db.delete(tx, addrs[i], refCounter, currEpoch)
|
|
if err != nil {
|
|
return 0, 0, err // maybe log and continue?
|
|
}
|
|
|
|
if removed {
|
|
rawDeleted++
|
|
sizes[i] = size
|
|
}
|
|
|
|
if available {
|
|
availableDeleted++
|
|
availableSizes[i] = size
|
|
}
|
|
}
|
|
|
|
if rawDeleted > 0 {
|
|
err := db.updateCounter(tx, phy, rawDeleted, false)
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("could not decrease phy object counter: %w", err)
|
|
}
|
|
}
|
|
|
|
if availableDeleted > 0 {
|
|
err := db.updateCounter(tx, logical, availableDeleted, false)
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("could not decrease logical object counter: %w", err)
|
|
}
|
|
}
|
|
|
|
for _, refNum := range refCounter {
|
|
if refNum.cur == refNum.all {
|
|
err := db.deleteObject(tx, refNum.obj, true)
|
|
if err != nil {
|
|
return rawDeleted, availableDeleted, err // maybe log and continue?
|
|
}
|
|
}
|
|
}
|
|
|
|
return rawDeleted, availableDeleted, nil
|
|
}
|
|
|
|
// delete removes object indexes from the metabase. Counts the references
|
|
// of the object that is being removed.
|
|
// The first return value indicates if an object has been removed. (removing a
|
|
// non-exist object is error-free). The second return value indicates if an
|
|
// object was available before the removal (for calculating the logical object
|
|
// counter). The third return value is removed object payload size.
|
|
func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (bool, bool, uint64, error) {
|
|
key := make([]byte, addressKeySize)
|
|
addrKey := addressKey(addr, key)
|
|
garbageBKT := tx.Bucket(garbageBucketName)
|
|
graveyardBKT := tx.Bucket(graveyardBucketName)
|
|
|
|
removeAvailableObject := inGraveyardWithKey(addrKey, graveyardBKT, garbageBKT) == 0
|
|
|
|
// remove record from the garbage bucket
|
|
if garbageBKT != nil {
|
|
err := garbageBKT.Delete(addrKey)
|
|
if err != nil {
|
|
return false, false, 0, fmt.Errorf("could not remove from garbage bucket: %w", err)
|
|
}
|
|
}
|
|
|
|
// unmarshal object, work only with physically stored (raw == true) objects
|
|
obj, err := db.get(tx, addr, key, false, true, currEpoch)
|
|
if err != nil {
|
|
var siErr *objectSDK.SplitInfoError
|
|
var notFoundErr apistatus.ObjectNotFound
|
|
|
|
if errors.As(err, ¬FoundErr) || errors.As(err, &siErr) {
|
|
return false, false, 0, nil
|
|
}
|
|
|
|
return false, false, 0, err
|
|
}
|
|
|
|
// if object is an only link to a parent, then remove parent
|
|
if parent := obj.Parent(); parent != nil {
|
|
parAddr := object.AddressOf(parent)
|
|
sParAddr := addressKey(parAddr, key)
|
|
k := string(sParAddr)
|
|
|
|
nRef, ok := refCounter[k]
|
|
if !ok {
|
|
nRef = &referenceNumber{
|
|
all: parentLength(tx, parAddr),
|
|
addr: parAddr,
|
|
obj: parent,
|
|
}
|
|
|
|
refCounter[k] = nRef
|
|
}
|
|
|
|
nRef.cur++
|
|
}
|
|
|
|
// remove object
|
|
err = db.deleteObject(tx, obj, false)
|
|
if err != nil {
|
|
return false, false, 0, fmt.Errorf("could not remove object: %w", err)
|
|
}
|
|
|
|
return true, removeAvailableObject, obj.PayloadSize(), nil
|
|
}
|
|
|
|
func (db *DB) deleteObject(
|
|
tx *bbolt.Tx,
|
|
obj *objectSDK.Object,
|
|
isParent bool,
|
|
) error {
|
|
err := delUniqueIndexes(tx, obj, isParent)
|
|
if err != nil {
|
|
return fmt.Errorf("can't remove unique indexes")
|
|
}
|
|
|
|
err = updateListIndexes(tx, obj, delListIndexItem)
|
|
if err != nil {
|
|
return fmt.Errorf("can't remove list indexes: %w", err)
|
|
}
|
|
|
|
err = updateFKBTIndexes(tx, obj, delFKBTIndexItem)
|
|
if err != nil {
|
|
return fmt.Errorf("can't remove fake bucket tree indexes: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// parentLength returns amount of available children from parentid index.
|
|
func parentLength(tx *bbolt.Tx, addr oid.Address) int {
|
|
bucketName := make([]byte, bucketKeySize)
|
|
|
|
bkt := tx.Bucket(parentBucketName(addr.Container(), bucketName[:]))
|
|
if bkt == nil {
|
|
return 0
|
|
}
|
|
|
|
lst, err := decodeList(bkt.Get(objectKey(addr.Object(), bucketName[:])))
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
|
|
return len(lst)
|
|
}
|
|
|
|
func delUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem) {
|
|
bkt := tx.Bucket(item.name)
|
|
if bkt != nil {
|
|
_ = bkt.Delete(item.key) // ignore error, best effort there
|
|
}
|
|
}
|
|
|
|
func delFKBTIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
|
bkt := tx.Bucket(item.name)
|
|
if bkt == nil {
|
|
return nil
|
|
}
|
|
|
|
fkbtRoot := bkt.Bucket(item.key)
|
|
if fkbtRoot == nil {
|
|
return nil
|
|
}
|
|
|
|
_ = fkbtRoot.Delete(item.val) // ignore error, best effort there
|
|
return nil
|
|
}
|
|
|
|
func delListIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
|
bkt := tx.Bucket(item.name)
|
|
if bkt == nil {
|
|
return nil
|
|
}
|
|
|
|
lst, err := decodeList(bkt.Get(item.key))
|
|
if err != nil || len(lst) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// remove element from the list
|
|
for i := range lst {
|
|
if bytes.Equal(item.val, lst[i]) {
|
|
copy(lst[i:], lst[i+1:])
|
|
lst = lst[:len(lst)-1]
|
|
break
|
|
}
|
|
}
|
|
|
|
// if list empty, remove the key from <list> bucket
|
|
if len(lst) == 0 {
|
|
_ = bkt.Delete(item.key) // ignore error, best effort there
|
|
|
|
return nil
|
|
}
|
|
|
|
// if list is not empty, then update it
|
|
encodedLst, err := encodeList(lst)
|
|
if err != nil {
|
|
return nil // ignore error, best effort there
|
|
}
|
|
|
|
_ = bkt.Put(item.key, encodedLst) // ignore error, best effort there
|
|
return nil
|
|
}
|
|
|
|
func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error {
|
|
addr := object.AddressOf(obj)
|
|
|
|
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
|
|
addrKey := addressKey(addr, make([]byte, addressKeySize))
|
|
cnr := addr.Container()
|
|
bucketName := make([]byte, bucketKeySize)
|
|
|
|
// add value to primary unique bucket
|
|
if !isParent {
|
|
switch obj.Type() {
|
|
case objectSDK.TypeRegular:
|
|
bucketName = primaryBucketName(cnr, bucketName)
|
|
case objectSDK.TypeTombstone:
|
|
bucketName = tombstoneBucketName(cnr, bucketName)
|
|
case objectSDK.TypeLock:
|
|
bucketName = bucketNameLockers(cnr, bucketName)
|
|
default:
|
|
return ErrUnknownObjectType
|
|
}
|
|
|
|
delUniqueIndexItem(tx, namedBucketItem{
|
|
name: bucketName,
|
|
key: objKey,
|
|
})
|
|
} else {
|
|
delUniqueIndexItem(tx, namedBucketItem{
|
|
name: parentBucketName(cnr, bucketName),
|
|
key: objKey,
|
|
})
|
|
}
|
|
|
|
delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index
|
|
name: smallBucketName(cnr, bucketName),
|
|
key: objKey,
|
|
})
|
|
delUniqueIndexItem(tx, namedBucketItem{ // remove from root index
|
|
name: rootBucketName(cnr, bucketName),
|
|
key: objKey,
|
|
})
|
|
delUniqueIndexItem(tx, namedBucketItem{ // remove from ToMoveIt index
|
|
name: toMoveItBucketName,
|
|
key: addrKey,
|
|
})
|
|
|
|
return nil
|
|
}
|