package meta import ( "bytes" "context" "errors" "fmt" "time" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object" storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" "go.etcd.io/bbolt" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) // DeletePrm groups the parameters of Delete operation. type DeletePrm struct { addrs []oid.Address } // DeleteRes groups the resulting values of Delete operation. type DeleteRes struct { rawRemoved uint64 availableRemoved uint64 sizes []uint64 availableSizes []uint64 removedByCnrID map[cid.ID]ObjectCounters } // AvailableObjectsRemoved returns the number of removed available // objects. func (d DeleteRes) AvailableObjectsRemoved() uint64 { return d.availableRemoved } // RemovedByCnrID returns the number of removed objects by container ID. func (d DeleteRes) RemovedByCnrID() map[cid.ID]ObjectCounters { return d.removedByCnrID } // RawObjectsRemoved returns the number of removed raw objects. func (d DeleteRes) RawObjectsRemoved() uint64 { return d.rawRemoved } // RemovedPhysicalObjectSizes returns the sizes of removed physical objects. func (d DeleteRes) RemovedPhysicalObjectSizes() []uint64 { return d.sizes } // RemovedLogicalObjectSizes returns the sizes of removed logical objects. func (d DeleteRes) RemovedLogicalObjectSizes() []uint64 { return d.availableSizes } // SetAddresses is a Delete option to set the addresses of the objects to delete. // // Option is required. func (p *DeletePrm) SetAddresses(addrs ...oid.Address) { p.addrs = addrs } type referenceNumber struct { all, cur int addr oid.Address obj *objectSDK.Object } type referenceCounter map[string]*referenceNumber // Delete removed object records from metabase indexes. func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) { var ( startedAt = time.Now() deleted = false ) defer func() { db.metrics.AddMethodDuration("Delete", time.Since(startedAt), deleted) }() _, span := tracing.StartSpanFromContext(ctx, "metabase.Delete", trace.WithAttributes( attribute.Int("addr_count", len(prm.addrs)), )) defer span.End() db.modeMtx.RLock() defer db.modeMtx.RUnlock() if db.mode.NoMetabase() { return DeleteRes{}, ErrDegradedMode } else if db.mode.ReadOnly() { return DeleteRes{}, ErrReadOnlyMode } var err error var res DeleteRes err = db.boltDB.Update(func(tx *bbolt.Tx) error { res, err = db.deleteGroup(tx, prm.addrs) return err }) if err == nil { deleted = true for i := range prm.addrs { storagelog.Write(db.log, storagelog.AddressField(prm.addrs[i]), storagelog.OpField("metabase DELETE")) } } return res, metaerr.Wrap(err) } // deleteGroup deletes object from the metabase. Handles removal of the // references of the split objects. func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address) (DeleteRes, error) { res := DeleteRes{ sizes: make([]uint64, len(addrs)), availableSizes: make([]uint64, len(addrs)), removedByCnrID: make(map[cid.ID]ObjectCounters), } refCounter := make(referenceCounter, len(addrs)) currEpoch := db.epochState.CurrentEpoch() for i := range addrs { removed, available, size, err := db.delete(tx, addrs[i], refCounter, currEpoch) if err != nil { return DeleteRes{}, err // maybe log and continue? } if removed { if v, ok := res.removedByCnrID[addrs[i].Container()]; ok { v.Phy++ res.removedByCnrID[addrs[i].Container()] = v } else { res.removedByCnrID[addrs[i].Container()] = ObjectCounters{ Phy: 1, } } res.rawRemoved++ res.sizes[i] = size } if available { if v, ok := res.removedByCnrID[addrs[i].Container()]; ok { v.Logic++ res.removedByCnrID[addrs[i].Container()] = v } else { res.removedByCnrID[addrs[i].Container()] = ObjectCounters{ Logic: 1, } } res.availableRemoved++ res.availableSizes[i] = size } } if res.rawRemoved > 0 { err := db.updateShardObjectCounter(tx, phy, res.rawRemoved, false) if err != nil { return DeleteRes{}, fmt.Errorf("could not decrease phy object counter: %w", err) } } if res.availableRemoved > 0 { err := db.updateShardObjectCounter(tx, logical, res.availableRemoved, false) if err != nil { return DeleteRes{}, fmt.Errorf("could not decrease logical object counter: %w", err) } } if err := db.updateContainerCounter(tx, res.removedByCnrID, false); err != nil { return DeleteRes{}, fmt.Errorf("could not decrease container object counter: %w", err) } for _, refNum := range refCounter { if refNum.cur == refNum.all { err := db.deleteObject(tx, refNum.obj, true) if err != nil { return DeleteRes{}, err // maybe log and continue? } } } return res, nil } // delete removes object indexes from the metabase. Counts the references // of the object that is being removed. // The first return value indicates if an object has been removed. (removing a // non-exist object is error-free). The second return value indicates if an // object was available before the removal (for calculating the logical object // counter). The third return value is removed object payload size. func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (bool, bool, uint64, error) { key := make([]byte, addressKeySize) addrKey := addressKey(addr, key) garbageBKT := tx.Bucket(garbageBucketName) graveyardBKT := tx.Bucket(graveyardBucketName) removeAvailableObject := inGraveyardWithKey(addrKey, graveyardBKT, garbageBKT) == 0 // remove record from the garbage bucket if garbageBKT != nil { err := garbageBKT.Delete(addrKey) if err != nil { return false, false, 0, fmt.Errorf("could not remove from garbage bucket: %w", err) } } // unmarshal object, work only with physically stored (raw == true) objects obj, err := db.get(tx, addr, key, false, true, currEpoch) if err != nil { var siErr *objectSDK.SplitInfoError if client.IsErrObjectNotFound(err) || errors.As(err, &siErr) { return false, false, 0, nil } return false, false, 0, err } // if object is an only link to a parent, then remove parent if parent := obj.Parent(); parent != nil { parAddr := object.AddressOf(parent) sParAddr := addressKey(parAddr, key) k := string(sParAddr) nRef, ok := refCounter[k] if !ok { nRef = &referenceNumber{ all: parentLength(tx, parAddr), addr: parAddr, obj: parent, } refCounter[k] = nRef } nRef.cur++ } // remove object err = db.deleteObject(tx, obj, false) if err != nil { return false, false, 0, fmt.Errorf("could not remove object: %w", err) } return true, removeAvailableObject, obj.PayloadSize(), nil } func (db *DB) deleteObject( tx *bbolt.Tx, obj *objectSDK.Object, isParent bool, ) error { err := delUniqueIndexes(tx, obj, isParent) if err != nil { return fmt.Errorf("can't remove unique indexes") } err = updateListIndexes(tx, obj, delListIndexItem) if err != nil { return fmt.Errorf("can't remove list indexes: %w", err) } err = updateFKBTIndexes(tx, obj, delFKBTIndexItem) if err != nil { return fmt.Errorf("can't remove fake bucket tree indexes: %w", err) } return nil } // parentLength returns amount of available children from parentid index. func parentLength(tx *bbolt.Tx, addr oid.Address) int { bucketName := make([]byte, bucketKeySize) bkt := tx.Bucket(parentBucketName(addr.Container(), bucketName[:])) if bkt == nil { return 0 } lst, err := decodeList(bkt.Get(objectKey(addr.Object(), bucketName[:]))) if err != nil { return 0 } return len(lst) } func delUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem) { bkt := tx.Bucket(item.name) if bkt != nil { _ = bkt.Delete(item.key) // ignore error, best effort there } } func delFKBTIndexItem(tx *bbolt.Tx, item namedBucketItem) error { bkt := tx.Bucket(item.name) if bkt == nil { return nil } fkbtRoot := bkt.Bucket(item.key) if fkbtRoot == nil { return nil } _ = fkbtRoot.Delete(item.val) // ignore error, best effort there return nil } func delListIndexItem(tx *bbolt.Tx, item namedBucketItem) error { bkt := tx.Bucket(item.name) if bkt == nil { return nil } lst, err := decodeList(bkt.Get(item.key)) if err != nil || len(lst) == 0 { return nil } // remove element from the list for i := range lst { if bytes.Equal(item.val, lst[i]) { copy(lst[i:], lst[i+1:]) lst = lst[:len(lst)-1] break } } // if list empty, remove the key from bucket if len(lst) == 0 { _ = bkt.Delete(item.key) // ignore error, best effort there return nil } // if list is not empty, then update it encodedLst, err := encodeList(lst) if err != nil { return nil // ignore error, best effort there } _ = bkt.Put(item.key, encodedLst) // ignore error, best effort there return nil } func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error { addr := object.AddressOf(obj) objKey := objectKey(addr.Object(), make([]byte, objectKeySize)) addrKey := addressKey(addr, make([]byte, addressKeySize)) cnr := addr.Container() bucketName := make([]byte, bucketKeySize) // add value to primary unique bucket if !isParent { switch obj.Type() { case objectSDK.TypeRegular: bucketName = primaryBucketName(cnr, bucketName) case objectSDK.TypeTombstone: bucketName = tombstoneBucketName(cnr, bucketName) case objectSDK.TypeLock: bucketName = bucketNameLockers(cnr, bucketName) default: return ErrUnknownObjectType } delUniqueIndexItem(tx, namedBucketItem{ name: bucketName, key: objKey, }) } else { delUniqueIndexItem(tx, namedBucketItem{ name: parentBucketName(cnr, bucketName), key: objKey, }) } delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index name: smallBucketName(cnr, bucketName), key: objKey, }) delUniqueIndexItem(tx, namedBucketItem{ // remove from root index name: rootBucketName(cnr, bucketName), key: objKey, }) delUniqueIndexItem(tx, namedBucketItem{ // remove from ToMoveIt index name: toMoveItBucketName, key: addrKey, }) return nil }