From 261335100845817f99133eac2bb5d658c3cd3d9e Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Tue, 23 May 2023 15:02:38 +0300 Subject: [PATCH] [#387] gc: Cancel GC is change mode requested Signed-off-by: Dmitrii Stepanov --- internal/logs/logs.go | 1 + pkg/local_object_storage/shard/control.go | 16 +- pkg/local_object_storage/shard/delete.go | 120 ++++++++------- pkg/local_object_storage/shard/gc.go | 16 +- .../shard/gc_internal_test.go | 144 ++++++++++++++++++ pkg/local_object_storage/shard/mode.go | 4 +- pkg/local_object_storage/shard/shard.go | 8 +- 7 files changed, 244 insertions(+), 65 deletions(-) create mode 100644 pkg/local_object_storage/shard/gc_internal_test.go diff --git a/internal/logs/logs.go b/internal/logs/logs.go index b84746a72..a862e745a 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -489,4 +489,5 @@ const ( EngineShardsEvacuationFailedToReadObject = "failed to read object to evacuate" EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node" ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked" + ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache" ) diff --git a/pkg/local_object_storage/shard/control.go b/pkg/local_object_storage/shard/control.go index e74f235f8..f885451af 100644 --- a/pkg/local_object_storage/shard/control.go +++ b/pkg/local_object_storage/shard/control.go @@ -304,8 +304,8 @@ func (s *Shard) Reload(ctx context.Context, opts ...Option) error { opts[i](&c) } - s.m.Lock() - defer s.m.Unlock() + unlock := s.lockExclusive() + defer unlock() ok, err := s.metaBase.Reload(c.metaOpts...) if err != nil { @@ -335,3 +335,15 @@ func (s *Shard) Reload(ctx context.Context, opts ...Option) error { s.log.Info(logs.ShardTryingToRestoreReadwriteMode) return s.setMode(mode.ReadWrite) } + +func (s *Shard) lockExclusive() func() { + s.setModeRequested.Store(true) + val := s.gcCancel.Load() + if val != nil { + cancelGC := val.(context.CancelFunc) + cancelGC() + } + s.m.Lock() + s.setModeRequested.Store(false) + return s.m.Unlock +} diff --git a/pkg/local_object_storage/shard/delete.go b/pkg/local_object_storage/shard/delete.go index f086aa30f..4eb7ad6af 100644 --- a/pkg/local_object_storage/shard/delete.go +++ b/pkg/local_object_storage/shard/delete.go @@ -53,70 +53,74 @@ func (s *Shard) delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) { return DeleteRes{}, ErrDegradedMode } - ln := len(prm.addr) - - smalls := make(map[oid.Address][]byte, ln) - - for i := range prm.addr { - if s.hasWriteCache() { - err := s.writeCache.Delete(ctx, prm.addr[i]) - if err != nil && !IsErrNotFound(err) && !errors.Is(err, writecache.ErrReadOnly) { - s.log.Warn(logs.ShardCantDeleteObjectFromWriteCache, zap.String("error", err.Error())) - } + for _, addr := range prm.addr { + select { + case <-ctx.Done(): + return DeleteRes{}, ctx.Err() + default: } - var sPrm meta.StorageIDPrm - sPrm.SetAddress(prm.addr[i]) + s.deleteObjectFromWriteCacheSafe(ctx, addr) - res, err := s.metaBase.StorageID(ctx, sPrm) - if err != nil { - s.log.Debug(logs.ShardCantGetStorageIDFromMetabase, - zap.Stringer("object", prm.addr[i]), - zap.String("error", err.Error())) + s.deleteFromBlobstorSafe(ctx, addr) - continue - } - - if res.StorageID() != nil { - smalls[prm.addr[i]] = res.StorageID() - } - } - - var delPrm meta.DeletePrm - delPrm.SetAddresses(prm.addr...) - - res, err := s.metaBase.Delete(ctx, delPrm) - if err != nil { - return DeleteRes{}, err // stop on metabase error ? - } - - var totalRemovedPayload uint64 - - s.decObjectCounterBy(physical, res.RawObjectsRemoved()) - s.decObjectCounterBy(logical, res.AvailableObjectsRemoved()) - for i := range prm.addr { - removedPayload := res.RemovedPhysicalObjectSizes()[i] - totalRemovedPayload += removedPayload - logicalRemovedPayload := res.RemovedLogicalObjectSizes()[i] - if logicalRemovedPayload > 0 { - s.addToContainerSize(prm.addr[i].Container().EncodeToString(), -int64(logicalRemovedPayload)) - } - } - s.addToPayloadSize(-int64(totalRemovedPayload)) - - for i := range prm.addr { - var delPrm common.DeletePrm - delPrm.Address = prm.addr[i] - id := smalls[prm.addr[i]] - delPrm.StorageID = id - - _, err = s.blobStor.Delete(ctx, delPrm) - if err != nil { - s.log.Debug(logs.ShardCantRemoveObjectFromBlobStor, - zap.Stringer("object_address", prm.addr[i]), - zap.String("error", err.Error())) + if err := s.deleteFromMetabase(ctx, addr); err != nil { + return DeleteRes{}, err // stop on metabase error ? } } return DeleteRes{}, nil } + +func (s *Shard) deleteObjectFromWriteCacheSafe(ctx context.Context, addr oid.Address) { + if s.hasWriteCache() { + err := s.writeCache.Delete(ctx, addr) + if err != nil && !IsErrNotFound(err) && !errors.Is(err, writecache.ErrReadOnly) { + s.log.Warn(logs.ShardCantDeleteObjectFromWriteCache, zap.Error(err)) + } + } +} + +func (s *Shard) deleteFromBlobstorSafe(ctx context.Context, addr oid.Address) { + var sPrm meta.StorageIDPrm + sPrm.SetAddress(addr) + + res, err := s.metaBase.StorageID(ctx, sPrm) + if err != nil { + s.log.Debug("can't get storage ID from metabase", + zap.Stringer("object", addr), + zap.String("error", err.Error())) + } + storageID := res.StorageID() + + var delPrm common.DeletePrm + delPrm.Address = addr + delPrm.StorageID = storageID + + _, err = s.blobStor.Delete(ctx, delPrm) + if err != nil { + s.log.Debug("can't remove object from blobStor", + zap.Stringer("object_address", addr), + zap.String("error", err.Error())) + } +} + +func (s *Shard) deleteFromMetabase(ctx context.Context, addr oid.Address) error { + var delPrm meta.DeletePrm + delPrm.SetAddresses(addr) + + res, err := s.metaBase.Delete(ctx, delPrm) + if err != nil { + return err + } + s.decObjectCounterBy(physical, res.RawObjectsRemoved()) + s.decObjectCounterBy(logical, res.AvailableObjectsRemoved()) + removedPayload := res.RemovedPhysicalObjectSizes()[0] + logicalRemovedPayload := res.RemovedLogicalObjectSizes()[0] + if logicalRemovedPayload > 0 { + s.addToContainerSize(addr.Container().EncodeToString(), -int64(logicalRemovedPayload)) + } + s.addToPayloadSize(-int64(removedPayload)) + + return nil +} diff --git a/pkg/local_object_storage/shard/gc.go b/pkg/local_object_storage/shard/gc.go index f4f7a21e2..97899acc2 100644 --- a/pkg/local_object_storage/shard/gc.go +++ b/pkg/local_object_storage/shard/gc.go @@ -197,6 +197,14 @@ func (gc *gc) stop() { // with GC-marked graves. // Does nothing if shard is in "read-only" mode. func (s *Shard) removeGarbage() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + s.gcCancel.Store(cancel) + if s.setModeRequested.Load() { + return + } + s.m.RLock() defer s.m.RUnlock() @@ -211,6 +219,12 @@ func (s *Shard) removeGarbage() { var iterPrm meta.GarbageIterationPrm iterPrm.SetHandler(func(g meta.GarbageObject) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + buf = append(buf, g.Address()) if len(buf) == s.rmBatchSize { @@ -237,7 +251,7 @@ func (s *Shard) removeGarbage() { deletePrm.SetAddresses(buf...) // delete accumulated objects - _, err = s.delete(context.TODO(), deletePrm) + _, err = s.delete(ctx, deletePrm) if err != nil { s.log.Warn(logs.ShardCouldNotDeleteTheObjects, zap.String("error", err.Error()), diff --git a/pkg/local_object_storage/shard/gc_internal_test.go b/pkg/local_object_storage/shard/gc_internal_test.go new file mode 100644 index 000000000..d94029976 --- /dev/null +++ b/pkg/local_object_storage/shard/gc_internal_test.go @@ -0,0 +1,144 @@ +package shard + +import ( + "context" + "path/filepath" + "testing" + "time" + + objectCore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/blobovniczatree" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/common" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/fstree" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil" + meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger" + cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test" + "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/panjf2000/ants/v2" + "github.com/stretchr/testify/require" + "go.uber.org/zap/zaptest" +) + +func Test_ObjectNotFoundIfNotDeletedFromMetabase(t *testing.T) { + t.Parallel() + + rootPath := t.TempDir() + + var sh *Shard + + l := &logger.Logger{Logger: zaptest.NewLogger(t)} + blobOpts := []blobstor.Option{ + blobstor.WithLogger(&logger.Logger{Logger: zaptest.NewLogger(t)}), + blobstor.WithStorages([]blobstor.SubStorage{ + { + Storage: blobovniczatree.NewBlobovniczaTree( + blobovniczatree.WithLogger(&logger.Logger{Logger: zaptest.NewLogger(t)}), + blobovniczatree.WithRootPath(filepath.Join(rootPath, "blob", "blobovnicza")), + blobovniczatree.WithBlobovniczaShallowDepth(1), + blobovniczatree.WithBlobovniczaShallowWidth(1)), + Policy: func(_ *object.Object, data []byte) bool { + return len(data) <= 1<<20 + }, + }, + { + Storage: fstree.New( + fstree.WithPath(filepath.Join(rootPath, "blob"))), + }, + }), + } + + opts := []Option{ + WithID(NewIDFromBytes([]byte{})), + WithLogger(l), + WithBlobStorOptions(blobOpts...), + WithMetaBaseOptions( + meta.WithPath(filepath.Join(rootPath, "meta")), + meta.WithEpochState(epochState{}), + ), + WithPiloramaOptions(pilorama.WithPath(filepath.Join(rootPath, "pilorama"))), + WithDeletedLockCallback(func(_ context.Context, addresses []oid.Address) { + sh.HandleDeletedLocks(addresses) + }), + WithExpiredLocksCallback(func(ctx context.Context, epoch uint64, a []oid.Address) { + sh.HandleExpiredLocks(ctx, epoch, a) + }), + WithGCWorkerPoolInitializer(func(sz int) util.WorkerPool { + pool, err := ants.NewPool(sz) + require.NoError(t, err) + return pool + }), + WithGCRemoverSleepInterval(1 * time.Second), + } + + sh = New(opts...) + + require.NoError(t, sh.Open()) + require.NoError(t, sh.Init(context.Background())) + + t.Cleanup(func() { + require.NoError(t, sh.Close()) + }) + + cnr := cidtest.ID() + obj := testutil.GenerateObjectWithCID(cnr) + objID, _ := obj.ID() + var addr oid.Address + addr.SetContainer(cnr) + addr.SetObject(objID) + + var putPrm PutPrm + putPrm.SetObject(obj) + + _, err := sh.Put(context.Background(), putPrm) + require.NoError(t, err) + + var getPrm GetPrm + getPrm.SetAddress(objectCore.AddressOf(obj)) + _, err = sh.Get(context.Background(), getPrm) + require.NoError(t, err, "failed to get") + + //inhume + var inhumePrm InhumePrm + inhumePrm.MarkAsGarbage(addr) + _, err = sh.Inhume(context.Background(), inhumePrm) + require.NoError(t, err, "failed to inhume") + _, err = sh.Get(context.Background(), getPrm) + require.Error(t, err, "get returned error") + require.True(t, IsErrNotFound(err), "invalid error type") + + //storageID + var metaStIDPrm meta.StorageIDPrm + metaStIDPrm.SetAddress(addr) + storageID, err := sh.metaBase.StorageID(context.Background(), metaStIDPrm) + require.NoError(t, err, "failed to get storage ID") + + //check existance in blobstore + var bsExisted common.ExistsPrm + bsExisted.Address = addr + bsExisted.StorageID = storageID.StorageID() + exRes, err := sh.blobStor.Exists(context.Background(), bsExisted) + require.NoError(t, err, "failed to check blobstore existance") + require.True(t, exRes.Exists, "invalid blobstore existance result") + + //drop from blobstor + var bsDeletePrm common.DeletePrm + bsDeletePrm.Address = addr + bsDeletePrm.StorageID = storageID.StorageID() + _, err = sh.blobStor.Delete(context.Background(), bsDeletePrm) + require.NoError(t, err, "failed to delete from blobstore") + + //check existance in blobstore + exRes, err = sh.blobStor.Exists(context.Background(), bsExisted) + require.NoError(t, err, "failed to check blobstore existance") + require.False(t, exRes.Exists, "invalid blobstore existance result") + + //get should return object not found + _, err = sh.Get(context.Background(), getPrm) + require.Error(t, err, "get returned no error") + require.True(t, IsErrNotFound(err), "invalid error type") +} diff --git a/pkg/local_object_storage/shard/mode.go b/pkg/local_object_storage/shard/mode.go index 50c52accc..efd41863b 100644 --- a/pkg/local_object_storage/shard/mode.go +++ b/pkg/local_object_storage/shard/mode.go @@ -19,8 +19,8 @@ var ErrDegradedMode = logicerr.New("shard is in degraded mode") // Returns any error encountered that did not allow // setting shard mode. func (s *Shard) SetMode(m mode.Mode) error { - s.m.Lock() - defer s.m.Unlock() + unlock := s.lockExclusive() + defer unlock() return s.setMode(m) } diff --git a/pkg/local_object_storage/shard/shard.go b/pkg/local_object_storage/shard/shard.go index e0059105f..65cc1ef55 100644 --- a/pkg/local_object_storage/shard/shard.go +++ b/pkg/local_object_storage/shard/shard.go @@ -3,6 +3,7 @@ package shard import ( "context" "sync" + "sync/atomic" "time" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" @@ -32,6 +33,9 @@ type Shard struct { metaBase *meta.DB tsSource TombstoneSource + + gcCancel atomic.Value + setModeRequested atomic.Bool } // Option represents Shard's constructor option. @@ -217,12 +221,12 @@ func WithWriteCache(use bool) Option { } // hasWriteCache returns bool if write cache exists on shards. -func (s Shard) hasWriteCache() bool { +func (s *Shard) hasWriteCache() bool { return s.cfg.useWriteCache } // needRefillMetabase returns true if metabase is needed to be refilled. -func (s Shard) needRefillMetabase() bool { +func (s *Shard) needRefillMetabase() bool { return s.cfg.refillMetabase }