Cancel GC is change mode requested #387

Merged
fyrchik merged 1 commit from dstepanov-yadro/frostfs-node:fix/fix/cancel_gc_on_set_mode into master 2023-05-25 10:47:10 +00:00
7 changed files with 244 additions and 65 deletions

View file

@ -489,4 +489,5 @@ const (
EngineShardsEvacuationFailedToReadObject = "failed to read object to evacuate"
EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node"
ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked"
ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache"
)

View file

@ -304,8 +304,8 @@ func (s *Shard) Reload(ctx context.Context, opts ...Option) error {
opts[i](&c)
}
s.m.Lock()
defer s.m.Unlock()
unlock := s.lockExclusive()
defer unlock()
ok, err := s.metaBase.Reload(c.metaOpts...)
if err != nil {
@ -335,3 +335,15 @@ func (s *Shard) Reload(ctx context.Context, opts ...Option) error {
s.log.Info(logs.ShardTryingToRestoreReadwriteMode)
return s.setMode(mode.ReadWrite)
}
func (s *Shard) lockExclusive() func() {
s.setModeRequested.Store(true)
val := s.gcCancel.Load()
if val != nil {
cancelGC := val.(context.CancelFunc)
cancelGC()
}
s.m.Lock()
s.setModeRequested.Store(false)
return s.m.Unlock
}

View file

@ -53,70 +53,74 @@ func (s *Shard) delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
return DeleteRes{}, ErrDegradedMode
}
ln := len(prm.addr)
smalls := make(map[oid.Address][]byte, ln)
for i := range prm.addr {
if s.hasWriteCache() {
err := s.writeCache.Delete(ctx, prm.addr[i])
if err != nil && !IsErrNotFound(err) && !errors.Is(err, writecache.ErrReadOnly) {
s.log.Warn(logs.ShardCantDeleteObjectFromWriteCache, zap.String("error", err.Error()))
}
for _, addr := range prm.addr {
select {
case <-ctx.Done():
return DeleteRes{}, ctx.Err()
default:
}
var sPrm meta.StorageIDPrm
sPrm.SetAddress(prm.addr[i])
s.deleteObjectFromWriteCacheSafe(ctx, addr)
res, err := s.metaBase.StorageID(ctx, sPrm)
if err != nil {
s.log.Debug(logs.ShardCantGetStorageIDFromMetabase,
zap.Stringer("object", prm.addr[i]),
zap.String("error", err.Error()))
s.deleteFromBlobstorSafe(ctx, addr)
continue
}
if res.StorageID() != nil {
smalls[prm.addr[i]] = res.StorageID()
}
}
var delPrm meta.DeletePrm
delPrm.SetAddresses(prm.addr...)
res, err := s.metaBase.Delete(ctx, delPrm)
if err != nil {
return DeleteRes{}, err // stop on metabase error ?
}
var totalRemovedPayload uint64
s.decObjectCounterBy(physical, res.RawObjectsRemoved())
s.decObjectCounterBy(logical, res.AvailableObjectsRemoved())
for i := range prm.addr {
removedPayload := res.RemovedPhysicalObjectSizes()[i]
totalRemovedPayload += removedPayload
logicalRemovedPayload := res.RemovedLogicalObjectSizes()[i]
if logicalRemovedPayload > 0 {
s.addToContainerSize(prm.addr[i].Container().EncodeToString(), -int64(logicalRemovedPayload))
}
}
s.addToPayloadSize(-int64(totalRemovedPayload))
for i := range prm.addr {
var delPrm common.DeletePrm
delPrm.Address = prm.addr[i]
id := smalls[prm.addr[i]]
delPrm.StorageID = id
_, err = s.blobStor.Delete(ctx, delPrm)
if err != nil {
s.log.Debug(logs.ShardCantRemoveObjectFromBlobStor,
zap.Stringer("object_address", prm.addr[i]),
zap.String("error", err.Error()))
if err := s.deleteFromMetabase(ctx, addr); err != nil {
return DeleteRes{}, err // stop on metabase error ?
}
}
return DeleteRes{}, nil
}
func (s *Shard) deleteObjectFromWriteCacheSafe(ctx context.Context, addr oid.Address) {
if s.hasWriteCache() {
err := s.writeCache.Delete(ctx, addr)
if err != nil && !IsErrNotFound(err) && !errors.Is(err, writecache.ErrReadOnly) {
s.log.Warn(logs.ShardCantDeleteObjectFromWriteCache, zap.Error(err))
}
}
}
func (s *Shard) deleteFromBlobstorSafe(ctx context.Context, addr oid.Address) {
var sPrm meta.StorageIDPrm
sPrm.SetAddress(addr)
res, err := s.metaBase.StorageID(ctx, sPrm)
if err != nil {
s.log.Debug("can't get storage ID from metabase",
zap.Stringer("object", addr),
zap.String("error", err.Error()))
}
storageID := res.StorageID()
var delPrm common.DeletePrm
delPrm.Address = addr
delPrm.StorageID = storageID
_, err = s.blobStor.Delete(ctx, delPrm)
if err != nil {
s.log.Debug("can't remove object from blobStor",
zap.Stringer("object_address", addr),
zap.String("error", err.Error()))
}
}
func (s *Shard) deleteFromMetabase(ctx context.Context, addr oid.Address) error {
var delPrm meta.DeletePrm
delPrm.SetAddresses(addr)
res, err := s.metaBase.Delete(ctx, delPrm)
if err != nil {
return err
}
s.decObjectCounterBy(physical, res.RawObjectsRemoved())
s.decObjectCounterBy(logical, res.AvailableObjectsRemoved())
removedPayload := res.RemovedPhysicalObjectSizes()[0]
logicalRemovedPayload := res.RemovedLogicalObjectSizes()[0]
if logicalRemovedPayload > 0 {
s.addToContainerSize(addr.Container().EncodeToString(), -int64(logicalRemovedPayload))
}
s.addToPayloadSize(-int64(removedPayload))
return nil
}

View file

@ -197,6 +197,14 @@ func (gc *gc) stop() {
// with GC-marked graves.
// Does nothing if shard is in "read-only" mode.
func (s *Shard) removeGarbage() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
s.gcCancel.Store(cancel)
if s.setModeRequested.Load() {
return
}
s.m.RLock()
defer s.m.RUnlock()
@ -211,6 +219,12 @@ func (s *Shard) removeGarbage() {
var iterPrm meta.GarbageIterationPrm
iterPrm.SetHandler(func(g meta.GarbageObject) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
buf = append(buf, g.Address())
if len(buf) == s.rmBatchSize {
@ -237,7 +251,7 @@ func (s *Shard) removeGarbage() {
deletePrm.SetAddresses(buf...)
// delete accumulated objects
_, err = s.delete(context.TODO(), deletePrm)
_, err = s.delete(ctx, deletePrm)
if err != nil {
s.log.Warn(logs.ShardCouldNotDeleteTheObjects,
zap.String("error", err.Error()),

View file

@ -0,0 +1,144 @@
package shard
import (
"context"
"path/filepath"
"testing"
"time"
objectCore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/blobovniczatree"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/common"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/fstree"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"github.com/panjf2000/ants/v2"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
)
func Test_ObjectNotFoundIfNotDeletedFromMetabase(t *testing.T) {
t.Parallel()
rootPath := t.TempDir()
var sh *Shard
l := &logger.Logger{Logger: zaptest.NewLogger(t)}
blobOpts := []blobstor.Option{
blobstor.WithLogger(&logger.Logger{Logger: zaptest.NewLogger(t)}),
blobstor.WithStorages([]blobstor.SubStorage{
{
Storage: blobovniczatree.NewBlobovniczaTree(
blobovniczatree.WithLogger(&logger.Logger{Logger: zaptest.NewLogger(t)}),
blobovniczatree.WithRootPath(filepath.Join(rootPath, "blob", "blobovnicza")),
blobovniczatree.WithBlobovniczaShallowDepth(1),
blobovniczatree.WithBlobovniczaShallowWidth(1)),
Policy: func(_ *object.Object, data []byte) bool {
return len(data) <= 1<<20
},
},
{
Storage: fstree.New(
fstree.WithPath(filepath.Join(rootPath, "blob"))),
},
}),
}
opts := []Option{
WithID(NewIDFromBytes([]byte{})),
WithLogger(l),
WithBlobStorOptions(blobOpts...),
WithMetaBaseOptions(
meta.WithPath(filepath.Join(rootPath, "meta")),
meta.WithEpochState(epochState{}),
),
WithPiloramaOptions(pilorama.WithPath(filepath.Join(rootPath, "pilorama"))),
WithDeletedLockCallback(func(_ context.Context, addresses []oid.Address) {
sh.HandleDeletedLocks(addresses)
}),
WithExpiredLocksCallback(func(ctx context.Context, epoch uint64, a []oid.Address) {
sh.HandleExpiredLocks(ctx, epoch, a)
}),
WithGCWorkerPoolInitializer(func(sz int) util.WorkerPool {
pool, err := ants.NewPool(sz)
require.NoError(t, err)
return pool
}),
WithGCRemoverSleepInterval(1 * time.Second),
}
sh = New(opts...)
require.NoError(t, sh.Open())
require.NoError(t, sh.Init(context.Background()))
t.Cleanup(func() {
require.NoError(t, sh.Close())
})
cnr := cidtest.ID()
obj := testutil.GenerateObjectWithCID(cnr)
objID, _ := obj.ID()
var addr oid.Address
addr.SetContainer(cnr)
addr.SetObject(objID)
var putPrm PutPrm
putPrm.SetObject(obj)
_, err := sh.Put(context.Background(), putPrm)
require.NoError(t, err)
var getPrm GetPrm
getPrm.SetAddress(objectCore.AddressOf(obj))
_, err = sh.Get(context.Background(), getPrm)
require.NoError(t, err, "failed to get")
//inhume
var inhumePrm InhumePrm
inhumePrm.MarkAsGarbage(addr)
_, err = sh.Inhume(context.Background(), inhumePrm)
require.NoError(t, err, "failed to inhume")
_, err = sh.Get(context.Background(), getPrm)
require.Error(t, err, "get returned error")
require.True(t, IsErrNotFound(err), "invalid error type")
//storageID
var metaStIDPrm meta.StorageIDPrm
metaStIDPrm.SetAddress(addr)
storageID, err := sh.metaBase.StorageID(context.Background(), metaStIDPrm)
require.NoError(t, err, "failed to get storage ID")
//check existance in blobstore
var bsExisted common.ExistsPrm
bsExisted.Address = addr
bsExisted.StorageID = storageID.StorageID()
exRes, err := sh.blobStor.Exists(context.Background(), bsExisted)
require.NoError(t, err, "failed to check blobstore existance")
require.True(t, exRes.Exists, "invalid blobstore existance result")
//drop from blobstor
var bsDeletePrm common.DeletePrm
bsDeletePrm.Address = addr
bsDeletePrm.StorageID = storageID.StorageID()
_, err = sh.blobStor.Delete(context.Background(), bsDeletePrm)
require.NoError(t, err, "failed to delete from blobstore")
//check existance in blobstore
exRes, err = sh.blobStor.Exists(context.Background(), bsExisted)
require.NoError(t, err, "failed to check blobstore existance")
require.False(t, exRes.Exists, "invalid blobstore existance result")
//get should return object not found
_, err = sh.Get(context.Background(), getPrm)
require.Error(t, err, "get returned no error")
require.True(t, IsErrNotFound(err), "invalid error type")
}

View file

@ -19,8 +19,8 @@ var ErrDegradedMode = logicerr.New("shard is in degraded mode")
// Returns any error encountered that did not allow
// setting shard mode.
func (s *Shard) SetMode(m mode.Mode) error {
s.m.Lock()
defer s.m.Unlock()
unlock := s.lockExclusive()
defer unlock()
return s.setMode(m)
}

View file

@ -3,6 +3,7 @@ package shard
import (
"context"
"sync"
"sync/atomic"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
@ -32,6 +33,9 @@ type Shard struct {
metaBase *meta.DB
tsSource TombstoneSource
gcCancel atomic.Value
setModeRequested atomic.Bool
}
// Option represents Shard's constructor option.
@ -217,12 +221,12 @@ func WithWriteCache(use bool) Option {
}
// hasWriteCache returns bool if write cache exists on shards.
func (s Shard) hasWriteCache() bool {
func (s *Shard) hasWriteCache() bool {
return s.cfg.useWriteCache
}
// needRefillMetabase returns true if metabase is needed to be refilled.
func (s Shard) needRefillMetabase() bool {
func (s *Shard) needRefillMetabase() bool {
return s.cfg.refillMetabase
}