frostfs-node/pkg/local_object_storage/shard/control.go
Dmitrii Stepanov b01f05c951
All checks were successful
Vulncheck / Vulncheck (push) Successful in 1m15s
Pre-commit hooks / Pre-commit (push) Successful in 1m40s
Build / Build Components (push) Successful in 1m46s
Tests and linters / Run gofumpt (push) Successful in 3m36s
Tests and linters / Lint (push) Successful in 3m55s
Tests and linters / Staticcheck (push) Successful in 3m59s
Tests and linters / Tests (push) Successful in 4m19s
Tests and linters / Tests with -race (push) Successful in 4m33s
Tests and linters / gopls check (push) Successful in 4m40s
OCI image / Build container images (push) Successful in 5m17s
[#1689] shard: Use single qos.Limiter instance for shard and writecache
Before fix shard and writecache used the same instance of qos.Limiter.
In case of SIGHUP signal shard was closing qos.Limiter, but didn't
update writecache's pointer.

Now shard uses atomic pointer to qos.Limiter and shares it with writecache.
On SIGHUP shard updates atomic pointer value and closes old qos.Limiter.

Change-Id: Ic2ab62441d3872e71c5771f54d070e0ca48fe375
Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2025-05-06 12:46:48 +03:00

484 lines
12 KiB
Go

package shard
import (
"context"
"errors"
"fmt"
"slices"
"sync"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor"
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
)
func (s *Shard) handleMetabaseFailure(ctx context.Context, stage string, err error) error {
s.log.Error(ctx, logs.ShardMetabaseFailureSwitchingMode,
zap.String("stage", stage),
zap.Stringer("mode", mode.ReadOnly),
zap.Error(err))
err = s.SetMode(ctx, mode.ReadOnly)
if err == nil {
return nil
}
s.log.Error(ctx, logs.ShardCantMoveShardToReadonlySwitchMode,
zap.String("stage", stage),
zap.Stringer("mode", mode.DegradedReadOnly),
zap.Error(err))
err = s.SetMode(ctx, mode.DegradedReadOnly)
if err != nil {
return fmt.Errorf("switch to mode %s", mode.DegradedReadOnly)
}
return nil
}
// Open opens all Shard's components.
func (s *Shard) Open(ctx context.Context) error {
components := []interface {
Open(context.Context, mode.Mode) error
}{
s.blobStor,
}
m := s.GetMode()
if !m.NoMetabase() {
components = append(components, s.metaBase)
}
if s.hasWriteCache() && !m.NoMetabase() {
components = append(components, s.writeCache)
}
if s.pilorama != nil {
components = append(components, s.pilorama)
}
for i, component := range components {
if err := component.Open(ctx, m); err != nil {
if component == s.metaBase {
// We must first open all other components to avoid
// opening non-existent DB in read-only mode.
for j := i + 1; j < len(components); j++ {
if err := components[j].Open(ctx, m); err != nil {
// Other components must be opened, fail.
return fmt.Errorf("open %T: %w", components[j], err)
}
}
err = s.handleMetabaseFailure(ctx, "open", err)
if err != nil {
return err
}
break
}
return fmt.Errorf("open %T: %w", component, err)
}
}
return nil
}
type metabaseSynchronizer Shard
func (x *metabaseSynchronizer) Init(ctx context.Context) error {
ctx, span := tracing.StartSpanFromContext(ctx, "metabaseSynchronizer.Init")
defer span.End()
return (*Shard)(x).refillMetabase(ctx)
}
// Init initializes all Shard's components.
func (s *Shard) Init(ctx context.Context) error {
m := s.GetMode()
if err := s.initializeComponents(ctx, m); err != nil {
return err
}
s.updateMetrics(ctx)
s.gc = &gc{
gcCfg: &s.gcCfg,
remover: s.removeGarbage,
stopChannel: make(chan struct{}),
newEpochChan: make(chan uint64),
newEpochHandlers: &newEpochHandlers{
cancelFunc: func() {},
handlers: []newEpochHandler{
s.collectExpiredLocks,
s.collectExpiredObjects,
s.collectExpiredTombstones,
s.collectExpiredMetrics,
},
},
}
if s.gc.metrics != nil {
s.gc.metrics.SetShardID(s.info.ID.String())
}
s.gc.init(ctx)
s.rb = newRebuilder()
if !m.NoMetabase() {
s.rb.Start(ctx, s.blobStor, s.metaBase, s.log)
}
s.writecacheSealCancel.Store(dummyCancel)
return nil
}
func (s *Shard) initializeComponents(ctx context.Context, m mode.Mode) error {
type initializer interface {
Init(context.Context) error
}
var components []initializer
if !m.NoMetabase() {
var initMetabase initializer
if s.NeedRefillMetabase() {
initMetabase = (*metabaseSynchronizer)(s)
} else {
initMetabase = s.metaBase
}
components = []initializer{
s.blobStor, initMetabase,
}
} else {
components = []initializer{s.blobStor}
}
if s.hasWriteCache() && !m.NoMetabase() {
components = append(components, s.writeCache)
}
if s.pilorama != nil {
components = append(components, s.pilorama)
}
for _, component := range components {
if err := component.Init(ctx); err != nil {
if component == s.metaBase {
if errors.Is(err, meta.ErrOutdatedVersion) || errors.Is(err, meta.ErrIncompletedUpgrade) {
return fmt.Errorf("metabase initialization: %w", err)
}
err = s.handleMetabaseFailure(ctx, "init", err)
if err != nil {
return err
}
break
}
return fmt.Errorf("initialize %T: %w", component, err)
}
}
return nil
}
func (s *Shard) refillMetabase(ctx context.Context) error {
path := s.metaBase.DumpInfo().Path
s.metricsWriter.SetRefillStatus(path, "running")
s.metricsWriter.SetRefillPercent(path, 0)
var success bool
defer func() {
if success {
s.metricsWriter.SetRefillStatus(path, "completed")
} else {
s.metricsWriter.SetRefillStatus(path, "failed")
}
}()
err := s.metaBase.Reset()
if err != nil {
return fmt.Errorf("reset metabase: %w", err)
}
withCount := true
totalObjects, err := s.blobStor.ObjectsCount(ctx)
if err != nil {
s.log.Warn(ctx, logs.EngineRefillFailedToGetObjectsCount, zap.Error(err))
withCount = false
}
eg, egCtx := errgroup.WithContext(ctx)
if s.refillMetabaseWorkersCount > 0 {
eg.SetLimit(s.refillMetabaseWorkersCount)
}
var completedCount uint64
var metricGuard sync.Mutex
itErr := blobstor.IterateBinaryObjects(egCtx, s.blobStor, func(addr oid.Address, data []byte, descriptor []byte) error {
eg.Go(func() error {
var success bool
defer func() {
s.metricsWriter.IncRefillObjectsCount(path, len(data), success)
if withCount {
metricGuard.Lock()
completedCount++
s.metricsWriter.SetRefillPercent(path, uint32(completedCount*100/totalObjects))
metricGuard.Unlock()
}
}()
if err := s.refillObject(egCtx, data, addr, descriptor); err != nil {
return err
}
success = true
return nil
})
select {
case <-egCtx.Done():
return egCtx.Err()
default:
return nil
}
})
egErr := eg.Wait()
err = errors.Join(egErr, itErr)
if err != nil {
return fmt.Errorf("put objects to the meta: %w", err)
}
err = s.metaBase.SyncCounters()
if err != nil {
return fmt.Errorf("sync object counters: %w", err)
}
success = true
s.metricsWriter.SetRefillPercent(path, 100)
return nil
}
func (s *Shard) refillObject(ctx context.Context, data []byte, addr oid.Address, descriptor []byte) error {
obj := objectSDK.New()
if err := obj.Unmarshal(data); err != nil {
s.log.Warn(ctx, logs.ShardCouldNotUnmarshalObject,
zap.Stringer("address", addr),
zap.Error(err))
return nil
}
hasIndexedAttribute := slices.IndexFunc(obj.Attributes(), func(attr objectSDK.Attribute) bool { return meta.IsAtrributeIndexed(attr.Key()) }) > 0
var isIndexedContainer bool
if hasIndexedAttribute {
info, err := s.containerInfo.Info(ctx, addr.Container())
if err != nil {
return err
}
if info.Removed {
s.log.Debug(ctx, logs.ShardSkipObjectFromResyncContainerDeleted, zap.Stringer("address", addr))
return nil
}
isIndexedContainer = info.Indexed
}
var err error
switch obj.Type() {
case objectSDK.TypeTombstone:
err = s.refillTombstoneObject(ctx, obj)
case objectSDK.TypeLock:
err = s.refillLockObject(ctx, obj)
default:
}
if err != nil {
return err
}
var mPrm meta.PutPrm
mPrm.SetObject(obj)
mPrm.SetStorageID(descriptor)
mPrm.SetIndexAttributes(hasIndexedAttribute && isIndexedContainer)
_, err = s.metaBase.Put(ctx, mPrm)
if err != nil && !client.IsErrObjectAlreadyRemoved(err) && !errors.Is(err, meta.ErrObjectIsExpired) {
return err
}
return nil
}
func (s *Shard) refillLockObject(ctx context.Context, obj *objectSDK.Object) error {
var lock objectSDK.Lock
if err := lock.Unmarshal(obj.Payload()); err != nil {
return fmt.Errorf("unmarshal lock content: %w", err)
}
locked := make([]oid.ID, lock.NumberOfMembers())
lock.ReadMembers(locked)
cnr, _ := obj.ContainerID()
id, _ := obj.ID()
err := s.metaBase.Lock(ctx, cnr, id, locked)
if err != nil {
return fmt.Errorf("lock objects: %w", err)
}
return nil
}
func (s *Shard) refillTombstoneObject(ctx context.Context, obj *objectSDK.Object) error {
tombstone := objectSDK.NewTombstone()
if err := tombstone.Unmarshal(obj.Payload()); err != nil {
return fmt.Errorf("unmarshal tombstone content: %w", err)
}
tombAddr := object.AddressOf(obj)
memberIDs := tombstone.Members()
tombMembers := make([]oid.Address, 0, len(memberIDs))
for i := range memberIDs {
a := tombAddr
a.SetObject(memberIDs[i])
tombMembers = append(tombMembers, a)
}
var inhumePrm meta.InhumePrm
inhumePrm.SetTombstoneAddress(tombAddr)
inhumePrm.SetAddresses(tombMembers...)
_, err := s.metaBase.Inhume(ctx, inhumePrm)
if err != nil {
return fmt.Errorf("inhume objects: %w", err)
}
return nil
}
// Close releases all Shard's components.
func (s *Shard) Close(ctx context.Context) error {
unlock := s.lockExclusive()
if s.rb != nil {
s.rb.Stop(ctx, s.log)
}
var components []interface{ Close(context.Context) error }
if s.pilorama != nil {
components = append(components, s.pilorama)
}
if s.hasWriteCache() {
prev := s.writecacheSealCancel.Swap(notInitializedCancel)
prev.cancel() // no need to wait: writecache.Seal and writecache.Close lock the same mutex
components = append(components, s.writeCache)
}
components = append(components, s.blobStor, s.metaBase)
var lastErr error
for _, component := range components {
if err := component.Close(ctx); err != nil {
lastErr = err
s.log.Error(ctx, logs.ShardCouldNotCloseShardComponent, zap.Error(err))
}
}
if s.opsLimiter != nil {
s.opsLimiter.Close()
}
unlock()
// GC waits for handlers and remover to complete. Handlers may try to lock shard's lock.
// So to prevent deadlock GC stopping is outside of exclusive lock.
// If Init/Open was unsuccessful gc can be nil.
if s.gc != nil {
s.gc.stop(ctx)
}
return lastErr
}
// Reload reloads configuration portions that are necessary.
// If a config option is invalid, it logs an error and returns nil.
// If there was a problem with applying new configuration, an error is returned.
func (s *Shard) Reload(ctx context.Context, opts ...Option) error {
ctx, span := tracing.StartSpanFromContext(ctx, "Shard.Reload")
defer span.End()
// Do not use defaultCfg here missing options need not be reloaded.
var c cfg
for i := range opts {
opts[i](&c)
}
unlock := s.lockExclusive()
defer unlock()
s.rb.Stop(ctx, s.log)
if !s.info.Mode.NoMetabase() {
defer func() {
s.rb.Start(ctx, s.blobStor, s.metaBase, s.log)
}()
}
ok, err := s.metaBase.Reload(ctx, c.metaOpts...)
if err != nil {
if errors.Is(err, meta.ErrDegradedMode) {
s.log.Error(ctx, logs.ShardCantOpenMetabaseMoveToADegradedMode, zap.Error(err))
_ = s.setMode(ctx, mode.DegradedReadOnly)
}
return err
}
if ok {
var err error
if c.refillMetabase {
// Here we refill metabase only if a new instance was opened. This is a feature,
// we don't want to hang for some time just because we forgot to change
// config after the node was updated.
err = s.refillMetabase(ctx)
} else {
err = s.metaBase.Init(ctx)
}
if err != nil {
s.log.Error(ctx, logs.ShardCantInitializeMetabaseMoveToADegradedreadonlyMode, zap.Error(err))
_ = s.setMode(ctx, mode.DegradedReadOnly)
return err
}
}
if err := s.setMode(ctx, c.info.Mode); err != nil {
return err
}
s.reloadOpsLimiter(&c)
return nil
}
func (s *Shard) reloadOpsLimiter(c *cfg) {
if c.configOpsLimiter != nil {
old := s.opsLimiter.ptr.Swap(&qosLimiterHolder{Limiter: c.configOpsLimiter})
old.Close()
s.opsLimiter.SetParentID(s.info.ID.String())
}
}
func (s *Shard) lockExclusive() func() {
s.setModeRequested.Store(true)
val := s.gcCancel.Load()
if val != nil {
cancelGC := val.(context.CancelFunc)
cancelGC()
}
if c := s.writecacheSealCancel.Load(); c != nil {
c.cancel()
}
s.m.Lock()
s.setModeRequested.Store(false)
return s.m.Unlock
}