All checks were successful
Vulncheck / Vulncheck (push) Successful in 1m15s
Pre-commit hooks / Pre-commit (push) Successful in 1m40s
Build / Build Components (push) Successful in 1m46s
Tests and linters / Run gofumpt (push) Successful in 3m36s
Tests and linters / Lint (push) Successful in 3m55s
Tests and linters / Staticcheck (push) Successful in 3m59s
Tests and linters / Tests (push) Successful in 4m19s
Tests and linters / Tests with -race (push) Successful in 4m33s
Tests and linters / gopls check (push) Successful in 4m40s
OCI image / Build container images (push) Successful in 5m17s
Before fix shard and writecache used the same instance of qos.Limiter. In case of SIGHUP signal shard was closing qos.Limiter, but didn't update writecache's pointer. Now shard uses atomic pointer to qos.Limiter and shares it with writecache. On SIGHUP shard updates atomic pointer value and closes old qos.Limiter. Change-Id: Ic2ab62441d3872e71c5771f54d070e0ca48fe375 Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
484 lines
12 KiB
Go
484 lines
12 KiB
Go
package shard
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"slices"
|
|
"sync"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor"
|
|
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/sync/errgroup"
|
|
)
|
|
|
|
func (s *Shard) handleMetabaseFailure(ctx context.Context, stage string, err error) error {
|
|
s.log.Error(ctx, logs.ShardMetabaseFailureSwitchingMode,
|
|
zap.String("stage", stage),
|
|
zap.Stringer("mode", mode.ReadOnly),
|
|
zap.Error(err))
|
|
|
|
err = s.SetMode(ctx, mode.ReadOnly)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
|
|
s.log.Error(ctx, logs.ShardCantMoveShardToReadonlySwitchMode,
|
|
zap.String("stage", stage),
|
|
zap.Stringer("mode", mode.DegradedReadOnly),
|
|
zap.Error(err))
|
|
|
|
err = s.SetMode(ctx, mode.DegradedReadOnly)
|
|
if err != nil {
|
|
return fmt.Errorf("switch to mode %s", mode.DegradedReadOnly)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Open opens all Shard's components.
|
|
func (s *Shard) Open(ctx context.Context) error {
|
|
components := []interface {
|
|
Open(context.Context, mode.Mode) error
|
|
}{
|
|
s.blobStor,
|
|
}
|
|
m := s.GetMode()
|
|
|
|
if !m.NoMetabase() {
|
|
components = append(components, s.metaBase)
|
|
}
|
|
|
|
if s.hasWriteCache() && !m.NoMetabase() {
|
|
components = append(components, s.writeCache)
|
|
}
|
|
|
|
if s.pilorama != nil {
|
|
components = append(components, s.pilorama)
|
|
}
|
|
|
|
for i, component := range components {
|
|
if err := component.Open(ctx, m); err != nil {
|
|
if component == s.metaBase {
|
|
// We must first open all other components to avoid
|
|
// opening non-existent DB in read-only mode.
|
|
for j := i + 1; j < len(components); j++ {
|
|
if err := components[j].Open(ctx, m); err != nil {
|
|
// Other components must be opened, fail.
|
|
return fmt.Errorf("open %T: %w", components[j], err)
|
|
}
|
|
}
|
|
err = s.handleMetabaseFailure(ctx, "open", err)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
break
|
|
}
|
|
|
|
return fmt.Errorf("open %T: %w", component, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type metabaseSynchronizer Shard
|
|
|
|
func (x *metabaseSynchronizer) Init(ctx context.Context) error {
|
|
ctx, span := tracing.StartSpanFromContext(ctx, "metabaseSynchronizer.Init")
|
|
defer span.End()
|
|
|
|
return (*Shard)(x).refillMetabase(ctx)
|
|
}
|
|
|
|
// Init initializes all Shard's components.
|
|
func (s *Shard) Init(ctx context.Context) error {
|
|
m := s.GetMode()
|
|
if err := s.initializeComponents(ctx, m); err != nil {
|
|
return err
|
|
}
|
|
|
|
s.updateMetrics(ctx)
|
|
|
|
s.gc = &gc{
|
|
gcCfg: &s.gcCfg,
|
|
remover: s.removeGarbage,
|
|
stopChannel: make(chan struct{}),
|
|
newEpochChan: make(chan uint64),
|
|
newEpochHandlers: &newEpochHandlers{
|
|
cancelFunc: func() {},
|
|
handlers: []newEpochHandler{
|
|
s.collectExpiredLocks,
|
|
s.collectExpiredObjects,
|
|
s.collectExpiredTombstones,
|
|
s.collectExpiredMetrics,
|
|
},
|
|
},
|
|
}
|
|
if s.gc.metrics != nil {
|
|
s.gc.metrics.SetShardID(s.info.ID.String())
|
|
}
|
|
|
|
s.gc.init(ctx)
|
|
|
|
s.rb = newRebuilder()
|
|
if !m.NoMetabase() {
|
|
s.rb.Start(ctx, s.blobStor, s.metaBase, s.log)
|
|
}
|
|
s.writecacheSealCancel.Store(dummyCancel)
|
|
return nil
|
|
}
|
|
|
|
func (s *Shard) initializeComponents(ctx context.Context, m mode.Mode) error {
|
|
type initializer interface {
|
|
Init(context.Context) error
|
|
}
|
|
|
|
var components []initializer
|
|
|
|
if !m.NoMetabase() {
|
|
var initMetabase initializer
|
|
|
|
if s.NeedRefillMetabase() {
|
|
initMetabase = (*metabaseSynchronizer)(s)
|
|
} else {
|
|
initMetabase = s.metaBase
|
|
}
|
|
|
|
components = []initializer{
|
|
s.blobStor, initMetabase,
|
|
}
|
|
} else {
|
|
components = []initializer{s.blobStor}
|
|
}
|
|
|
|
if s.hasWriteCache() && !m.NoMetabase() {
|
|
components = append(components, s.writeCache)
|
|
}
|
|
|
|
if s.pilorama != nil {
|
|
components = append(components, s.pilorama)
|
|
}
|
|
|
|
for _, component := range components {
|
|
if err := component.Init(ctx); err != nil {
|
|
if component == s.metaBase {
|
|
if errors.Is(err, meta.ErrOutdatedVersion) || errors.Is(err, meta.ErrIncompletedUpgrade) {
|
|
return fmt.Errorf("metabase initialization: %w", err)
|
|
}
|
|
|
|
err = s.handleMetabaseFailure(ctx, "init", err)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
break
|
|
}
|
|
|
|
return fmt.Errorf("initialize %T: %w", component, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *Shard) refillMetabase(ctx context.Context) error {
|
|
path := s.metaBase.DumpInfo().Path
|
|
s.metricsWriter.SetRefillStatus(path, "running")
|
|
s.metricsWriter.SetRefillPercent(path, 0)
|
|
var success bool
|
|
defer func() {
|
|
if success {
|
|
s.metricsWriter.SetRefillStatus(path, "completed")
|
|
} else {
|
|
s.metricsWriter.SetRefillStatus(path, "failed")
|
|
}
|
|
}()
|
|
|
|
err := s.metaBase.Reset()
|
|
if err != nil {
|
|
return fmt.Errorf("reset metabase: %w", err)
|
|
}
|
|
|
|
withCount := true
|
|
totalObjects, err := s.blobStor.ObjectsCount(ctx)
|
|
if err != nil {
|
|
s.log.Warn(ctx, logs.EngineRefillFailedToGetObjectsCount, zap.Error(err))
|
|
withCount = false
|
|
}
|
|
|
|
eg, egCtx := errgroup.WithContext(ctx)
|
|
if s.refillMetabaseWorkersCount > 0 {
|
|
eg.SetLimit(s.refillMetabaseWorkersCount)
|
|
}
|
|
|
|
var completedCount uint64
|
|
var metricGuard sync.Mutex
|
|
itErr := blobstor.IterateBinaryObjects(egCtx, s.blobStor, func(addr oid.Address, data []byte, descriptor []byte) error {
|
|
eg.Go(func() error {
|
|
var success bool
|
|
defer func() {
|
|
s.metricsWriter.IncRefillObjectsCount(path, len(data), success)
|
|
if withCount {
|
|
metricGuard.Lock()
|
|
completedCount++
|
|
s.metricsWriter.SetRefillPercent(path, uint32(completedCount*100/totalObjects))
|
|
metricGuard.Unlock()
|
|
}
|
|
}()
|
|
|
|
if err := s.refillObject(egCtx, data, addr, descriptor); err != nil {
|
|
return err
|
|
}
|
|
success = true
|
|
return nil
|
|
})
|
|
|
|
select {
|
|
case <-egCtx.Done():
|
|
return egCtx.Err()
|
|
default:
|
|
return nil
|
|
}
|
|
})
|
|
|
|
egErr := eg.Wait()
|
|
|
|
err = errors.Join(egErr, itErr)
|
|
if err != nil {
|
|
return fmt.Errorf("put objects to the meta: %w", err)
|
|
}
|
|
|
|
err = s.metaBase.SyncCounters()
|
|
if err != nil {
|
|
return fmt.Errorf("sync object counters: %w", err)
|
|
}
|
|
|
|
success = true
|
|
s.metricsWriter.SetRefillPercent(path, 100)
|
|
return nil
|
|
}
|
|
|
|
func (s *Shard) refillObject(ctx context.Context, data []byte, addr oid.Address, descriptor []byte) error {
|
|
obj := objectSDK.New()
|
|
if err := obj.Unmarshal(data); err != nil {
|
|
s.log.Warn(ctx, logs.ShardCouldNotUnmarshalObject,
|
|
zap.Stringer("address", addr),
|
|
zap.Error(err))
|
|
return nil
|
|
}
|
|
|
|
hasIndexedAttribute := slices.IndexFunc(obj.Attributes(), func(attr objectSDK.Attribute) bool { return meta.IsAtrributeIndexed(attr.Key()) }) > 0
|
|
|
|
var isIndexedContainer bool
|
|
if hasIndexedAttribute {
|
|
info, err := s.containerInfo.Info(ctx, addr.Container())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if info.Removed {
|
|
s.log.Debug(ctx, logs.ShardSkipObjectFromResyncContainerDeleted, zap.Stringer("address", addr))
|
|
return nil
|
|
}
|
|
isIndexedContainer = info.Indexed
|
|
}
|
|
|
|
var err error
|
|
switch obj.Type() {
|
|
case objectSDK.TypeTombstone:
|
|
err = s.refillTombstoneObject(ctx, obj)
|
|
case objectSDK.TypeLock:
|
|
err = s.refillLockObject(ctx, obj)
|
|
default:
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var mPrm meta.PutPrm
|
|
mPrm.SetObject(obj)
|
|
mPrm.SetStorageID(descriptor)
|
|
mPrm.SetIndexAttributes(hasIndexedAttribute && isIndexedContainer)
|
|
|
|
_, err = s.metaBase.Put(ctx, mPrm)
|
|
if err != nil && !client.IsErrObjectAlreadyRemoved(err) && !errors.Is(err, meta.ErrObjectIsExpired) {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *Shard) refillLockObject(ctx context.Context, obj *objectSDK.Object) error {
|
|
var lock objectSDK.Lock
|
|
if err := lock.Unmarshal(obj.Payload()); err != nil {
|
|
return fmt.Errorf("unmarshal lock content: %w", err)
|
|
}
|
|
|
|
locked := make([]oid.ID, lock.NumberOfMembers())
|
|
lock.ReadMembers(locked)
|
|
|
|
cnr, _ := obj.ContainerID()
|
|
id, _ := obj.ID()
|
|
err := s.metaBase.Lock(ctx, cnr, id, locked)
|
|
if err != nil {
|
|
return fmt.Errorf("lock objects: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *Shard) refillTombstoneObject(ctx context.Context, obj *objectSDK.Object) error {
|
|
tombstone := objectSDK.NewTombstone()
|
|
|
|
if err := tombstone.Unmarshal(obj.Payload()); err != nil {
|
|
return fmt.Errorf("unmarshal tombstone content: %w", err)
|
|
}
|
|
|
|
tombAddr := object.AddressOf(obj)
|
|
memberIDs := tombstone.Members()
|
|
tombMembers := make([]oid.Address, 0, len(memberIDs))
|
|
|
|
for i := range memberIDs {
|
|
a := tombAddr
|
|
a.SetObject(memberIDs[i])
|
|
|
|
tombMembers = append(tombMembers, a)
|
|
}
|
|
|
|
var inhumePrm meta.InhumePrm
|
|
|
|
inhumePrm.SetTombstoneAddress(tombAddr)
|
|
inhumePrm.SetAddresses(tombMembers...)
|
|
|
|
_, err := s.metaBase.Inhume(ctx, inhumePrm)
|
|
if err != nil {
|
|
return fmt.Errorf("inhume objects: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Close releases all Shard's components.
|
|
func (s *Shard) Close(ctx context.Context) error {
|
|
unlock := s.lockExclusive()
|
|
if s.rb != nil {
|
|
s.rb.Stop(ctx, s.log)
|
|
}
|
|
var components []interface{ Close(context.Context) error }
|
|
|
|
if s.pilorama != nil {
|
|
components = append(components, s.pilorama)
|
|
}
|
|
|
|
if s.hasWriteCache() {
|
|
prev := s.writecacheSealCancel.Swap(notInitializedCancel)
|
|
prev.cancel() // no need to wait: writecache.Seal and writecache.Close lock the same mutex
|
|
components = append(components, s.writeCache)
|
|
}
|
|
|
|
components = append(components, s.blobStor, s.metaBase)
|
|
|
|
var lastErr error
|
|
for _, component := range components {
|
|
if err := component.Close(ctx); err != nil {
|
|
lastErr = err
|
|
s.log.Error(ctx, logs.ShardCouldNotCloseShardComponent, zap.Error(err))
|
|
}
|
|
}
|
|
|
|
if s.opsLimiter != nil {
|
|
s.opsLimiter.Close()
|
|
}
|
|
|
|
unlock()
|
|
|
|
// GC waits for handlers and remover to complete. Handlers may try to lock shard's lock.
|
|
// So to prevent deadlock GC stopping is outside of exclusive lock.
|
|
// If Init/Open was unsuccessful gc can be nil.
|
|
if s.gc != nil {
|
|
s.gc.stop(ctx)
|
|
}
|
|
|
|
return lastErr
|
|
}
|
|
|
|
// Reload reloads configuration portions that are necessary.
|
|
// If a config option is invalid, it logs an error and returns nil.
|
|
// If there was a problem with applying new configuration, an error is returned.
|
|
func (s *Shard) Reload(ctx context.Context, opts ...Option) error {
|
|
ctx, span := tracing.StartSpanFromContext(ctx, "Shard.Reload")
|
|
defer span.End()
|
|
|
|
// Do not use defaultCfg here missing options need not be reloaded.
|
|
var c cfg
|
|
for i := range opts {
|
|
opts[i](&c)
|
|
}
|
|
|
|
unlock := s.lockExclusive()
|
|
defer unlock()
|
|
|
|
s.rb.Stop(ctx, s.log)
|
|
if !s.info.Mode.NoMetabase() {
|
|
defer func() {
|
|
s.rb.Start(ctx, s.blobStor, s.metaBase, s.log)
|
|
}()
|
|
}
|
|
|
|
ok, err := s.metaBase.Reload(ctx, c.metaOpts...)
|
|
if err != nil {
|
|
if errors.Is(err, meta.ErrDegradedMode) {
|
|
s.log.Error(ctx, logs.ShardCantOpenMetabaseMoveToADegradedMode, zap.Error(err))
|
|
_ = s.setMode(ctx, mode.DegradedReadOnly)
|
|
}
|
|
return err
|
|
}
|
|
if ok {
|
|
var err error
|
|
if c.refillMetabase {
|
|
// Here we refill metabase only if a new instance was opened. This is a feature,
|
|
// we don't want to hang for some time just because we forgot to change
|
|
// config after the node was updated.
|
|
err = s.refillMetabase(ctx)
|
|
} else {
|
|
err = s.metaBase.Init(ctx)
|
|
}
|
|
if err != nil {
|
|
s.log.Error(ctx, logs.ShardCantInitializeMetabaseMoveToADegradedreadonlyMode, zap.Error(err))
|
|
_ = s.setMode(ctx, mode.DegradedReadOnly)
|
|
return err
|
|
}
|
|
}
|
|
if err := s.setMode(ctx, c.info.Mode); err != nil {
|
|
return err
|
|
}
|
|
s.reloadOpsLimiter(&c)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Shard) reloadOpsLimiter(c *cfg) {
|
|
if c.configOpsLimiter != nil {
|
|
old := s.opsLimiter.ptr.Swap(&qosLimiterHolder{Limiter: c.configOpsLimiter})
|
|
old.Close()
|
|
s.opsLimiter.SetParentID(s.info.ID.String())
|
|
}
|
|
}
|
|
|
|
func (s *Shard) lockExclusive() func() {
|
|
s.setModeRequested.Store(true)
|
|
val := s.gcCancel.Load()
|
|
if val != nil {
|
|
cancelGC := val.(context.CancelFunc)
|
|
cancelGC()
|
|
}
|
|
if c := s.writecacheSealCancel.Load(); c != nil {
|
|
c.cancel()
|
|
}
|
|
s.m.Lock()
|
|
s.setModeRequested.Store(false)
|
|
return s.m.Unlock
|
|
}
|