frostfs-node/pkg/local_object_storage/engine/evacuate.go
Dmitrii Stepanov 15d853ea22 [#947] controlSvc: Return tree evacuation stat
Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
2024-02-09 11:20:39 +03:00

500 lines
14 KiB
Go

package engine
import (
"context"
"errors"
"fmt"
"strings"
"sync/atomic"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
tracingPkg "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/tracing"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util"
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"git.frostfs.info/TrueCloudLab/hrw"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)
var (
ErrMustBeReadOnly = logicerr.New("shard must be in read-only mode")
evacuationOperationLogField = zap.String("operation", "evacuation")
)
// EvacuateScope is an evacuation scope. Keep in sync with pkg/services/control/service.proto.
type EvacuateScope uint32
var (
EvacuateScopeObjects EvacuateScope = 1
EvacuateScopeTrees EvacuateScope = 2
)
func (s EvacuateScope) String() string {
var sb strings.Builder
first := true
if s&EvacuateScopeObjects == EvacuateScopeObjects {
if !first {
sb.WriteString(";")
}
sb.WriteString("objects")
first = false
}
if s&EvacuateScopeTrees == EvacuateScopeTrees {
if !first {
sb.WriteString(";")
}
sb.WriteString("trees")
}
return sb.String()
}
// EvacuateShardPrm represents parameters for the EvacuateShard operation.
type EvacuateShardPrm struct {
ShardID []*shard.ID
ObjectsHandler func(context.Context, oid.Address, *objectSDK.Object) error
IgnoreErrors bool
Async bool
Scope EvacuateScope
}
// EvacuateShardRes represents result of the EvacuateShard operation.
type EvacuateShardRes struct {
objEvacuated *atomic.Uint64
objTotal *atomic.Uint64
objFailed *atomic.Uint64
objSkipped *atomic.Uint64
trEvacuated *atomic.Uint64
trTotal *atomic.Uint64
trFailed *atomic.Uint64
}
// NewEvacuateShardRes creates new EvacuateShardRes instance.
func NewEvacuateShardRes() *EvacuateShardRes {
return &EvacuateShardRes{
objEvacuated: new(atomic.Uint64),
objTotal: new(atomic.Uint64),
objFailed: new(atomic.Uint64),
objSkipped: new(atomic.Uint64),
trEvacuated: new(atomic.Uint64),
trTotal: new(atomic.Uint64),
trFailed: new(atomic.Uint64),
}
}
// ObjectsEvacuated returns amount of evacuated objects.
// Objects for which handler returned no error are also assumed evacuated.
func (p *EvacuateShardRes) ObjectsEvacuated() uint64 {
if p == nil {
return 0
}
return p.objEvacuated.Load()
}
// ObjectsTotal returns total count objects to evacuate.
func (p *EvacuateShardRes) ObjectsTotal() uint64 {
if p == nil {
return 0
}
return p.objTotal.Load()
}
// ObjectsFailed returns count of failed objects to evacuate.
func (p *EvacuateShardRes) ObjectsFailed() uint64 {
if p == nil {
return 0
}
return p.objFailed.Load()
}
// ObjectsSkipped returns count of skipped objects.
func (p *EvacuateShardRes) ObjectsSkipped() uint64 {
if p == nil {
return 0
}
return p.objSkipped.Load()
}
// TreesEvacuated returns amount of evacuated trees.
func (p *EvacuateShardRes) TreesEvacuated() uint64 {
if p == nil {
return 0
}
return p.trEvacuated.Load()
}
// TreesTotal returns total count trees to evacuate.
func (p *EvacuateShardRes) TreesTotal() uint64 {
if p == nil {
return 0
}
return p.trTotal.Load()
}
// TreesFailed returns count of failed trees to evacuate.
func (p *EvacuateShardRes) TreesFailed() uint64 {
if p == nil {
return 0
}
return p.trFailed.Load()
}
// DeepCopy returns deep copy of result instance.
func (p *EvacuateShardRes) DeepCopy() *EvacuateShardRes {
if p == nil {
return nil
}
res := &EvacuateShardRes{
objEvacuated: new(atomic.Uint64),
objTotal: new(atomic.Uint64),
objFailed: new(atomic.Uint64),
objSkipped: new(atomic.Uint64),
trEvacuated: new(atomic.Uint64),
trTotal: new(atomic.Uint64),
trFailed: new(atomic.Uint64),
}
res.objEvacuated.Store(p.objEvacuated.Load())
res.objTotal.Store(p.objTotal.Load())
res.objFailed.Store(p.objFailed.Load())
res.objSkipped.Store(p.objSkipped.Load())
res.trTotal.Store(p.trTotal.Load())
res.trEvacuated.Store(p.trEvacuated.Load())
res.trFailed.Store(p.trFailed.Load())
return res
}
const defaultEvacuateBatchSize = 100
type pooledShard struct {
hashedShard
pool util.WorkerPool
}
var errMustHaveTwoShards = errors.New("must have at least 1 spare shard")
// Evacuate moves data from one shard to the others.
// The shard being moved must be in read-only mode.
func (e *StorageEngine) Evacuate(ctx context.Context, prm EvacuateShardPrm) (*EvacuateShardRes, error) {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
shardIDs := make([]string, len(prm.ShardID))
for i := range prm.ShardID {
shardIDs[i] = prm.ShardID[i].String()
}
ctx, span := tracing.StartSpanFromContext(ctx, "StorageEngine.Evacuate",
trace.WithAttributes(
attribute.StringSlice("shardIDs", shardIDs),
attribute.Bool("async", prm.Async),
attribute.Bool("ignoreErrors", prm.IgnoreErrors),
attribute.Stringer("scope", prm.Scope),
))
defer span.End()
shards, weights, err := e.getActualShards(shardIDs, prm)
if err != nil {
return nil, err
}
shardsToEvacuate := make(map[string]*shard.Shard)
for i := range shardIDs {
for j := range shards {
if shards[j].ID().String() == shardIDs[i] {
shardsToEvacuate[shardIDs[i]] = shards[j].Shard
}
}
}
res := NewEvacuateShardRes()
ctx = ctxOrBackground(ctx, prm.Async)
eg, egCtx, err := e.evacuateLimiter.TryStart(ctx, shardIDs, res)
if err != nil {
return nil, err
}
eg.Go(func() error {
return e.evacuateShards(egCtx, shardIDs, prm, res, shards, weights, shardsToEvacuate)
})
if prm.Async {
return nil, nil
}
return res, eg.Wait()
}
func ctxOrBackground(ctx context.Context, background bool) context.Context {
if background {
return context.Background()
}
return ctx
}
func (e *StorageEngine) evacuateShards(ctx context.Context, shardIDs []string, prm EvacuateShardPrm, res *EvacuateShardRes,
shards []pooledShard, weights []float64, shardsToEvacuate map[string]*shard.Shard,
) error {
var err error
ctx, span := tracing.StartSpanFromContext(ctx, "StorageEngine.evacuateShards",
trace.WithAttributes(
attribute.StringSlice("shardIDs", shardIDs),
attribute.Bool("async", prm.Async),
attribute.Bool("ignoreErrors", prm.IgnoreErrors),
attribute.Stringer("scope", prm.Scope),
))
defer func() {
span.End()
e.evacuateLimiter.Complete(err)
}()
e.log.Info(logs.EngineStartedShardsEvacuation, zap.Strings("shard_ids", shardIDs), evacuationOperationLogField,
zap.String("trace_id", tracingPkg.GetTraceID(ctx)), zap.Stringer("scope", prm.Scope))
err = e.getTotalObjectsCount(ctx, shardsToEvacuate, res)
if err != nil {
e.log.Error(logs.EngineShardsEvacuationFailedToCount, zap.Strings("shard_ids", shardIDs), zap.Error(err), evacuationOperationLogField,
zap.String("trace_id", tracingPkg.GetTraceID(ctx)), zap.Stringer("scope", prm.Scope))
return err
}
for _, shardID := range shardIDs {
if err = e.evacuateShard(ctx, shardID, prm, res, shards, weights, shardsToEvacuate); err != nil {
e.log.Error(logs.EngineFinishedWithErrorShardsEvacuation, zap.Error(err), zap.Strings("shard_ids", shardIDs), evacuationOperationLogField,
zap.String("trace_id", tracingPkg.GetTraceID(ctx)), zap.Stringer("scope", prm.Scope))
return err
}
}
e.log.Info(logs.EngineFinishedSuccessfullyShardsEvacuation,
zap.Strings("shard_ids", shardIDs),
evacuationOperationLogField,
zap.Uint64("total_objects", res.ObjectsTotal()),
zap.Uint64("evacuated_objects", res.ObjectsEvacuated()),
zap.Uint64("failed_objects", res.ObjectsFailed()),
zap.Uint64("skipped_objects", res.ObjectsSkipped()),
zap.Uint64("total_trees", res.TreesTotal()),
zap.Uint64("evacuated_trees", res.TreesEvacuated()),
zap.Uint64("failed_trees", res.TreesFailed()),
)
return nil
}
func (e *StorageEngine) getTotalObjectsCount(ctx context.Context, shardsToEvacuate map[string]*shard.Shard, res *EvacuateShardRes) error {
ctx, span := tracing.StartSpanFromContext(ctx, "StorageEngine.getTotalObjectsCount")
defer span.End()
for _, sh := range shardsToEvacuate {
cnt, err := sh.LogicalObjectsCount(ctx)
if err != nil {
if errors.Is(err, shard.ErrDegradedMode) {
continue
}
return err
}
res.objTotal.Add(cnt)
}
return nil
}
func (e *StorageEngine) evacuateShard(ctx context.Context, shardID string, prm EvacuateShardPrm, res *EvacuateShardRes,
shards []pooledShard, weights []float64, shardsToEvacuate map[string]*shard.Shard,
) error {
ctx, span := tracing.StartSpanFromContext(ctx, "StorageEngine.evacuateShard",
trace.WithAttributes(
attribute.String("shardID", shardID),
))
defer span.End()
var listPrm shard.ListWithCursorPrm
listPrm.WithCount(defaultEvacuateBatchSize)
sh := shardsToEvacuate[shardID]
var c *meta.Cursor
for {
listPrm.WithCursor(c)
// TODO (@fyrchik): #1731 this approach doesn't work in degraded modes
// because ListWithCursor works only with the metabase.
listRes, err := sh.ListWithCursor(ctx, listPrm)
if err != nil {
if errors.Is(err, meta.ErrEndOfListing) || errors.Is(err, shard.ErrDegradedMode) {
break
}
e.log.Error(logs.EngineShardsEvacuationFailedToListObjects, zap.String("shard_id", shardID), zap.Error(err), evacuationOperationLogField,
zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
return err
}
if err = e.evacuateObjects(ctx, sh, listRes.AddressList(), prm, res, shards, weights, shardsToEvacuate); err != nil {
return err
}
c = listRes.Cursor()
}
return nil
}
func (e *StorageEngine) getActualShards(shardIDs []string, prm EvacuateShardPrm) ([]pooledShard, []float64, error) {
e.mtx.RLock()
defer e.mtx.RUnlock()
for i := range shardIDs {
sh, ok := e.shards[shardIDs[i]]
if !ok {
return nil, nil, errShardNotFound
}
if !sh.GetMode().ReadOnly() {
return nil, nil, ErrMustBeReadOnly
}
}
if len(e.shards)-len(shardIDs) < 1 && prm.ObjectsHandler == nil {
return nil, nil, errMustHaveTwoShards
}
// We must have all shards, to have correct information about their
// indexes in a sorted slice and set appropriate marks in the metabase.
// Evacuated shard is skipped during put.
shards := make([]pooledShard, 0, len(e.shards))
for id := range e.shards {
shards = append(shards, pooledShard{
hashedShard: hashedShard(e.shards[id]),
pool: e.shardPools[id],
})
}
weights := make([]float64, 0, len(shards))
for i := range shards {
weights = append(weights, e.shardWeight(shards[i].Shard))
}
return shards, weights, nil
}
func (e *StorageEngine) evacuateObjects(ctx context.Context, sh *shard.Shard, toEvacuate []object.AddressWithType, prm EvacuateShardPrm, res *EvacuateShardRes,
shards []pooledShard, weights []float64, shardsToEvacuate map[string]*shard.Shard,
) error {
ctx, span := tracing.StartSpanFromContext(ctx, "StorageEngine.evacuateObjects",
trace.WithAttributes(
attribute.Int("objects_count", len(toEvacuate)),
))
defer span.End()
for i := range toEvacuate {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
addr := toEvacuate[i].Address
var getPrm shard.GetPrm
getPrm.SetAddress(addr)
getRes, err := sh.Get(ctx, getPrm)
if err != nil {
if prm.IgnoreErrors {
res.objFailed.Add(1)
continue
}
e.log.Error(logs.EngineShardsEvacuationFailedToReadObject, zap.String("address", addr.EncodeToString()), zap.Error(err), evacuationOperationLogField,
zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
return err
}
evacuatedLocal, err := e.tryEvacuateObjectLocal(ctx, addr, getRes.Object(), sh, shards, weights, shardsToEvacuate, res)
if err != nil {
return err
}
if evacuatedLocal {
continue
}
if prm.ObjectsHandler == nil {
// Do not check ignoreErrors flag here because
// ignoring errors on put make this command kinda useless.
return fmt.Errorf("%w: %s", errPutShard, toEvacuate[i])
}
err = prm.ObjectsHandler(ctx, addr, getRes.Object())
if err != nil {
e.log.Error(logs.EngineShardsEvacuationFailedToMoveObject, zap.String("address", addr.EncodeToString()), zap.Error(err), evacuationOperationLogField,
zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
return err
}
res.objEvacuated.Add(1)
}
return nil
}
func (e *StorageEngine) tryEvacuateObjectLocal(ctx context.Context, addr oid.Address, object *objectSDK.Object, sh *shard.Shard,
shards []pooledShard, weights []float64, shardsToEvacuate map[string]*shard.Shard, res *EvacuateShardRes,
) (bool, error) {
hrw.SortHasherSliceByWeightValue(shards, weights, hrw.StringHash(addr.EncodeToString()))
for j := range shards {
select {
case <-ctx.Done():
return false, ctx.Err()
default:
}
if _, ok := shardsToEvacuate[shards[j].ID().String()]; ok {
continue
}
switch e.putToShard(ctx, shards[j].hashedShard, j, shards[j].pool, addr, object).status {
case putToShardSuccess:
res.objEvacuated.Add(1)
e.log.Debug(logs.EngineObjectIsMovedToAnotherShard,
zap.Stringer("from", sh.ID()),
zap.Stringer("to", shards[j].ID()),
zap.Stringer("addr", addr),
evacuationOperationLogField,
zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
return true, nil
case putToShardExists, putToShardRemoved:
res.objSkipped.Add(1)
return true, nil
default:
continue
}
}
return false, nil
}
func (e *StorageEngine) GetEvacuationState(ctx context.Context) (*EvacuationState, error) {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
return e.evacuateLimiter.GetState(), nil
}
func (e *StorageEngine) EnqueRunningEvacuationStop(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
return e.evacuateLimiter.CancelIfRunning()
}