[#947] engine: Evacuate trees to remote nodes

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2024-02-06 17:34:32 +03:00
parent 728150d1d2
commit db67c21d55
8 changed files with 283 additions and 95 deletions

View file

@ -17,6 +17,7 @@ import (
tracingPkg "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/tracing"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util"
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"git.frostfs.info/TrueCloudLab/hrw"
@ -74,6 +75,7 @@ func (s EvacuateScope) TreesOnly() bool {
type EvacuateShardPrm struct {
ShardID []*shard.ID
ObjectsHandler func(context.Context, oid.Address, *objectSDK.Object) error
TreeHandler func(context.Context, cid.ID, string, pilorama.Forest) (string, error)
IgnoreErrors bool
Async bool
Scope EvacuateScope
@ -436,19 +438,44 @@ func (e *StorageEngine) evacuateTrees(ctx context.Context, sh *shard.Shard, tree
default:
}
success, _, err := e.tryEvacuateTreeLocal(ctx, sh, contTree, prm, shards, weights, shardsToEvacuate)
success, shardID, err := e.tryEvacuateTreeLocal(ctx, sh, contTree, prm, shards, weights, shardsToEvacuate)
if err != nil {
return err
}
if success {
e.log.Debug(logs.EngineShardsEvacuationTreeEvacuatedLocal,
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
zap.String("from_shard_id", sh.ID().String()), zap.String("to_shard_id", shardID),
evacuationOperationLogField, zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
res.trEvacuated.Add(1)
} else {
res.trFailed.Add(1)
continue
}
nodePK, err := e.evacuateTreeToOtherNode(ctx, sh, contTree, prm)
if err != nil {
e.log.Error(logs.EngineShardsEvacuationFailedToMoveTree,
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
zap.String("from_shard_id", sh.ID().String()), evacuationOperationLogField,
zap.Error(err), zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
return err
}
e.log.Debug(logs.EngineShardsEvacuationTreeEvacuatedRemote,
zap.String("cid", contTree.CID.EncodeToString()), zap.String("treeID", contTree.TreeID),
zap.String("from_shardID", sh.ID().String()), zap.String("to_node", nodePK),
evacuationOperationLogField, zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
res.trEvacuated.Add(1)
}
return nil
}
func (e *StorageEngine) evacuateTreeToOtherNode(ctx context.Context, sh *shard.Shard, tree pilorama.ContainerIDTreeID, prm EvacuateShardPrm) (string, error) {
if prm.TreeHandler == nil {
return "", fmt.Errorf("failed to evacuate tree '%s' for container %s from shard %s: local evacuation failed, but no remote evacuation available", tree.TreeID, tree.CID, sh.ID())
}
return prm.TreeHandler(ctx, tree.CID, tree.TreeID, sh)
}
func (e *StorageEngine) tryEvacuateTreeLocal(ctx context.Context, sh *shard.Shard, tree pilorama.ContainerIDTreeID,
prm EvacuateShardPrm, shards []pooledShard, weights []float64, shardsToEvacuate map[string]*shard.Shard,
) (bool, string, error) {
@ -579,6 +606,10 @@ func (e *StorageEngine) getActualShards(shardIDs []string, prm EvacuateShardPrm)
return nil, nil, errMustHaveTwoShards
}
if len(e.shards)-len(shardIDs) < 1 && prm.TreeHandler == nil && prm.Scope.WithTrees() {
return nil, nil, errMustHaveTwoShards
}
// We must have all shards, to have correct information about their
// indexes in a sorted slice and set appropriate marks in the metabase.
// Evacuated shard is skipped during put.