forked from TrueCloudLab/frostfs-node
[#1222] engine: Fix tree evacuation
Do not fail if it is unable to evacuate tree to other node. Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
parent
2bac82cd6f
commit
3a797e4682
3 changed files with 30 additions and 16 deletions
|
@ -75,7 +75,7 @@ func (s EvacuateScope) TreesOnly() bool {
|
||||||
type EvacuateShardPrm struct {
|
type EvacuateShardPrm struct {
|
||||||
ShardID []*shard.ID
|
ShardID []*shard.ID
|
||||||
ObjectsHandler func(context.Context, oid.Address, *objectSDK.Object) (bool, error)
|
ObjectsHandler func(context.Context, oid.Address, *objectSDK.Object) (bool, error)
|
||||||
TreeHandler func(context.Context, cid.ID, string, pilorama.Forest) (string, error)
|
TreeHandler func(context.Context, cid.ID, string, pilorama.Forest) (bool, string, error)
|
||||||
IgnoreErrors bool
|
IgnoreErrors bool
|
||||||
Async bool
|
Async bool
|
||||||
Scope EvacuateScope
|
Scope EvacuateScope
|
||||||
|
@ -450,7 +450,7 @@ func (e *StorageEngine) evacuateTrees(ctx context.Context, sh *shard.Shard, tree
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
nodePK, err := e.evacuateTreeToOtherNode(ctx, sh, contTree, prm)
|
moved, nodePK, err := e.evacuateTreeToOtherNode(ctx, sh, contTree, prm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
e.log.Error(logs.EngineShardsEvacuationFailedToMoveTree,
|
e.log.Error(logs.EngineShardsEvacuationFailedToMoveTree,
|
||||||
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
|
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
|
||||||
|
@ -458,18 +458,32 @@ func (e *StorageEngine) evacuateTrees(ctx context.Context, sh *shard.Shard, tree
|
||||||
zap.Error(err), zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
|
zap.Error(err), zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
e.log.Debug(logs.EngineShardsEvacuationTreeEvacuatedRemote,
|
if moved {
|
||||||
zap.String("cid", contTree.CID.EncodeToString()), zap.String("treeID", contTree.TreeID),
|
e.log.Debug(logs.EngineShardsEvacuationTreeEvacuatedRemote,
|
||||||
zap.String("from_shardID", sh.ID().String()), zap.String("to_node", nodePK),
|
zap.String("cid", contTree.CID.EncodeToString()), zap.String("treeID", contTree.TreeID),
|
||||||
evacuationOperationLogField, zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
|
zap.String("from_shardID", sh.ID().String()), zap.String("to_node", nodePK),
|
||||||
res.trEvacuated.Add(1)
|
evacuationOperationLogField, zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
|
||||||
|
res.trEvacuated.Add(1)
|
||||||
|
} else if prm.IgnoreErrors {
|
||||||
|
res.trFailed.Add(1)
|
||||||
|
e.log.Warn(logs.EngineShardsEvacuationFailedToMoveTree,
|
||||||
|
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
|
||||||
|
zap.String("from_shard_id", sh.ID().String()), evacuationOperationLogField,
|
||||||
|
zap.Error(err), zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
|
||||||
|
} else {
|
||||||
|
e.log.Error(logs.EngineShardsEvacuationFailedToMoveTree,
|
||||||
|
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
|
||||||
|
zap.String("from_shard_id", sh.ID().String()), evacuationOperationLogField,
|
||||||
|
zap.Error(err), zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
|
||||||
|
return fmt.Errorf("no remote nodes available to replicate tree '%s' of container %s", contTree.TreeID, contTree.CID)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *StorageEngine) evacuateTreeToOtherNode(ctx context.Context, sh *shard.Shard, tree pilorama.ContainerIDTreeID, prm EvacuateShardPrm) (string, error) {
|
func (e *StorageEngine) evacuateTreeToOtherNode(ctx context.Context, sh *shard.Shard, tree pilorama.ContainerIDTreeID, prm EvacuateShardPrm) (bool, string, error) {
|
||||||
if prm.TreeHandler == nil {
|
if prm.TreeHandler == nil {
|
||||||
return "", fmt.Errorf("failed to evacuate tree '%s' for container %s from shard %s: local evacuation failed, but no remote evacuation available", tree.TreeID, tree.CID, sh.ID())
|
return false, "", fmt.Errorf("failed to evacuate tree '%s' for container %s from shard %s: local evacuation failed, but no remote evacuation available", tree.TreeID, tree.CID, sh.ID())
|
||||||
}
|
}
|
||||||
|
|
||||||
return prm.TreeHandler(ctx, tree.CID, tree.TreeID, sh)
|
return prm.TreeHandler(ctx, tree.CID, tree.TreeID, sh)
|
||||||
|
|
|
@ -507,7 +507,7 @@ func TestEvacuateTreesRemote(t *testing.T) {
|
||||||
var prm EvacuateShardPrm
|
var prm EvacuateShardPrm
|
||||||
prm.ShardID = ids
|
prm.ShardID = ids
|
||||||
prm.Scope = EvacuateScopeTrees
|
prm.Scope = EvacuateScopeTrees
|
||||||
prm.TreeHandler = func(ctx context.Context, contID cid.ID, treeID string, f pilorama.Forest) (string, error) {
|
prm.TreeHandler = func(ctx context.Context, contID cid.ID, treeID string, f pilorama.Forest) (bool, string, error) {
|
||||||
key := contID.String() + treeID
|
key := contID.String() + treeID
|
||||||
var height uint64
|
var height uint64
|
||||||
for {
|
for {
|
||||||
|
@ -515,7 +515,7 @@ func TestEvacuateTreesRemote(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
if op.Time == 0 {
|
if op.Time == 0 {
|
||||||
return "", nil
|
return true, "", nil
|
||||||
}
|
}
|
||||||
evacuatedTreeOps[key] = append(evacuatedTreeOps[key], &op)
|
evacuatedTreeOps[key] = append(evacuatedTreeOps[key], &op)
|
||||||
height = op.Time + 1
|
height = op.Time + 1
|
||||||
|
|
|
@ -89,22 +89,22 @@ func (s *Server) replicateObject(ctx context.Context, addr oid.Address, obj *obj
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) replicateTree(ctx context.Context, contID cid.ID, treeID string, forest pilorama.Forest) (string, error) {
|
func (s *Server) replicateTree(ctx context.Context, contID cid.ID, treeID string, forest pilorama.Forest) (bool, string, error) {
|
||||||
nodes, err := s.getContainerNodes(contID)
|
nodes, err := s.getContainerNodes(contID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return false, "", err
|
||||||
}
|
}
|
||||||
if len(nodes) == 0 {
|
if len(nodes) == 0 {
|
||||||
return "", fmt.Errorf("no remote nodes available to replicate tree '%s' of container %s", treeID, contID)
|
return false, "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
err = s.replicateTreeToNode(ctx, forest, contID, treeID, node)
|
err = s.replicateTreeToNode(ctx, forest, contID, treeID, node)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return hex.EncodeToString(node.PublicKey()), nil
|
return true, hex.EncodeToString(node.PublicKey()), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return "", err
|
return false, "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) replicateTreeToNode(ctx context.Context, forest pilorama.Forest, contID cid.ID, treeID string, node netmap.NodeInfo) error {
|
func (s *Server) replicateTreeToNode(ctx context.Context, forest pilorama.Forest, contID cid.ID, treeID string, node netmap.NodeInfo) error {
|
||||||
|
|
Loading…
Reference in a new issue