[#1222] engine: Fix tree evacuation
All checks were successful
DCO action / DCO (pull_request) Successful in 13m4s
Vulncheck / Vulncheck (pull_request) Successful in 15m32s
Build / Build Components (1.22) (pull_request) Successful in 16m30s
Build / Build Components (1.21) (pull_request) Successful in 16m37s
Tests and linters / gopls check (pull_request) Successful in 20m33s
Tests and linters / Staticcheck (pull_request) Successful in 21m17s
Tests and linters / Lint (pull_request) Successful in 21m37s
Tests and linters / Tests with -race (pull_request) Successful in 26m40s
Tests and linters / Tests (1.21) (pull_request) Successful in 26m54s
Tests and linters / Tests (1.22) (pull_request) Successful in 27m59s
Pre-commit hooks / Pre-commit (pull_request) Successful in 39m39s

Do not fail if it is unable to evacuate tree to other node.

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2024-07-03 11:47:50 +03:00
parent 2bac82cd6f
commit 3a797e4682
3 changed files with 30 additions and 16 deletions

View file

@ -75,7 +75,7 @@ func (s EvacuateScope) TreesOnly() bool {
type EvacuateShardPrm struct {
ShardID []*shard.ID
ObjectsHandler func(context.Context, oid.Address, *objectSDK.Object) (bool, error)
TreeHandler func(context.Context, cid.ID, string, pilorama.Forest) (string, error)
TreeHandler func(context.Context, cid.ID, string, pilorama.Forest) (bool, string, error)
IgnoreErrors bool
Async bool
Scope EvacuateScope
@ -450,7 +450,7 @@ func (e *StorageEngine) evacuateTrees(ctx context.Context, sh *shard.Shard, tree
continue
}
nodePK, err := e.evacuateTreeToOtherNode(ctx, sh, contTree, prm)
moved, nodePK, err := e.evacuateTreeToOtherNode(ctx, sh, contTree, prm)
if err != nil {
e.log.Error(logs.EngineShardsEvacuationFailedToMoveTree,
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
@ -458,18 +458,32 @@ func (e *StorageEngine) evacuateTrees(ctx context.Context, sh *shard.Shard, tree
zap.Error(err), zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
return err
}
e.log.Debug(logs.EngineShardsEvacuationTreeEvacuatedRemote,
zap.String("cid", contTree.CID.EncodeToString()), zap.String("treeID", contTree.TreeID),
zap.String("from_shardID", sh.ID().String()), zap.String("to_node", nodePK),
evacuationOperationLogField, zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
res.trEvacuated.Add(1)
if moved {
e.log.Debug(logs.EngineShardsEvacuationTreeEvacuatedRemote,
zap.String("cid", contTree.CID.EncodeToString()), zap.String("treeID", contTree.TreeID),
zap.String("from_shardID", sh.ID().String()), zap.String("to_node", nodePK),
evacuationOperationLogField, zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
res.trEvacuated.Add(1)
} else if prm.IgnoreErrors {
res.trFailed.Add(1)
e.log.Warn(logs.EngineShardsEvacuationFailedToMoveTree,
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
zap.String("from_shard_id", sh.ID().String()), evacuationOperationLogField,
zap.Error(err), zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
} else {
e.log.Error(logs.EngineShardsEvacuationFailedToMoveTree,
zap.String("cid", contTree.CID.EncodeToString()), zap.String("tree_id", contTree.TreeID),
zap.String("from_shard_id", sh.ID().String()), evacuationOperationLogField,
zap.Error(err), zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
return fmt.Errorf("no remote nodes available to replicate tree '%s' of container %s", contTree.TreeID, contTree.CID)
}
}
return nil
}
func (e *StorageEngine) evacuateTreeToOtherNode(ctx context.Context, sh *shard.Shard, tree pilorama.ContainerIDTreeID, prm EvacuateShardPrm) (string, error) {
func (e *StorageEngine) evacuateTreeToOtherNode(ctx context.Context, sh *shard.Shard, tree pilorama.ContainerIDTreeID, prm EvacuateShardPrm) (bool, string, error) {
if prm.TreeHandler == nil {
return "", fmt.Errorf("failed to evacuate tree '%s' for container %s from shard %s: local evacuation failed, but no remote evacuation available", tree.TreeID, tree.CID, sh.ID())
return false, "", fmt.Errorf("failed to evacuate tree '%s' for container %s from shard %s: local evacuation failed, but no remote evacuation available", tree.TreeID, tree.CID, sh.ID())
}
return prm.TreeHandler(ctx, tree.CID, tree.TreeID, sh)

View file

@ -507,7 +507,7 @@ func TestEvacuateTreesRemote(t *testing.T) {
var prm EvacuateShardPrm
prm.ShardID = ids
prm.Scope = EvacuateScopeTrees
prm.TreeHandler = func(ctx context.Context, contID cid.ID, treeID string, f pilorama.Forest) (string, error) {
prm.TreeHandler = func(ctx context.Context, contID cid.ID, treeID string, f pilorama.Forest) (bool, string, error) {
key := contID.String() + treeID
var height uint64
for {
@ -515,7 +515,7 @@ func TestEvacuateTreesRemote(t *testing.T) {
require.NoError(t, err)
if op.Time == 0 {
return "", nil
return true, "", nil
}
evacuatedTreeOps[key] = append(evacuatedTreeOps[key], &op)
height = op.Time + 1

View file

@ -89,22 +89,22 @@ func (s *Server) replicateObject(ctx context.Context, addr oid.Address, obj *obj
return true, nil
}
func (s *Server) replicateTree(ctx context.Context, contID cid.ID, treeID string, forest pilorama.Forest) (string, error) {
func (s *Server) replicateTree(ctx context.Context, contID cid.ID, treeID string, forest pilorama.Forest) (bool, string, error) {
nodes, err := s.getContainerNodes(contID)
if err != nil {
return "", err
return false, "", err
}
if len(nodes) == 0 {
return "", fmt.Errorf("no remote nodes available to replicate tree '%s' of container %s", treeID, contID)
return false, "", nil
}
for _, node := range nodes {
err = s.replicateTreeToNode(ctx, forest, contID, treeID, node)
if err == nil {
return hex.EncodeToString(node.PublicKey()), nil
return true, hex.EncodeToString(node.PublicKey()), nil
}
}
return "", err
return false, "", err
}
func (s *Server) replicateTreeToNode(ctx context.Context, forest pilorama.Forest, contID cid.ID, treeID string, node netmap.NodeInfo) error {