diff --git a/cmd/frostfs-cli/modules/control/evacuate_shard.go b/cmd/frostfs-cli/modules/control/evacuate_shard.go deleted file mode 100644 index 1e48c1df..00000000 --- a/cmd/frostfs-cli/modules/control/evacuate_shard.go +++ /dev/null @@ -1,56 +0,0 @@ -package control - -import ( - "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-cli/internal/key" - commonCmd "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/internal/common" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control" - "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/api/rpc/client" - "github.com/spf13/cobra" -) - -const ignoreErrorsFlag = "no-errors" - -var evacuateShardCmd = &cobra.Command{ - Use: "evacuate", - Short: "Evacuate objects from shard", - Long: "Evacuate objects from shard to other shards", - Run: evacuateShard, - Deprecated: "use frostfs-cli control shards evacuation start", -} - -func evacuateShard(cmd *cobra.Command, _ []string) { - pk := key.Get(cmd) - - req := &control.EvacuateShardRequest{Body: new(control.EvacuateShardRequest_Body)} - req.Body.Shard_ID = getShardIDList(cmd) - req.Body.IgnoreErrors, _ = cmd.Flags().GetBool(ignoreErrorsFlag) - - signRequest(cmd, pk, req) - - cli := getClient(cmd, pk) - - var resp *control.EvacuateShardResponse - var err error - err = cli.ExecRaw(func(client *client.Client) error { - resp, err = control.EvacuateShard(client, req) - return err - }) - commonCmd.ExitOnErr(cmd, "rpc error: %w", err) - - cmd.Printf("Objects moved: %d\n", resp.GetBody().GetCount()) - - verifyResponse(cmd, resp.GetSignature(), resp.GetBody()) - - cmd.Println("Shard has successfully been evacuated.") -} - -func initControlEvacuateShardCmd() { - initControlFlags(evacuateShardCmd) - - flags := evacuateShardCmd.Flags() - flags.StringSlice(shardIDFlag, nil, "List of shard IDs in base58 encoding") - flags.Bool(shardAllFlag, false, "Process all shards") - flags.Bool(ignoreErrorsFlag, false, "Skip invalid/unreadable objects") - - evacuateShardCmd.MarkFlagsMutuallyExclusive(shardIDFlag, shardAllFlag) -} diff --git a/cmd/frostfs-cli/modules/control/evacuation.go b/cmd/frostfs-cli/modules/control/evacuation.go index 73700e56..8032bf09 100644 --- a/cmd/frostfs-cli/modules/control/evacuation.go +++ b/cmd/frostfs-cli/modules/control/evacuation.go @@ -17,10 +17,11 @@ import ( ) const ( - awaitFlag = "await" - noProgressFlag = "no-progress" - scopeFlag = "scope" - repOneOnlyFlag = "rep-one-only" + awaitFlag = "await" + noProgressFlag = "no-progress" + scopeFlag = "scope" + repOneOnlyFlag = "rep-one-only" + ignoreErrorsFlag = "no-errors" containerWorkerCountFlag = "container-worker-count" objectWorkerCountFlag = "object-worker-count" diff --git a/cmd/frostfs-cli/modules/control/shards.go b/cmd/frostfs-cli/modules/control/shards.go index 329cb910..3483f5d6 100644 --- a/cmd/frostfs-cli/modules/control/shards.go +++ b/cmd/frostfs-cli/modules/control/shards.go @@ -13,7 +13,6 @@ var shardsCmd = &cobra.Command{ func initControlShardsCmd() { shardsCmd.AddCommand(listShardsCmd) shardsCmd.AddCommand(setShardModeCmd) - shardsCmd.AddCommand(evacuateShardCmd) shardsCmd.AddCommand(evacuationShardCmd) shardsCmd.AddCommand(flushCacheCmd) shardsCmd.AddCommand(doctorCmd) @@ -23,7 +22,6 @@ func initControlShardsCmd() { initControlShardsListCmd() initControlSetShardModeCmd() - initControlEvacuateShardCmd() initControlEvacuationShardCmd() initControlFlushCacheCmd() initControlDoctorCmd() diff --git a/pkg/services/control/rpc.go b/pkg/services/control/rpc.go index 514061db..6982d780 100644 --- a/pkg/services/control/rpc.go +++ b/pkg/services/control/rpc.go @@ -15,7 +15,6 @@ const ( rpcListShards = "ListShards" rpcSetShardMode = "SetShardMode" rpcSynchronizeTree = "SynchronizeTree" - rpcEvacuateShard = "EvacuateShard" rpcStartShardEvacuation = "StartShardEvacuation" rpcGetShardEvacuationStatus = "GetShardEvacuationStatus" rpcResetShardEvacuationStatus = "ResetShardEvacuationStatus" @@ -162,19 +161,6 @@ func SynchronizeTree(cli *client.Client, req *SynchronizeTreeRequest, opts ...cl return wResp.message, nil } -// EvacuateShard executes ControlService.EvacuateShard RPC. -func EvacuateShard(cli *client.Client, req *EvacuateShardRequest, opts ...client.CallOption) (*EvacuateShardResponse, error) { - wResp := newResponseWrapper[EvacuateShardResponse]() - wReq := &requestWrapper{m: req} - - err := client.SendUnary(cli, common.CallMethodInfoUnary(serviceName, rpcEvacuateShard), wReq, wResp, opts...) - if err != nil { - return nil, err - } - - return wResp.message, nil -} - // StartShardEvacuation executes ControlService.StartShardEvacuation RPC. func StartShardEvacuation(cli *client.Client, req *StartShardEvacuationRequest, opts ...client.CallOption) (*StartShardEvacuationResponse, error) { wResp := newResponseWrapper[StartShardEvacuationResponse]() diff --git a/pkg/services/control/server/evacuate.go b/pkg/services/control/server/evacuate.go deleted file mode 100644 index ae341337..00000000 --- a/pkg/services/control/server/evacuate.go +++ /dev/null @@ -1,188 +0,0 @@ -package control - -import ( - "bytes" - "context" - "crypto/sha256" - "encoding/hex" - "errors" - "fmt" - - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/engine" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control/server/ctrlmessage" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object_manager/placement" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/replicator" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/tree" - cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" - "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap" - objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" - oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "github.com/nspcc-dev/neo-go/pkg/crypto/keys" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" -) - -var errFailedToBuildListOfContainerNodes = errors.New("can't build a list of container nodes") - -func (s *Server) EvacuateShard(ctx context.Context, req *control.EvacuateShardRequest) (*control.EvacuateShardResponse, error) { - err := s.isValidRequest(req) - if err != nil { - return nil, status.Error(codes.PermissionDenied, err.Error()) - } - - prm := engine.EvacuateShardPrm{ - ShardID: s.getShardIDList(req.GetBody().GetShard_ID()), - IgnoreErrors: req.GetBody().GetIgnoreErrors(), - ObjectsHandler: s.replicateObject, - Scope: engine.EvacuateScopeObjects, - } - - res, err := s.s.Evacuate(ctx, prm) - if err != nil { - return nil, status.Error(codes.Internal, err.Error()) - } - - resp := &control.EvacuateShardResponse{ - Body: &control.EvacuateShardResponse_Body{ - Count: uint32(res.ObjectsEvacuated()), - }, - } - - err = ctrlmessage.Sign(s.key, resp) - if err != nil { - return nil, status.Error(codes.Internal, err.Error()) - } - return resp, nil -} - -func (s *Server) replicateObject(ctx context.Context, addr oid.Address, obj *objectSDK.Object) (bool, error) { - cid, ok := obj.ContainerID() - if !ok { - // Return nil to prevent situations where a shard can't be evacuated - // because of a single bad/corrupted object. - return false, nil - } - - nodes, err := s.getContainerNodes(cid) - if err != nil { - return false, err - } - - if len(nodes) == 0 { - return false, nil - } - - var res replicatorResult - task := replicator.Task{ - NumCopies: 1, - Addr: addr, - Obj: obj, - Nodes: nodes, - } - s.replicator.HandleReplicationTask(ctx, task, &res) - - if res.count == 0 { - return false, errors.New("object was not replicated") - } - return true, nil -} - -func (s *Server) replicateTree(ctx context.Context, contID cid.ID, treeID string, forest pilorama.Forest) (bool, string, error) { - nodes, err := s.getContainerNodes(contID) - if err != nil { - return false, "", err - } - if len(nodes) == 0 { - return false, "", nil - } - - for _, node := range nodes { - err = s.replicateTreeToNode(ctx, forest, contID, treeID, node) - if err == nil { - return true, hex.EncodeToString(node.PublicKey()), nil - } - } - return false, "", err -} - -func (s *Server) replicateTreeToNode(ctx context.Context, forest pilorama.Forest, contID cid.ID, treeID string, node netmap.NodeInfo) error { - rawCID := make([]byte, sha256.Size) - contID.Encode(rawCID) - - var height uint64 - for { - op, err := forest.TreeGetOpLog(ctx, contID, treeID, height) - if err != nil { - return err - } - - if op.Time == 0 { - return nil - } - - req := &tree.ApplyRequest{ - Body: &tree.ApplyRequest_Body{ - ContainerId: rawCID, - TreeId: treeID, - Operation: &tree.LogMove{ - ParentId: op.Parent, - Meta: op.Meta.Bytes(), - ChildId: op.Child, - }, - }, - } - - err = tree.SignMessage(req, s.key) - if err != nil { - return fmt.Errorf("can't message apply request: %w", err) - } - - err = s.treeService.ReplicateTreeOp(ctx, node, req) - if err != nil { - return err - } - - height = op.Time + 1 - } -} - -func (s *Server) getContainerNodes(contID cid.ID) ([]netmap.NodeInfo, error) { - nm, err := s.netMapSrc.GetNetMap(0) - if err != nil { - return nil, err - } - - c, err := s.cnrSrc.Get(contID) - if err != nil { - return nil, err - } - - binCnr := make([]byte, sha256.Size) - contID.Encode(binCnr) - - ns, err := nm.ContainerNodes(c.Value.PlacementPolicy(), binCnr) - if err != nil { - return nil, errFailedToBuildListOfContainerNodes - } - - nodes := placement.FlattenNodes(ns) - bs := (*keys.PublicKey)(&s.key.PublicKey).Bytes() - for i := 0; i < len(nodes); i++ { // don't use range, slice mutates in body - if bytes.Equal(nodes[i].PublicKey(), bs) { - copy(nodes[i:], nodes[i+1:]) - nodes = nodes[:len(nodes)-1] - } - } - return nodes, nil -} - -type replicatorResult struct { - count int -} - -// SubmitSuccessfulReplication implements the replicator.TaskResult interface. -func (r *replicatorResult) SubmitSuccessfulReplication(_ netmap.NodeInfo) { - r.count++ -} diff --git a/pkg/services/control/server/evacuate_async.go b/pkg/services/control/server/evacuate_async.go index 146ac7e1..04465eb2 100644 --- a/pkg/services/control/server/evacuate_async.go +++ b/pkg/services/control/server/evacuate_async.go @@ -1,17 +1,32 @@ package control import ( + "bytes" "context" + "crypto/sha256" + "encoding/hex" "errors" + "fmt" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/engine" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control/server/ctrlmessage" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object_manager/placement" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/replicator" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/tree" + cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" + "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap" + objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/nspcc-dev/neo-go/pkg/crypto/keys" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) +var errFailedToBuildListOfContainerNodes = errors.New("can't build a list of container nodes") + func (s *Server) StartShardEvacuation(ctx context.Context, req *control.StartShardEvacuationRequest) (*control.StartShardEvacuationResponse, error) { err := s.isValidRequest(req) if err != nil { @@ -34,8 +49,7 @@ func (s *Server) StartShardEvacuation(ctx context.Context, req *control.StartSha RepOneOnly: req.GetBody().GetRepOneOnly(), } - _, err = s.s.Evacuate(ctx, prm) - if err != nil { + if err = s.s.Evacuate(ctx, prm); err != nil { var logicalErr logicerr.Logical if errors.As(err, &logicalErr) { return nil, status.Error(codes.Aborted, err.Error()) @@ -135,3 +149,133 @@ func (s *Server) ResetShardEvacuationStatus(ctx context.Context, req *control.Re } return resp, nil } + +func (s *Server) replicateObject(ctx context.Context, addr oid.Address, obj *objectSDK.Object) (bool, error) { + cid, ok := obj.ContainerID() + if !ok { + // Return nil to prevent situations where a shard can't be evacuated + // because of a single bad/corrupted object. + return false, nil + } + + nodes, err := s.getContainerNodes(cid) + if err != nil { + return false, err + } + + if len(nodes) == 0 { + return false, nil + } + + var res replicatorResult + task := replicator.Task{ + NumCopies: 1, + Addr: addr, + Obj: obj, + Nodes: nodes, + } + s.replicator.HandleReplicationTask(ctx, task, &res) + + if res.count == 0 { + return false, errors.New("object was not replicated") + } + return true, nil +} + +func (s *Server) replicateTree(ctx context.Context, contID cid.ID, treeID string, forest pilorama.Forest) (bool, string, error) { + nodes, err := s.getContainerNodes(contID) + if err != nil { + return false, "", err + } + if len(nodes) == 0 { + return false, "", nil + } + + for _, node := range nodes { + err = s.replicateTreeToNode(ctx, forest, contID, treeID, node) + if err == nil { + return true, hex.EncodeToString(node.PublicKey()), nil + } + } + return false, "", err +} + +func (s *Server) replicateTreeToNode(ctx context.Context, forest pilorama.Forest, contID cid.ID, treeID string, node netmap.NodeInfo) error { + rawCID := make([]byte, sha256.Size) + contID.Encode(rawCID) + + var height uint64 + for { + op, err := forest.TreeGetOpLog(ctx, contID, treeID, height) + if err != nil { + return err + } + + if op.Time == 0 { + return nil + } + + req := &tree.ApplyRequest{ + Body: &tree.ApplyRequest_Body{ + ContainerId: rawCID, + TreeId: treeID, + Operation: &tree.LogMove{ + ParentId: op.Parent, + Meta: op.Meta.Bytes(), + ChildId: op.Child, + }, + }, + } + + err = tree.SignMessage(req, s.key) + if err != nil { + return fmt.Errorf("can't message apply request: %w", err) + } + + err = s.treeService.ReplicateTreeOp(ctx, node, req) + if err != nil { + return err + } + + height = op.Time + 1 + } +} + +func (s *Server) getContainerNodes(contID cid.ID) ([]netmap.NodeInfo, error) { + nm, err := s.netMapSrc.GetNetMap(0) + if err != nil { + return nil, err + } + + c, err := s.cnrSrc.Get(contID) + if err != nil { + return nil, err + } + + binCnr := make([]byte, sha256.Size) + contID.Encode(binCnr) + + ns, err := nm.ContainerNodes(c.Value.PlacementPolicy(), binCnr) + if err != nil { + return nil, errFailedToBuildListOfContainerNodes + } + + nodes := placement.FlattenNodes(ns) + bs := (*keys.PublicKey)(&s.key.PublicKey).Bytes() + for i := 0; i < len(nodes); i++ { // don't use range, slice mutates in body + if bytes.Equal(nodes[i].PublicKey(), bs) { + copy(nodes[i:], nodes[i+1:]) + nodes = nodes[:len(nodes)-1] + } + } + return nodes, nil +} + +type replicatorResult struct { + count int +} + +// SubmitSuccessfulReplication implements the replicator.TaskResult interface. +func (r *replicatorResult) SubmitSuccessfulReplication(_ netmap.NodeInfo) { + r.count++ +} diff --git a/pkg/services/control/service.proto b/pkg/services/control/service.proto index ae1939e1..97ecf9a8 100644 --- a/pkg/services/control/service.proto +++ b/pkg/services/control/service.proto @@ -30,11 +30,6 @@ service ControlService { // Synchronizes all log operations for the specified tree. rpc SynchronizeTree(SynchronizeTreeRequest) returns (SynchronizeTreeResponse); - // EvacuateShard moves all data from one shard to the others. - // Deprecated: Use - // StartShardEvacuation/GetShardEvacuationStatus/StopShardEvacuation - rpc EvacuateShard(EvacuateShardRequest) returns (EvacuateShardResponse); - // StartShardEvacuation starts moving all data from one shard to the others. rpc StartShardEvacuation(StartShardEvacuationRequest) returns (StartShardEvacuationResponse); diff --git a/pkg/services/control/service_grpc.pb.go b/pkg/services/control/service_grpc.pb.go index f5cfefa8..987e08c5 100644 Binary files a/pkg/services/control/service_grpc.pb.go and b/pkg/services/control/service_grpc.pb.go differ