From 9ba48c582dd803c8252a7e8b63f77eed5054acf6 Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Tue, 30 Jan 2024 18:18:58 +0300 Subject: [PATCH] [#917] engine: Allow to detach shards Signed-off-by: Dmitrii Stepanov --- .../modules/control/detach_shards.go | 49 ++++++++++ cmd/frostfs-cli/modules/control/shards.go | 2 + .../modules/control/shards_set_mode.go | 10 +- pkg/local_object_storage/engine/shards.go | 97 +++++++++++++++++++ .../engine/shards_test.go | 20 ++++ pkg/services/control/rpc.go | 20 ++++ pkg/services/control/server/detach_shards.go | 37 +++++++ 7 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 cmd/frostfs-cli/modules/control/detach_shards.go create mode 100644 pkg/services/control/server/detach_shards.go diff --git a/cmd/frostfs-cli/modules/control/detach_shards.go b/cmd/frostfs-cli/modules/control/detach_shards.go new file mode 100644 index 00000000..5e5b60c3 --- /dev/null +++ b/cmd/frostfs-cli/modules/control/detach_shards.go @@ -0,0 +1,49 @@ +package control + +import ( + rawclient "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/rpc/client" + "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-cli/internal/key" + commonCmd "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/internal/common" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control" + "github.com/spf13/cobra" +) + +var shardsDetachCmd = &cobra.Command{ + Use: "detach", + Short: "Detach and close the shards", + Long: "Detach and close the shards", + Run: shardsDetach, +} + +func shardsDetach(cmd *cobra.Command, _ []string) { + pk := key.Get(cmd) + + req := &control.DetachShardsRequest{ + Body: &control.DetachShardsRequest_Body{ + Shard_ID: getShardIDListFromIDFlag(cmd, false), + }, + } + + signRequest(cmd, pk, req) + + cli := getClient(cmd, pk) + + var resp *control.DetachShardsResponse + var err error + err = cli.ExecRaw(func(client *rawclient.Client) error { + resp, err = control.DetachShards(client, req) + return err + }) + commonCmd.ExitOnErr(cmd, "rpc error: %w", err) + + verifyResponse(cmd, resp.GetSignature(), resp.GetBody()) + + cmd.Println("Shard mode update request successfully sent.") +} + +func initControlShardsDetachCmd() { + initControlFlags(shardsDetachCmd) + + flags := shardsDetachCmd.Flags() + flags.StringSlice(shardIDFlag, nil, "List of shard IDs in base58 encoding") +} diff --git a/cmd/frostfs-cli/modules/control/shards.go b/cmd/frostfs-cli/modules/control/shards.go index 6d3ef420..d8198c42 100644 --- a/cmd/frostfs-cli/modules/control/shards.go +++ b/cmd/frostfs-cli/modules/control/shards.go @@ -18,6 +18,7 @@ func initControlShardsCmd() { shardsCmd.AddCommand(flushCacheCmd) shardsCmd.AddCommand(doctorCmd) shardsCmd.AddCommand(writecacheShardCmd) + shardsCmd.AddCommand(shardsDetachCmd) initControlShardsListCmd() initControlSetShardModeCmd() @@ -26,4 +27,5 @@ func initControlShardsCmd() { initControlFlushCacheCmd() initControlDoctorCmd() initControlShardsWritecacheCmd() + initControlShardsDetachCmd() } diff --git a/cmd/frostfs-cli/modules/control/shards_set_mode.go b/cmd/frostfs-cli/modules/control/shards_set_mode.go index 78f76896..1c87b405 100644 --- a/cmd/frostfs-cli/modules/control/shards_set_mode.go +++ b/cmd/frostfs-cli/modules/control/shards_set_mode.go @@ -145,9 +145,17 @@ func getShardIDList(cmd *cobra.Command) [][]byte { return nil } + return getShardIDListFromIDFlag(cmd, true) +} + +func getShardIDListFromIDFlag(cmd *cobra.Command, withAllFlag bool) [][]byte { sidList, _ := cmd.Flags().GetStringSlice(shardIDFlag) if len(sidList) == 0 { - commonCmd.ExitOnErr(cmd, "", fmt.Errorf("either --%s or --%s flag must be provided", shardIDFlag, shardAllFlag)) + if withAllFlag { + commonCmd.ExitOnErr(cmd, "", fmt.Errorf("either --%s or --%s flag must be provided", shardIDFlag, shardAllFlag)) + } else { + commonCmd.ExitOnErr(cmd, "", fmt.Errorf("--%s flag value must be provided", shardIDFlag)) + } } // We can sort the ID list and perform this check without additional allocations, diff --git a/pkg/local_object_storage/engine/shards.go b/pkg/local_object_storage/engine/shards.go index bd25dde5..83fa4f5c 100644 --- a/pkg/local_object_storage/engine/shards.go +++ b/pkg/local_object_storage/engine/shards.go @@ -2,7 +2,9 @@ package engine import ( "context" + "errors" "fmt" + "sync" "sync/atomic" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" @@ -14,6 +16,7 @@ import ( "github.com/google/uuid" "github.com/panjf2000/ants/v2" "go.uber.org/zap" + "golang.org/x/sync/errgroup" ) var errShardNotFound = logicerr.New("shard not found") @@ -344,6 +347,100 @@ func (e *StorageEngine) HandleNewEpoch(ctx context.Context, epoch uint64) { } } +func (e *StorageEngine) DetachShards(ids []*shard.ID) error { + if len(ids) == 0 { + return logicerr.New("ids must be non-empty") + } + + deletedShards, err := e.deleteShards(ids) + if err != nil { + return err + } + + return e.closeShards(deletedShards) +} + +// closeShards closes deleted shards. Tries to close all shards. +// Returns single error with joined shard errors. +func (e *StorageEngine) closeShards(deletedShards []hashedShard) error { + var multiErr error + var multiErrGuard sync.Mutex + var eg errgroup.Group + for _, sh := range deletedShards { + sh := sh + eg.Go(func() error { + err := sh.SetMode(mode.Disabled) + if err != nil { + e.log.Error(logs.EngineCouldNotChangeShardModeToDisabled, + zap.Stringer("id", sh.ID()), + zap.Error(err), + ) + multiErrGuard.Lock() + multiErr = errors.Join(multiErr, fmt.Errorf("could not change shard (id:%s) mode to disabled: %w", sh.ID(), err)) + multiErrGuard.Unlock() + } + + err = sh.Close() + if err != nil { + e.log.Error(logs.EngineCouldNotCloseRemovedShard, + zap.Stringer("id", sh.ID()), + zap.Error(err), + ) + multiErrGuard.Lock() + multiErr = errors.Join(multiErr, fmt.Errorf("could not close removed shard (id:%s): %w", sh.ID(), err)) + multiErrGuard.Unlock() + } + return nil + }) + } + if err := eg.Wait(); err != nil { + return err + } + return multiErr +} + +// deleteShards deletes shards with specified ids from engine shard list +// and releases all engine resources associated with shards. +// Returns deleted shards or error if some shard could not be deleted. +func (e *StorageEngine) deleteShards(ids []*shard.ID) ([]hashedShard, error) { + ss := make([]hashedShard, 0, len(ids)) + + e.mtx.Lock() + defer e.mtx.Unlock() + + for _, id := range ids { + idStr := id.String() + sh, found := e.shards[idStr] + if !found { + return nil, errShardNotFound + } + ss = append(ss, sh) + } + + if len(ss) == len(e.shards) { + return nil, logicerr.New("could not delete all the shards") + } + + for _, sh := range ss { + idStr := sh.ID().String() + + sh.DeleteShardMetrics() + + delete(e.shards, idStr) + + pool, ok := e.shardPools[idStr] + if ok { + pool.Release() + delete(e.shardPools, idStr) + } + + e.log.Info(logs.EngineShardHasBeenRemoved, + zap.String("id", idStr)) + } + + return ss, nil +} + func (s hashedShard) Hash() uint64 { return s.hash } diff --git a/pkg/local_object_storage/engine/shards_test.go b/pkg/local_object_storage/engine/shards_test.go index f2896d55..3bb602ce 100644 --- a/pkg/local_object_storage/engine/shards_test.go +++ b/pkg/local_object_storage/engine/shards_test.go @@ -4,6 +4,8 @@ import ( "context" "testing" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "github.com/stretchr/testify/require" ) @@ -42,3 +44,21 @@ func TestRemoveShard(t *testing.T) { require.True(t, ok != removed) } } + +func TestDisableShards(t *testing.T) { + t.Parallel() + + const numOfShards = 2 + + te := testNewEngine(t).setShardsNum(t, numOfShards) + e, ids := te.engine, te.shardIDs + defer func() { require.NoError(t, e.Close(context.Background())) }() + + require.ErrorAs(t, e.DetachShards(ids), new(logicerr.Logical)) + require.ErrorAs(t, e.DetachShards(nil), new(logicerr.Logical)) + require.ErrorAs(t, e.DetachShards([]*shard.ID{}), new(logicerr.Logical)) + + require.NoError(t, e.DetachShards([]*shard.ID{ids[0]})) + + require.Equal(t, 1, len(e.shards)) +} diff --git a/pkg/services/control/rpc.go b/pkg/services/control/rpc.go index 24b20f87..877bb63c 100644 --- a/pkg/services/control/rpc.go +++ b/pkg/services/control/rpc.go @@ -26,6 +26,7 @@ const ( rpcRemoveChainLocalOverride = "RemoveChainLocalOverride" rpcSealWriteCache = "SealWriteCache" rpcListTargetsLocalOverrides = "ListTargetsLocalOverrides" + rpcDetachShards = "DetachShards" ) // HealthCheck executes ControlService.HealthCheck RPC. @@ -292,3 +293,22 @@ func SealWriteCache(cli *client.Client, req *SealWriteCacheRequest, opts ...clie return wResp.message, nil } + +// DetachShards executes ControlService.DetachShards RPC. +func DetachShards( + cli *client.Client, + req *DetachShardsRequest, + opts ...client.CallOption, +) (*DetachShardsResponse, error) { + wResp := newResponseWrapper[DetachShardsResponse]() + + wReq := &requestWrapper{ + m: req, + } + err := client.SendUnary(cli, common.CallMethodInfoUnary(serviceName, rpcDetachShards), wReq, wResp, opts...) + if err != nil { + return nil, err + } + + return wResp.message, nil +} diff --git a/pkg/services/control/server/detach_shards.go b/pkg/services/control/server/detach_shards.go new file mode 100644 index 00000000..c8bea97b --- /dev/null +++ b/pkg/services/control/server/detach_shards.go @@ -0,0 +1,37 @@ +package control + +import ( + "context" + "errors" + + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +func (s *Server) DetachShards(_ context.Context, req *control.DetachShardsRequest) (*control.DetachShardsResponse, error) { + err := s.isValidRequest(req) + if err != nil { + return nil, status.Error(codes.PermissionDenied, err.Error()) + } + + shardIDs := s.getShardIDList(req.GetBody().GetShard_ID()) + + if err := s.s.DetachShards(shardIDs); err != nil { + if errors.As(err, new(logicerr.Logical)) { + return nil, status.Error(codes.InvalidArgument, err.Error()) + } + return nil, status.Error(codes.Internal, err.Error()) + } + + resp := &control.DetachShardsResponse{ + Body: &control.DetachShardsResponse_Body{}, + } + + if err = SignMessage(s.key, resp); err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + return resp, nil +}