forked from TrueCloudLab/frostfs-node
[#917] engine: Allow to detach shards
Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
parent
4358d3c423
commit
9ba48c582d
7 changed files with 234 additions and 1 deletions
49
cmd/frostfs-cli/modules/control/detach_shards.go
Normal file
49
cmd/frostfs-cli/modules/control/detach_shards.go
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
package control
|
||||||
|
|
||||||
|
import (
|
||||||
|
rawclient "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/rpc/client"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-cli/internal/key"
|
||||||
|
commonCmd "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/internal/common"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
var shardsDetachCmd = &cobra.Command{
|
||||||
|
Use: "detach",
|
||||||
|
Short: "Detach and close the shards",
|
||||||
|
Long: "Detach and close the shards",
|
||||||
|
Run: shardsDetach,
|
||||||
|
}
|
||||||
|
|
||||||
|
func shardsDetach(cmd *cobra.Command, _ []string) {
|
||||||
|
pk := key.Get(cmd)
|
||||||
|
|
||||||
|
req := &control.DetachShardsRequest{
|
||||||
|
Body: &control.DetachShardsRequest_Body{
|
||||||
|
Shard_ID: getShardIDListFromIDFlag(cmd, false),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
signRequest(cmd, pk, req)
|
||||||
|
|
||||||
|
cli := getClient(cmd, pk)
|
||||||
|
|
||||||
|
var resp *control.DetachShardsResponse
|
||||||
|
var err error
|
||||||
|
err = cli.ExecRaw(func(client *rawclient.Client) error {
|
||||||
|
resp, err = control.DetachShards(client, req)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
commonCmd.ExitOnErr(cmd, "rpc error: %w", err)
|
||||||
|
|
||||||
|
verifyResponse(cmd, resp.GetSignature(), resp.GetBody())
|
||||||
|
|
||||||
|
cmd.Println("Shard mode update request successfully sent.")
|
||||||
|
}
|
||||||
|
|
||||||
|
func initControlShardsDetachCmd() {
|
||||||
|
initControlFlags(shardsDetachCmd)
|
||||||
|
|
||||||
|
flags := shardsDetachCmd.Flags()
|
||||||
|
flags.StringSlice(shardIDFlag, nil, "List of shard IDs in base58 encoding")
|
||||||
|
}
|
|
@ -18,6 +18,7 @@ func initControlShardsCmd() {
|
||||||
shardsCmd.AddCommand(flushCacheCmd)
|
shardsCmd.AddCommand(flushCacheCmd)
|
||||||
shardsCmd.AddCommand(doctorCmd)
|
shardsCmd.AddCommand(doctorCmd)
|
||||||
shardsCmd.AddCommand(writecacheShardCmd)
|
shardsCmd.AddCommand(writecacheShardCmd)
|
||||||
|
shardsCmd.AddCommand(shardsDetachCmd)
|
||||||
|
|
||||||
initControlShardsListCmd()
|
initControlShardsListCmd()
|
||||||
initControlSetShardModeCmd()
|
initControlSetShardModeCmd()
|
||||||
|
@ -26,4 +27,5 @@ func initControlShardsCmd() {
|
||||||
initControlFlushCacheCmd()
|
initControlFlushCacheCmd()
|
||||||
initControlDoctorCmd()
|
initControlDoctorCmd()
|
||||||
initControlShardsWritecacheCmd()
|
initControlShardsWritecacheCmd()
|
||||||
|
initControlShardsDetachCmd()
|
||||||
}
|
}
|
||||||
|
|
|
@ -145,9 +145,17 @@ func getShardIDList(cmd *cobra.Command) [][]byte {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return getShardIDListFromIDFlag(cmd, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getShardIDListFromIDFlag(cmd *cobra.Command, withAllFlag bool) [][]byte {
|
||||||
sidList, _ := cmd.Flags().GetStringSlice(shardIDFlag)
|
sidList, _ := cmd.Flags().GetStringSlice(shardIDFlag)
|
||||||
if len(sidList) == 0 {
|
if len(sidList) == 0 {
|
||||||
|
if withAllFlag {
|
||||||
commonCmd.ExitOnErr(cmd, "", fmt.Errorf("either --%s or --%s flag must be provided", shardIDFlag, shardAllFlag))
|
commonCmd.ExitOnErr(cmd, "", fmt.Errorf("either --%s or --%s flag must be provided", shardIDFlag, shardAllFlag))
|
||||||
|
} else {
|
||||||
|
commonCmd.ExitOnErr(cmd, "", fmt.Errorf("--%s flag value must be provided", shardIDFlag))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We can sort the ID list and perform this check without additional allocations,
|
// We can sort the ID list and perform this check without additional allocations,
|
||||||
|
|
|
@ -2,7 +2,9 @@ package engine
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||||
|
@ -14,6 +16,7 @@ import (
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/panjf2000/ants/v2"
|
"github.com/panjf2000/ants/v2"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
var errShardNotFound = logicerr.New("shard not found")
|
var errShardNotFound = logicerr.New("shard not found")
|
||||||
|
@ -344,6 +347,100 @@ func (e *StorageEngine) HandleNewEpoch(ctx context.Context, epoch uint64) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *StorageEngine) DetachShards(ids []*shard.ID) error {
|
||||||
|
if len(ids) == 0 {
|
||||||
|
return logicerr.New("ids must be non-empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
deletedShards, err := e.deleteShards(ids)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return e.closeShards(deletedShards)
|
||||||
|
}
|
||||||
|
|
||||||
|
// closeShards closes deleted shards. Tries to close all shards.
|
||||||
|
// Returns single error with joined shard errors.
|
||||||
|
func (e *StorageEngine) closeShards(deletedShards []hashedShard) error {
|
||||||
|
var multiErr error
|
||||||
|
var multiErrGuard sync.Mutex
|
||||||
|
var eg errgroup.Group
|
||||||
|
for _, sh := range deletedShards {
|
||||||
|
sh := sh
|
||||||
|
eg.Go(func() error {
|
||||||
|
err := sh.SetMode(mode.Disabled)
|
||||||
|
if err != nil {
|
||||||
|
e.log.Error(logs.EngineCouldNotChangeShardModeToDisabled,
|
||||||
|
zap.Stringer("id", sh.ID()),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
multiErrGuard.Lock()
|
||||||
|
multiErr = errors.Join(multiErr, fmt.Errorf("could not change shard (id:%s) mode to disabled: %w", sh.ID(), err))
|
||||||
|
multiErrGuard.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
err = sh.Close()
|
||||||
|
if err != nil {
|
||||||
|
e.log.Error(logs.EngineCouldNotCloseRemovedShard,
|
||||||
|
zap.Stringer("id", sh.ID()),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
multiErrGuard.Lock()
|
||||||
|
multiErr = errors.Join(multiErr, fmt.Errorf("could not close removed shard (id:%s): %w", sh.ID(), err))
|
||||||
|
multiErrGuard.Unlock()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if err := eg.Wait(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return multiErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteShards deletes shards with specified ids from engine shard list
|
||||||
|
// and releases all engine resources associated with shards.
|
||||||
|
// Returns deleted shards or error if some shard could not be deleted.
|
||||||
|
func (e *StorageEngine) deleteShards(ids []*shard.ID) ([]hashedShard, error) {
|
||||||
|
ss := make([]hashedShard, 0, len(ids))
|
||||||
|
|
||||||
|
e.mtx.Lock()
|
||||||
|
defer e.mtx.Unlock()
|
||||||
|
|
||||||
|
for _, id := range ids {
|
||||||
|
idStr := id.String()
|
||||||
|
sh, found := e.shards[idStr]
|
||||||
|
if !found {
|
||||||
|
return nil, errShardNotFound
|
||||||
|
}
|
||||||
|
ss = append(ss, sh)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(ss) == len(e.shards) {
|
||||||
|
return nil, logicerr.New("could not delete all the shards")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sh := range ss {
|
||||||
|
idStr := sh.ID().String()
|
||||||
|
|
||||||
|
sh.DeleteShardMetrics()
|
||||||
|
|
||||||
|
delete(e.shards, idStr)
|
||||||
|
|
||||||
|
pool, ok := e.shardPools[idStr]
|
||||||
|
if ok {
|
||||||
|
pool.Release()
|
||||||
|
delete(e.shardPools, idStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
e.log.Info(logs.EngineShardHasBeenRemoved,
|
||||||
|
zap.String("id", idStr))
|
||||||
|
}
|
||||||
|
|
||||||
|
return ss, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s hashedShard) Hash() uint64 {
|
func (s hashedShard) Hash() uint64 {
|
||||||
return s.hash
|
return s.hash
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,8 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -42,3 +44,21 @@ func TestRemoveShard(t *testing.T) {
|
||||||
require.True(t, ok != removed)
|
require.True(t, ok != removed)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDisableShards(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
const numOfShards = 2
|
||||||
|
|
||||||
|
te := testNewEngine(t).setShardsNum(t, numOfShards)
|
||||||
|
e, ids := te.engine, te.shardIDs
|
||||||
|
defer func() { require.NoError(t, e.Close(context.Background())) }()
|
||||||
|
|
||||||
|
require.ErrorAs(t, e.DetachShards(ids), new(logicerr.Logical))
|
||||||
|
require.ErrorAs(t, e.DetachShards(nil), new(logicerr.Logical))
|
||||||
|
require.ErrorAs(t, e.DetachShards([]*shard.ID{}), new(logicerr.Logical))
|
||||||
|
|
||||||
|
require.NoError(t, e.DetachShards([]*shard.ID{ids[0]}))
|
||||||
|
|
||||||
|
require.Equal(t, 1, len(e.shards))
|
||||||
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ const (
|
||||||
rpcRemoveChainLocalOverride = "RemoveChainLocalOverride"
|
rpcRemoveChainLocalOverride = "RemoveChainLocalOverride"
|
||||||
rpcSealWriteCache = "SealWriteCache"
|
rpcSealWriteCache = "SealWriteCache"
|
||||||
rpcListTargetsLocalOverrides = "ListTargetsLocalOverrides"
|
rpcListTargetsLocalOverrides = "ListTargetsLocalOverrides"
|
||||||
|
rpcDetachShards = "DetachShards"
|
||||||
)
|
)
|
||||||
|
|
||||||
// HealthCheck executes ControlService.HealthCheck RPC.
|
// HealthCheck executes ControlService.HealthCheck RPC.
|
||||||
|
@ -292,3 +293,22 @@ func SealWriteCache(cli *client.Client, req *SealWriteCacheRequest, opts ...clie
|
||||||
|
|
||||||
return wResp.message, nil
|
return wResp.message, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DetachShards executes ControlService.DetachShards RPC.
|
||||||
|
func DetachShards(
|
||||||
|
cli *client.Client,
|
||||||
|
req *DetachShardsRequest,
|
||||||
|
opts ...client.CallOption,
|
||||||
|
) (*DetachShardsResponse, error) {
|
||||||
|
wResp := newResponseWrapper[DetachShardsResponse]()
|
||||||
|
|
||||||
|
wReq := &requestWrapper{
|
||||||
|
m: req,
|
||||||
|
}
|
||||||
|
err := client.SendUnary(cli, common.CallMethodInfoUnary(serviceName, rpcDetachShards), wReq, wResp, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return wResp.message, nil
|
||||||
|
}
|
||||||
|
|
37
pkg/services/control/server/detach_shards.go
Normal file
37
pkg/services/control/server/detach_shards.go
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
package control
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control"
|
||||||
|
"google.golang.org/grpc/codes"
|
||||||
|
"google.golang.org/grpc/status"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s *Server) DetachShards(_ context.Context, req *control.DetachShardsRequest) (*control.DetachShardsResponse, error) {
|
||||||
|
err := s.isValidRequest(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, status.Error(codes.PermissionDenied, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
shardIDs := s.getShardIDList(req.GetBody().GetShard_ID())
|
||||||
|
|
||||||
|
if err := s.s.DetachShards(shardIDs); err != nil {
|
||||||
|
if errors.As(err, new(logicerr.Logical)) {
|
||||||
|
return nil, status.Error(codes.InvalidArgument, err.Error())
|
||||||
|
}
|
||||||
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := &control.DetachShardsResponse{
|
||||||
|
Body: &control.DetachShardsResponse_Body{},
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = SignMessage(s.key, resp); err != nil {
|
||||||
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
Loading…
Reference in a new issue