forked from TrueCloudLab/frostfs-node
[#2164] network/cache: Do not reconnect to failed clients immediately
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
This commit is contained in:
parent
f3caf6acfe
commit
6f5edac730
5 changed files with 120 additions and 21 deletions
117
pkg/network/cache/multi.go
vendored
117
pkg/network/cache/multi.go
vendored
|
@ -5,6 +5,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
rawclient "github.com/TrueCloudLab/frostfs-api-go/v2/rpc/client"
|
rawclient "github.com/TrueCloudLab/frostfs-api-go/v2/rpc/client"
|
||||||
clientcore "github.com/TrueCloudLab/frostfs-node/pkg/core/client"
|
clientcore "github.com/TrueCloudLab/frostfs-node/pkg/core/client"
|
||||||
|
@ -12,23 +13,34 @@ import (
|
||||||
"github.com/TrueCloudLab/frostfs-sdk-go/client"
|
"github.com/TrueCloudLab/frostfs-sdk-go/client"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type singleClient struct {
|
||||||
|
sync.RWMutex
|
||||||
|
client clientcore.Client
|
||||||
|
lastAttempt time.Time
|
||||||
|
}
|
||||||
|
|
||||||
type multiClient struct {
|
type multiClient struct {
|
||||||
mtx sync.RWMutex
|
mtx sync.RWMutex
|
||||||
|
|
||||||
clients map[string]clientcore.Client
|
clients map[string]*singleClient
|
||||||
|
|
||||||
// addrMtx protects addr field. Should not be taken before the mtx.
|
// addrMtx protects addr field. Should not be taken before the mtx.
|
||||||
addrMtx sync.RWMutex
|
addrMtx sync.RWMutex
|
||||||
addr network.AddressGroup
|
addr network.AddressGroup
|
||||||
|
|
||||||
opts ClientCacheOpts
|
opts ClientCacheOpts
|
||||||
|
|
||||||
|
reconnectInterval time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const defaultReconnectInterval = time.Second * 30
|
||||||
|
|
||||||
func newMultiClient(addr network.AddressGroup, opts ClientCacheOpts) *multiClient {
|
func newMultiClient(addr network.AddressGroup, opts ClientCacheOpts) *multiClient {
|
||||||
return &multiClient{
|
return &multiClient{
|
||||||
clients: make(map[string]clientcore.Client),
|
clients: make(map[string]*singleClient),
|
||||||
addr: addr,
|
addr: addr,
|
||||||
opts: opts,
|
opts: opts,
|
||||||
|
reconnectInterval: defaultReconnectInterval,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,6 +122,8 @@ loop:
|
||||||
x.addrMtx.Unlock()
|
x.addrMtx.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var errRecentlyFailed = errors.New("client has recently failed, skipping")
|
||||||
|
|
||||||
func (x *multiClient) iterateClients(ctx context.Context, f func(clientcore.Client) error) error {
|
func (x *multiClient) iterateClients(ctx context.Context, f func(clientcore.Client) error) error {
|
||||||
var firstErr error
|
var firstErr error
|
||||||
|
|
||||||
|
@ -134,16 +148,45 @@ func (x *multiClient) iterateClients(ctx context.Context, f func(clientcore.Clie
|
||||||
|
|
||||||
success := err == nil || errors.Is(err, context.Canceled)
|
success := err == nil || errors.Is(err, context.Canceled)
|
||||||
|
|
||||||
if success || firstErr == nil {
|
if success || firstErr == nil || errors.Is(firstErr, errRecentlyFailed) {
|
||||||
firstErr = err
|
firstErr = err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
x.ReportError(err)
|
||||||
|
}
|
||||||
|
|
||||||
return success
|
return success
|
||||||
})
|
})
|
||||||
|
|
||||||
return firstErr
|
return firstErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (x *multiClient) ReportError(err error) {
|
||||||
|
if errors.Is(err, errRecentlyFailed) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dropping all clients here is not necessary, we do this
|
||||||
|
// because `multiClient` doesn't yet provide convenient interface
|
||||||
|
// for reporting individual errors for streaming operations.
|
||||||
|
x.mtx.RLock()
|
||||||
|
for _, sc := range x.clients {
|
||||||
|
sc.invalidate()
|
||||||
|
}
|
||||||
|
x.mtx.RUnlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *singleClient) invalidate() {
|
||||||
|
s.Lock()
|
||||||
|
if s.client != nil {
|
||||||
|
_ = s.client.Close()
|
||||||
|
}
|
||||||
|
s.client = nil
|
||||||
|
s.lastAttempt = time.Now()
|
||||||
|
s.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
func (x *multiClient) ObjectPutInit(ctx context.Context, p client.PrmObjectPutInit) (res *client.ObjectWriter, err error) {
|
func (x *multiClient) ObjectPutInit(ctx context.Context, p client.PrmObjectPutInit) (res *client.ObjectWriter, err error) {
|
||||||
err = x.iterateClients(ctx, func(c clientcore.Client) error {
|
err = x.iterateClients(ctx, func(c clientcore.Client) error {
|
||||||
res, err = c.ObjectPutInit(ctx, p)
|
res, err = c.ObjectPutInit(ctx, p)
|
||||||
|
@ -243,7 +286,9 @@ func (x *multiClient) Close() error {
|
||||||
|
|
||||||
{
|
{
|
||||||
for _, c := range x.clients {
|
for _, c := range x.clients {
|
||||||
_ = c.Close()
|
if c.client != nil {
|
||||||
|
_ = c.client.Close()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,7 +302,12 @@ func (x *multiClient) RawForAddress(addr network.Address, f func(client *rawclie
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return c.ExecRaw(f)
|
|
||||||
|
err = c.ExecRaw(f)
|
||||||
|
if err != nil {
|
||||||
|
x.ReportError(err)
|
||||||
|
}
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *multiClient) client(addr network.Address) (clientcore.Client, error) {
|
func (x *multiClient) client(addr network.Address) (clientcore.Client, error) {
|
||||||
|
@ -268,20 +318,45 @@ func (x *multiClient) client(addr network.Address) (clientcore.Client, error) {
|
||||||
x.mtx.RUnlock()
|
x.mtx.RUnlock()
|
||||||
|
|
||||||
if cached {
|
if cached {
|
||||||
return c, nil
|
c.RLock()
|
||||||
}
|
if c.client != nil {
|
||||||
|
cl := c.client
|
||||||
x.mtx.Lock()
|
c.RUnlock()
|
||||||
defer x.mtx.Unlock()
|
return cl, nil
|
||||||
|
|
||||||
c, cached = x.clients[strAddr]
|
|
||||||
if !cached {
|
|
||||||
var err error
|
|
||||||
c, err = x.createForAddress(addr)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
x.clients[strAddr] = c
|
if x.reconnectInterval != 0 && time.Since(c.lastAttempt) < x.reconnectInterval {
|
||||||
|
c.RUnlock()
|
||||||
|
return nil, errRecentlyFailed
|
||||||
|
}
|
||||||
|
c.RUnlock()
|
||||||
|
} else {
|
||||||
|
var ok bool
|
||||||
|
x.mtx.Lock()
|
||||||
|
c, ok = x.clients[strAddr]
|
||||||
|
if !ok {
|
||||||
|
c = new(singleClient)
|
||||||
|
x.clients[strAddr] = c
|
||||||
|
}
|
||||||
|
x.mtx.Unlock()
|
||||||
}
|
}
|
||||||
return c, nil
|
|
||||||
|
c.Lock()
|
||||||
|
defer c.Unlock()
|
||||||
|
|
||||||
|
if c.client != nil {
|
||||||
|
return c.client, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if x.reconnectInterval != 0 && time.Since(c.lastAttempt) < x.reconnectInterval {
|
||||||
|
return nil, errRecentlyFailed
|
||||||
|
}
|
||||||
|
|
||||||
|
cl, err := x.createForAddress(addr)
|
||||||
|
if err != nil {
|
||||||
|
c.lastAttempt = time.Now()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
c.client = cl
|
||||||
|
return cl, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import (
|
||||||
objectSvc "github.com/TrueCloudLab/frostfs-node/pkg/services/object"
|
objectSvc "github.com/TrueCloudLab/frostfs-node/pkg/services/object"
|
||||||
getsvc "github.com/TrueCloudLab/frostfs-node/pkg/services/object/get"
|
getsvc "github.com/TrueCloudLab/frostfs-node/pkg/services/object/get"
|
||||||
"github.com/TrueCloudLab/frostfs-node/pkg/services/object/internal"
|
"github.com/TrueCloudLab/frostfs-node/pkg/services/object/internal"
|
||||||
|
internalclient "github.com/TrueCloudLab/frostfs-node/pkg/services/object/internal/client"
|
||||||
"github.com/TrueCloudLab/frostfs-node/pkg/services/object/util"
|
"github.com/TrueCloudLab/frostfs-node/pkg/services/object/util"
|
||||||
apistatus "github.com/TrueCloudLab/frostfs-sdk-go/client/status"
|
apistatus "github.com/TrueCloudLab/frostfs-sdk-go/client/status"
|
||||||
frostfscrypto "github.com/TrueCloudLab/frostfs-sdk-go/crypto"
|
frostfscrypto "github.com/TrueCloudLab/frostfs-sdk-go/crypto"
|
||||||
|
@ -126,6 +127,7 @@ func (s *Service) toPrm(req *objectV2.GetRequest, stream objectSvc.GetObjectStre
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internalclient.ReportError(c, err)
|
||||||
return nil, fmt.Errorf("reading the response failed: %w", err)
|
return nil, fmt.Errorf("reading the response failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,6 +290,7 @@ func (s *Service) toRangePrm(req *objectV2.GetRangeRequest, stream objectSvc.Get
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internalclient.ReportError(c, err)
|
||||||
return nil, fmt.Errorf("reading the response failed: %w", err)
|
return nil, fmt.Errorf("reading the response failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -170,6 +170,8 @@ func GetObject(prm GetObjectPrm) (*GetObjectRes, error) {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// pull out an error from status
|
// pull out an error from status
|
||||||
err = apistatus.ErrFromStatus(res.Status())
|
err = apistatus.ErrFromStatus(res.Status())
|
||||||
|
} else {
|
||||||
|
ReportError(prm.cli, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("read object header: %w", err)
|
return nil, fmt.Errorf("read object header: %w", err)
|
||||||
|
@ -439,6 +441,8 @@ func PutObject(prm PutObjectPrm) (*PutObjectRes, error) {
|
||||||
cliRes, err := w.Close()
|
cliRes, err := w.Close()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
err = apistatus.ErrFromStatus(cliRes.Status())
|
err = apistatus.ErrFromStatus(cliRes.Status())
|
||||||
|
} else {
|
||||||
|
ReportError(prm.cli, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
14
pkg/services/object/internal/client/error.go
Normal file
14
pkg/services/object/internal/client/error.go
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
package internal
|
||||||
|
|
||||||
|
import clientcore "github.com/TrueCloudLab/frostfs-node/pkg/core/client"
|
||||||
|
|
||||||
|
type errorReporter interface {
|
||||||
|
ReportError(error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReportError drops client connection if possible.
|
||||||
|
func ReportError(c clientcore.Client, err error) {
|
||||||
|
if ce, ok := c.(errorReporter); ok {
|
||||||
|
ce.ReportError(err)
|
||||||
|
}
|
||||||
|
}
|
|
@ -11,6 +11,7 @@ import (
|
||||||
"github.com/TrueCloudLab/frostfs-node/pkg/core/client"
|
"github.com/TrueCloudLab/frostfs-node/pkg/core/client"
|
||||||
"github.com/TrueCloudLab/frostfs-node/pkg/network"
|
"github.com/TrueCloudLab/frostfs-node/pkg/network"
|
||||||
"github.com/TrueCloudLab/frostfs-node/pkg/services/object/internal"
|
"github.com/TrueCloudLab/frostfs-node/pkg/services/object/internal"
|
||||||
|
internalclient "github.com/TrueCloudLab/frostfs-node/pkg/services/object/internal/client"
|
||||||
putsvc "github.com/TrueCloudLab/frostfs-node/pkg/services/object/put"
|
putsvc "github.com/TrueCloudLab/frostfs-node/pkg/services/object/put"
|
||||||
"github.com/TrueCloudLab/frostfs-node/pkg/services/object/util"
|
"github.com/TrueCloudLab/frostfs-node/pkg/services/object/util"
|
||||||
)
|
)
|
||||||
|
@ -153,12 +154,14 @@ func (s *streamer) relayRequest(info client.NodeInfo, c client.MultiAddressClien
|
||||||
// send init part
|
// send init part
|
||||||
err = stream.Write(s.init)
|
err = stream.Write(s.init)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
internalclient.ReportError(c, err)
|
||||||
err = fmt.Errorf("sending the initial message to stream failed: %w", err)
|
err = fmt.Errorf("sending the initial message to stream failed: %w", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range s.chunks {
|
for i := range s.chunks {
|
||||||
if err = stream.Write(s.chunks[i]); err != nil {
|
if err = stream.Write(s.chunks[i]); err != nil {
|
||||||
|
internalclient.ReportError(c, err)
|
||||||
err = fmt.Errorf("sending the chunk %d failed: %w", i, err)
|
err = fmt.Errorf("sending the chunk %d failed: %w", i, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue