forked from TrueCloudLab/frostfs-s3-gw
[#291] server auto re-binding
Signed-off-by: Pavel Pogodaev <p.pogodaev@yadro.com>
This commit is contained in:
parent
94bd1dfe28
commit
bfcde09f07
7 changed files with 147 additions and 27 deletions
|
@ -26,6 +26,7 @@ This document outlines major changes between releases.
|
||||||
- Support `policy` contract (#259)
|
- Support `policy` contract (#259)
|
||||||
- Support `proxy` contract (#287)
|
- Support `proxy` contract (#287)
|
||||||
- Authmate: support custom attributes (#292)
|
- Authmate: support custom attributes (#292)
|
||||||
|
- Add new `reconnect_interval` config param (#291)
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- Generalise config param `use_default_xmlns_for_complete_multipart` to `use_default_xmlns` so that use default xmlns for all requests (#221)
|
- Generalise config param `use_default_xmlns_for_complete_multipart` to `use_default_xmlns` so that use default xmlns for all requests (#221)
|
||||||
|
|
139
cmd/s3-gw/app.go
139
cmd/s3-gw/app.go
|
@ -71,7 +71,9 @@ type (
|
||||||
|
|
||||||
policyStorage *policy.Storage
|
policyStorage *policy.Storage
|
||||||
|
|
||||||
servers []Server
|
servers []Server
|
||||||
|
unbindServers []ServerInfo
|
||||||
|
mu sync.RWMutex
|
||||||
|
|
||||||
controlAPI *grpc.Server
|
controlAPI *grpc.Server
|
||||||
|
|
||||||
|
@ -88,6 +90,7 @@ type (
|
||||||
logLevel zap.AtomicLevel
|
logLevel zap.AtomicLevel
|
||||||
maxClient maxClientsConfig
|
maxClient maxClientsConfig
|
||||||
defaultMaxAge int
|
defaultMaxAge int
|
||||||
|
reconnectInterval time.Duration
|
||||||
notificatorEnabled bool
|
notificatorEnabled bool
|
||||||
resolveZoneList []string
|
resolveZoneList []string
|
||||||
isResolveListAllow bool // True if ResolveZoneList contains allowed zones
|
isResolveListAllow bool // True if ResolveZoneList contains allowed zones
|
||||||
|
@ -205,6 +208,7 @@ func newAppSettings(log *Logger, v *viper.Viper, key *keys.PrivateKey) *appSetti
|
||||||
logLevel: log.lvl,
|
logLevel: log.lvl,
|
||||||
maxClient: newMaxClients(v),
|
maxClient: newMaxClients(v),
|
||||||
defaultMaxAge: fetchDefaultMaxAge(v, log.logger),
|
defaultMaxAge: fetchDefaultMaxAge(v, log.logger),
|
||||||
|
reconnectInterval: fetchReconnectInterval(v),
|
||||||
notificatorEnabled: v.GetBool(cfgEnableNATS),
|
notificatorEnabled: v.GetBool(cfgEnableNATS),
|
||||||
frostfsidValidation: v.GetBool(cfgFrostfsIDValidationEnabled),
|
frostfsidValidation: v.GetBool(cfgFrostfsIDValidationEnabled),
|
||||||
}
|
}
|
||||||
|
@ -699,17 +703,23 @@ func (a *App) Serve(ctx context.Context) {
|
||||||
|
|
||||||
a.startServices()
|
a.startServices()
|
||||||
|
|
||||||
for i := range a.servers {
|
servs := a.getServers()
|
||||||
go func(i int) {
|
|
||||||
a.log.Info(logs.StartingServer, zap.String("address", a.servers[i].Address()))
|
|
||||||
|
|
||||||
if err := srv.Serve(a.servers[i].Listener()); err != nil && err != http.ErrServerClosed {
|
for i := range servs {
|
||||||
a.metrics.MarkUnhealthy(a.servers[i].Address())
|
go func(i int) {
|
||||||
|
a.log.Info(logs.StartingServer, zap.String("address", servs[i].Address()))
|
||||||
|
|
||||||
|
if err := srv.Serve(servs[i].Listener()); err != nil && err != http.ErrServerClosed {
|
||||||
|
a.metrics.MarkUnhealthy(servs[i].Address())
|
||||||
a.log.Fatal(logs.ListenAndServe, zap.Error(err))
|
a.log.Fatal(logs.ListenAndServe, zap.Error(err))
|
||||||
}
|
}
|
||||||
}(i)
|
}(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(a.unbindServers) != 0 {
|
||||||
|
a.scheduleReconnect(ctx, srv)
|
||||||
|
}
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
address := a.cfg.GetString(cfgControlGRPCEndpoint)
|
address := a.cfg.GetString(cfgControlGRPCEndpoint)
|
||||||
a.log.Info(logs.StartingControlAPI, zap.String("address", address))
|
a.log.Info(logs.StartingControlAPI, zap.String("address", address))
|
||||||
|
@ -826,7 +836,7 @@ func (a *App) startServices() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) initServers(ctx context.Context) {
|
func (a *App) initServers(ctx context.Context) {
|
||||||
serversInfo := fetchServers(a.cfg)
|
serversInfo := fetchServers(a.cfg, a.log)
|
||||||
|
|
||||||
a.servers = make([]Server, 0, len(serversInfo))
|
a.servers = make([]Server, 0, len(serversInfo))
|
||||||
for _, serverInfo := range serversInfo {
|
for _, serverInfo := range serversInfo {
|
||||||
|
@ -836,6 +846,7 @@ func (a *App) initServers(ctx context.Context) {
|
||||||
}
|
}
|
||||||
srv, err := newServer(ctx, serverInfo)
|
srv, err := newServer(ctx, serverInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
a.unbindServers = append(a.unbindServers, serverInfo)
|
||||||
a.metrics.MarkUnhealthy(serverInfo.Address)
|
a.metrics.MarkUnhealthy(serverInfo.Address)
|
||||||
a.log.Warn(logs.FailedToAddServer, append(fields, zap.Error(err))...)
|
a.log.Warn(logs.FailedToAddServer, append(fields, zap.Error(err))...)
|
||||||
continue
|
continue
|
||||||
|
@ -852,21 +863,24 @@ func (a *App) initServers(ctx context.Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) updateServers() error {
|
func (a *App) updateServers() error {
|
||||||
serversInfo := fetchServers(a.cfg)
|
serversInfo := fetchServers(a.cfg, a.log)
|
||||||
|
|
||||||
|
a.mu.Lock()
|
||||||
|
defer a.mu.Unlock()
|
||||||
|
|
||||||
var found bool
|
var found bool
|
||||||
for _, serverInfo := range serversInfo {
|
for _, serverInfo := range serversInfo {
|
||||||
index := a.serverIndex(serverInfo.Address)
|
ser := a.getServer(serverInfo.Address)
|
||||||
if index == -1 {
|
if ser != nil {
|
||||||
continue
|
if serverInfo.TLS.Enabled {
|
||||||
}
|
if err := ser.UpdateCert(serverInfo.TLS.CertFile, serverInfo.TLS.KeyFile); err != nil {
|
||||||
|
return fmt.Errorf("failed to update tls certs: %w", err)
|
||||||
if serverInfo.TLS.Enabled {
|
}
|
||||||
if err := a.servers[index].UpdateCert(serverInfo.TLS.CertFile, serverInfo.TLS.KeyFile); err != nil {
|
found = true
|
||||||
return fmt.Errorf("failed to update tls certs: %w", err)
|
|
||||||
}
|
}
|
||||||
|
} else if unbind := a.updateUnbindServerInfo(serverInfo); unbind {
|
||||||
|
found = true
|
||||||
}
|
}
|
||||||
found = true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if !found {
|
if !found {
|
||||||
|
@ -876,15 +890,6 @@ func (a *App) updateServers() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) serverIndex(address string) int {
|
|
||||||
for i := range a.servers {
|
|
||||||
if a.servers[i].Address() == address {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *App) stopServices() {
|
func (a *App) stopServices() {
|
||||||
ctx, cancel := shutdownContext()
|
ctx, cancel := shutdownContext()
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
@ -959,6 +964,31 @@ func (a *App) initHandler() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *App) getServer(address string) Server {
|
||||||
|
for i := range a.servers {
|
||||||
|
if a.servers[i].Address() == address {
|
||||||
|
return a.servers[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) updateUnbindServerInfo(info ServerInfo) bool {
|
||||||
|
for i := range a.unbindServers {
|
||||||
|
if a.unbindServers[i].Address == info.Address {
|
||||||
|
a.unbindServers[i] = info
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) getServers() []Server {
|
||||||
|
a.mu.RLock()
|
||||||
|
defer a.mu.RUnlock()
|
||||||
|
return a.servers
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) setRuntimeParameters() {
|
func (a *App) setRuntimeParameters() {
|
||||||
if len(os.Getenv("GOMEMLIMIT")) != 0 {
|
if len(os.Getenv("GOMEMLIMIT")) != 0 {
|
||||||
// default limit < yaml limit < app env limit < GOMEMLIMIT
|
// default limit < yaml limit < app env limit < GOMEMLIMIT
|
||||||
|
@ -974,3 +1004,60 @@ func (a *App) setRuntimeParameters() {
|
||||||
zap.Int64("old_value", previous))
|
zap.Int64("old_value", previous))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *App) scheduleReconnect(ctx context.Context, srv *http.Server) {
|
||||||
|
go func() {
|
||||||
|
t := time.NewTicker(a.settings.reconnectInterval)
|
||||||
|
defer t.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-t.C:
|
||||||
|
if a.tryReconnect(ctx, srv) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.Reset(a.settings.reconnectInterval)
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) tryReconnect(ctx context.Context, sr *http.Server) bool {
|
||||||
|
a.mu.Lock()
|
||||||
|
defer a.mu.Unlock()
|
||||||
|
|
||||||
|
a.log.Info(logs.ServerReconnecting)
|
||||||
|
var failedServers []ServerInfo
|
||||||
|
|
||||||
|
for _, serverInfo := range a.unbindServers {
|
||||||
|
fields := []zap.Field{
|
||||||
|
zap.String("address", serverInfo.Address), zap.Bool("tls enabled", serverInfo.TLS.Enabled),
|
||||||
|
zap.String("tls cert", serverInfo.TLS.CertFile), zap.String("tls key", serverInfo.TLS.KeyFile),
|
||||||
|
}
|
||||||
|
|
||||||
|
srv, err := newServer(ctx, serverInfo)
|
||||||
|
if err != nil {
|
||||||
|
a.log.Warn(logs.ServerReconnectFailed, zap.Error(err))
|
||||||
|
failedServers = append(failedServers, serverInfo)
|
||||||
|
a.metrics.MarkUnhealthy(serverInfo.Address)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
a.log.Info(logs.StartingServer, zap.String("address", srv.Address()))
|
||||||
|
a.metrics.MarkHealthy(serverInfo.Address)
|
||||||
|
if err = sr.Serve(srv.Listener()); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||||
|
a.log.Warn(logs.ListenAndServe, zap.Error(err))
|
||||||
|
a.metrics.MarkUnhealthy(serverInfo.Address)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
a.servers = append(a.servers, srv)
|
||||||
|
a.log.Info(logs.ServerReconnectedSuccessfully, fields...)
|
||||||
|
}
|
||||||
|
|
||||||
|
a.unbindServers = failedServers
|
||||||
|
|
||||||
|
return len(a.unbindServers) == 0
|
||||||
|
}
|
||||||
|
|
|
@ -59,6 +59,8 @@ const (
|
||||||
defaultConstraintName = "default"
|
defaultConstraintName = "default"
|
||||||
|
|
||||||
defaultNamespace = ""
|
defaultNamespace = ""
|
||||||
|
|
||||||
|
defaultReconnectInterval = time.Minute
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -222,6 +224,9 @@ const ( // Settings.
|
||||||
// Proxy.
|
// Proxy.
|
||||||
cfgProxyContract = "proxy.contract"
|
cfgProxyContract = "proxy.contract"
|
||||||
|
|
||||||
|
// Server.
|
||||||
|
cfgReconnectInterval = "reconnect_interval"
|
||||||
|
|
||||||
// envPrefix is an environment variables prefix used for configuration.
|
// envPrefix is an environment variables prefix used for configuration.
|
||||||
envPrefix = "S3_GW"
|
envPrefix = "S3_GW"
|
||||||
)
|
)
|
||||||
|
@ -244,6 +249,15 @@ func fetchConnectTimeout(cfg *viper.Viper) time.Duration {
|
||||||
return connTimeout
|
return connTimeout
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func fetchReconnectInterval(cfg *viper.Viper) time.Duration {
|
||||||
|
reconnect := cfg.GetDuration(cfgReconnectInterval)
|
||||||
|
if reconnect <= 0 {
|
||||||
|
reconnect = defaultReconnectInterval
|
||||||
|
}
|
||||||
|
|
||||||
|
return reconnect
|
||||||
|
}
|
||||||
|
|
||||||
func fetchStreamTimeout(cfg *viper.Viper) time.Duration {
|
func fetchStreamTimeout(cfg *viper.Viper) time.Duration {
|
||||||
streamTimeout := cfg.GetDuration(cfgStreamTimeout)
|
streamTimeout := cfg.GetDuration(cfgStreamTimeout)
|
||||||
if streamTimeout <= 0 {
|
if streamTimeout <= 0 {
|
||||||
|
@ -611,8 +625,9 @@ func fetchPeers(l *zap.Logger, v *viper.Viper) []pool.NodeParam {
|
||||||
return nodes
|
return nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchServers(v *viper.Viper) []ServerInfo {
|
func fetchServers(v *viper.Viper, log *zap.Logger) []ServerInfo {
|
||||||
var servers []ServerInfo
|
var servers []ServerInfo
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
|
||||||
for i := 0; ; i++ {
|
for i := 0; ; i++ {
|
||||||
key := cfgServer + "." + strconv.Itoa(i) + "."
|
key := cfgServer + "." + strconv.Itoa(i) + "."
|
||||||
|
@ -627,6 +642,11 @@ func fetchServers(v *viper.Viper) []ServerInfo {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _, ok := seen[serverInfo.Address]; ok {
|
||||||
|
log.Warn(logs.WarnDuplicateAddress, zap.String("address", serverInfo.Address))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[serverInfo.Address] = struct{}{}
|
||||||
servers = append(servers, serverInfo)
|
servers = append(servers, serverInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,9 @@ S3_GW_SERVER_1_TLS_ENABLED=true
|
||||||
S3_GW_SERVER_1_TLS_CERT_FILE=/path/to/tls/cert
|
S3_GW_SERVER_1_TLS_CERT_FILE=/path/to/tls/cert
|
||||||
S3_GW_SERVER_1_TLS_KEY_FILE=/path/to/tls/key
|
S3_GW_SERVER_1_TLS_KEY_FILE=/path/to/tls/key
|
||||||
|
|
||||||
|
# How often to reconnect to the servers
|
||||||
|
S3_GW_RECONNECT_INTERVAL: 1m
|
||||||
|
|
||||||
# Control API
|
# Control API
|
||||||
# List of hex-encoded public keys that have rights to use the Control Service
|
# List of hex-encoded public keys that have rights to use the Control Service
|
||||||
S3_GW_CONTROL_AUTHORIZED_KEYS=035839e45d472a3b7769a2a1bd7d54c4ccd4943c3b40f547870e83a8fcbfb3ce11 028f42cfcb74499d7b15b35d9bff260a1c8d27de4f446a627406a382d8961486d6
|
S3_GW_CONTROL_AUTHORIZED_KEYS=035839e45d472a3b7769a2a1bd7d54c4ccd4943c3b40f547870e83a8fcbfb3ce11 028f42cfcb74499d7b15b35d9bff260a1c8d27de4f446a627406a382d8961486d6
|
||||||
|
|
|
@ -25,6 +25,8 @@ peers:
|
||||||
priority: 2
|
priority: 2
|
||||||
weight: 0.9
|
weight: 0.9
|
||||||
|
|
||||||
|
reconnect_interval: 1m
|
||||||
|
|
||||||
server:
|
server:
|
||||||
- address: 0.0.0.0:8080
|
- address: 0.0.0.0:8080
|
||||||
tls:
|
tls:
|
||||||
|
|
|
@ -218,6 +218,8 @@ max_clients_deadline: 30s
|
||||||
allowed_access_key_id_prefixes:
|
allowed_access_key_id_prefixes:
|
||||||
- Ck9BHsgKcnwfCTUSFm6pxhoNS4cBqgN2NQ8zVgPjqZDX
|
- Ck9BHsgKcnwfCTUSFm6pxhoNS4cBqgN2NQ8zVgPjqZDX
|
||||||
- 3stjWenX15YwYzczMr88gy3CQr4NYFBQ8P7keGzH5QFn
|
- 3stjWenX15YwYzczMr88gy3CQr4NYFBQ8P7keGzH5QFn
|
||||||
|
|
||||||
|
reconnect_interval: 1m
|
||||||
```
|
```
|
||||||
|
|
||||||
| Parameter | Type | SIGHUP reload | Default value | Description |
|
| Parameter | Type | SIGHUP reload | Default value | Description |
|
||||||
|
@ -233,6 +235,7 @@ allowed_access_key_id_prefixes:
|
||||||
| `max_clients_count` | `int` | no | `100` | Limits for processing of clients' requests. |
|
| `max_clients_count` | `int` | no | `100` | Limits for processing of clients' requests. |
|
||||||
| `max_clients_deadline` | `duration` | no | `30s` | Deadline after which the gate sends error `RequestTimeout` to a client. |
|
| `max_clients_deadline` | `duration` | no | `30s` | Deadline after which the gate sends error `RequestTimeout` to a client. |
|
||||||
| `allowed_access_key_id_prefixes` | `[]string` | no | | List of allowed `AccessKeyID` prefixes which S3 GW serve. If the parameter is omitted, all `AccessKeyID` will be accepted. |
|
| `allowed_access_key_id_prefixes` | `[]string` | no | | List of allowed `AccessKeyID` prefixes which S3 GW serve. If the parameter is omitted, all `AccessKeyID` will be accepted. |
|
||||||
|
| `reconnect_interval` | `duration` | no | `1m` | Listeners reconnection interval. |
|
||||||
|
|
||||||
### `wallet` section
|
### `wallet` section
|
||||||
|
|
||||||
|
|
|
@ -135,6 +135,9 @@ const (
|
||||||
ControlAPIGetPolicy = "get policy request"
|
ControlAPIGetPolicy = "get policy request"
|
||||||
ControlAPIListPolicies = "list policies request"
|
ControlAPIListPolicies = "list policies request"
|
||||||
PolicyValidationFailed = "policy validation failed"
|
PolicyValidationFailed = "policy validation failed"
|
||||||
|
ServerReconnecting = "reconnecting server..."
|
||||||
|
ServerReconnectedSuccessfully = "server reconnected successfully"
|
||||||
|
ServerReconnectFailed = "failed to reconnect server"
|
||||||
ParseTreeNode = "parse tree node"
|
ParseTreeNode = "parse tree node"
|
||||||
FailedToGetRealObjectSize = "failed to get real object size"
|
FailedToGetRealObjectSize = "failed to get real object size"
|
||||||
CouldntDeleteObjectFromStorageContinueDeleting = "couldn't delete object from storage, continue deleting from tree"
|
CouldntDeleteObjectFromStorageContinueDeleting = "couldn't delete object from storage, continue deleting from tree"
|
||||||
|
@ -149,4 +152,5 @@ const (
|
||||||
InvalidBucketObjectLockEnabledHeader = "invalid X-Amz-Bucket-Object-Lock-Enabled header"
|
InvalidBucketObjectLockEnabledHeader = "invalid X-Amz-Bucket-Object-Lock-Enabled header"
|
||||||
InvalidTreeKV = "invalid tree service meta KV"
|
InvalidTreeKV = "invalid tree service meta KV"
|
||||||
FailedToWriteResponse = "failed to write response"
|
FailedToWriteResponse = "failed to write response"
|
||||||
|
WarnDuplicateAddress = "duplicate address"
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue