From 87f668128148dc17462dda48cda8bde59a9a452f Mon Sep 17 00:00:00 2001 From: Denis Kirillov Date: Fri, 29 Jul 2022 09:26:11 +0300 Subject: [PATCH] [#633] Add config param for pool error threshold Signed-off-by: Denis Kirillov --- cmd/s3-gw/app.go | 6 ++++++ cmd/s3-gw/app_settings.go | 8 +++++++- config/config.env | 2 ++ config/config.yaml | 3 +++ docs/configuration.md | 2 ++ 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cmd/s3-gw/app.go b/cmd/s3-gw/app.go index 24f95310..4a43b884 100644 --- a/cmd/s3-gw/app.go +++ b/cmd/s3-gw/app.go @@ -74,6 +74,7 @@ func newApp(ctx context.Context, l *zap.Logger, v *viper.Viper) *App { maxClientsCount = defaultMaxClientsCount maxClientsDeadline = defaultMaxClientsDeadline + poolErrorThreshold = defaultPoolErrorThreshold ) if v := v.GetDuration(cfgConnectTimeout); v > 0 { @@ -96,6 +97,10 @@ func newApp(ctx context.Context, l *zap.Logger, v *viper.Viper) *App { reBalance = v } + if v := v.GetUint32(cfgPoolErrorThreshold); v > 0 { + poolErrorThreshold = v + } + password := wallet.GetPassword(v, cfgWalletPassphrase) if key, err = wallet.GetKeyFromPath(v.GetString(cfgWallet), v.GetString(cfgAddress), password); err != nil { l.Fatal("could not load NeoFS private key", zap.Error(err)) @@ -114,6 +119,7 @@ func newApp(ctx context.Context, l *zap.Logger, v *viper.Viper) *App { prmPool.SetKey(&key.PrivateKey) prmPool.SetNodeDialTimeout(conTimeout) prmPool.SetHealthcheckTimeout(hckTimeout) + prmPool.SetErrorThreshold(poolErrorThreshold) prmPool.SetClientRebalanceInterval(reBalance) for _, peer := range fetchPeers(l, v) { prmPool.AddNode(peer) diff --git a/cmd/s3-gw/app_settings.go b/cmd/s3-gw/app_settings.go index ef59fc5d..cdd5633f 100644 --- a/cmd/s3-gw/app_settings.go +++ b/cmd/s3-gw/app_settings.go @@ -23,6 +23,8 @@ const ( defaultConnectTimeout = 10 * time.Second defaultShutdownTimeout = 15 * time.Second + defaultPoolErrorThreshold uint32 = 100 + defaultMaxClientsCount = 100 defaultMaxClientsDeadline = time.Second * 30 ) @@ -40,10 +42,11 @@ const ( // Settings. cfgTLSKeyFile = "tls.key_file" cfgTLSCertFile = "tls.cert_file" - // Timeouts. + // Pool config. cfgConnectTimeout = "connect_timeout" cfgHealthcheckTimeout = "healthcheck_timeout" cfgRebalanceInterval = "rebalance_interval" + cfgPoolErrorThreshold = "pool_error_threshold" // Caching. cfgObjectsCacheLifetime = "cache.objects.lifetime" @@ -211,6 +214,9 @@ func newSettings() *viper.Viper { // logger: v.SetDefault(cfgLoggerLevel, "debug") + // pool: + v.SetDefault(cfgPoolErrorThreshold, defaultPoolErrorThreshold) + v.SetDefault(cfgPProfAddress, "localhost:8085") v.SetDefault(cfgPrometheusAddress, "localhost:8086") diff --git a/config/config.env b/config/config.env index 27425ba6..88f8ecb8 100644 --- a/config/config.env +++ b/config/config.env @@ -54,6 +54,8 @@ S3_GW_CONNECT_TIMEOUT=10s S3_GW_HEALTHCHECK_TIMEOUT=15s # Interval to check node health S3_GW_REBALANCE_INTERVAL=60s +# The number of errors on connection after which node is considered as unhealthy +S3_GW_POOL_ERROR_THRESHOLD=100 # Limits for processing of clients' requests S3_GW_MAX_CLIENTS_COUNT=100 diff --git a/config/config.yaml b/config/config.yaml index b5956c7d..d645dad0 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -59,6 +59,9 @@ connect_timeout: 10s healthcheck_timeout: 15s # Interval to check node health rebalance_interval: 60s +# The number of errors on connection after which node is considered as unhealthy +pool_error_threshold: 100 + # Limits for processing of clients' requests max_clients_count: 100 diff --git a/docs/configuration.md b/docs/configuration.md index 6d6d169f..d5b6c455 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -149,6 +149,7 @@ resolve_order: connect_timeout: 10s healthcheck_timeout: 15s rebalance_interval: 60s +pool_error_threshold: 100 max_clients_count: 100 max_clients_deadline: 30s @@ -165,6 +166,7 @@ default_policy: REP 3 | `connect_timeout` | `duration` | `10s` | Timeout to connect to a node. | | `healthcheck_timeout` | `duration` | `15s` | Timeout to check node health during rebalance. | | `rebalance_interval` | `duration` | `60s` | Interval to check node health. | +| `pool_error_threshold` | `uint32` | `100` | The number of errors on connection after which node is considered as unhealthy. | | `max_clients_count` | `int` | `100` | Limits for processing of clients' requests. | | `max_clients_deadline` | `duration` | `30s` | Deadline after which the gate sends error `RequestTimeout` to a client. | | `default_policy` | `string` | `REP 3` | Default policy of placing containers in NeoFS. If a user sends a request `CreateBucket` and doesn't define policy for placing of a container in NeoFS, the S3 Gateway will put the container with default policy. |