From 127fd63e67ec4efc7906f0287919578d85517fd3 Mon Sep 17 00:00:00 2001 From: Alex Vanin Date: Wed, 5 Mar 2025 18:44:11 +0300 Subject: [PATCH 1/2] [#654] Add circuit breaker configuration in tree pool Circuit breaker prevents from port starving when some storage nodes are up but unsynced. See more details in: https://git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pulls/339 Signed-off-by: Alex Vanin --- cmd/s3-gw/app.go | 2 ++ cmd/s3-gw/app_settings.go | 19 +++++++++++++------ config/config.env | 4 ++++ config/config.yaml | 4 ++++ docs/configuration.md | 4 ++++ go.mod | 2 +- go.sum | 4 ++-- 7 files changed, 30 insertions(+), 9 deletions(-) diff --git a/cmd/s3-gw/app.go b/cmd/s3-gw/app.go index 807c453b..2ae4db64 100644 --- a/cmd/s3-gw/app.go +++ b/cmd/s3-gw/app.go @@ -863,6 +863,8 @@ func (a *App) initPools(ctx context.Context) { prmTree.SetLogger(a.log.With(logs.TagField(logs.TagDatapath))) prmTree.SetMaxRequestAttempts(a.config().GetInt(cfgTreePoolMaxAttempts)) + prmTree.SetCircuitBreakerThreshold(a.config().GetInt(cfgPoolCbThreshold)) + prmTree.SetCircuitBreakerDuration(a.config().GetDuration(cfgPoolCbBreakDuration)) interceptors := []grpc.DialOption{ grpc.WithUnaryInterceptor(grpctracing.NewUnaryClientInteceptor()), diff --git a/cmd/s3-gw/app_settings.go b/cmd/s3-gw/app_settings.go index 3c02c554..6afdb086 100644 --- a/cmd/s3-gw/app_settings.go +++ b/cmd/s3-gw/app_settings.go @@ -36,6 +36,9 @@ const ( defaultStreamTimeout = 10 * time.Second defaultShutdownTimeout = 15 * time.Second + defaultCbThreshold = 10 + defaultCbBreakDuration = 10 * time.Second + defaultLoggerSamplerInterval = 1 * time.Second defaultGracefulCloseOnSwitchTimeout = 10 * time.Second @@ -120,12 +123,14 @@ const ( cfgTLSCertFile = "tls.cert_file" // Pool config. - cfgConnectTimeout = "connect_timeout" - cfgStreamTimeout = "stream_timeout" - cfgTreeStreamTimeout = "tree_stream_timeout" - cfgHealthcheckTimeout = "healthcheck_timeout" - cfgRebalanceInterval = "rebalance_interval" - cfgPoolErrorThreshold = "pool_error_threshold" + cfgConnectTimeout = "connect_timeout" + cfgStreamTimeout = "stream_timeout" + cfgTreeStreamTimeout = "tree_stream_timeout" + cfgHealthcheckTimeout = "healthcheck_timeout" + cfgRebalanceInterval = "rebalance_interval" + cfgPoolErrorThreshold = "pool_error_threshold" + cfgPoolCbThreshold = "pool_cb_threshold" + cfgPoolCbBreakDuration = "pool_cb_break_duration" // Caching. cfgObjectsCacheLifetime = "cache.objects.lifetime" @@ -1126,6 +1131,8 @@ func setDefaults(v *viper.Viper, flags *pflag.FlagSet) { // pool: v.SetDefault(cfgPoolErrorThreshold, defaultPoolErrorThreshold) v.SetDefault(cfgStreamTimeout, defaultStreamTimeout) + v.SetDefault(cfgPoolCbThreshold, defaultCbThreshold) + v.SetDefault(cfgPoolCbBreakDuration, defaultCbBreakDuration) v.SetDefault(cfgPProfAddress, "localhost:8085") v.SetDefault(cfgPrometheusAddress, "localhost:8086") diff --git a/config/config.env b/config/config.env index 53171673..9b538f0c 100644 --- a/config/config.env +++ b/config/config.env @@ -95,6 +95,10 @@ S3_GW_HEALTHCHECK_TIMEOUT=15s S3_GW_REBALANCE_INTERVAL=60s # The number of errors on connection after which node is considered as unhealthy S3_GW_POOL_ERROR_THRESHOLD=100 +# The number of init errors before tree service circuit breaker is closed +S3_GW_POOL_CB_THRESHOLD: 10 +# Duration when circuit breaker blocks all tree service inits to remote node +S3_GW_POOL_CB_BREAK_DURATION: 10s # Limits for processing of clients' requests S3_GW_MAX_CLIENTS_COUNT=100 diff --git a/config/config.yaml b/config/config.yaml index 28e83654..cad714c4 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -116,6 +116,10 @@ healthcheck_timeout: 15s rebalance_interval: 60s # The number of errors on connection after which node is considered as unhealthy pool_error_threshold: 100 +# The number of init errors before tree service circuit breaker is closed +pool_cb_threshold: 10 +# Duration when circuit breaker blocks all tree service inits to remote node +pool_cb_break_duration: 10s # Limits for processing of clients' requests diff --git a/docs/configuration.md b/docs/configuration.md index 5c923b0c..9f04038f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -219,6 +219,8 @@ tree_stream_timeout: 10s healthcheck_timeout: 15s rebalance_interval: 60s pool_error_threshold: 100 +pool_cb_threshold: 10 +pool_cb_break_duration: 10s max_clients_count: 100 max_clients_deadline: 30s @@ -243,6 +245,8 @@ source_ip_header: "Source-Ip" | `healthcheck_timeout` | `duration` | no | `15s` | Timeout to check node health during rebalance. | | `rebalance_interval` | `duration` | no | `60s` | Interval to check node health. | | `pool_error_threshold` | `uint32` | no | `100` | The number of errors on connection after which node is considered as unhealthy. | +| `pool_cb_threshold` | `int` | no | `10` | The number of init errors before tree service circuit breaker is closed | +| `pool_cb_break_timeout` | `duration` | no | `10s` | Duration when circuit breaker blocks all tree service inits to remote node | | `max_clients_count` | `int` | no | `100` | Limits for processing of clients' requests. | | `max_clients_deadline` | `duration` | no | `30s` | Deadline after which the gate sends error `RequestTimeout` to a client. | | `allowed_access_key_id_prefixes` | `[]string` | no | | List of allowed `AccessKeyID` prefixes which S3 GW serve. If the parameter is omitted, all `AccessKeyID` will be accepted. | diff --git a/go.mod b/go.mod index cfcdb834..da4d521d 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( git.frostfs.info/TrueCloudLab/frostfs-contract v0.20.1-0.20241022094040-5f956751d48b git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20241125133852-37bd75821121 git.frostfs.info/TrueCloudLab/frostfs-qos v0.0.0-20250227072915-25102d1e1aa3 - git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250228093256-2b8329e026c7 + git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250305114045-7a37613988a4 git.frostfs.info/TrueCloudLab/multinet v0.0.0-20241015075604-6cb0d80e0972 git.frostfs.info/TrueCloudLab/policy-engine v0.0.0-20240822104152-a3bc3099bd5b git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02 diff --git a/go.sum b/go.sum index 67a78962..58b3c12c 100644 --- a/go.sum +++ b/go.sum @@ -44,8 +44,8 @@ git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20241125133852-37bd75 git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20241125133852-37bd75821121/go.mod h1:kbwB4v2o6RyOfCo9kEFeUDZIX3LKhmS0yXPrtvzkQ1g= git.frostfs.info/TrueCloudLab/frostfs-qos v0.0.0-20250227072915-25102d1e1aa3 h1:QnAt5b2R6+hQthMOIn5ECfLAlVD8IAE5JRm1NCCOmuE= git.frostfs.info/TrueCloudLab/frostfs-qos v0.0.0-20250227072915-25102d1e1aa3/go.mod h1:PCijYq4oa8vKtIEcUX6jRiszI6XAW+nBwU+T1kB4d1U= -git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250228093256-2b8329e026c7 h1:T7r38zZ/aT1xTp+AxhizfukW10Rq3WQ5/m3moLGVnSk= -git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250228093256-2b8329e026c7/go.mod h1:aQpPWfG8oyfJ2X+FenPTJpSRWZjwcP5/RAtkW+/VEX8= +git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250305114045-7a37613988a4 h1:DWMwf08GhGE9Q2g3p8Kyjl0DxPuxY7WmtkkVf4iBiCo= +git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250305114045-7a37613988a4/go.mod h1:aQpPWfG8oyfJ2X+FenPTJpSRWZjwcP5/RAtkW+/VEX8= git.frostfs.info/TrueCloudLab/hrw v1.2.1 h1:ccBRK21rFvY5R1WotI6LNoPlizk7qSvdfD8lNIRudVc= git.frostfs.info/TrueCloudLab/hrw v1.2.1/go.mod h1:C1Ygde2n843yTZEQ0FP69jYiuaYV0kriLvP4zm8JuvM= git.frostfs.info/TrueCloudLab/multinet v0.0.0-20241015075604-6cb0d80e0972 h1:/960fWeyn2AFHwQUwDsWB3sbP6lTEnFnMzLMM6tx6N8= -- 2.45.3 From a3b5b879ad6d5766f65ff089757b937ad853e6a5 Mon Sep 17 00:00:00 2001 From: Alex Vanin Date: Wed, 5 Mar 2025 18:45:49 +0300 Subject: [PATCH 2/2] [#655] Port release v0.32.12 changelog Signed-off-by: Alex Vanin --- CHANGELOG.md | 8 +++++++- VERSION | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 45ccc5f2..f9d8c8b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ This document outlines major changes between releases. ## [Unreleased] +## [0.32.12] - 2025-03-06 + +### Fixed +- Reduced number of dial calls to available unhealthy tree services (#654) + ## [0.32.11] - 2025-02-28 ### Fixed @@ -464,4 +469,5 @@ To see CHANGELOG for older versions, refer to https://github.com/nspcc-dev/neofs [0.32.9]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.8...v0.32.9 [0.32.10]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.9...v0.32.10 [0.32.11]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.10...v0.32.11 -[Unreleased]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.11...master \ No newline at end of file +[0.32.12]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.11...v0.32.12 +[Unreleased]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.12...master \ No newline at end of file diff --git a/VERSION b/VERSION index a897d288..90488f7d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v0.32.11 +v0.32.12 -- 2.45.3