WIP: Port SDK Go update to prevent socket starvation #655

Draft
alexvanin wants to merge 2 commits from alexvanin/frostfs-s3-gw:port/socket-starvation into master
9 changed files with 38 additions and 11 deletions

View file

@ -4,6 +4,11 @@ This document outlines major changes between releases.
## [Unreleased]
## [0.32.12] - 2025-03-06
### Fixed
- Reduced number of dial calls to available unhealthy tree services (#654)
## [0.32.11] - 2025-02-28
### Fixed
@ -464,4 +469,5 @@ To see CHANGELOG for older versions, refer to https://github.com/nspcc-dev/neofs
[0.32.9]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.8...v0.32.9
[0.32.10]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.9...v0.32.10
[0.32.11]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.10...v0.32.11
[Unreleased]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.11...master
[0.32.12]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.11...v0.32.12
[Unreleased]: https://git.frostfs.info/TrueCloudLab/frostfs-s3-gw/compare/v0.32.12...master

View file

@ -1 +1 @@
v0.32.11
v0.32.12

View file

@ -863,6 +863,8 @@ func (a *App) initPools(ctx context.Context) {
prmTree.SetLogger(a.log.With(logs.TagField(logs.TagDatapath)))
prmTree.SetMaxRequestAttempts(a.config().GetInt(cfgTreePoolMaxAttempts))
prmTree.SetCircuitBreakerThreshold(a.config().GetInt(cfgPoolCbThreshold))
prmTree.SetCircuitBreakerDuration(a.config().GetDuration(cfgPoolCbBreakDuration))
interceptors := []grpc.DialOption{
grpc.WithUnaryInterceptor(grpctracing.NewUnaryClientInteceptor()),

View file

@ -36,6 +36,9 @@ const (
defaultStreamTimeout = 10 * time.Second
defaultShutdownTimeout = 15 * time.Second
defaultCbThreshold = 10
defaultCbBreakDuration = 10 * time.Second
defaultLoggerSamplerInterval = 1 * time.Second
defaultGracefulCloseOnSwitchTimeout = 10 * time.Second
@ -126,6 +129,8 @@ const (
cfgHealthcheckTimeout = "healthcheck_timeout"
cfgRebalanceInterval = "rebalance_interval"
cfgPoolErrorThreshold = "pool_error_threshold"
cfgPoolCbThreshold = "pool_cb_threshold"
cfgPoolCbBreakDuration = "pool_cb_break_duration"
// Caching.
cfgObjectsCacheLifetime = "cache.objects.lifetime"
@ -1126,6 +1131,8 @@ func setDefaults(v *viper.Viper, flags *pflag.FlagSet) {
// pool:
v.SetDefault(cfgPoolErrorThreshold, defaultPoolErrorThreshold)
v.SetDefault(cfgStreamTimeout, defaultStreamTimeout)
v.SetDefault(cfgPoolCbThreshold, defaultCbThreshold)
v.SetDefault(cfgPoolCbBreakDuration, defaultCbBreakDuration)
v.SetDefault(cfgPProfAddress, "localhost:8085")
v.SetDefault(cfgPrometheusAddress, "localhost:8086")

View file

@ -95,6 +95,10 @@ S3_GW_HEALTHCHECK_TIMEOUT=15s
S3_GW_REBALANCE_INTERVAL=60s
# The number of errors on connection after which node is considered as unhealthy
S3_GW_POOL_ERROR_THRESHOLD=100
# The number of init errors before tree service circuit breaker is closed
S3_GW_POOL_CB_THRESHOLD: 10
# Duration when circuit breaker blocks all tree service inits to remote node
S3_GW_POOL_CB_BREAK_DURATION: 10s
# Limits for processing of clients' requests
S3_GW_MAX_CLIENTS_COUNT=100

View file

@ -116,6 +116,10 @@ healthcheck_timeout: 15s
rebalance_interval: 60s
# The number of errors on connection after which node is considered as unhealthy
pool_error_threshold: 100
# The number of init errors before tree service circuit breaker is closed
pool_cb_threshold: 10
# Duration when circuit breaker blocks all tree service inits to remote node
pool_cb_break_duration: 10s
# Limits for processing of clients' requests

View file

@ -219,6 +219,8 @@ tree_stream_timeout: 10s
healthcheck_timeout: 15s
rebalance_interval: 60s
pool_error_threshold: 100
pool_cb_threshold: 10
pool_cb_break_duration: 10s
max_clients_count: 100
max_clients_deadline: 30s
@ -243,6 +245,8 @@ source_ip_header: "Source-Ip"
| `healthcheck_timeout` | `duration` | no | `15s` | Timeout to check node health during rebalance. |
| `rebalance_interval` | `duration` | no | `60s` | Interval to check node health. |
| `pool_error_threshold` | `uint32` | no | `100` | The number of errors on connection after which node is considered as unhealthy. |
| `pool_cb_threshold` | `int` | no | `10` | The number of init errors before tree service circuit breaker is closed |
| `pool_cb_break_timeout` | `duration` | no | `10s` | Duration when circuit breaker blocks all tree service inits to remote node |
| `max_clients_count` | `int` | no | `100` | Limits for processing of clients' requests. |
| `max_clients_deadline` | `duration` | no | `30s` | Deadline after which the gate sends error `RequestTimeout` to a client. |
| `allowed_access_key_id_prefixes` | `[]string` | no | | List of allowed `AccessKeyID` prefixes which S3 GW serve. If the parameter is omitted, all `AccessKeyID` will be accepted. |

2
go.mod
View file

@ -6,7 +6,7 @@ require (
git.frostfs.info/TrueCloudLab/frostfs-contract v0.20.1-0.20241022094040-5f956751d48b
git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20241125133852-37bd75821121
git.frostfs.info/TrueCloudLab/frostfs-qos v0.0.0-20250227072915-25102d1e1aa3
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250228093256-2b8329e026c7
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250305114045-7a37613988a4
git.frostfs.info/TrueCloudLab/multinet v0.0.0-20241015075604-6cb0d80e0972
git.frostfs.info/TrueCloudLab/policy-engine v0.0.0-20240822104152-a3bc3099bd5b
git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02

4
go.sum
View file

@ -44,8 +44,8 @@ git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20241125133852-37bd75
git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20241125133852-37bd75821121/go.mod h1:kbwB4v2o6RyOfCo9kEFeUDZIX3LKhmS0yXPrtvzkQ1g=
git.frostfs.info/TrueCloudLab/frostfs-qos v0.0.0-20250227072915-25102d1e1aa3 h1:QnAt5b2R6+hQthMOIn5ECfLAlVD8IAE5JRm1NCCOmuE=
git.frostfs.info/TrueCloudLab/frostfs-qos v0.0.0-20250227072915-25102d1e1aa3/go.mod h1:PCijYq4oa8vKtIEcUX6jRiszI6XAW+nBwU+T1kB4d1U=
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250228093256-2b8329e026c7 h1:T7r38zZ/aT1xTp+AxhizfukW10Rq3WQ5/m3moLGVnSk=
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250228093256-2b8329e026c7/go.mod h1:aQpPWfG8oyfJ2X+FenPTJpSRWZjwcP5/RAtkW+/VEX8=
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250305114045-7a37613988a4 h1:DWMwf08GhGE9Q2g3p8Kyjl0DxPuxY7WmtkkVf4iBiCo=
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20250305114045-7a37613988a4/go.mod h1:aQpPWfG8oyfJ2X+FenPTJpSRWZjwcP5/RAtkW+/VEX8=
git.frostfs.info/TrueCloudLab/hrw v1.2.1 h1:ccBRK21rFvY5R1WotI6LNoPlizk7qSvdfD8lNIRudVc=
git.frostfs.info/TrueCloudLab/hrw v1.2.1/go.mod h1:C1Ygde2n843yTZEQ0FP69jYiuaYV0kriLvP4zm8JuvM=
git.frostfs.info/TrueCloudLab/multinet v0.0.0-20241015075604-6cb0d80e0972 h1:/960fWeyn2AFHwQUwDsWB3sbP6lTEnFnMzLMM6tx6N8=