From 32c77f3a2376acad9affbd901f8e451be4348c16 Mon Sep 17 00:00:00 2001
From: Dmitrii Stepanov <d.stepanov@yadro.com>
Date: Tue, 25 Jul 2023 10:07:38 +0300
Subject: [PATCH] [#537] node: Add runtime.memory_limit config parameter

This parameter allows to set soft memory limit for Go GC.

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
---
 cmd/frostfs-node/config.go                    |  4 +++
 cmd/frostfs-node/config/runtime/config.go     | 23 ++++++++++++++
 .../config/runtime/config_test.go             | 30 +++++++++++++++++++
 cmd/frostfs-node/main.go                      |  1 +
 cmd/frostfs-node/runtime.go                   | 26 ++++++++++++++++
 config/example/node.env                       |  2 ++
 config/example/node.json                      |  3 ++
 config/example/node.yaml                      |  3 ++
 docs/storage-node-configuration.md            | 13 ++++++++
 internal/logs/logs.go                         |  2 ++
 10 files changed, 107 insertions(+)
 create mode 100644 cmd/frostfs-node/config/runtime/config.go
 create mode 100644 cmd/frostfs-node/config/runtime/config_test.go
 create mode 100644 cmd/frostfs-node/runtime.go

diff --git a/cmd/frostfs-node/config.go b/cmd/frostfs-node/config.go
index c088d2f07..283cf501a 100644
--- a/cmd/frostfs-node/config.go
+++ b/cmd/frostfs-node/config.go
@@ -1055,6 +1055,10 @@ func (c *cfg) reloadConfig(ctx context.Context) {
 	}
 
 	components = append(components, dCmp{"logger", logPrm.Reload})
+	components = append(components, dCmp{"runtime", func() error {
+		setRuntimeParameters(c)
+		return nil
+	}})
 	components = append(components, dCmp{"tracing", func() error {
 		updated, err := tracing.Setup(ctx, *tracingconfig.ToTracingConfig(c.appCfg))
 		if updated {
diff --git a/cmd/frostfs-node/config/runtime/config.go b/cmd/frostfs-node/config/runtime/config.go
new file mode 100644
index 000000000..ad6cce43b
--- /dev/null
+++ b/cmd/frostfs-node/config/runtime/config.go
@@ -0,0 +1,23 @@
+package runtime
+
+import (
+	"math"
+
+	"git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config"
+)
+
+const (
+	subsection         = "runtime"
+	memoryLimitDefault = math.MaxInt64
+)
+
+// GCMemoryLimitBytes returns the  value of "soft_memory_limit" config parameter from "runtime" section.
+func GCMemoryLimitBytes(c *config.Config) int64 {
+	l := config.SizeInBytesSafe(c.Sub(subsection), "soft_memory_limit")
+
+	if l > 0 {
+		return int64(l)
+	}
+
+	return memoryLimitDefault
+}
diff --git a/cmd/frostfs-node/config/runtime/config_test.go b/cmd/frostfs-node/config/runtime/config_test.go
new file mode 100644
index 000000000..1bfa42ad8
--- /dev/null
+++ b/cmd/frostfs-node/config/runtime/config_test.go
@@ -0,0 +1,30 @@
+package runtime
+
+import (
+	"math"
+	"testing"
+
+	"git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config"
+	configtest "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/test"
+	"github.com/stretchr/testify/require"
+)
+
+func TestGCMemoryLimit(t *testing.T) {
+	t.Run("defaults", func(t *testing.T) {
+		empty := configtest.EmptyConfig()
+
+		require.Equal(t, int64(math.MaxInt64), GCMemoryLimitBytes(empty))
+	})
+
+	const path = "../../../../config/example/node"
+
+	fileConfigTest := func(c *config.Config) {
+		require.Equal(t, int64(1073741824), GCMemoryLimitBytes(c))
+	}
+
+	configtest.ForEachFileType(path, fileConfigTest)
+
+	t.Run("ENV", func(t *testing.T) {
+		configtest.ForEnvFileType(t, path, fileConfigTest)
+	})
+}
diff --git a/cmd/frostfs-node/main.go b/cmd/frostfs-node/main.go
index 425cf25a0..bf872da03 100644
--- a/cmd/frostfs-node/main.go
+++ b/cmd/frostfs-node/main.go
@@ -84,6 +84,7 @@ func initApp(ctx context.Context, c *cfg) {
 		c.wg.Done()
 	}()
 
+	setRuntimeParameters(c)
 	metrics, _ := metricsComponent(c)
 	initAndLog(c, "profiler", initProfilerService)
 	initAndLog(c, metrics.name, metrics.init)
diff --git a/cmd/frostfs-node/runtime.go b/cmd/frostfs-node/runtime.go
new file mode 100644
index 000000000..d858ba490
--- /dev/null
+++ b/cmd/frostfs-node/runtime.go
@@ -0,0 +1,26 @@
+package main
+
+import (
+	"os"
+	"runtime/debug"
+
+	"git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/runtime"
+	"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
+	"go.uber.org/zap"
+)
+
+func setRuntimeParameters(c *cfg) {
+	if len(os.Getenv("GOMEMLIMIT")) != 0 {
+		// default limit < yaml limit < app env limit < GOMEMLIMIT
+		c.log.Warn(logs.RuntimeSoftMemoryDefinedWithGOMEMLIMIT)
+		return
+	}
+
+	memLimitBytes := runtime.GCMemoryLimitBytes(c.appCfg)
+	previous := debug.SetMemoryLimit(memLimitBytes)
+	if memLimitBytes != previous {
+		c.log.Info(logs.RuntimeSoftMemoryLimitUpdated,
+			zap.Int64("new_value", memLimitBytes),
+			zap.Int64("old_value", previous))
+	}
+}
diff --git a/config/example/node.env b/config/example/node.env
index 089021767..3abb744be 100644
--- a/config/example/node.env
+++ b/config/example/node.env
@@ -188,3 +188,5 @@ FROSTFS_STORAGE_SHARD_1_GC_REMOVER_SLEEP_INTERVAL=5m
 FROSTFS_TRACING_ENABLED=true
 FROSTFS_TRACING_ENDPOINT="localhost"
 FROSTFS_TRACING_EXPORTER="otlp_grpc"
+
+FROSTFS_RUNTIME_SOFT_MEMORY_LIMIT=1073741824
diff --git a/config/example/node.json b/config/example/node.json
index e4b85bc81..6c98903f1 100644
--- a/config/example/node.json
+++ b/config/example/node.json
@@ -245,5 +245,8 @@
     "enabled": true,
     "endpoint": "localhost:9090",
     "exporter": "otlp_grpc"
+  },
+  "runtime": {
+    "soft_memory_limit": 1073741824
   }
 }
diff --git a/config/example/node.yaml b/config/example/node.yaml
index 897f4e15b..0ef5fea7f 100644
--- a/config/example/node.yaml
+++ b/config/example/node.yaml
@@ -217,3 +217,6 @@ tracing:
   enabled: true
   exporter: "otlp_grpc"
   endpoint: "localhost"
+
+runtime:
+  soft_memory_limit: 1gb
diff --git a/docs/storage-node-configuration.md b/docs/storage-node-configuration.md
index 4469b1e10..439edf598 100644
--- a/docs/storage-node-configuration.md
+++ b/docs/storage-node-configuration.md
@@ -24,6 +24,7 @@ There are some custom types used for brevity:
 | `policer`    | [Policer service configuration](#policer-section)       |
 | `replicator` | [Replicator service configuration](#replicator-section) |
 | `storage`    | [Storage engine configuration](#storage-section)        |
+| `runtime`    | [Runtime configuration](#runtime-section)               |
 
 
 # `control` section
@@ -426,3 +427,15 @@ object:
 | `delete.tombstone_lifetime` | `int` | `5`           | Tombstone lifetime for removed objects in epochs.                                              |
 | `put.pool_size_remote`      | `int` | `10`          | Max pool size for performing remote `PUT` operations. Used by Policer and Replicator services. |
 | `put.pool_size_local`       | `int` | `10`          | Max pool size for performing local `PUT` operations. Used by Policer and Replicator services.  |
+
+# `runtime` section
+Contains runtime parameters.
+
+```yaml
+runtime:
+  soft_memory_limit: 1GB
+```
+
+| Parameter           | Type   | Default value | Description                                                              |
+|---------------------|--------|---------------|--------------------------------------------------------------------------|
+| `soft_memory_limit` | `size` | 0             | Soft memory limit for the runtime. Zero or no value stands for no limit. If `GOMEMLIMIT` environment variable is set, the value from the configuration file will be ignored. |
diff --git a/internal/logs/logs.go b/internal/logs/logs.go
index a2ff8dcb9..a400187cc 100644
--- a/internal/logs/logs.go
+++ b/internal/logs/logs.go
@@ -493,4 +493,6 @@ const (
 	FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap                   = "the node is under maintenance, skip initial bootstrap"
 	EngineCouldNotChangeShardModeToDisabled                                 = "could not change shard mode to disabled"
 	NetmapNodeAlreadyInCandidateListOnlineSkipInitialBootstrap              = "the node is already in candidate list with online state, skip initial bootstrap"
+	RuntimeSoftMemoryLimitUpdated                                           = "soft runtime memory limit value updated"
+	RuntimeSoftMemoryDefinedWithGOMEMLIMIT                                  = "soft runtime memory defined with GOMEMLIMIT environment variable, config value skipped"
 )
-- 
2.45.3