diff --git a/cmd/frostfs-node/config.go b/cmd/frostfs-node/config.go
index 8a7317231..6580de157 100644
--- a/cmd/frostfs-node/config.go
+++ b/cmd/frostfs-node/config.go
@@ -105,7 +105,10 @@ type applicationConfiguration struct {
}
type shardCfg struct {
- compress bool
+ compress bool
+ estimateCompressibility bool
+ estimateCompressibilityThreshold float64
+
smallSizeObjectLimit uint64
uncompressableContentType []string
refillMetabase bool
@@ -217,6 +220,8 @@ func (a *applicationConfiguration) updateShardConfig(c *config.Config, oldConfig
newConfig.refillMetabase = oldConfig.RefillMetabase()
newConfig.mode = oldConfig.Mode()
newConfig.compress = oldConfig.Compress()
+ newConfig.estimateCompressibility = oldConfig.EstimateCompressibility()
+ newConfig.estimateCompressibilityThreshold = oldConfig.EstimateCompressibilityThreshold()
newConfig.uncompressableContentType = oldConfig.UncompressableContentTypes()
newConfig.smallSizeObjectLimit = oldConfig.SmallSizeLimit()
@@ -830,6 +835,8 @@ func (c *cfg) getShardOpts(shCfg shardCfg) shardOptsWithID {
blobstoreOpts := []blobstor.Option{
blobstor.WithCompressObjects(shCfg.compress),
blobstor.WithUncompressableContentTypes(shCfg.uncompressableContentType),
+ blobstor.WithCompressibilityEstimate(shCfg.estimateCompressibility),
+ blobstor.WithCompressibilityEstimateThreshold(shCfg.estimateCompressibilityThreshold),
blobstor.WithStorages(ss),
blobstor.WithLogger(c.log),
}
diff --git a/cmd/frostfs-node/config/cast.go b/cmd/frostfs-node/config/cast.go
index 9036c3ab0..c99d33569 100644
--- a/cmd/frostfs-node/config/cast.go
+++ b/cmd/frostfs-node/config/cast.go
@@ -223,3 +223,15 @@ func parseSizeInBytes(sizeStr string) uint64 {
size := cast.ToFloat64(sizeStr)
return safeMul(size, multiplier)
}
+
+// FloatOrDefault reads a configuration value
+// from c by name and casts it to float64.
+//
+// Returns defaultValue if the value can not be casted.
+func FloatOrDefault(c *Config, name string, defaultValue float64) float64 {
+ v, err := cast.ToFloat64E(c.Value(name))
+ if err != nil {
+ return defaultValue
+ }
+ return v
+}
diff --git a/cmd/frostfs-node/config/engine/config_test.go b/cmd/frostfs-node/config/engine/config_test.go
index 4077b1744..6b7c268ce 100644
--- a/cmd/frostfs-node/config/engine/config_test.go
+++ b/cmd/frostfs-node/config/engine/config_test.go
@@ -84,6 +84,8 @@ func TestEngineSection(t *testing.T) {
require.Equal(t, true, sc.Compress())
require.Equal(t, []string{"audio/*", "video/*"}, sc.UncompressableContentTypes())
+ require.Equal(t, true, sc.EstimateCompressibility())
+ require.Equal(t, float64(0.7), sc.EstimateCompressibilityThreshold())
require.EqualValues(t, 102400, sc.SmallSizeLimit())
require.Equal(t, 2, len(ss))
diff --git a/cmd/frostfs-node/config/engine/shard/config.go b/cmd/frostfs-node/config/engine/shard/config.go
index 1dc32fb86..16100c3a7 100644
--- a/cmd/frostfs-node/config/engine/shard/config.go
+++ b/cmd/frostfs-node/config/engine/shard/config.go
@@ -16,8 +16,11 @@ import (
// which provides access to Shard configurations.
type Config config.Config
-// SmallSizeLimitDefault is a default limit of small objects payload in bytes.
-const SmallSizeLimitDefault = 1 << 20
+const (
+ // SmallSizeLimitDefault is a default limit of small objects payload in bytes.
+ SmallSizeLimitDefault = 1 << 20
+ EstimateCompressibilityThresholdDefault = 0.1
+)
// From wraps config section into Config.
func From(c *config.Config) *Config {
@@ -43,6 +46,30 @@ func (x *Config) UncompressableContentTypes() []string {
"compression_exclude_content_types")
}
+// EstimateCompressibility returns the value of "estimate_compressibility" config parameter.
+//
+// Returns false if the value is not a valid bool.
+func (x *Config) EstimateCompressibility() bool {
+ return config.BoolSafe(
+ (*config.Config)(x),
+ "compression_estimate_compressibility",
+ )
+}
+
+// EstimateCompressibilityThreshold returns the value of "estimate_compressibility_threshold" config parameter.
+//
+// Returns EstimateCompressibilityThresholdDefault if the value is not defined, not valid float or not in range [0.0; 1.0].
+func (x *Config) EstimateCompressibilityThreshold() float64 {
+ v := config.FloatOrDefault(
+ (*config.Config)(x),
+ "compression_estimate_compressibility_threshold",
+ EstimateCompressibilityThresholdDefault)
+ if v < 0.0 || v > 1.0 {
+ return EstimateCompressibilityThresholdDefault
+ }
+ return v
+}
+
// SmallSizeLimit returns the value of "small_object_size" config parameter.
//
// Returns SmallSizeLimitDefault if the value is not a positive number.
diff --git a/config/example/node.env b/config/example/node.env
index fde65173b..dda740cf1 100644
--- a/config/example/node.env
+++ b/config/example/node.env
@@ -113,6 +113,8 @@ FROSTFS_STORAGE_SHARD_0_METABASE_MAX_BATCH_DELAY=10ms
### Blobstor config
FROSTFS_STORAGE_SHARD_0_COMPRESS=true
FROSTFS_STORAGE_SHARD_0_COMPRESSION_EXCLUDE_CONTENT_TYPES="audio/* video/*"
+FROSTFS_STORAGE_SHARD_0_COMPRESSION_ESTIMATE_COMPRESSIBILITY=true
+FROSTFS_STORAGE_SHARD_0_COMPRESSION_ESTIMATE_COMPRESSIBILITY_THRESHOLD=0.7
FROSTFS_STORAGE_SHARD_0_SMALL_OBJECT_SIZE=102400
### Blobovnicza config
FROSTFS_STORAGE_SHARD_0_BLOBSTOR_0_PATH=tmp/0/blob/blobovnicza
diff --git a/config/example/node.json b/config/example/node.json
index e8455ee55..1038d5e5c 100644
--- a/config/example/node.json
+++ b/config/example/node.json
@@ -160,6 +160,8 @@
"compression_exclude_content_types": [
"audio/*", "video/*"
],
+ "compression_estimate_compressibility": true,
+ "compression_estimate_compressibility_threshold": 0.7,
"small_object_size": 102400,
"blobstor": [
{
diff --git a/config/example/node.yaml b/config/example/node.yaml
index 2ca1b426c..8b2046e95 100644
--- a/config/example/node.yaml
+++ b/config/example/node.yaml
@@ -178,6 +178,8 @@ storage:
compression_exclude_content_types:
- audio/*
- video/*
+ compression_estimate_compressibility: true
+ compression_estimate_compressibility_threshold: 0.7
blobstor:
- type: blobovnicza
diff --git a/docs/storage-node-configuration.md b/docs/storage-node-configuration.md
index 2e2d04088..5e9f3caf7 100644
--- a/docs/storage-node-configuration.md
+++ b/docs/storage-node-configuration.md
@@ -179,15 +179,17 @@ The following table describes configuration for each shard.
| Parameter | Type | Default value | Description |
|-------------------------------------|---------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `compress` | `bool` | `false` | Flag to enable compression. |
-| `compression_exclude_content_types` | `[]string` | | List of content-types to disable compression for. Content-type is taken from `Content-Type` object attribute. Each element can contain a star `*` as a first (last) character, which matches any prefix (suffix). |
-| `mode` | `string` | `read-write` | Shard Mode.
Possible values: `read-write`, `read-only`, `degraded`, `degraded-read-only`, `disabled` |
-| `resync_metabase` | `bool` | `false` | Flag to enable metabase resync on start. |
-| `writecache` | [Writecache config](#writecache-subsection) | | Write-cache configuration. |
-| `metabase` | [Metabase config](#metabase-subsection) | | Metabase configuration. |
-| `blobstor` | [Blobstor config](#blobstor-subsection) | | Blobstor configuration. |
-| `small_object_size` | `size` | `1M` | Maximum size of an object stored in blobovnicza tree. |
-| `gc` | [GC config](#gc-subsection) | | GC configuration. |
+| `compress` | `bool` | `false` | Flag to enable compression. |
+| `compression_exclude_content_types` | `[]string` | | List of content-types to disable compression for. Content-type is taken from `Content-Type` object attribute. Each element can contain a star `*` as a first (last) character, which matches any prefix (suffix). |
+| `compression_estimate_compressibility` | `bool` | `false` | If `true`, then noramalized compressibility estimation is used to decide compress data or not. |
+| `compression_estimate_compressibility_threshold` | `float` | `0.1` | Normilized compressibility estimate threshold: data will compress if estimation if greater than this value. |
+| `mode` | `string` | `read-write` | Shard Mode.
Possible values: `read-write`, `read-only`, `degraded`, `degraded-read-only`, `disabled` |
+| `resync_metabase` | `bool` | `false` | Flag to enable metabase resync on start. |
+| `writecache` | [Writecache config](#writecache-subsection) | | Write-cache configuration. |
+| `metabase` | [Metabase config](#metabase-subsection) | | Metabase configuration. |
+| `blobstor` | [Blobstor config](#blobstor-subsection) | | Blobstor configuration. |
+| `small_object_size` | `size` | `1M` | Maximum size of an object stored in blobovnicza tree. |
+| `gc` | [GC config](#gc-subsection) | | GC configuration. |
### `blobstor` subsection
diff --git a/pkg/local_object_storage/blobstor/blobstor.go b/pkg/local_object_storage/blobstor/blobstor.go
index d2a2338a3..bc9ab2b99 100644
--- a/pkg/local_object_storage/blobstor/blobstor.go
+++ b/pkg/local_object_storage/blobstor/blobstor.go
@@ -107,6 +107,27 @@ func WithCompressObjects(comp bool) Option {
}
}
+// WithCompressibilityEstimate returns an option to use
+// normilized compressibility estimate to decide compress
+// data or not.
+//
+// See https://github.com/klauspost/compress/blob/v1.17.2/compressible.go#L5
+func WithCompressibilityEstimate(v bool) Option {
+ return func(c *cfg) {
+ c.compression.UseCompressEstimation = v
+ }
+}
+
+// WithCompressibilityEstimateThreshold returns an option to set
+// normilized compressibility estimate threshold.
+//
+// See https://github.com/klauspost/compress/blob/v1.17.2/compressible.go#L5
+func WithCompressibilityEstimateThreshold(threshold float64) Option {
+ return func(c *cfg) {
+ c.compression.CompressEstimationThreshold = threshold
+ }
+}
+
// WithUncompressableContentTypes returns option to disable decompression
// for specific content types as seen by object.AttributeContentType attribute.
func WithUncompressableContentTypes(values []string) Option {
diff --git a/pkg/local_object_storage/blobstor/compression/bench_test.go b/pkg/local_object_storage/blobstor/compression/bench_test.go
index 6e05366cf..986912985 100644
--- a/pkg/local_object_storage/blobstor/compression/bench_test.go
+++ b/pkg/local_object_storage/blobstor/compression/bench_test.go
@@ -3,8 +3,10 @@ package compression
import (
"crypto/rand"
"fmt"
+ "log"
"testing"
+ "github.com/klauspost/compress"
"github.com/stretchr/testify/require"
)
@@ -47,3 +49,50 @@ func notSoRandomSlice(size, blockSize int) []byte {
}
return data
}
+
+func BenchmarkCompressionRealVSEstimate(b *testing.B) {
+ var total float64 // to prevent from compiler optimizations
+ maxSize := 60 * 1024 * 1024
+ b.Run("estimate", func(b *testing.B) {
+ b.ResetTimer()
+
+ c := &Config{
+ Enabled: true,
+ }
+ require.NoError(b, c.Init())
+
+ for size := 1024; size <= maxSize; size *= 2 {
+ data := make([]byte, size)
+ _, err := rand.Reader.Read(data)
+ require.NoError(b, err)
+
+ b.StartTimer()
+ estimation := compress.Estimate(data)
+ total += estimation
+ b.StopTimer()
+ }
+ })
+
+ b.Run("compress", func(b *testing.B) {
+ b.ResetTimer()
+
+ c := &Config{
+ Enabled: true,
+ }
+ require.NoError(b, c.Init())
+
+ for size := 1024; size <= maxSize; size *= 2 {
+ data := make([]byte, size)
+ _, err := rand.Reader.Read(data)
+ require.NoError(b, err)
+
+ b.StartTimer()
+ maxSize := c.encoder.MaxEncodedSize(len(data))
+ compressed := c.encoder.EncodeAll(data, make([]byte, 0, maxSize))
+ total += float64(len(compressed)) / float64(len(data))
+ b.StopTimer()
+ }
+ })
+
+ log.Println(total)
+}
diff --git a/pkg/local_object_storage/blobstor/compression/compress.go b/pkg/local_object_storage/blobstor/compression/compress.go
index 270c2b18d..85ab47692 100644
--- a/pkg/local_object_storage/blobstor/compression/compress.go
+++ b/pkg/local_object_storage/blobstor/compression/compress.go
@@ -5,6 +5,7 @@ import (
"strings"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
+ "github.com/klauspost/compress"
"github.com/klauspost/compress/zstd"
)
@@ -13,6 +14,9 @@ type Config struct {
Enabled bool
UncompressableContentTypes []string
+ UseCompressEstimation bool
+ CompressEstimationThreshold float64
+
encoder *zstd.Encoder
decoder *zstd.Decoder
}
@@ -82,6 +86,17 @@ func (c *Config) Compress(data []byte) []byte {
if c == nil || !c.Enabled {
return data
}
+ if c.UseCompressEstimation {
+ estimated := compress.Estimate(data)
+ if estimated >= c.CompressEstimationThreshold {
+ return c.compress(data)
+ }
+ return data
+ }
+ return c.compress(data)
+}
+
+func (c *Config) compress(data []byte) []byte {
maxSize := c.encoder.MaxEncodedSize(len(data))
compressed := c.encoder.EncodeAll(data, make([]byte, 0, maxSize))
if len(data) < len(compressed) {