From c80b46fad351e4860c743614d32fa5e81ca8ecce Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Tue, 31 Oct 2023 14:45:22 +0300 Subject: [PATCH] [#754] blobstor: Estimate compressability Now it is possible to enable compressability estimation. If data is likely uncompressable, it should reduce CPU time and memory. Signed-off-by: Dmitrii Stepanov --- cmd/frostfs-node/config.go | 9 +++- cmd/frostfs-node/config/cast.go | 12 +++++ cmd/frostfs-node/config/engine/config_test.go | 2 + .../config/engine/shard/config.go | 31 +++++++++++- config/example/node.env | 2 + config/example/node.json | 2 + config/example/node.yaml | 2 + docs/storage-node-configuration.md | 20 ++++---- pkg/local_object_storage/blobstor/blobstor.go | 21 ++++++++ .../blobstor/compression/bench_test.go | 49 +++++++++++++++++++ .../blobstor/compression/compress.go | 15 ++++++ 11 files changed, 153 insertions(+), 12 deletions(-) diff --git a/cmd/frostfs-node/config.go b/cmd/frostfs-node/config.go index 8a7317231..6580de157 100644 --- a/cmd/frostfs-node/config.go +++ b/cmd/frostfs-node/config.go @@ -105,7 +105,10 @@ type applicationConfiguration struct { } type shardCfg struct { - compress bool + compress bool + estimateCompressibility bool + estimateCompressibilityThreshold float64 + smallSizeObjectLimit uint64 uncompressableContentType []string refillMetabase bool @@ -217,6 +220,8 @@ func (a *applicationConfiguration) updateShardConfig(c *config.Config, oldConfig newConfig.refillMetabase = oldConfig.RefillMetabase() newConfig.mode = oldConfig.Mode() newConfig.compress = oldConfig.Compress() + newConfig.estimateCompressibility = oldConfig.EstimateCompressibility() + newConfig.estimateCompressibilityThreshold = oldConfig.EstimateCompressibilityThreshold() newConfig.uncompressableContentType = oldConfig.UncompressableContentTypes() newConfig.smallSizeObjectLimit = oldConfig.SmallSizeLimit() @@ -830,6 +835,8 @@ func (c *cfg) getShardOpts(shCfg shardCfg) shardOptsWithID { blobstoreOpts := []blobstor.Option{ blobstor.WithCompressObjects(shCfg.compress), blobstor.WithUncompressableContentTypes(shCfg.uncompressableContentType), + blobstor.WithCompressibilityEstimate(shCfg.estimateCompressibility), + blobstor.WithCompressibilityEstimateThreshold(shCfg.estimateCompressibilityThreshold), blobstor.WithStorages(ss), blobstor.WithLogger(c.log), } diff --git a/cmd/frostfs-node/config/cast.go b/cmd/frostfs-node/config/cast.go index 9036c3ab0..c99d33569 100644 --- a/cmd/frostfs-node/config/cast.go +++ b/cmd/frostfs-node/config/cast.go @@ -223,3 +223,15 @@ func parseSizeInBytes(sizeStr string) uint64 { size := cast.ToFloat64(sizeStr) return safeMul(size, multiplier) } + +// FloatOrDefault reads a configuration value +// from c by name and casts it to float64. +// +// Returns defaultValue if the value can not be casted. +func FloatOrDefault(c *Config, name string, defaultValue float64) float64 { + v, err := cast.ToFloat64E(c.Value(name)) + if err != nil { + return defaultValue + } + return v +} diff --git a/cmd/frostfs-node/config/engine/config_test.go b/cmd/frostfs-node/config/engine/config_test.go index 4077b1744..6b7c268ce 100644 --- a/cmd/frostfs-node/config/engine/config_test.go +++ b/cmd/frostfs-node/config/engine/config_test.go @@ -84,6 +84,8 @@ func TestEngineSection(t *testing.T) { require.Equal(t, true, sc.Compress()) require.Equal(t, []string{"audio/*", "video/*"}, sc.UncompressableContentTypes()) + require.Equal(t, true, sc.EstimateCompressibility()) + require.Equal(t, float64(0.7), sc.EstimateCompressibilityThreshold()) require.EqualValues(t, 102400, sc.SmallSizeLimit()) require.Equal(t, 2, len(ss)) diff --git a/cmd/frostfs-node/config/engine/shard/config.go b/cmd/frostfs-node/config/engine/shard/config.go index 1dc32fb86..16100c3a7 100644 --- a/cmd/frostfs-node/config/engine/shard/config.go +++ b/cmd/frostfs-node/config/engine/shard/config.go @@ -16,8 +16,11 @@ import ( // which provides access to Shard configurations. type Config config.Config -// SmallSizeLimitDefault is a default limit of small objects payload in bytes. -const SmallSizeLimitDefault = 1 << 20 +const ( + // SmallSizeLimitDefault is a default limit of small objects payload in bytes. + SmallSizeLimitDefault = 1 << 20 + EstimateCompressibilityThresholdDefault = 0.1 +) // From wraps config section into Config. func From(c *config.Config) *Config { @@ -43,6 +46,30 @@ func (x *Config) UncompressableContentTypes() []string { "compression_exclude_content_types") } +// EstimateCompressibility returns the value of "estimate_compressibility" config parameter. +// +// Returns false if the value is not a valid bool. +func (x *Config) EstimateCompressibility() bool { + return config.BoolSafe( + (*config.Config)(x), + "compression_estimate_compressibility", + ) +} + +// EstimateCompressibilityThreshold returns the value of "estimate_compressibility_threshold" config parameter. +// +// Returns EstimateCompressibilityThresholdDefault if the value is not defined, not valid float or not in range [0.0; 1.0]. +func (x *Config) EstimateCompressibilityThreshold() float64 { + v := config.FloatOrDefault( + (*config.Config)(x), + "compression_estimate_compressibility_threshold", + EstimateCompressibilityThresholdDefault) + if v < 0.0 || v > 1.0 { + return EstimateCompressibilityThresholdDefault + } + return v +} + // SmallSizeLimit returns the value of "small_object_size" config parameter. // // Returns SmallSizeLimitDefault if the value is not a positive number. diff --git a/config/example/node.env b/config/example/node.env index fde65173b..dda740cf1 100644 --- a/config/example/node.env +++ b/config/example/node.env @@ -113,6 +113,8 @@ FROSTFS_STORAGE_SHARD_0_METABASE_MAX_BATCH_DELAY=10ms ### Blobstor config FROSTFS_STORAGE_SHARD_0_COMPRESS=true FROSTFS_STORAGE_SHARD_0_COMPRESSION_EXCLUDE_CONTENT_TYPES="audio/* video/*" +FROSTFS_STORAGE_SHARD_0_COMPRESSION_ESTIMATE_COMPRESSIBILITY=true +FROSTFS_STORAGE_SHARD_0_COMPRESSION_ESTIMATE_COMPRESSIBILITY_THRESHOLD=0.7 FROSTFS_STORAGE_SHARD_0_SMALL_OBJECT_SIZE=102400 ### Blobovnicza config FROSTFS_STORAGE_SHARD_0_BLOBSTOR_0_PATH=tmp/0/blob/blobovnicza diff --git a/config/example/node.json b/config/example/node.json index e8455ee55..1038d5e5c 100644 --- a/config/example/node.json +++ b/config/example/node.json @@ -160,6 +160,8 @@ "compression_exclude_content_types": [ "audio/*", "video/*" ], + "compression_estimate_compressibility": true, + "compression_estimate_compressibility_threshold": 0.7, "small_object_size": 102400, "blobstor": [ { diff --git a/config/example/node.yaml b/config/example/node.yaml index 2ca1b426c..8b2046e95 100644 --- a/config/example/node.yaml +++ b/config/example/node.yaml @@ -178,6 +178,8 @@ storage: compression_exclude_content_types: - audio/* - video/* + compression_estimate_compressibility: true + compression_estimate_compressibility_threshold: 0.7 blobstor: - type: blobovnicza diff --git a/docs/storage-node-configuration.md b/docs/storage-node-configuration.md index 2e2d04088..5e9f3caf7 100644 --- a/docs/storage-node-configuration.md +++ b/docs/storage-node-configuration.md @@ -179,15 +179,17 @@ The following table describes configuration for each shard. | Parameter | Type | Default value | Description | |-------------------------------------|---------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `compress` | `bool` | `false` | Flag to enable compression. | -| `compression_exclude_content_types` | `[]string` | | List of content-types to disable compression for. Content-type is taken from `Content-Type` object attribute. Each element can contain a star `*` as a first (last) character, which matches any prefix (suffix). | -| `mode` | `string` | `read-write` | Shard Mode.
Possible values: `read-write`, `read-only`, `degraded`, `degraded-read-only`, `disabled` | -| `resync_metabase` | `bool` | `false` | Flag to enable metabase resync on start. | -| `writecache` | [Writecache config](#writecache-subsection) | | Write-cache configuration. | -| `metabase` | [Metabase config](#metabase-subsection) | | Metabase configuration. | -| `blobstor` | [Blobstor config](#blobstor-subsection) | | Blobstor configuration. | -| `small_object_size` | `size` | `1M` | Maximum size of an object stored in blobovnicza tree. | -| `gc` | [GC config](#gc-subsection) | | GC configuration. | +| `compress` | `bool` | `false` | Flag to enable compression. | +| `compression_exclude_content_types` | `[]string` | | List of content-types to disable compression for. Content-type is taken from `Content-Type` object attribute. Each element can contain a star `*` as a first (last) character, which matches any prefix (suffix). | +| `compression_estimate_compressibility` | `bool` | `false` | If `true`, then noramalized compressibility estimation is used to decide compress data or not. | +| `compression_estimate_compressibility_threshold` | `float` | `0.1` | Normilized compressibility estimate threshold: data will compress if estimation if greater than this value. | +| `mode` | `string` | `read-write` | Shard Mode.
Possible values: `read-write`, `read-only`, `degraded`, `degraded-read-only`, `disabled` | +| `resync_metabase` | `bool` | `false` | Flag to enable metabase resync on start. | +| `writecache` | [Writecache config](#writecache-subsection) | | Write-cache configuration. | +| `metabase` | [Metabase config](#metabase-subsection) | | Metabase configuration. | +| `blobstor` | [Blobstor config](#blobstor-subsection) | | Blobstor configuration. | +| `small_object_size` | `size` | `1M` | Maximum size of an object stored in blobovnicza tree. | +| `gc` | [GC config](#gc-subsection) | | GC configuration. | ### `blobstor` subsection diff --git a/pkg/local_object_storage/blobstor/blobstor.go b/pkg/local_object_storage/blobstor/blobstor.go index d2a2338a3..bc9ab2b99 100644 --- a/pkg/local_object_storage/blobstor/blobstor.go +++ b/pkg/local_object_storage/blobstor/blobstor.go @@ -107,6 +107,27 @@ func WithCompressObjects(comp bool) Option { } } +// WithCompressibilityEstimate returns an option to use +// normilized compressibility estimate to decide compress +// data or not. +// +// See https://github.com/klauspost/compress/blob/v1.17.2/compressible.go#L5 +func WithCompressibilityEstimate(v bool) Option { + return func(c *cfg) { + c.compression.UseCompressEstimation = v + } +} + +// WithCompressibilityEstimateThreshold returns an option to set +// normilized compressibility estimate threshold. +// +// See https://github.com/klauspost/compress/blob/v1.17.2/compressible.go#L5 +func WithCompressibilityEstimateThreshold(threshold float64) Option { + return func(c *cfg) { + c.compression.CompressEstimationThreshold = threshold + } +} + // WithUncompressableContentTypes returns option to disable decompression // for specific content types as seen by object.AttributeContentType attribute. func WithUncompressableContentTypes(values []string) Option { diff --git a/pkg/local_object_storage/blobstor/compression/bench_test.go b/pkg/local_object_storage/blobstor/compression/bench_test.go index 6e05366cf..986912985 100644 --- a/pkg/local_object_storage/blobstor/compression/bench_test.go +++ b/pkg/local_object_storage/blobstor/compression/bench_test.go @@ -3,8 +3,10 @@ package compression import ( "crypto/rand" "fmt" + "log" "testing" + "github.com/klauspost/compress" "github.com/stretchr/testify/require" ) @@ -47,3 +49,50 @@ func notSoRandomSlice(size, blockSize int) []byte { } return data } + +func BenchmarkCompressionRealVSEstimate(b *testing.B) { + var total float64 // to prevent from compiler optimizations + maxSize := 60 * 1024 * 1024 + b.Run("estimate", func(b *testing.B) { + b.ResetTimer() + + c := &Config{ + Enabled: true, + } + require.NoError(b, c.Init()) + + for size := 1024; size <= maxSize; size *= 2 { + data := make([]byte, size) + _, err := rand.Reader.Read(data) + require.NoError(b, err) + + b.StartTimer() + estimation := compress.Estimate(data) + total += estimation + b.StopTimer() + } + }) + + b.Run("compress", func(b *testing.B) { + b.ResetTimer() + + c := &Config{ + Enabled: true, + } + require.NoError(b, c.Init()) + + for size := 1024; size <= maxSize; size *= 2 { + data := make([]byte, size) + _, err := rand.Reader.Read(data) + require.NoError(b, err) + + b.StartTimer() + maxSize := c.encoder.MaxEncodedSize(len(data)) + compressed := c.encoder.EncodeAll(data, make([]byte, 0, maxSize)) + total += float64(len(compressed)) / float64(len(data)) + b.StopTimer() + } + }) + + log.Println(total) +} diff --git a/pkg/local_object_storage/blobstor/compression/compress.go b/pkg/local_object_storage/blobstor/compression/compress.go index 270c2b18d..85ab47692 100644 --- a/pkg/local_object_storage/blobstor/compression/compress.go +++ b/pkg/local_object_storage/blobstor/compression/compress.go @@ -5,6 +5,7 @@ import ( "strings" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + "github.com/klauspost/compress" "github.com/klauspost/compress/zstd" ) @@ -13,6 +14,9 @@ type Config struct { Enabled bool UncompressableContentTypes []string + UseCompressEstimation bool + CompressEstimationThreshold float64 + encoder *zstd.Encoder decoder *zstd.Decoder } @@ -82,6 +86,17 @@ func (c *Config) Compress(data []byte) []byte { if c == nil || !c.Enabled { return data } + if c.UseCompressEstimation { + estimated := compress.Estimate(data) + if estimated >= c.CompressEstimationThreshold { + return c.compress(data) + } + return data + } + return c.compress(data) +} + +func (c *Config) compress(data []byte) []byte { maxSize := c.encoder.MaxEncodedSize(len(data)) compressed := c.encoder.EncodeAll(data, make([]byte, 0, maxSize)) if len(data) < len(compressed) {