forked from TrueCloudLab/frostfs-node
[#754] blobstor: Estimate compressability
Now it is possible to enable compressability estimation. If data is likely uncompressable, it should reduce CPU time and memory. Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
parent
05b508f79a
commit
c80b46fad3
11 changed files with 153 additions and 12 deletions
|
@ -105,7 +105,10 @@ type applicationConfiguration struct {
|
|||
}
|
||||
|
||||
type shardCfg struct {
|
||||
compress bool
|
||||
compress bool
|
||||
estimateCompressibility bool
|
||||
estimateCompressibilityThreshold float64
|
||||
|
||||
smallSizeObjectLimit uint64
|
||||
uncompressableContentType []string
|
||||
refillMetabase bool
|
||||
|
@ -217,6 +220,8 @@ func (a *applicationConfiguration) updateShardConfig(c *config.Config, oldConfig
|
|||
newConfig.refillMetabase = oldConfig.RefillMetabase()
|
||||
newConfig.mode = oldConfig.Mode()
|
||||
newConfig.compress = oldConfig.Compress()
|
||||
newConfig.estimateCompressibility = oldConfig.EstimateCompressibility()
|
||||
newConfig.estimateCompressibilityThreshold = oldConfig.EstimateCompressibilityThreshold()
|
||||
newConfig.uncompressableContentType = oldConfig.UncompressableContentTypes()
|
||||
newConfig.smallSizeObjectLimit = oldConfig.SmallSizeLimit()
|
||||
|
||||
|
@ -830,6 +835,8 @@ func (c *cfg) getShardOpts(shCfg shardCfg) shardOptsWithID {
|
|||
blobstoreOpts := []blobstor.Option{
|
||||
blobstor.WithCompressObjects(shCfg.compress),
|
||||
blobstor.WithUncompressableContentTypes(shCfg.uncompressableContentType),
|
||||
blobstor.WithCompressibilityEstimate(shCfg.estimateCompressibility),
|
||||
blobstor.WithCompressibilityEstimateThreshold(shCfg.estimateCompressibilityThreshold),
|
||||
blobstor.WithStorages(ss),
|
||||
blobstor.WithLogger(c.log),
|
||||
}
|
||||
|
|
|
@ -223,3 +223,15 @@ func parseSizeInBytes(sizeStr string) uint64 {
|
|||
size := cast.ToFloat64(sizeStr)
|
||||
return safeMul(size, multiplier)
|
||||
}
|
||||
|
||||
// FloatOrDefault reads a configuration value
|
||||
// from c by name and casts it to float64.
|
||||
//
|
||||
// Returns defaultValue if the value can not be casted.
|
||||
func FloatOrDefault(c *Config, name string, defaultValue float64) float64 {
|
||||
v, err := cast.ToFloat64E(c.Value(name))
|
||||
if err != nil {
|
||||
return defaultValue
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
|
|
@ -84,6 +84,8 @@ func TestEngineSection(t *testing.T) {
|
|||
|
||||
require.Equal(t, true, sc.Compress())
|
||||
require.Equal(t, []string{"audio/*", "video/*"}, sc.UncompressableContentTypes())
|
||||
require.Equal(t, true, sc.EstimateCompressibility())
|
||||
require.Equal(t, float64(0.7), sc.EstimateCompressibilityThreshold())
|
||||
require.EqualValues(t, 102400, sc.SmallSizeLimit())
|
||||
|
||||
require.Equal(t, 2, len(ss))
|
||||
|
|
|
@ -16,8 +16,11 @@ import (
|
|||
// which provides access to Shard configurations.
|
||||
type Config config.Config
|
||||
|
||||
// SmallSizeLimitDefault is a default limit of small objects payload in bytes.
|
||||
const SmallSizeLimitDefault = 1 << 20
|
||||
const (
|
||||
// SmallSizeLimitDefault is a default limit of small objects payload in bytes.
|
||||
SmallSizeLimitDefault = 1 << 20
|
||||
EstimateCompressibilityThresholdDefault = 0.1
|
||||
)
|
||||
|
||||
// From wraps config section into Config.
|
||||
func From(c *config.Config) *Config {
|
||||
|
@ -43,6 +46,30 @@ func (x *Config) UncompressableContentTypes() []string {
|
|||
"compression_exclude_content_types")
|
||||
}
|
||||
|
||||
// EstimateCompressibility returns the value of "estimate_compressibility" config parameter.
|
||||
//
|
||||
// Returns false if the value is not a valid bool.
|
||||
func (x *Config) EstimateCompressibility() bool {
|
||||
return config.BoolSafe(
|
||||
(*config.Config)(x),
|
||||
"compression_estimate_compressibility",
|
||||
)
|
||||
}
|
||||
|
||||
// EstimateCompressibilityThreshold returns the value of "estimate_compressibility_threshold" config parameter.
|
||||
//
|
||||
// Returns EstimateCompressibilityThresholdDefault if the value is not defined, not valid float or not in range [0.0; 1.0].
|
||||
func (x *Config) EstimateCompressibilityThreshold() float64 {
|
||||
v := config.FloatOrDefault(
|
||||
(*config.Config)(x),
|
||||
"compression_estimate_compressibility_threshold",
|
||||
EstimateCompressibilityThresholdDefault)
|
||||
if v < 0.0 || v > 1.0 {
|
||||
return EstimateCompressibilityThresholdDefault
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// SmallSizeLimit returns the value of "small_object_size" config parameter.
|
||||
//
|
||||
// Returns SmallSizeLimitDefault if the value is not a positive number.
|
||||
|
|
|
@ -113,6 +113,8 @@ FROSTFS_STORAGE_SHARD_0_METABASE_MAX_BATCH_DELAY=10ms
|
|||
### Blobstor config
|
||||
FROSTFS_STORAGE_SHARD_0_COMPRESS=true
|
||||
FROSTFS_STORAGE_SHARD_0_COMPRESSION_EXCLUDE_CONTENT_TYPES="audio/* video/*"
|
||||
FROSTFS_STORAGE_SHARD_0_COMPRESSION_ESTIMATE_COMPRESSIBILITY=true
|
||||
FROSTFS_STORAGE_SHARD_0_COMPRESSION_ESTIMATE_COMPRESSIBILITY_THRESHOLD=0.7
|
||||
FROSTFS_STORAGE_SHARD_0_SMALL_OBJECT_SIZE=102400
|
||||
### Blobovnicza config
|
||||
FROSTFS_STORAGE_SHARD_0_BLOBSTOR_0_PATH=tmp/0/blob/blobovnicza
|
||||
|
|
|
@ -160,6 +160,8 @@
|
|||
"compression_exclude_content_types": [
|
||||
"audio/*", "video/*"
|
||||
],
|
||||
"compression_estimate_compressibility": true,
|
||||
"compression_estimate_compressibility_threshold": 0.7,
|
||||
"small_object_size": 102400,
|
||||
"blobstor": [
|
||||
{
|
||||
|
|
|
@ -178,6 +178,8 @@ storage:
|
|||
compression_exclude_content_types:
|
||||
- audio/*
|
||||
- video/*
|
||||
compression_estimate_compressibility: true
|
||||
compression_estimate_compressibility_threshold: 0.7
|
||||
|
||||
blobstor:
|
||||
- type: blobovnicza
|
||||
|
|
|
@ -179,15 +179,17 @@ The following table describes configuration for each shard.
|
|||
|
||||
| Parameter | Type | Default value | Description |
|
||||
|-------------------------------------|---------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `compress` | `bool` | `false` | Flag to enable compression. |
|
||||
| `compression_exclude_content_types` | `[]string` | | List of content-types to disable compression for. Content-type is taken from `Content-Type` object attribute. Each element can contain a star `*` as a first (last) character, which matches any prefix (suffix). |
|
||||
| `mode` | `string` | `read-write` | Shard Mode.<br/>Possible values: `read-write`, `read-only`, `degraded`, `degraded-read-only`, `disabled` |
|
||||
| `resync_metabase` | `bool` | `false` | Flag to enable metabase resync on start. |
|
||||
| `writecache` | [Writecache config](#writecache-subsection) | | Write-cache configuration. |
|
||||
| `metabase` | [Metabase config](#metabase-subsection) | | Metabase configuration. |
|
||||
| `blobstor` | [Blobstor config](#blobstor-subsection) | | Blobstor configuration. |
|
||||
| `small_object_size` | `size` | `1M` | Maximum size of an object stored in blobovnicza tree. |
|
||||
| `gc` | [GC config](#gc-subsection) | | GC configuration. |
|
||||
| `compress` | `bool` | `false` | Flag to enable compression. |
|
||||
| `compression_exclude_content_types` | `[]string` | | List of content-types to disable compression for. Content-type is taken from `Content-Type` object attribute. Each element can contain a star `*` as a first (last) character, which matches any prefix (suffix). |
|
||||
| `compression_estimate_compressibility` | `bool` | `false` | If `true`, then noramalized compressibility estimation is used to decide compress data or not. |
|
||||
| `compression_estimate_compressibility_threshold` | `float` | `0.1` | Normilized compressibility estimate threshold: data will compress if estimation if greater than this value. |
|
||||
| `mode` | `string` | `read-write` | Shard Mode.<br/>Possible values: `read-write`, `read-only`, `degraded`, `degraded-read-only`, `disabled` |
|
||||
| `resync_metabase` | `bool` | `false` | Flag to enable metabase resync on start. |
|
||||
| `writecache` | [Writecache config](#writecache-subsection) | | Write-cache configuration. |
|
||||
| `metabase` | [Metabase config](#metabase-subsection) | | Metabase configuration. |
|
||||
| `blobstor` | [Blobstor config](#blobstor-subsection) | | Blobstor configuration. |
|
||||
| `small_object_size` | `size` | `1M` | Maximum size of an object stored in blobovnicza tree. |
|
||||
| `gc` | [GC config](#gc-subsection) | | GC configuration. |
|
||||
|
||||
### `blobstor` subsection
|
||||
|
||||
|
|
|
@ -107,6 +107,27 @@ func WithCompressObjects(comp bool) Option {
|
|||
}
|
||||
}
|
||||
|
||||
// WithCompressibilityEstimate returns an option to use
|
||||
// normilized compressibility estimate to decide compress
|
||||
// data or not.
|
||||
//
|
||||
// See https://github.com/klauspost/compress/blob/v1.17.2/compressible.go#L5
|
||||
func WithCompressibilityEstimate(v bool) Option {
|
||||
return func(c *cfg) {
|
||||
c.compression.UseCompressEstimation = v
|
||||
}
|
||||
}
|
||||
|
||||
// WithCompressibilityEstimateThreshold returns an option to set
|
||||
// normilized compressibility estimate threshold.
|
||||
//
|
||||
// See https://github.com/klauspost/compress/blob/v1.17.2/compressible.go#L5
|
||||
func WithCompressibilityEstimateThreshold(threshold float64) Option {
|
||||
return func(c *cfg) {
|
||||
c.compression.CompressEstimationThreshold = threshold
|
||||
}
|
||||
}
|
||||
|
||||
// WithUncompressableContentTypes returns option to disable decompression
|
||||
// for specific content types as seen by object.AttributeContentType attribute.
|
||||
func WithUncompressableContentTypes(values []string) Option {
|
||||
|
|
|
@ -3,8 +3,10 @@ package compression
|
|||
import (
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"log"
|
||||
"testing"
|
||||
|
||||
"github.com/klauspost/compress"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
|
@ -47,3 +49,50 @@ func notSoRandomSlice(size, blockSize int) []byte {
|
|||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func BenchmarkCompressionRealVSEstimate(b *testing.B) {
|
||||
var total float64 // to prevent from compiler optimizations
|
||||
maxSize := 60 * 1024 * 1024
|
||||
b.Run("estimate", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
|
||||
c := &Config{
|
||||
Enabled: true,
|
||||
}
|
||||
require.NoError(b, c.Init())
|
||||
|
||||
for size := 1024; size <= maxSize; size *= 2 {
|
||||
data := make([]byte, size)
|
||||
_, err := rand.Reader.Read(data)
|
||||
require.NoError(b, err)
|
||||
|
||||
b.StartTimer()
|
||||
estimation := compress.Estimate(data)
|
||||
total += estimation
|
||||
b.StopTimer()
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("compress", func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
|
||||
c := &Config{
|
||||
Enabled: true,
|
||||
}
|
||||
require.NoError(b, c.Init())
|
||||
|
||||
for size := 1024; size <= maxSize; size *= 2 {
|
||||
data := make([]byte, size)
|
||||
_, err := rand.Reader.Read(data)
|
||||
require.NoError(b, err)
|
||||
|
||||
b.StartTimer()
|
||||
maxSize := c.encoder.MaxEncodedSize(len(data))
|
||||
compressed := c.encoder.EncodeAll(data, make([]byte, 0, maxSize))
|
||||
total += float64(len(compressed)) / float64(len(data))
|
||||
b.StopTimer()
|
||||
}
|
||||
})
|
||||
|
||||
log.Println(total)
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"strings"
|
||||
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
"github.com/klauspost/compress"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
|
@ -13,6 +14,9 @@ type Config struct {
|
|||
Enabled bool
|
||||
UncompressableContentTypes []string
|
||||
|
||||
UseCompressEstimation bool
|
||||
CompressEstimationThreshold float64
|
||||
|
||||
encoder *zstd.Encoder
|
||||
decoder *zstd.Decoder
|
||||
}
|
||||
|
@ -82,6 +86,17 @@ func (c *Config) Compress(data []byte) []byte {
|
|||
if c == nil || !c.Enabled {
|
||||
return data
|
||||
}
|
||||
if c.UseCompressEstimation {
|
||||
estimated := compress.Estimate(data)
|
||||
if estimated >= c.CompressEstimationThreshold {
|
||||
return c.compress(data)
|
||||
}
|
||||
return data
|
||||
}
|
||||
return c.compress(data)
|
||||
}
|
||||
|
||||
func (c *Config) compress(data []byte) []byte {
|
||||
maxSize := c.encoder.MaxEncodedSize(len(data))
|
||||
compressed := c.encoder.EncodeAll(data, make([]byte, 0, maxSize))
|
||||
if len(data) < len(compressed) {
|
||||
|
|
Loading…
Reference in a new issue