[#754] blobstor: Estimate compressability

Now it is possible to enable compressability estimation.
If data is likely uncompressable, it should reduce CPU time and memory.

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2023-10-31 14:45:22 +03:00
parent 05b508f79a
commit c80b46fad3
11 changed files with 153 additions and 12 deletions

View file

@ -3,8 +3,10 @@ package compression
import (
"crypto/rand"
"fmt"
"log"
"testing"
"github.com/klauspost/compress"
"github.com/stretchr/testify/require"
)
@ -47,3 +49,50 @@ func notSoRandomSlice(size, blockSize int) []byte {
}
return data
}
func BenchmarkCompressionRealVSEstimate(b *testing.B) {
var total float64 // to prevent from compiler optimizations
maxSize := 60 * 1024 * 1024
b.Run("estimate", func(b *testing.B) {
b.ResetTimer()
c := &Config{
Enabled: true,
}
require.NoError(b, c.Init())
for size := 1024; size <= maxSize; size *= 2 {
data := make([]byte, size)
_, err := rand.Reader.Read(data)
require.NoError(b, err)
b.StartTimer()
estimation := compress.Estimate(data)
total += estimation
b.StopTimer()
}
})
b.Run("compress", func(b *testing.B) {
b.ResetTimer()
c := &Config{
Enabled: true,
}
require.NoError(b, c.Init())
for size := 1024; size <= maxSize; size *= 2 {
data := make([]byte, size)
_, err := rand.Reader.Read(data)
require.NoError(b, err)
b.StartTimer()
maxSize := c.encoder.MaxEncodedSize(len(data))
compressed := c.encoder.EncodeAll(data, make([]byte, 0, maxSize))
total += float64(len(compressed)) / float64(len(data))
b.StopTimer()
}
})
log.Println(total)
}

View file

@ -5,6 +5,7 @@ import (
"strings"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
"github.com/klauspost/compress"
"github.com/klauspost/compress/zstd"
)
@ -13,6 +14,9 @@ type Config struct {
Enabled bool
UncompressableContentTypes []string
UseCompressEstimation bool
CompressEstimationThreshold float64
encoder *zstd.Encoder
decoder *zstd.Decoder
}
@ -82,6 +86,17 @@ func (c *Config) Compress(data []byte) []byte {
if c == nil || !c.Enabled {
return data
}
if c.UseCompressEstimation {
estimated := compress.Estimate(data)
if estimated >= c.CompressEstimationThreshold {
return c.compress(data)
}
return data
}
return c.compress(data)
}
func (c *Config) compress(data []byte) []byte {
maxSize := c.encoder.MaxEncodedSize(len(data))
compressed := c.encoder.EncodeAll(data, make([]byte, 0, maxSize))
if len(data) < len(compressed) {