frostfs-node/pkg/local_object_storage/blobstor/compression/compress.go
Evgenii Stratonikov 0272218eb9 [#2184] compression: Properly calculate upper bound
If the data is not compressible allocating `len(data)` will lead to a
slice reallocation. For a compressible data the results for small size
are flaky and we allocate a bit more. However, it feels right to use a
provided function if we need to pick any size at all.

```
name                                                           old time/op    new time/op    delta
Compression/size=128/zeroed_slice-8                              2.23µs ±12%    2.06µs ± 6%   -7.35%  (p=0.009 n=10+10)
Compression/size=128/not_so_random_slice_(block_=_123)-8         19.0µs ±10%    15.8µs ±16%  -17.09%  (p=0.000 n=9+10)
Compression/size=128/random_slice-8                              17.6µs ±15%    16.1µs ±16%     ~     (p=0.075 n=10+10)
Compression/size=1024/zeroed_slice-8                             3.05µs ±11%    2.84µs ±10%     ~     (p=0.089 n=10+10)
Compression/size=1024/not_so_random_slice_(block_=_123)-8        18.1µs ± 6%    18.2µs ±12%     ~     (p=0.971 n=10+10)
Compression/size=1024/random_slice-8                             48.6µs ± 6%    45.6µs ± 5%   -6.07%  (p=0.006 n=10+9)
Compression/size=32768/zeroed_slice-8                            26.8µs ± 3%    28.7µs ± 8%   +7.23%  (p=0.001 n=10+10)
Compression/size=32768/not_so_random_slice_(block_=_123)-8       44.3µs ± 8%    43.7µs ±13%     ~     (p=0.762 n=8+10)
Compression/size=32768/random_slice-8                            97.3µs ±32%    68.9µs ±15%  -29.13%  (p=0.000 n=10+10)
Compression/size=33554432/zeroed_slice-8                         29.8ms ± 9%    30.3ms ±17%     ~     (p=1.000 n=9+9)
Compression/size=33554432/not_so_random_slice_(block_=_123)-8    33.1ms ±14%    30.3ms ±11%   -8.61%  (p=0.043 n=10+10)
Compression/size=33554432/random_slice-8                         41.7ms ± 3%    30.1ms ± 8%  -27.72%  (p=0.000 n=9+10)

name                                                           old alloc/op   new alloc/op   delta
Compression/size=128/zeroed_slice-8                                128B ± 0%      144B ± 0%  +12.50%  (p=0.000 n=10+10)
Compression/size=128/not_so_random_slice_(block_=_123)-8           384B ± 0%      144B ± 0%  -62.50%  (p=0.000 n=10+10)
Compression/size=128/random_slice-8                                384B ± 0%      144B ± 0%  -62.50%  (p=0.000 n=10+10)
Compression/size=1024/zeroed_slice-8                             1.02kB ± 0%    1.15kB ± 0%  +12.50%  (p=0.000 n=10+10)
Compression/size=1024/not_so_random_slice_(block_=_123)-8        1.02kB ± 0%    1.15kB ± 0%  +12.50%  (p=0.000 n=10+10)
Compression/size=1024/random_slice-8                             2.56kB ± 0%    1.15kB ± 0%  -55.00%  (p=0.000 n=10+10)
Compression/size=32768/zeroed_slice-8                            32.8kB ± 0%    41.0kB ± 0%  +25.00%  (p=0.000 n=10+10)
Compression/size=32768/not_so_random_slice_(block_=_123)-8       32.8kB ± 0%    41.0kB ± 0%  +25.00%  (p=0.000 n=10+10)
Compression/size=32768/random_slice-8                            81.9kB ± 0%    41.0kB ± 0%  -50.00%  (p=0.000 n=10+10)
Compression/size=33554432/zeroed_slice-8                         33.6MB ± 0%    33.6MB ± 0%   +0.02%  (p=0.000 n=9+9)
Compression/size=33554432/not_so_random_slice_(block_=_123)-8    33.6MB ± 0%    33.6MB ± 0%   +0.02%  (p=0.000 n=8+10)
Compression/size=33554432/random_slice-8                         75.5MB ± 0%    33.6MB ± 0%  -55.55%  (p=0.000 n=10+10)

name                                                           old allocs/op  new allocs/op  delta
Compression/size=128/zeroed_slice-8                                1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Compression/size=128/not_so_random_slice_(block_=_123)-8           2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
Compression/size=128/random_slice-8                                2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
Compression/size=1024/zeroed_slice-8                               1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Compression/size=1024/not_so_random_slice_(block_=_123)-8          1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Compression/size=1024/random_slice-8                               2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
Compression/size=32768/zeroed_slice-8                              1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Compression/size=32768/not_so_random_slice_(block_=_123)-8         1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Compression/size=32768/random_slice-8                              2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
Compression/size=33554432/zeroed_slice-8                           1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Compression/size=33554432/not_so_random_slice_(block_=_123)-8      1.00 ± 0%      1.00 ± 0%     ~     (all equal)
Compression/size=33554432/random_slice-8                           2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2022-12-30 11:07:35 +03:00

103 lines
2.5 KiB
Go

package compression
import (
"bytes"
"strings"
objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object"
"github.com/klauspost/compress/zstd"
)
// Config represents common compression-related configuration.
type Config struct {
Enabled bool
UncompressableContentTypes []string
encoder *zstd.Encoder
decoder *zstd.Decoder
}
// zstdFrameMagic contains first 4 bytes of any compressed object
// https://github.com/klauspost/compress/blob/master/zstd/framedec.go#L58 .
var zstdFrameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
// Init initializes compression routines.
func (c *Config) Init() error {
var err error
if c.Enabled {
c.encoder, err = zstd.NewWriter(nil)
if err != nil {
return err
}
}
c.decoder, err = zstd.NewReader(nil)
if err != nil {
return err
}
return nil
}
// NeedsCompression returns true if the object should be compressed.
// For an object to be compressed 2 conditions must hold:
// 1. Compression is enabled in settings.
// 2. Object MIME Content-Type is allowed for compression.
func (c *Config) NeedsCompression(obj *objectSDK.Object) bool {
if !c.Enabled || len(c.UncompressableContentTypes) == 0 {
return c.Enabled
}
for _, attr := range obj.Attributes() {
if attr.Key() == objectSDK.AttributeContentType {
for _, value := range c.UncompressableContentTypes {
match := false
switch {
case len(value) > 0 && value[len(value)-1] == '*':
match = strings.HasPrefix(attr.Value(), value[:len(value)-1])
case len(value) > 0 && value[0] == '*':
match = strings.HasSuffix(attr.Value(), value[1:])
default:
match = attr.Value() == value
}
if match {
return false
}
}
}
}
return c.Enabled
}
// Decompress decompresses data if it starts with the magic
// and returns data untouched otherwise.
func (c *Config) Decompress(data []byte) ([]byte, error) {
if len(data) < 4 || !bytes.Equal(data[:4], zstdFrameMagic) {
return data, nil
}
return c.decoder.DecodeAll(data, nil)
}
// Compress compresses data if compression is enabled
// and returns data untouched otherwise.
func (c *Config) Compress(data []byte) []byte {
if c == nil || !c.Enabled {
return data
}
maxSize := c.encoder.MaxEncodedSize(len(data))
return c.encoder.EncodeAll(data, make([]byte, 0, maxSize))
}
// Close closes encoder and decoder, returns any error occurred.
func (c *Config) Close() error {
var err error
if c.encoder != nil {
err = c.encoder.Close()
}
if c.decoder != nil {
c.decoder.Close()
}
return err
}