[#188] transformer: Allow to provide size hint

For big objects with known size we can optimize allocation patterns
by providing size hint. As with any hint, it does not affect transformer
functionality: slices with capacity > MaxSize are never allocated.

```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
                                                │     out     │
                                                │   sec/op    │
Transformer/small/no_size_hint-8                  65.44µ ± 3%
Transformer/small/no_size_hint,_with_buffer-8     64.24µ ± 5%
Transformer/small/with_size_hint,_with_buffer-8   58.70µ ± 5%
Transformer/big/no_size_hint-8                    367.8m ± 3%
Transformer/big/no_size_hint,_with_buffer-8       562.7m ± 0%
Transformer/big/with_size_hint,_with_buffer-8     385.6m ± 7%
geomean                                           5.197m

                                                │     out      │
                                                │     B/op     │
Transformer/small/no_size_hint-8                  13.40Ki ± 0%
Transformer/small/no_size_hint,_with_buffer-8     13.40Ki ± 0%
Transformer/small/with_size_hint,_with_buffer-8   13.39Ki ± 0%
Transformer/big/no_size_hint-8                    288.0Mi ± 0%
Transformer/big/no_size_hint,_with_buffer-8       1.390Gi ± 0%
Transformer/big/with_size_hint,_with_buffer-8     288.0Mi ± 0%
geomean                                           2.533Mi

                                                │    out     │
                                                │ allocs/op  │
Transformer/small/no_size_hint-8                  92.00 ± 0%
Transformer/small/no_size_hint,_with_buffer-8     92.00 ± 0%
Transformer/small/with_size_hint,_with_buffer-8   92.00 ± 0%
Transformer/big/no_size_hint-8                    546.5 ± 0%
Transformer/big/no_size_hint,_with_buffer-8       607.5 ± 0%
Transformer/big/with_size_hint,_with_buffer-8     545.5 ± 0%
geomean                                           228.1
```

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
This commit is contained in:
Evgenii Stratonikov 2023-10-26 18:13:54 +03:00 committed by Evgenii Stratonikov
parent a02c0bfac8
commit 665e5807bc
4 changed files with 134 additions and 11 deletions

View file

@ -18,8 +18,8 @@ func TestChannelTarget(t *testing.T) {
tt := new(testTarget) tt := new(testTarget)
ct := NewChannelTarget(ch) ct := NewChannelTarget(ch)
chTarget, _ := newPayloadSizeLimiter(maxSize, func() ObjectWriter { return ct }) chTarget, _ := newPayloadSizeLimiter(maxSize, 0, func() ObjectWriter { return ct })
testTarget, _ := newPayloadSizeLimiter(maxSize, func() ObjectWriter { return tt }) testTarget, _ := newPayloadSizeLimiter(maxSize, 0, func() ObjectWriter { return tt })
ver := version.Current() ver := version.Current()
cnr := cidtest.ID() cnr := cidtest.ID()

View file

@ -0,0 +1,72 @@
package transformer
import (
"context"
"crypto/rand"
"math"
"testing"
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/user"
"github.com/nspcc-dev/neo-go/pkg/crypto/keys"
"github.com/stretchr/testify/require"
)
func TestTransformerSizeHintCorrectness(t *testing.T) {
const (
maxSize = 100
payloadSize = maxSize*2 + maxSize/2
)
pk, err := keys.NewPrivateKey()
require.NoError(t, err)
p := Params{
Key: &pk.PrivateKey,
NetworkState: dummyEpochSource(123),
MaxSize: maxSize,
WithoutHomomorphicHash: true,
}
cnr := cidtest.ID()
hdr := newObject(cnr)
var owner user.ID
user.IDFromKey(&owner, pk.PrivateKey.PublicKey)
hdr.SetOwnerID(&owner)
expected := make([]byte, payloadSize)
_, _ = rand.Read(expected)
t.Run("default", func(t *testing.T) {
p.SizeHint = 0
testPayloadEqual(t, p, hdr, expected)
})
t.Run("size hint is perfect", func(t *testing.T) {
p.SizeHint = payloadSize
testPayloadEqual(t, p, hdr, expected)
})
t.Run("size hint < payload size", func(t *testing.T) {
p.SizeHint = payloadSize / 2
testPayloadEqual(t, p, hdr, expected)
})
t.Run("size hint > payload size", func(t *testing.T) {
p.SizeHint = math.MaxUint64
testPayloadEqual(t, p, hdr, expected)
})
}
func testPayloadEqual(t *testing.T, p Params, hdr *objectSDK.Object, expected []byte) {
tt := new(testTarget)
p.NextTargetInit = func() ObjectWriter { return tt }
target := NewPayloadSizeLimiter(p)
writeObject(t, context.Background(), target, hdr, expected)
var actual []byte
for i := range tt.objects {
actual = append(actual, tt.objects[i].Payload()...)
}
require.Equal(t, expected, actual)
}

View file

@ -40,6 +40,11 @@ type Params struct {
NetworkState EpochSource NetworkState EpochSource
MaxSize uint64 MaxSize uint64
WithoutHomomorphicHash bool WithoutHomomorphicHash bool
// SizeHint is a hint for the total payload size to be processed.
// It is used primarily to optimize allocations and doesn't affect
// functionality. Primary usecases are providing file size when putting an object
// with the frostfs-cli or using Content-Length header in gateways.
SizeHint uint64
} }
// NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length // NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length
@ -121,7 +126,18 @@ func (s *payloadSizeLimiter) initializeCurrent() {
s.nextTarget = s.NextTargetInit() s.nextTarget = s.NextTargetInit()
s.writtenCurrent = 0 s.writtenCurrent = 0
s.initPayloadHashers() s.initPayloadHashers()
s.payload = make([]byte, 0)
var payloadSize uint64
// Check whether SizeHint is valid.
if remaining := s.SizeHint - s.written; remaining <= s.SizeHint {
if remaining >= s.MaxSize {
payloadSize = s.MaxSize
} else {
payloadSize = remaining % s.MaxSize
}
}
s.payload = make([]byte, 0, payloadSize)
} }
func (s *payloadSizeLimiter) initPayloadHashers() { func (s *payloadSizeLimiter) initPayloadHashers() {

View file

@ -20,7 +20,7 @@ func TestTransformer(t *testing.T) {
tt := new(testTarget) tt := new(testTarget)
target, pk := newPayloadSizeLimiter(maxSize, func() ObjectWriter { return tt }) target, pk := newPayloadSizeLimiter(maxSize, 0, func() ObjectWriter { return tt })
cnr := cidtest.ID() cnr := cidtest.ID()
hdr := newObject(cnr) hdr := newObject(cnr)
@ -114,15 +114,37 @@ func writeObject(t *testing.T, ctx context.Context, target ChunkedObjectWriter,
func BenchmarkTransformer(b *testing.B) { func BenchmarkTransformer(b *testing.B) {
hdr := newObject(cidtest.ID()) hdr := newObject(cidtest.ID())
const (
// bufferSize is taken from https://git.frostfs.info/TrueCloudLab/frostfs-sdk-go/src/commit/670619d2426fee233a37efe21a0471989b16a4fc/pool/pool.go#L1825
bufferSize = 3 * 1024 * 1024
smallSize = 8 * 1024
bigSize = 64 * 1024 * 1024 * 9 / 2 // 4.5 parts
)
b.Run("small", func(b *testing.B) { b.Run("small", func(b *testing.B) {
benchmarkTransformer(b, hdr, 8*1024) b.Run("no size hint", func(b *testing.B) {
benchmarkTransformer(b, hdr, smallSize, 0, 0)
})
b.Run("no size hint, with buffer", func(b *testing.B) {
benchmarkTransformer(b, hdr, smallSize, 0, bufferSize)
})
b.Run("with size hint, with buffer", func(b *testing.B) {
benchmarkTransformer(b, hdr, smallSize, smallSize, bufferSize)
})
}) })
b.Run("big", func(b *testing.B) { b.Run("big", func(b *testing.B) {
benchmarkTransformer(b, hdr, 64*1024*1024*9/2) b.Run("no size hint", func(b *testing.B) {
benchmarkTransformer(b, hdr, bigSize, 0, 0)
})
b.Run("no size hint, with buffer", func(b *testing.B) {
benchmarkTransformer(b, hdr, bigSize, 0, bufferSize)
})
b.Run("with size hint, with buffer", func(b *testing.B) {
benchmarkTransformer(b, hdr, bigSize, bigSize, bufferSize)
})
}) })
} }
func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize int) { func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize, sizeHint, bufferSize int) {
const maxSize = 64 * 1024 * 1024 const maxSize = 64 * 1024 * 1024
payload := make([]byte, payloadSize) payload := make([]byte, payloadSize)
@ -131,20 +153,32 @@ func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize in
b.ReportAllocs() b.ReportAllocs()
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
f, _ := newPayloadSizeLimiter(maxSize, func() ObjectWriter { return benchTarget{} }) f, _ := newPayloadSizeLimiter(maxSize, uint64(sizeHint), func() ObjectWriter { return benchTarget{} })
if err := f.WriteHeader(ctx, header); err != nil { if err := f.WriteHeader(ctx, header); err != nil {
b.Fatalf("write header: %v", err) b.Fatalf("write header: %v", err)
} }
if bufferSize == 0 {
if _, err := f.Write(ctx, payload); err != nil { if _, err := f.Write(ctx, payload); err != nil {
b.Fatalf("write: %v", err) b.Fatalf("write: %v", err)
} }
} else {
j := 0
for ; j+bufferSize < payloadSize; j += bufferSize {
if _, err := f.Write(ctx, payload[j:j+bufferSize]); err != nil {
b.Fatalf("write: %v", err)
}
}
if _, err := f.Write(ctx, payload[j:payloadSize]); err != nil {
b.Fatalf("write: %v", err)
}
}
if _, err := f.Close(ctx); err != nil { if _, err := f.Close(ctx); err != nil {
b.Fatalf("close: %v", err) b.Fatalf("close: %v", err)
} }
} }
} }
func newPayloadSizeLimiter(maxSize uint64, nextTarget TargetInitializer) (ChunkedObjectWriter, *keys.PrivateKey) { func newPayloadSizeLimiter(maxSize uint64, sizeHint uint64, nextTarget TargetInitializer) (ChunkedObjectWriter, *keys.PrivateKey) {
p, err := keys.NewPrivateKey() p, err := keys.NewPrivateKey()
if err != nil { if err != nil {
panic(err) panic(err)
@ -155,6 +189,7 @@ func newPayloadSizeLimiter(maxSize uint64, nextTarget TargetInitializer) (Chunke
NextTargetInit: nextTarget, NextTargetInit: nextTarget,
NetworkState: dummyEpochSource(123), NetworkState: dummyEpochSource(123),
MaxSize: maxSize, MaxSize: maxSize,
SizeHint: sizeHint,
WithoutHomomorphicHash: true, WithoutHomomorphicHash: true,
}), p }), p
} }