From 665e5807bcdbce6f5219d4f2cad43487af423a93 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 26 Oct 2023 18:13:54 +0300 Subject: [PATCH] [#188] transformer: Allow to provide size hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For big objects with known size we can optimize allocation patterns by providing size hint. As with any hint, it does not affect transformer functionality: slices with capacity > MaxSize are never allocated. ``` goos: linux goarch: amd64 pkg: git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz │ out │ │ sec/op │ Transformer/small/no_size_hint-8 65.44µ ± 3% Transformer/small/no_size_hint,_with_buffer-8 64.24µ ± 5% Transformer/small/with_size_hint,_with_buffer-8 58.70µ ± 5% Transformer/big/no_size_hint-8 367.8m ± 3% Transformer/big/no_size_hint,_with_buffer-8 562.7m ± 0% Transformer/big/with_size_hint,_with_buffer-8 385.6m ± 7% geomean 5.197m │ out │ │ B/op │ Transformer/small/no_size_hint-8 13.40Ki ± 0% Transformer/small/no_size_hint,_with_buffer-8 13.40Ki ± 0% Transformer/small/with_size_hint,_with_buffer-8 13.39Ki ± 0% Transformer/big/no_size_hint-8 288.0Mi ± 0% Transformer/big/no_size_hint,_with_buffer-8 1.390Gi ± 0% Transformer/big/with_size_hint,_with_buffer-8 288.0Mi ± 0% geomean 2.533Mi │ out │ │ allocs/op │ Transformer/small/no_size_hint-8 92.00 ± 0% Transformer/small/no_size_hint,_with_buffer-8 92.00 ± 0% Transformer/small/with_size_hint,_with_buffer-8 92.00 ± 0% Transformer/big/no_size_hint-8 546.5 ± 0% Transformer/big/no_size_hint,_with_buffer-8 607.5 ± 0% Transformer/big/with_size_hint,_with_buffer-8 545.5 ± 0% geomean 228.1 ``` Signed-off-by: Evgenii Stratonikov --- object/transformer/channel_test.go | 4 +- object/transformer/size_hint_test.go | 72 ++++++++++++++++++++++++++ object/transformer/transformer.go | 18 ++++++- object/transformer/transformer_test.go | 51 +++++++++++++++--- 4 files changed, 134 insertions(+), 11 deletions(-) create mode 100644 object/transformer/size_hint_test.go diff --git a/object/transformer/channel_test.go b/object/transformer/channel_test.go index 1bb074b..7a31c7b 100644 --- a/object/transformer/channel_test.go +++ b/object/transformer/channel_test.go @@ -18,8 +18,8 @@ func TestChannelTarget(t *testing.T) { tt := new(testTarget) ct := NewChannelTarget(ch) - chTarget, _ := newPayloadSizeLimiter(maxSize, func() ObjectWriter { return ct }) - testTarget, _ := newPayloadSizeLimiter(maxSize, func() ObjectWriter { return tt }) + chTarget, _ := newPayloadSizeLimiter(maxSize, 0, func() ObjectWriter { return ct }) + testTarget, _ := newPayloadSizeLimiter(maxSize, 0, func() ObjectWriter { return tt }) ver := version.Current() cnr := cidtest.ID() diff --git a/object/transformer/size_hint_test.go b/object/transformer/size_hint_test.go new file mode 100644 index 0000000..b0965b1 --- /dev/null +++ b/object/transformer/size_hint_test.go @@ -0,0 +1,72 @@ +package transformer + +import ( + "context" + "crypto/rand" + "math" + "testing" + + cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test" + objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/user" + "github.com/nspcc-dev/neo-go/pkg/crypto/keys" + "github.com/stretchr/testify/require" +) + +func TestTransformerSizeHintCorrectness(t *testing.T) { + const ( + maxSize = 100 + payloadSize = maxSize*2 + maxSize/2 + ) + + pk, err := keys.NewPrivateKey() + require.NoError(t, err) + + p := Params{ + Key: &pk.PrivateKey, + NetworkState: dummyEpochSource(123), + MaxSize: maxSize, + WithoutHomomorphicHash: true, + } + + cnr := cidtest.ID() + hdr := newObject(cnr) + + var owner user.ID + user.IDFromKey(&owner, pk.PrivateKey.PublicKey) + hdr.SetOwnerID(&owner) + + expected := make([]byte, payloadSize) + _, _ = rand.Read(expected) + + t.Run("default", func(t *testing.T) { + p.SizeHint = 0 + testPayloadEqual(t, p, hdr, expected) + }) + t.Run("size hint is perfect", func(t *testing.T) { + p.SizeHint = payloadSize + testPayloadEqual(t, p, hdr, expected) + }) + t.Run("size hint < payload size", func(t *testing.T) { + p.SizeHint = payloadSize / 2 + testPayloadEqual(t, p, hdr, expected) + }) + t.Run("size hint > payload size", func(t *testing.T) { + p.SizeHint = math.MaxUint64 + testPayloadEqual(t, p, hdr, expected) + }) +} + +func testPayloadEqual(t *testing.T, p Params, hdr *objectSDK.Object, expected []byte) { + tt := new(testTarget) + + p.NextTargetInit = func() ObjectWriter { return tt } + target := NewPayloadSizeLimiter(p) + + writeObject(t, context.Background(), target, hdr, expected) + var actual []byte + for i := range tt.objects { + actual = append(actual, tt.objects[i].Payload()...) + } + require.Equal(t, expected, actual) +} diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go index 6fea8dd..4b27de8 100644 --- a/object/transformer/transformer.go +++ b/object/transformer/transformer.go @@ -40,6 +40,11 @@ type Params struct { NetworkState EpochSource MaxSize uint64 WithoutHomomorphicHash bool + // SizeHint is a hint for the total payload size to be processed. + // It is used primarily to optimize allocations and doesn't affect + // functionality. Primary usecases are providing file size when putting an object + // with the frostfs-cli or using Content-Length header in gateways. + SizeHint uint64 } // NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length @@ -121,7 +126,18 @@ func (s *payloadSizeLimiter) initializeCurrent() { s.nextTarget = s.NextTargetInit() s.writtenCurrent = 0 s.initPayloadHashers() - s.payload = make([]byte, 0) + + var payloadSize uint64 + + // Check whether SizeHint is valid. + if remaining := s.SizeHint - s.written; remaining <= s.SizeHint { + if remaining >= s.MaxSize { + payloadSize = s.MaxSize + } else { + payloadSize = remaining % s.MaxSize + } + } + s.payload = make([]byte, 0, payloadSize) } func (s *payloadSizeLimiter) initPayloadHashers() { diff --git a/object/transformer/transformer_test.go b/object/transformer/transformer_test.go index cbfb11a..f1a3280 100644 --- a/object/transformer/transformer_test.go +++ b/object/transformer/transformer_test.go @@ -20,7 +20,7 @@ func TestTransformer(t *testing.T) { tt := new(testTarget) - target, pk := newPayloadSizeLimiter(maxSize, func() ObjectWriter { return tt }) + target, pk := newPayloadSizeLimiter(maxSize, 0, func() ObjectWriter { return tt }) cnr := cidtest.ID() hdr := newObject(cnr) @@ -114,15 +114,37 @@ func writeObject(t *testing.T, ctx context.Context, target ChunkedObjectWriter, func BenchmarkTransformer(b *testing.B) { hdr := newObject(cidtest.ID()) + const ( + // bufferSize is taken from https://git.frostfs.info/TrueCloudLab/frostfs-sdk-go/src/commit/670619d2426fee233a37efe21a0471989b16a4fc/pool/pool.go#L1825 + bufferSize = 3 * 1024 * 1024 + smallSize = 8 * 1024 + bigSize = 64 * 1024 * 1024 * 9 / 2 // 4.5 parts + ) b.Run("small", func(b *testing.B) { - benchmarkTransformer(b, hdr, 8*1024) + b.Run("no size hint", func(b *testing.B) { + benchmarkTransformer(b, hdr, smallSize, 0, 0) + }) + b.Run("no size hint, with buffer", func(b *testing.B) { + benchmarkTransformer(b, hdr, smallSize, 0, bufferSize) + }) + b.Run("with size hint, with buffer", func(b *testing.B) { + benchmarkTransformer(b, hdr, smallSize, smallSize, bufferSize) + }) }) b.Run("big", func(b *testing.B) { - benchmarkTransformer(b, hdr, 64*1024*1024*9/2) + b.Run("no size hint", func(b *testing.B) { + benchmarkTransformer(b, hdr, bigSize, 0, 0) + }) + b.Run("no size hint, with buffer", func(b *testing.B) { + benchmarkTransformer(b, hdr, bigSize, 0, bufferSize) + }) + b.Run("with size hint, with buffer", func(b *testing.B) { + benchmarkTransformer(b, hdr, bigSize, bigSize, bufferSize) + }) }) } -func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize int) { +func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize, sizeHint, bufferSize int) { const maxSize = 64 * 1024 * 1024 payload := make([]byte, payloadSize) @@ -131,12 +153,24 @@ func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize in b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { - f, _ := newPayloadSizeLimiter(maxSize, func() ObjectWriter { return benchTarget{} }) + f, _ := newPayloadSizeLimiter(maxSize, uint64(sizeHint), func() ObjectWriter { return benchTarget{} }) if err := f.WriteHeader(ctx, header); err != nil { b.Fatalf("write header: %v", err) } - if _, err := f.Write(ctx, payload); err != nil { - b.Fatalf("write: %v", err) + if bufferSize == 0 { + if _, err := f.Write(ctx, payload); err != nil { + b.Fatalf("write: %v", err) + } + } else { + j := 0 + for ; j+bufferSize < payloadSize; j += bufferSize { + if _, err := f.Write(ctx, payload[j:j+bufferSize]); err != nil { + b.Fatalf("write: %v", err) + } + } + if _, err := f.Write(ctx, payload[j:payloadSize]); err != nil { + b.Fatalf("write: %v", err) + } } if _, err := f.Close(ctx); err != nil { b.Fatalf("close: %v", err) @@ -144,7 +178,7 @@ func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize in } } -func newPayloadSizeLimiter(maxSize uint64, nextTarget TargetInitializer) (ChunkedObjectWriter, *keys.PrivateKey) { +func newPayloadSizeLimiter(maxSize uint64, sizeHint uint64, nextTarget TargetInitializer) (ChunkedObjectWriter, *keys.PrivateKey) { p, err := keys.NewPrivateKey() if err != nil { panic(err) @@ -155,6 +189,7 @@ func newPayloadSizeLimiter(maxSize uint64, nextTarget TargetInitializer) (Chunke NextTargetInit: nextTarget, NetworkState: dummyEpochSource(123), MaxSize: maxSize, + SizeHint: sizeHint, WithoutHomomorphicHash: true, }), p }