From c2ea143d00252fe01c36f7eaf740bf6e753e1f92 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 17 Feb 2023 12:52:46 +0300 Subject: [PATCH 01/12] [#19] object: Move transformer implementation from node Signed-off-by: Evgenii Stratonikov --- object/transformer/fmt.go | 116 ++++++++++++ object/transformer/transformer.go | 294 ++++++++++++++++++++++++++++++ object/transformer/types.go | 111 +++++++++++ 3 files changed, 521 insertions(+) create mode 100644 object/transformer/fmt.go create mode 100644 object/transformer/transformer.go create mode 100644 object/transformer/types.go diff --git a/object/transformer/fmt.go b/object/transformer/fmt.go new file mode 100644 index 0000000..b0ca5c9 --- /dev/null +++ b/object/transformer/fmt.go @@ -0,0 +1,116 @@ +package transformer + +import ( + "crypto/ecdsa" + "fmt" + + "github.com/TrueCloudLab/frostfs-sdk-go/object" + oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/TrueCloudLab/frostfs-sdk-go/session" + "github.com/TrueCloudLab/frostfs-sdk-go/version" +) + +type formatter struct { + prm *FormatterParams + + obj *object.Object + + sz uint64 +} + +type EpochSource interface { + CurrentEpoch() uint64 +} + +// FormatterParams groups NewFormatTarget parameters. +type FormatterParams struct { + Key *ecdsa.PrivateKey + + NextTarget ObjectTarget + + SessionToken *session.Object + + NetworkState EpochSource +} + +// NewFormatTarget returns ObjectTarget instance that finalizes object structure +// and writes it to the next target. +// +// Chunks must be written before the WriteHeader call. +// +// Object changes: +// - sets version to current SDK version; +// - sets payload size to the total length of all written chunks; +// - sets session token; +// - sets number of creation epoch; +// - calculates and sets verification fields (ID, Signature). +func NewFormatTarget(p *FormatterParams) ObjectTarget { + return &formatter{ + prm: p, + } +} + +func (f *formatter) WriteHeader(obj *object.Object) error { + f.obj = obj + + return nil +} + +func (f *formatter) Write(p []byte) (n int, err error) { + n, err = f.prm.NextTarget.Write(p) + + f.sz += uint64(n) + + return +} + +func (f *formatter) Close() (*AccessIdentifiers, error) { + curEpoch := f.prm.NetworkState.CurrentEpoch() + ver := version.Current() + + f.obj.SetVersion(&ver) + f.obj.SetPayloadSize(f.sz) + f.obj.SetSessionToken(f.prm.SessionToken) + f.obj.SetCreationEpoch(curEpoch) + + var ( + parID *oid.ID + parHdr *object.Object + ) + + if par := f.obj.Parent(); par != nil && par.Signature() == nil { + rawPar := object.NewFromV2(par.ToV2()) + + rawPar.SetSessionToken(f.prm.SessionToken) + rawPar.SetCreationEpoch(curEpoch) + + if err := object.SetIDWithSignature(*f.prm.Key, rawPar); err != nil { + return nil, fmt.Errorf("could not finalize parent object: %w", err) + } + + id, _ := rawPar.ID() + parID = &id + parHdr = rawPar + + f.obj.SetParent(parHdr) + } + + if err := object.SetIDWithSignature(*f.prm.Key, f.obj); err != nil { + return nil, fmt.Errorf("could not finalize object: %w", err) + } + + if err := f.prm.NextTarget.WriteHeader(f.obj); err != nil { + return nil, fmt.Errorf("could not write header to next target: %w", err) + } + + if _, err := f.prm.NextTarget.Close(); err != nil { + return nil, fmt.Errorf("could not close next target: %w", err) + } + + id, _ := f.obj.ID() + + return new(AccessIdentifiers). + WithSelfID(id). + WithParentID(parID). + WithParent(parHdr), nil +} diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go new file mode 100644 index 0000000..cfd7098 --- /dev/null +++ b/object/transformer/transformer.go @@ -0,0 +1,294 @@ +package transformer + +import ( + "crypto/sha256" + "fmt" + "hash" + "io" + + "github.com/TrueCloudLab/frostfs-sdk-go/checksum" + "github.com/TrueCloudLab/frostfs-sdk-go/object" + oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/TrueCloudLab/tzhash/tz" +) + +type payloadSizeLimiter struct { + maxSize, written uint64 + + withoutHomomorphicHash bool + + targetInit func() ObjectTarget + + target ObjectTarget + + current, parent *object.Object + + currentHashers, parentHashers []*payloadChecksumHasher + + previous []oid.ID + + chunkWriter io.Writer + + splitID *object.SplitID + + parAttrs []object.Attribute +} + +type payloadChecksumHasher struct { + hasher hash.Hash + + checksumWriter func([]byte) +} + +// NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length +// of the writing object and writes generated objects to targets from initializer. +// +// Calculates and adds homomorphic hash to resulting objects only if withoutHomomorphicHash +// is false. +// +// Objects w/ payload size less or equal than max size remain untouched. +func NewPayloadSizeLimiter(maxSize uint64, withoutHomomorphicHash bool, targetInit TargetInitializer) ObjectTarget { + return &payloadSizeLimiter{ + maxSize: maxSize, + withoutHomomorphicHash: withoutHomomorphicHash, + targetInit: targetInit, + splitID: object.NewSplitID(), + } +} + +func (s *payloadSizeLimiter) WriteHeader(hdr *object.Object) error { + s.current = fromObject(hdr) + + s.initialize() + + return nil +} + +func (s *payloadSizeLimiter) Write(p []byte) (int, error) { + if err := s.writeChunk(p); err != nil { + return 0, err + } + + return len(p), nil +} + +func (s *payloadSizeLimiter) Close() (*AccessIdentifiers, error) { + return s.release(true) +} + +func (s *payloadSizeLimiter) initialize() { + // if it is an object after the 1st + if ln := len(s.previous); ln > 0 { + // initialize parent object once (after 1st object) + if ln == 1 { + s.detachParent() + } + + // set previous object to the last previous identifier + s.current.SetPreviousID(s.previous[ln-1]) + } + + s.initializeCurrent() +} + +func fromObject(obj *object.Object) *object.Object { + cnr, _ := obj.ContainerID() + + res := object.New() + res.SetContainerID(cnr) + res.SetOwnerID(obj.OwnerID()) + res.SetAttributes(obj.Attributes()...) + res.SetType(obj.Type()) + + // obj.SetSplitID creates splitHeader but we don't need to do it in case + // of small objects, so we should make nil check. + if obj.SplitID() != nil { + res.SetSplitID(obj.SplitID()) + } + + return res +} + +func (s *payloadSizeLimiter) initializeCurrent() { + // initialize current object target + s.target = s.targetInit() + + // create payload hashers + s.currentHashers = payloadHashersForObject(s.current, s.withoutHomomorphicHash) + + // compose multi-writer from target and all payload hashers + ws := make([]io.Writer, 0, 1+len(s.currentHashers)+len(s.parentHashers)) + + ws = append(ws, s.target) + + for i := range s.currentHashers { + ws = append(ws, s.currentHashers[i].hasher) + } + + for i := range s.parentHashers { + ws = append(ws, s.parentHashers[i].hasher) + } + + s.chunkWriter = io.MultiWriter(ws...) +} + +func payloadHashersForObject(obj *object.Object, withoutHomomorphicHash bool) []*payloadChecksumHasher { + hashers := make([]*payloadChecksumHasher, 0, 2) + + hashers = append(hashers, &payloadChecksumHasher{ + hasher: sha256.New(), + checksumWriter: func(binChecksum []byte) { + if ln := len(binChecksum); ln != sha256.Size { + panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", sha256.Size, ln)) + } + + csSHA := [sha256.Size]byte{} + copy(csSHA[:], binChecksum) + + var cs checksum.Checksum + cs.SetSHA256(csSHA) + + obj.SetPayloadChecksum(cs) + }, + }) + + if !withoutHomomorphicHash { + hashers = append(hashers, &payloadChecksumHasher{ + hasher: tz.New(), + checksumWriter: func(binChecksum []byte) { + if ln := len(binChecksum); ln != tz.Size { + panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", tz.Size, ln)) + } + + csTZ := [tz.Size]byte{} + copy(csTZ[:], binChecksum) + + var cs checksum.Checksum + cs.SetTillichZemor(csTZ) + + obj.SetPayloadHomomorphicHash(cs) + }, + }) + } + + return hashers +} + +func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) { + // Arg finalize is true only when called from Close method. + // We finalize parent and generate linking objects only if it is more + // than 1 object in split-chain. + withParent := finalize && len(s.previous) > 0 + + if withParent { + writeHashes(s.parentHashers) + s.parent.SetPayloadSize(s.written) + s.current.SetParent(s.parent) + } + + // release current object + writeHashes(s.currentHashers) + + // release current, get its id + if err := s.target.WriteHeader(s.current); err != nil { + return nil, fmt.Errorf("could not write header: %w", err) + } + + ids, err := s.target.Close() + if err != nil { + return nil, fmt.Errorf("could not close target: %w", err) + } + + // save identifier of the released object + s.previous = append(s.previous, ids.SelfID()) + + if withParent { + // generate and release linking object + s.initializeLinking(ids.Parent()) + s.initializeCurrent() + + if _, err := s.release(false); err != nil { + return nil, fmt.Errorf("could not release linking object: %w", err) + } + } + + return ids, nil +} + +func writeHashes(hashers []*payloadChecksumHasher) { + for i := range hashers { + hashers[i].checksumWriter(hashers[i].hasher.Sum(nil)) + } +} + +func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) { + s.current = fromObject(s.current) + s.current.SetParent(parHdr) + s.current.SetChildren(s.previous...) + s.current.SetSplitID(s.splitID) +} + +func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { + // statement is true if the previous write of bytes reached exactly the boundary. + if s.written > 0 && s.written%s.maxSize == 0 { + if s.written == s.maxSize { + s.prepareFirstChild() + } + + // we need to release current object + if _, err := s.release(false); err != nil { + return fmt.Errorf("could not release object: %w", err) + } + + // initialize another object + s.initialize() + } + + var ( + ln = uint64(len(chunk)) + cut = ln + leftToEdge = s.maxSize - s.written%s.maxSize + ) + + // write bytes no further than the boundary of the current object + if ln > leftToEdge { + cut = leftToEdge + } + + if _, err := s.chunkWriter.Write(chunk[:cut]); err != nil { + return fmt.Errorf("could not write chunk to target: %w", err) + } + + // increase written bytes counter + s.written += cut + + // if there are more bytes in buffer we call method again to start filling another object + if ln > leftToEdge { + return s.writeChunk(chunk[cut:]) + } + + return nil +} + +func (s *payloadSizeLimiter) prepareFirstChild() { + // initialize split header with split ID on first object in chain + s.current.InitRelations() + s.current.SetSplitID(s.splitID) + + // cut source attributes + s.parAttrs = s.current.Attributes() + s.current.SetAttributes() + + // attributes will be added to parent in detachParent +} + +func (s *payloadSizeLimiter) detachParent() { + s.parent = s.current + s.current = fromObject(s.parent) + s.parent.ResetRelations() + s.parent.SetSignature(nil) + s.parentHashers = s.currentHashers + + // return source attributes + s.parent.SetAttributes(s.parAttrs...) +} diff --git a/object/transformer/types.go b/object/transformer/types.go new file mode 100644 index 0000000..72e6a74 --- /dev/null +++ b/object/transformer/types.go @@ -0,0 +1,111 @@ +package transformer + +import ( + "io" + + "github.com/TrueCloudLab/frostfs-sdk-go/object" + oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" +) + +// AccessIdentifiers represents group of the object identifiers +// that are returned after writing the object. +// Consists of the ID of the stored object and the ID of the parent object. +type AccessIdentifiers struct { + par *oid.ID + + self oid.ID + + parHdr *object.Object +} + +// ObjectTarget is an interface of the object writer. +type ObjectTarget interface { + // WriteHeader writes object header w/ payload part. + // The payload of the object may be incomplete. + // + // Must be called exactly once. Control remains with the caller. + // Missing a call or re-calling can lead to undefined behavior + // that depends on the implementation. + // + // Must not be called after Close call. + WriteHeader(*object.Object) error + + // Write writes object payload chunk. + // + // Can be called multiple times. + // + // Must not be called after Close call. + io.Writer + + // Close is used to finish object writing. + // + // Close must return access identifiers of the object + // that has been written. + // + // Must be called no more than once. Control remains with the caller. + // Re-calling can lead to undefined behavior + // that depends on the implementation. + Close() (*AccessIdentifiers, error) +} + +// TargetInitializer represents ObjectTarget constructor. +type TargetInitializer func() ObjectTarget + +// SelfID returns identifier of the written object. +func (a AccessIdentifiers) SelfID() oid.ID { + return a.self +} + +// WithSelfID returns AccessIdentifiers with passed self identifier. +func (a *AccessIdentifiers) WithSelfID(v oid.ID) *AccessIdentifiers { + res := a + if res == nil { + res = new(AccessIdentifiers) + } + + res.self = v + + return res +} + +// ParentID return identifier of the parent of the written object. +func (a *AccessIdentifiers) ParentID() *oid.ID { + if a != nil { + return a.par + } + + return nil +} + +// WithParentID returns AccessIdentifiers with passed parent identifier. +func (a *AccessIdentifiers) WithParentID(v *oid.ID) *AccessIdentifiers { + res := a + if res == nil { + res = new(AccessIdentifiers) + } + + res.par = v + + return res +} + +// Parent return identifier of the parent of the written object. +func (a *AccessIdentifiers) Parent() *object.Object { + if a != nil { + return a.parHdr + } + + return nil +} + +// WithParent returns AccessIdentifiers with passed parent identifier. +func (a *AccessIdentifiers) WithParent(v *object.Object) *AccessIdentifiers { + res := a + if res == nil { + res = new(AccessIdentifiers) + } + + res.parHdr = v + + return res +} -- 2.45.2 From 1e1c836310705f69ddd78fc0002d3aa4307d1f35 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Sat, 18 Feb 2023 10:13:14 +0300 Subject: [PATCH 02/12] [#19] transformer: Cover with unit-tests Signed-off-by: Evgenii Stratonikov --- object/transformer/transformer_test.go | 164 +++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 object/transformer/transformer_test.go diff --git a/object/transformer/transformer_test.go b/object/transformer/transformer_test.go new file mode 100644 index 0000000..fe35fe3 --- /dev/null +++ b/object/transformer/transformer_test.go @@ -0,0 +1,164 @@ +package transformer + +import ( + "crypto/rand" + "testing" + + cidtest "github.com/TrueCloudLab/frostfs-sdk-go/container/id/test" + objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/TrueCloudLab/frostfs-sdk-go/version" + "github.com/nspcc-dev/neo-go/pkg/crypto/keys" + "github.com/stretchr/testify/require" +) + +func TestTransformer(t *testing.T) { + const maxSize = 100 + + tt := new(testTarget) + + target, _ := newPayloadSizeLimiter(maxSize, tt) + + ver := version.Current() + cnr := cidtest.ID() + hdr := objectSDK.New() + hdr.SetContainerID(cnr) + hdr.SetType(objectSDK.TypeRegular) + hdr.SetVersion(&ver) + require.NoError(t, target.WriteHeader(hdr)) + + expectedPayload := make([]byte, maxSize*2+maxSize/2) + _, _ = rand.Read(expectedPayload) + + _, err := target.Write(expectedPayload) + require.NoError(t, err) + + ids, err := target.Close() + require.NoError(t, err) + + require.Equal(t, 4, len(tt.objects)) // 3 parts + linking object + + var actualPayload []byte + for i := range tt.objects { + childCnr, ok := tt.objects[i].ContainerID() + require.True(t, ok) + require.Equal(t, cnr, childCnr) + require.Equal(t, objectSDK.TypeRegular, tt.objects[i].Type()) + + payload := tt.objects[i].Payload() + require.EqualValues(t, tt.objects[i].PayloadSize(), len(payload)) + actualPayload = append(actualPayload, payload...) + + switch i { + case 0, 1: + require.EqualValues(t, maxSize, len(payload)) + case 2: + require.EqualValues(t, maxSize/2, len(payload)) + case 3: + parID, ok := tt.objects[i].ParentID() + require.True(t, ok) + require.Equal(t, ids.ParentID(), &parID) + } + } + require.Equal(t, expectedPayload, actualPayload) +} + +func BenchmarkTransformer(b *testing.B) { + ver := version.Current() + cnr := cidtest.ID() + hdr := objectSDK.New() + hdr.SetContainerID(cnr) + hdr.SetType(objectSDK.TypeRegular) + hdr.SetVersion(&ver) + + b.Run("small", func(b *testing.B) { + benchmarkTransformer(b, hdr, 8*1024) + }) + b.Run("big", func(b *testing.B) { + benchmarkTransformer(b, hdr, 64*1024*1024*9/2) + }) +} + +func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize int) { + const maxSize = 64 * 1024 * 1024 + + payload := make([]byte, payloadSize) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + f, _ := newPayloadSizeLimiter(maxSize, benchTarget{}) + if err := f.WriteHeader(header); err != nil { + b.Fatalf("write header: %v", err) + } + if _, err := f.Write(payload); err != nil { + b.Fatalf("write: %v", err) + } + if _, err := f.Close(); err != nil { + b.Fatalf("close: %v", err) + } + } +} + +func newPayloadSizeLimiter(maxSize uint64, nextTarget ObjectTarget) (ObjectTarget, *keys.PrivateKey) { + p, err := keys.NewPrivateKey() + if err != nil { + panic(err) + } + + return NewPayloadSizeLimiter(maxSize, true, func() ObjectTarget { + return NewFormatTarget(&FormatterParams{ + Key: &p.PrivateKey, + NextTarget: nextTarget, + NetworkState: dummyEpochSource(123), + }) + }), p +} + +type dummyEpochSource uint64 + +func (s dummyEpochSource) CurrentEpoch() uint64 { + return uint64(s) +} + +type benchTarget struct{} + +func (benchTarget) WriteHeader(object *objectSDK.Object) error { + return nil +} + +func (benchTarget) Write(p []byte) (n int, err error) { + return len(p), nil +} + +func (benchTarget) Close() (*AccessIdentifiers, error) { + return nil, nil +} + +type testTarget struct { + current *objectSDK.Object + payload []byte + objects []*objectSDK.Object +} + +func (tt *testTarget) WriteHeader(object *objectSDK.Object) error { + tt.current = object + return nil +} + +func (tt *testTarget) Write(p []byte) (n int, err error) { + tt.payload = append(tt.payload, p...) + return len(p), nil +} + +func (tt *testTarget) Close() (*AccessIdentifiers, error) { + tt.current.SetPayload(tt.payload) + // We need to marshal, because current implementation reuses written object. + data, _ := tt.current.Marshal() + obj := objectSDK.New() + _ = obj.Unmarshal(data) + + tt.objects = append(tt.objects, obj) + tt.current = nil + tt.payload = nil + return nil, nil // AccessIdentifiers should not be used. +} -- 2.45.2 From 2af5889a053ffdaafed8517ceab86cfb6458ac95 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 17 Feb 2023 13:00:27 +0300 Subject: [PATCH 03/12] [#19] transformer: Simplify `AccessIdentifiers` Signed-off-by: Evgenii Stratonikov --- object/transformer/fmt.go | 9 ++-- object/transformer/transformer.go | 4 +- object/transformer/transformer_test.go | 2 +- object/transformer/types.go | 67 ++------------------------ 4 files changed, 11 insertions(+), 71 deletions(-) diff --git a/object/transformer/fmt.go b/object/transformer/fmt.go index b0ca5c9..31357d7 100644 --- a/object/transformer/fmt.go +++ b/object/transformer/fmt.go @@ -109,8 +109,9 @@ func (f *formatter) Close() (*AccessIdentifiers, error) { id, _ := f.obj.ID() - return new(AccessIdentifiers). - WithSelfID(id). - WithParentID(parID). - WithParent(parHdr), nil + return &AccessIdentifiers{ + ParentID: parID, + SelfID: id, + ParentHeader: parHdr, + }, nil } diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go index cfd7098..d3b5b8d 100644 --- a/object/transformer/transformer.go +++ b/object/transformer/transformer.go @@ -200,11 +200,11 @@ func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) } // save identifier of the released object - s.previous = append(s.previous, ids.SelfID()) + s.previous = append(s.previous, ids.SelfID) if withParent { // generate and release linking object - s.initializeLinking(ids.Parent()) + s.initializeLinking(ids.ParentHeader) s.initializeCurrent() if _, err := s.release(false); err != nil { diff --git a/object/transformer/transformer_test.go b/object/transformer/transformer_test.go index fe35fe3..8fcd680 100644 --- a/object/transformer/transformer_test.go +++ b/object/transformer/transformer_test.go @@ -56,7 +56,7 @@ func TestTransformer(t *testing.T) { case 3: parID, ok := tt.objects[i].ParentID() require.True(t, ok) - require.Equal(t, ids.ParentID(), &parID) + require.Equal(t, ids.ParentID, &parID) } } require.Equal(t, expectedPayload, actualPayload) diff --git a/object/transformer/types.go b/object/transformer/types.go index 72e6a74..0525eb0 100644 --- a/object/transformer/types.go +++ b/object/transformer/types.go @@ -11,11 +11,9 @@ import ( // that are returned after writing the object. // Consists of the ID of the stored object and the ID of the parent object. type AccessIdentifiers struct { - par *oid.ID - - self oid.ID - - parHdr *object.Object + ParentID *oid.ID + SelfID oid.ID + ParentHeader *object.Object } // ObjectTarget is an interface of the object writer. @@ -50,62 +48,3 @@ type ObjectTarget interface { // TargetInitializer represents ObjectTarget constructor. type TargetInitializer func() ObjectTarget - -// SelfID returns identifier of the written object. -func (a AccessIdentifiers) SelfID() oid.ID { - return a.self -} - -// WithSelfID returns AccessIdentifiers with passed self identifier. -func (a *AccessIdentifiers) WithSelfID(v oid.ID) *AccessIdentifiers { - res := a - if res == nil { - res = new(AccessIdentifiers) - } - - res.self = v - - return res -} - -// ParentID return identifier of the parent of the written object. -func (a *AccessIdentifiers) ParentID() *oid.ID { - if a != nil { - return a.par - } - - return nil -} - -// WithParentID returns AccessIdentifiers with passed parent identifier. -func (a *AccessIdentifiers) WithParentID(v *oid.ID) *AccessIdentifiers { - res := a - if res == nil { - res = new(AccessIdentifiers) - } - - res.par = v - - return res -} - -// Parent return identifier of the parent of the written object. -func (a *AccessIdentifiers) Parent() *object.Object { - if a != nil { - return a.parHdr - } - - return nil -} - -// WithParent returns AccessIdentifiers with passed parent identifier. -func (a *AccessIdentifiers) WithParent(v *object.Object) *AccessIdentifiers { - res := a - if res == nil { - res = new(AccessIdentifiers) - } - - res.parHdr = v - - return res -} -- 2.45.2 From fb807e80ce0e7652cca85f42835cbcd246714378 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Sat, 18 Feb 2023 10:36:20 +0300 Subject: [PATCH 04/12] [#19] transformer: Make `writeChunk` non-recursive Signed-off-by: Evgenii Stratonikov --- object/transformer/transformer.go | 69 ++++++++++++++++--------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go index d3b5b8d..8c819fa 100644 --- a/object/transformer/transformer.go +++ b/object/transformer/transformer.go @@ -229,45 +229,46 @@ func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) { } func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { - // statement is true if the previous write of bytes reached exactly the boundary. - if s.written > 0 && s.written%s.maxSize == 0 { - if s.written == s.maxSize { - s.prepareFirstChild() + for { + // statement is true if the previous write of bytes reached exactly the boundary. + if s.written > 0 && s.written%s.maxSize == 0 { + if s.written == s.maxSize { + s.prepareFirstChild() + } + + // we need to release current object + if _, err := s.release(false); err != nil { + return fmt.Errorf("could not release object: %w", err) + } + + // initialize another object + s.initialize() } - // we need to release current object - if _, err := s.release(false); err != nil { - return fmt.Errorf("could not release object: %w", err) + var ( + ln = uint64(len(chunk)) + cut = ln + leftToEdge = s.maxSize - s.written%s.maxSize + ) + + // write bytes no further than the boundary of the current object + if ln > leftToEdge { + cut = leftToEdge } - // initialize another object - s.initialize() + if _, err := s.chunkWriter.Write(chunk[:cut]); err != nil { + return fmt.Errorf("could not write chunk to target: %w", err) + } + + // increase written bytes counter + s.written += cut + + if cut == ln { + return nil + } + // if there are more bytes in buffer we call method again to start filling another object + chunk = chunk[cut:] } - - var ( - ln = uint64(len(chunk)) - cut = ln - leftToEdge = s.maxSize - s.written%s.maxSize - ) - - // write bytes no further than the boundary of the current object - if ln > leftToEdge { - cut = leftToEdge - } - - if _, err := s.chunkWriter.Write(chunk[:cut]); err != nil { - return fmt.Errorf("could not write chunk to target: %w", err) - } - - // increase written bytes counter - s.written += cut - - // if there are more bytes in buffer we call method again to start filling another object - if ln > leftToEdge { - return s.writeChunk(chunk[cut:]) - } - - return nil } func (s *payloadSizeLimiter) prepareFirstChild() { -- 2.45.2 From 51bad6dfdbfde150cdb857c975c537a59582e4fe Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Sat, 18 Feb 2023 10:43:34 +0300 Subject: [PATCH 05/12] [#19] transformer: Merge formatter and payload splitter Signed-off-by: Evgenii Stratonikov --- object/transformer/fmt.go | 112 ------------------------- object/transformer/transformer.go | 95 +++++++++++++++------ object/transformer/transformer_test.go | 12 +-- 3 files changed, 76 insertions(+), 143 deletions(-) diff --git a/object/transformer/fmt.go b/object/transformer/fmt.go index 31357d7..193b8f4 100644 --- a/object/transformer/fmt.go +++ b/object/transformer/fmt.go @@ -1,117 +1,5 @@ package transformer -import ( - "crypto/ecdsa" - "fmt" - - "github.com/TrueCloudLab/frostfs-sdk-go/object" - oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" - "github.com/TrueCloudLab/frostfs-sdk-go/session" - "github.com/TrueCloudLab/frostfs-sdk-go/version" -) - -type formatter struct { - prm *FormatterParams - - obj *object.Object - - sz uint64 -} - type EpochSource interface { CurrentEpoch() uint64 } - -// FormatterParams groups NewFormatTarget parameters. -type FormatterParams struct { - Key *ecdsa.PrivateKey - - NextTarget ObjectTarget - - SessionToken *session.Object - - NetworkState EpochSource -} - -// NewFormatTarget returns ObjectTarget instance that finalizes object structure -// and writes it to the next target. -// -// Chunks must be written before the WriteHeader call. -// -// Object changes: -// - sets version to current SDK version; -// - sets payload size to the total length of all written chunks; -// - sets session token; -// - sets number of creation epoch; -// - calculates and sets verification fields (ID, Signature). -func NewFormatTarget(p *FormatterParams) ObjectTarget { - return &formatter{ - prm: p, - } -} - -func (f *formatter) WriteHeader(obj *object.Object) error { - f.obj = obj - - return nil -} - -func (f *formatter) Write(p []byte) (n int, err error) { - n, err = f.prm.NextTarget.Write(p) - - f.sz += uint64(n) - - return -} - -func (f *formatter) Close() (*AccessIdentifiers, error) { - curEpoch := f.prm.NetworkState.CurrentEpoch() - ver := version.Current() - - f.obj.SetVersion(&ver) - f.obj.SetPayloadSize(f.sz) - f.obj.SetSessionToken(f.prm.SessionToken) - f.obj.SetCreationEpoch(curEpoch) - - var ( - parID *oid.ID - parHdr *object.Object - ) - - if par := f.obj.Parent(); par != nil && par.Signature() == nil { - rawPar := object.NewFromV2(par.ToV2()) - - rawPar.SetSessionToken(f.prm.SessionToken) - rawPar.SetCreationEpoch(curEpoch) - - if err := object.SetIDWithSignature(*f.prm.Key, rawPar); err != nil { - return nil, fmt.Errorf("could not finalize parent object: %w", err) - } - - id, _ := rawPar.ID() - parID = &id - parHdr = rawPar - - f.obj.SetParent(parHdr) - } - - if err := object.SetIDWithSignature(*f.prm.Key, f.obj); err != nil { - return nil, fmt.Errorf("could not finalize object: %w", err) - } - - if err := f.prm.NextTarget.WriteHeader(f.obj); err != nil { - return nil, fmt.Errorf("could not write header to next target: %w", err) - } - - if _, err := f.prm.NextTarget.Close(); err != nil { - return nil, fmt.Errorf("could not close next target: %w", err) - } - - id, _ := f.obj.ID() - - return &AccessIdentifiers{ - ParentID: parID, - SelfID: id, - ParentHeader: parHdr, - }, nil -} diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go index 8c819fa..55ad9ff 100644 --- a/object/transformer/transformer.go +++ b/object/transformer/transformer.go @@ -1,6 +1,7 @@ package transformer import ( + "crypto/ecdsa" "crypto/sha256" "fmt" "hash" @@ -9,17 +10,15 @@ import ( "github.com/TrueCloudLab/frostfs-sdk-go/checksum" "github.com/TrueCloudLab/frostfs-sdk-go/object" oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/TrueCloudLab/frostfs-sdk-go/session" + "github.com/TrueCloudLab/frostfs-sdk-go/version" "github.com/TrueCloudLab/tzhash/tz" ) type payloadSizeLimiter struct { - maxSize, written uint64 + Params - withoutHomomorphicHash bool - - targetInit func() ObjectTarget - - target ObjectTarget + written, writtenCurrent uint64 current, parent *object.Object @@ -40,6 +39,15 @@ type payloadChecksumHasher struct { checksumWriter func([]byte) } +type Params struct { + Key *ecdsa.PrivateKey + NextTarget ObjectTarget + SessionToken *session.Object + NetworkState EpochSource + MaxSize uint64 + WithoutHomomorphicHash bool +} + // NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length // of the writing object and writes generated objects to targets from initializer. // @@ -47,12 +55,10 @@ type payloadChecksumHasher struct { // is false. // // Objects w/ payload size less or equal than max size remain untouched. -func NewPayloadSizeLimiter(maxSize uint64, withoutHomomorphicHash bool, targetInit TargetInitializer) ObjectTarget { +func NewPayloadSizeLimiter(p Params) ObjectTarget { return &payloadSizeLimiter{ - maxSize: maxSize, - withoutHomomorphicHash: withoutHomomorphicHash, - targetInit: targetInit, - splitID: object.NewSplitID(), + Params: p, + splitID: object.NewSplitID(), } } @@ -110,16 +116,14 @@ func fromObject(obj *object.Object) *object.Object { } func (s *payloadSizeLimiter) initializeCurrent() { - // initialize current object target - s.target = s.targetInit() - // create payload hashers - s.currentHashers = payloadHashersForObject(s.current, s.withoutHomomorphicHash) + s.writtenCurrent = 0 + s.currentHashers = payloadHashersForObject(s.current, s.WithoutHomomorphicHash) // compose multi-writer from target and all payload hashers ws := make([]io.Writer, 0, 1+len(s.currentHashers)+len(s.parentHashers)) - ws = append(ws, s.target) + ws = append(ws, s.NextTarget) for i := range s.currentHashers { ws = append(ws, s.currentHashers[i].hasher) @@ -189,14 +193,54 @@ func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) // release current object writeHashes(s.currentHashers) - // release current, get its id - if err := s.target.WriteHeader(s.current); err != nil { - return nil, fmt.Errorf("could not write header: %w", err) + curEpoch := s.NetworkState.CurrentEpoch() + ver := version.Current() + + s.current.SetVersion(&ver) + s.current.SetPayloadSize(s.writtenCurrent) + s.current.SetSessionToken(s.SessionToken) + s.current.SetCreationEpoch(curEpoch) + + var ( + parID *oid.ID + parHdr *object.Object + ) + + if par := s.current.Parent(); par != nil && par.Signature() == nil { + rawPar := object.NewFromV2(par.ToV2()) + + rawPar.SetSessionToken(s.SessionToken) + rawPar.SetCreationEpoch(curEpoch) + + if err := object.SetIDWithSignature(*s.Key, rawPar); err != nil { + return nil, fmt.Errorf("could not finalize parent object: %w", err) + } + + id, _ := rawPar.ID() + parID = &id + parHdr = rawPar + + s.current.SetParent(parHdr) } - ids, err := s.target.Close() - if err != nil { - return nil, fmt.Errorf("could not close target: %w", err) + if err := object.SetIDWithSignature(*s.Key, s.current); err != nil { + return nil, fmt.Errorf("could not finalize object: %w", err) + } + + if err := s.NextTarget.WriteHeader(s.current); err != nil { + return nil, fmt.Errorf("could not write header to next target: %w", err) + } + + if _, err := s.NextTarget.Close(); err != nil { + return nil, fmt.Errorf("could not close next target: %w", err) + } + + id, _ := s.current.ID() + + ids := &AccessIdentifiers{ + ParentID: parID, + SelfID: id, + ParentHeader: parHdr, } // save identifier of the released object @@ -231,8 +275,8 @@ func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) { func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { for { // statement is true if the previous write of bytes reached exactly the boundary. - if s.written > 0 && s.written%s.maxSize == 0 { - if s.written == s.maxSize { + if s.written > 0 && s.written%s.MaxSize == 0 { + if s.written == s.MaxSize { s.prepareFirstChild() } @@ -248,7 +292,7 @@ func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { var ( ln = uint64(len(chunk)) cut = ln - leftToEdge = s.maxSize - s.written%s.maxSize + leftToEdge = s.MaxSize - s.written%s.MaxSize ) // write bytes no further than the boundary of the current object @@ -261,6 +305,7 @@ func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { } // increase written bytes counter + s.writtenCurrent += cut s.written += cut if cut == ln { diff --git a/object/transformer/transformer_test.go b/object/transformer/transformer_test.go index 8fcd680..545945d 100644 --- a/object/transformer/transformer_test.go +++ b/object/transformer/transformer_test.go @@ -105,12 +105,12 @@ func newPayloadSizeLimiter(maxSize uint64, nextTarget ObjectTarget) (ObjectTarge panic(err) } - return NewPayloadSizeLimiter(maxSize, true, func() ObjectTarget { - return NewFormatTarget(&FormatterParams{ - Key: &p.PrivateKey, - NextTarget: nextTarget, - NetworkState: dummyEpochSource(123), - }) + return NewPayloadSizeLimiter(Params{ + Key: &p.PrivateKey, + NextTarget: nextTarget, + NetworkState: dummyEpochSource(123), + MaxSize: maxSize, + WithoutHomomorphicHash: true, }), p } -- 2.45.2 From 4917a718555dec1850bed2724f23bc593437c6d3 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 23 Feb 2023 16:56:38 +0300 Subject: [PATCH 06/12] [#19] transformer: Move `EpochSource` to other types Signed-off-by: Evgenii Stratonikov --- object/transformer/fmt.go | 5 ----- object/transformer/types.go | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 object/transformer/fmt.go diff --git a/object/transformer/fmt.go b/object/transformer/fmt.go deleted file mode 100644 index 193b8f4..0000000 --- a/object/transformer/fmt.go +++ /dev/null @@ -1,5 +0,0 @@ -package transformer - -type EpochSource interface { - CurrentEpoch() uint64 -} diff --git a/object/transformer/types.go b/object/transformer/types.go index 0525eb0..f030e26 100644 --- a/object/transformer/types.go +++ b/object/transformer/types.go @@ -16,6 +16,11 @@ type AccessIdentifiers struct { ParentHeader *object.Object } +// EpochSource is a source for the current epoch. +type EpochSource interface { + CurrentEpoch() uint64 +} + // ObjectTarget is an interface of the object writer. type ObjectTarget interface { // WriteHeader writes object header w/ payload part. -- 2.45.2 From 43987aae7ac399b1181ae2fbbd791c099ecfa6b2 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 23 Feb 2023 17:13:13 +0300 Subject: [PATCH 07/12] [#19] transformer/test: Add helper functions Signed-off-by: Evgenii Stratonikov --- object/transformer/transformer_test.go | 35 +++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/object/transformer/transformer_test.go b/object/transformer/transformer_test.go index 545945d..f1f6722 100644 --- a/object/transformer/transformer_test.go +++ b/object/transformer/transformer_test.go @@ -4,6 +4,7 @@ import ( "crypto/rand" "testing" + cid "github.com/TrueCloudLab/frostfs-sdk-go/container/id" cidtest "github.com/TrueCloudLab/frostfs-sdk-go/container/id/test" objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" "github.com/TrueCloudLab/frostfs-sdk-go/version" @@ -18,23 +19,13 @@ func TestTransformer(t *testing.T) { target, _ := newPayloadSizeLimiter(maxSize, tt) - ver := version.Current() cnr := cidtest.ID() - hdr := objectSDK.New() - hdr.SetContainerID(cnr) - hdr.SetType(objectSDK.TypeRegular) - hdr.SetVersion(&ver) - require.NoError(t, target.WriteHeader(hdr)) + hdr := newObject(cnr) expectedPayload := make([]byte, maxSize*2+maxSize/2) _, _ = rand.Read(expectedPayload) - _, err := target.Write(expectedPayload) - require.NoError(t, err) - - ids, err := target.Close() - require.NoError(t, err) - + ids := writeObject(t, target, hdr, expectedPayload) require.Equal(t, 4, len(tt.objects)) // 3 parts + linking object var actualPayload []byte @@ -62,13 +53,29 @@ func TestTransformer(t *testing.T) { require.Equal(t, expectedPayload, actualPayload) } -func BenchmarkTransformer(b *testing.B) { +func newObject(cnr cid.ID) *objectSDK.Object { ver := version.Current() - cnr := cidtest.ID() hdr := objectSDK.New() hdr.SetContainerID(cnr) hdr.SetType(objectSDK.TypeRegular) hdr.SetVersion(&ver) + return hdr +} + +func writeObject(t *testing.T, target ObjectTarget, header *objectSDK.Object, payload []byte) *AccessIdentifiers { + require.NoError(t, target.WriteHeader(header)) + + _, err := target.Write(payload) + require.NoError(t, err) + + ids, err := target.Close() + require.NoError(t, err) + + return ids +} + +func BenchmarkTransformer(b *testing.B) { + hdr := newObject(cidtest.ID()) b.Run("small", func(b *testing.B) { benchmarkTransformer(b, hdr, 8*1024) -- 2.45.2 From f102456c746b9af6e25a2fbad79017229fe51566 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 24 Feb 2023 16:21:56 +0300 Subject: [PATCH 08/12] [#19] transformer/test: Check owner ID and payload hash for parts Signed-off-by: Evgenii Stratonikov --- object/transformer/transformer_test.go | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/object/transformer/transformer_test.go b/object/transformer/transformer_test.go index f1f6722..2b4ee31 100644 --- a/object/transformer/transformer_test.go +++ b/object/transformer/transformer_test.go @@ -2,11 +2,13 @@ package transformer import ( "crypto/rand" + "crypto/sha256" "testing" cid "github.com/TrueCloudLab/frostfs-sdk-go/container/id" cidtest "github.com/TrueCloudLab/frostfs-sdk-go/container/id/test" objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/TrueCloudLab/frostfs-sdk-go/user" "github.com/TrueCloudLab/frostfs-sdk-go/version" "github.com/nspcc-dev/neo-go/pkg/crypto/keys" "github.com/stretchr/testify/require" @@ -17,11 +19,15 @@ func TestTransformer(t *testing.T) { tt := new(testTarget) - target, _ := newPayloadSizeLimiter(maxSize, tt) + target, pk := newPayloadSizeLimiter(maxSize, tt) cnr := cidtest.ID() hdr := newObject(cnr) + var owner user.ID + user.IDFromKey(&owner, pk.PrivateKey.PublicKey) + hdr.SetOwnerID(&owner) + expectedPayload := make([]byte, maxSize*2+maxSize/2) _, _ = rand.Read(expectedPayload) @@ -34,11 +40,20 @@ func TestTransformer(t *testing.T) { require.True(t, ok) require.Equal(t, cnr, childCnr) require.Equal(t, objectSDK.TypeRegular, tt.objects[i].Type()) + require.Equal(t, &owner, tt.objects[i].OwnerID()) payload := tt.objects[i].Payload() require.EqualValues(t, tt.objects[i].PayloadSize(), len(payload)) actualPayload = append(actualPayload, payload...) + if len(payload) != 0 { + cs, ok := tt.objects[i].PayloadChecksum() + require.True(t, ok) + + h := sha256.Sum256(payload) + require.Equal(t, h[:], cs.Value()) + } + switch i { case 0, 1: require.EqualValues(t, maxSize, len(payload)) @@ -51,6 +66,14 @@ func TestTransformer(t *testing.T) { } } require.Equal(t, expectedPayload, actualPayload) + + t.Run("parent checksum", func(t *testing.T) { + cs, ok := ids.ParentHeader.PayloadChecksum() + require.True(t, ok) + + h := sha256.Sum256(expectedPayload) + require.Equal(t, h[:], cs.Value()) + }) } func newObject(cnr cid.ID) *objectSDK.Object { -- 2.45.2 From bf7e56c64d96072f420a6204236869d6269bffe5 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 24 Feb 2023 16:31:12 +0300 Subject: [PATCH 09/12] [#19] transformer: Do not reuse memory of sent objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Slower, but more correct. ``` name old time/op new time/op delta Transformer/small-8 72.4µs ± 8% 74.8µs ±11% ~ (p=0.278 n=9+10) Transformer/big-8 1.31s ± 8% 1.38s ±11% +5.50% (p=0.035 n=10+10) name old alloc/op new alloc/op delta Transformer/small-8 7.39kB ± 0% 7.69kB ± 0% +4.04% (p=0.000 n=10+10) Transformer/big-8 46.9kB ± 0% 49.2kB ± 0% +4.87% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Transformer/small-8 94.6 ± 1% 102.0 ± 0% +7.82% (p=0.000 n=10+9) Transformer/big-8 560 ± 0% 620 ± 1% +10.66% (p=0.000 n=10+10) ``` Signed-off-by: Evgenii Stratonikov --- object/transformer/hasher.go | 38 ++++++++++++++ object/transformer/transformer.go | 72 ++++++-------------------- object/transformer/transformer_test.go | 7 +-- 3 files changed, 56 insertions(+), 61 deletions(-) create mode 100644 object/transformer/hasher.go diff --git a/object/transformer/hasher.go b/object/transformer/hasher.go new file mode 100644 index 0000000..4482a79 --- /dev/null +++ b/object/transformer/hasher.go @@ -0,0 +1,38 @@ +package transformer + +import ( + "crypto/sha256" + "hash" + + "github.com/TrueCloudLab/frostfs-sdk-go/checksum" + objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/TrueCloudLab/tzhash/tz" +) + +type payloadChecksumHasher struct { + hasher hash.Hash + typ checksum.Type +} + +func (h payloadChecksumHasher) writeChecksum(obj *objectSDK.Object) { + switch h.typ { + case checksum.SHA256: + csSHA := [sha256.Size]byte{} + h.hasher.Sum(csSHA[:0]) + + var cs checksum.Checksum + cs.SetSHA256(csSHA) + + obj.SetPayloadChecksum(cs) + case checksum.TZ: + csTZ := [tz.Size]byte{} + h.hasher.Sum(csTZ[:0]) + + var cs checksum.Checksum + cs.SetTillichZemor(csTZ) + + obj.SetPayloadHomomorphicHash(cs) + default: + panic("unreachable") + } +} diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go index 55ad9ff..86b0a39 100644 --- a/object/transformer/transformer.go +++ b/object/transformer/transformer.go @@ -4,7 +4,6 @@ import ( "crypto/ecdsa" "crypto/sha256" "fmt" - "hash" "io" "github.com/TrueCloudLab/frostfs-sdk-go/checksum" @@ -33,12 +32,6 @@ type payloadSizeLimiter struct { parAttrs []object.Attribute } -type payloadChecksumHasher struct { - hasher hash.Hash - - checksumWriter func([]byte) -} - type Params struct { Key *ecdsa.PrivateKey NextTarget ObjectTarget @@ -83,11 +76,17 @@ func (s *payloadSizeLimiter) Close() (*AccessIdentifiers, error) { } func (s *payloadSizeLimiter) initialize() { + s.current = fromObject(s.current) + // if it is an object after the 1st if ln := len(s.previous); ln > 0 { // initialize parent object once (after 1st object) if ln == 1 { - s.detachParent() + s.parent = fromObject(s.current) + s.parentHashers = s.currentHashers + + // return source attributes + s.parent.SetAttributes(s.parAttrs...) } // set previous object to the last previous identifier @@ -118,7 +117,7 @@ func fromObject(obj *object.Object) *object.Object { func (s *payloadSizeLimiter) initializeCurrent() { // create payload hashers s.writtenCurrent = 0 - s.currentHashers = payloadHashersForObject(s.current, s.WithoutHomomorphicHash) + s.currentHashers = payloadHashersForObject(s.WithoutHomomorphicHash) // compose multi-writer from target and all payload hashers ws := make([]io.Writer, 0, 1+len(s.currentHashers)+len(s.parentHashers)) @@ -136,42 +135,18 @@ func (s *payloadSizeLimiter) initializeCurrent() { s.chunkWriter = io.MultiWriter(ws...) } -func payloadHashersForObject(obj *object.Object, withoutHomomorphicHash bool) []*payloadChecksumHasher { +func payloadHashersForObject(withoutHomomorphicHash bool) []*payloadChecksumHasher { hashers := make([]*payloadChecksumHasher, 0, 2) hashers = append(hashers, &payloadChecksumHasher{ hasher: sha256.New(), - checksumWriter: func(binChecksum []byte) { - if ln := len(binChecksum); ln != sha256.Size { - panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", sha256.Size, ln)) - } - - csSHA := [sha256.Size]byte{} - copy(csSHA[:], binChecksum) - - var cs checksum.Checksum - cs.SetSHA256(csSHA) - - obj.SetPayloadChecksum(cs) - }, + typ: checksum.SHA256, }) if !withoutHomomorphicHash { hashers = append(hashers, &payloadChecksumHasher{ hasher: tz.New(), - checksumWriter: func(binChecksum []byte) { - if ln := len(binChecksum); ln != tz.Size { - panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", tz.Size, ln)) - } - - csTZ := [tz.Size]byte{} - copy(csTZ[:], binChecksum) - - var cs checksum.Checksum - cs.SetTillichZemor(csTZ) - - obj.SetPayloadHomomorphicHash(cs) - }, + typ: checksum.TZ, }) } @@ -185,13 +160,17 @@ func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) withParent := finalize && len(s.previous) > 0 if withParent { - writeHashes(s.parentHashers) + for i := range s.parentHashers { + s.parentHashers[i].writeChecksum(s.parent) + } s.parent.SetPayloadSize(s.written) s.current.SetParent(s.parent) } // release current object - writeHashes(s.currentHashers) + for i := range s.currentHashers { + s.currentHashers[i].writeChecksum(s.current) + } curEpoch := s.NetworkState.CurrentEpoch() ver := version.Current() @@ -259,12 +238,6 @@ func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) return ids, nil } -func writeHashes(hashers []*payloadChecksumHasher) { - for i := range hashers { - hashers[i].checksumWriter(hashers[i].hasher.Sum(nil)) - } -} - func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) { s.current = fromObject(s.current) s.current.SetParent(parHdr) @@ -327,14 +300,3 @@ func (s *payloadSizeLimiter) prepareFirstChild() { // attributes will be added to parent in detachParent } - -func (s *payloadSizeLimiter) detachParent() { - s.parent = s.current - s.current = fromObject(s.parent) - s.parent.ResetRelations() - s.parent.SetSignature(nil) - s.parentHashers = s.currentHashers - - // return source attributes - s.parent.SetAttributes(s.parAttrs...) -} diff --git a/object/transformer/transformer_test.go b/object/transformer/transformer_test.go index 2b4ee31..fbc8e45 100644 --- a/object/transformer/transformer_test.go +++ b/object/transformer/transformer_test.go @@ -182,12 +182,7 @@ func (tt *testTarget) Write(p []byte) (n int, err error) { func (tt *testTarget) Close() (*AccessIdentifiers, error) { tt.current.SetPayload(tt.payload) - // We need to marshal, because current implementation reuses written object. - data, _ := tt.current.Marshal() - obj := objectSDK.New() - _ = obj.Unmarshal(data) - - tt.objects = append(tt.objects, obj) + tt.objects = append(tt.objects, tt.current) tt.current = nil tt.payload = nil return nil, nil // AccessIdentifiers should not be used. -- 2.45.2 From 51fedb5d3b5c886f944cbab8d29b9aee3a23e0c0 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 23 Feb 2023 17:13:27 +0300 Subject: [PATCH 10/12] [#19] transformer: Add a target which sends parts to a channel Signed-off-by: Evgenii Stratonikov --- object/transformer/channel.go | 44 ++++++++++++++++++++++++ object/transformer/channel_test.go | 55 ++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 object/transformer/channel.go create mode 100644 object/transformer/channel_test.go diff --git a/object/transformer/channel.go b/object/transformer/channel.go new file mode 100644 index 0000000..707de09 --- /dev/null +++ b/object/transformer/channel.go @@ -0,0 +1,44 @@ +package transformer + +import ( + objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/nspcc-dev/neo-go/pkg/util/slice" +) + +type chanTarget struct { + header *objectSDK.Object + payload []byte + ch chan<- *objectSDK.Object +} + +// NewChannelTarget returns ObjectTarget which writes +// object parts to a provided channel. +func NewChannelTarget(ch chan<- *objectSDK.Object) ObjectTarget { + return &chanTarget{ + ch: ch, + } +} + +// WriteHeader implements the ObjectTarget interface. +func (c *chanTarget) WriteHeader(object *objectSDK.Object) error { + c.header = object + return nil +} + +// Write implements the ObjectTarget interface. +func (c *chanTarget) Write(p []byte) (n int, err error) { + c.payload = append(c.payload, p...) + return len(p), nil +} + +// Close implements the ObjectTarget interface. +func (c *chanTarget) Close() (*AccessIdentifiers, error) { + if len(c.payload) != 0 { + c.header.SetPayload(slice.Copy(c.payload)) + } + c.ch <- c.header + + c.header = nil + c.payload = nil + return new(AccessIdentifiers), nil +} diff --git a/object/transformer/channel_test.go b/object/transformer/channel_test.go new file mode 100644 index 0000000..99aef23 --- /dev/null +++ b/object/transformer/channel_test.go @@ -0,0 +1,55 @@ +package transformer + +import ( + "crypto/rand" + "testing" + + cidtest "github.com/TrueCloudLab/frostfs-sdk-go/container/id/test" + objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/TrueCloudLab/frostfs-sdk-go/version" + "github.com/stretchr/testify/require" +) + +func TestChannelTarget(t *testing.T) { + const maxSize = 100 + + ch := make(chan *objectSDK.Object, 10) + tt := new(testTarget) + + chTarget, _ := newPayloadSizeLimiter(maxSize, NewChannelTarget(ch)) + testTarget, _ := newPayloadSizeLimiter(maxSize, tt) + + ver := version.Current() + cnr := cidtest.ID() + hdr := objectSDK.New() + hdr.SetContainerID(cnr) + hdr.SetType(objectSDK.TypeRegular) + hdr.SetVersion(&ver) + + payload := make([]byte, maxSize*2+maxSize/2) + _, _ = rand.Read(payload) + + expectedIDs := writeObject(t, testTarget, hdr, payload) + actualIDs := writeObject(t, chTarget, hdr, payload) + _ = expectedIDs + _ = actualIDs + //require.Equal(t, expectedIDs, actualIDs) + + for i := range tt.objects { + select { + case obj := <-ch: + // Because of the split ID objects can be different. + // However, payload and attributes must be the same. + require.Equal(t, tt.objects[i].Payload(), obj.Payload()) + require.Equal(t, tt.objects[i].Attributes(), obj.Attributes()) + default: + require.FailNow(t, "received less parts than expected") + } + } + + select { + case <-ch: + require.FailNow(t, "received more parts than expected") + default: + } +} -- 2.45.2 From fe394bc0f694f1958995a524bed3b76a90da3dd7 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 24 Feb 2023 17:01:38 +0300 Subject: [PATCH 11/12] [#19] transformer: Optimize payload hashers a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` name old time/op new time/op delta Transformer/small-8 74.8µs ±11% 73.7µs ±15% ~ (p=0.529 n=10+10) Transformer/big-8 1.38s ±11% 1.36s ± 4% ~ (p=0.796 n=10+10) name old alloc/op new alloc/op delta Transformer/small-8 7.69kB ± 0% 7.67kB ± 0% -0.21% (p=0.000 n=10+10) Transformer/big-8 49.2kB ± 0% 49.0kB ± 0% -0.48% (p=0.004 n=10+10) name old allocs/op new allocs/op delta Transformer/small-8 102 ± 0% 101 ± 0% -0.98% (p=0.000 n=9+10) Transformer/big-8 620 ± 1% 609 ± 0% -1.66% (p=0.000 n=10+10) ``` Signed-off-by: Evgenii Stratonikov --- object/transformer/transformer.go | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go index 86b0a39..613bc5e 100644 --- a/object/transformer/transformer.go +++ b/object/transformer/transformer.go @@ -21,7 +21,7 @@ type payloadSizeLimiter struct { current, parent *object.Object - currentHashers, parentHashers []*payloadChecksumHasher + currentHashers, parentHashers []payloadChecksumHasher previous []oid.ID @@ -83,7 +83,7 @@ func (s *payloadSizeLimiter) initialize() { // initialize parent object once (after 1st object) if ln == 1 { s.parent = fromObject(s.current) - s.parentHashers = s.currentHashers + s.parentHashers = append(s.parentHashers[:0], s.currentHashers...) // return source attributes s.parent.SetAttributes(s.parAttrs...) @@ -117,7 +117,7 @@ func fromObject(obj *object.Object) *object.Object { func (s *payloadSizeLimiter) initializeCurrent() { // create payload hashers s.writtenCurrent = 0 - s.currentHashers = payloadHashersForObject(s.WithoutHomomorphicHash) + s.initPayloadHashers() // compose multi-writer from target and all payload hashers ws := make([]io.Writer, 0, 1+len(s.currentHashers)+len(s.parentHashers)) @@ -135,22 +135,18 @@ func (s *payloadSizeLimiter) initializeCurrent() { s.chunkWriter = io.MultiWriter(ws...) } -func payloadHashersForObject(withoutHomomorphicHash bool) []*payloadChecksumHasher { - hashers := make([]*payloadChecksumHasher, 0, 2) - - hashers = append(hashers, &payloadChecksumHasher{ +func (s *payloadSizeLimiter) initPayloadHashers() { + s.currentHashers = append(s.currentHashers[:0], payloadChecksumHasher{ hasher: sha256.New(), typ: checksum.SHA256, }) - if !withoutHomomorphicHash { - hashers = append(hashers, &payloadChecksumHasher{ + if !s.WithoutHomomorphicHash { + s.currentHashers = append(s.currentHashers, payloadChecksumHasher{ hasher: tz.New(), typ: checksum.TZ, }) } - - return hashers } func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) { -- 2.45.2 From e7ee4e1f24b5c6d6bd8f9fcd89de18ce1764b924 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 24 Feb 2023 17:09:34 +0300 Subject: [PATCH 12/12] [#19] transformer: Do not allocate intermeate slice for hashers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` name old time/op new time/op delta Transformer/small-8 73.7µs ±15% 72.4µs ±16% ~ (p=0.604 n=10+9) Transformer/big-8 1.36s ± 4% 1.36s ± 8% ~ (p=0.579 n=10+10) name old alloc/op new alloc/op delta Transformer/small-8 7.67kB ± 0% 7.57kB ± 0% -1.36% (p=0.000 n=10+10) Transformer/big-8 49.0kB ± 0% 48.3kB ± 0% -1.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Transformer/small-8 101 ± 0% 98 ± 0% -2.97% (p=0.000 n=10+10) Transformer/big-8 609 ± 0% 591 ± 1% -3.00% (p=0.000 n=10+9) ``` Signed-off-by: Evgenii Stratonikov --- object/transformer/transformer.go | 38 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go index 613bc5e..1c323a7 100644 --- a/object/transformer/transformer.go +++ b/object/transformer/transformer.go @@ -4,7 +4,6 @@ import ( "crypto/ecdsa" "crypto/sha256" "fmt" - "io" "github.com/TrueCloudLab/frostfs-sdk-go/checksum" "github.com/TrueCloudLab/frostfs-sdk-go/object" @@ -25,8 +24,6 @@ type payloadSizeLimiter struct { previous []oid.ID - chunkWriter io.Writer - splitID *object.SplitID parAttrs []object.Attribute @@ -118,21 +115,6 @@ func (s *payloadSizeLimiter) initializeCurrent() { // create payload hashers s.writtenCurrent = 0 s.initPayloadHashers() - - // compose multi-writer from target and all payload hashers - ws := make([]io.Writer, 0, 1+len(s.currentHashers)+len(s.parentHashers)) - - ws = append(ws, s.NextTarget) - - for i := range s.currentHashers { - ws = append(ws, s.currentHashers[i].hasher) - } - - for i := range s.parentHashers { - ws = append(ws, s.parentHashers[i].hasher) - } - - s.chunkWriter = io.MultiWriter(ws...) } func (s *payloadSizeLimiter) initPayloadHashers() { @@ -269,7 +251,7 @@ func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { cut = leftToEdge } - if _, err := s.chunkWriter.Write(chunk[:cut]); err != nil { + if err := s.writeHashes(chunk[:cut]); err != nil { return fmt.Errorf("could not write chunk to target: %w", err) } @@ -285,6 +267,24 @@ func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { } } +func (s *payloadSizeLimiter) writeHashes(chunk []byte) error { + _, err := s.NextTarget.Write(chunk) + if err != nil { + return err + } + + // The `Write` method of `hash.Hash` never returns an error. + for i := range s.currentHashers { + _, _ = s.currentHashers[i].hasher.Write(chunk) + } + + for i := range s.parentHashers { + _, _ = s.parentHashers[i].hasher.Write(chunk) + } + + return nil +} + func (s *payloadSizeLimiter) prepareFirstChild() { // initialize split header with split ID on first object in chain s.current.InitRelations() -- 2.45.2