diff --git a/object/transformer/channel.go b/object/transformer/channel.go new file mode 100644 index 0000000..707de09 --- /dev/null +++ b/object/transformer/channel.go @@ -0,0 +1,44 @@ +package transformer + +import ( + objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/nspcc-dev/neo-go/pkg/util/slice" +) + +type chanTarget struct { + header *objectSDK.Object + payload []byte + ch chan<- *objectSDK.Object +} + +// NewChannelTarget returns ObjectTarget which writes +// object parts to a provided channel. +func NewChannelTarget(ch chan<- *objectSDK.Object) ObjectTarget { + return &chanTarget{ + ch: ch, + } +} + +// WriteHeader implements the ObjectTarget interface. +func (c *chanTarget) WriteHeader(object *objectSDK.Object) error { + c.header = object + return nil +} + +// Write implements the ObjectTarget interface. +func (c *chanTarget) Write(p []byte) (n int, err error) { + c.payload = append(c.payload, p...) + return len(p), nil +} + +// Close implements the ObjectTarget interface. +func (c *chanTarget) Close() (*AccessIdentifiers, error) { + if len(c.payload) != 0 { + c.header.SetPayload(slice.Copy(c.payload)) + } + c.ch <- c.header + + c.header = nil + c.payload = nil + return new(AccessIdentifiers), nil +} diff --git a/object/transformer/channel_test.go b/object/transformer/channel_test.go new file mode 100644 index 0000000..99aef23 --- /dev/null +++ b/object/transformer/channel_test.go @@ -0,0 +1,55 @@ +package transformer + +import ( + "crypto/rand" + "testing" + + cidtest "github.com/TrueCloudLab/frostfs-sdk-go/container/id/test" + objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/TrueCloudLab/frostfs-sdk-go/version" + "github.com/stretchr/testify/require" +) + +func TestChannelTarget(t *testing.T) { + const maxSize = 100 + + ch := make(chan *objectSDK.Object, 10) + tt := new(testTarget) + + chTarget, _ := newPayloadSizeLimiter(maxSize, NewChannelTarget(ch)) + testTarget, _ := newPayloadSizeLimiter(maxSize, tt) + + ver := version.Current() + cnr := cidtest.ID() + hdr := objectSDK.New() + hdr.SetContainerID(cnr) + hdr.SetType(objectSDK.TypeRegular) + hdr.SetVersion(&ver) + + payload := make([]byte, maxSize*2+maxSize/2) + _, _ = rand.Read(payload) + + expectedIDs := writeObject(t, testTarget, hdr, payload) + actualIDs := writeObject(t, chTarget, hdr, payload) + _ = expectedIDs + _ = actualIDs + //require.Equal(t, expectedIDs, actualIDs) + + for i := range tt.objects { + select { + case obj := <-ch: + // Because of the split ID objects can be different. + // However, payload and attributes must be the same. + require.Equal(t, tt.objects[i].Payload(), obj.Payload()) + require.Equal(t, tt.objects[i].Attributes(), obj.Attributes()) + default: + require.FailNow(t, "received less parts than expected") + } + } + + select { + case <-ch: + require.FailNow(t, "received more parts than expected") + default: + } +} diff --git a/object/transformer/hasher.go b/object/transformer/hasher.go new file mode 100644 index 0000000..4482a79 --- /dev/null +++ b/object/transformer/hasher.go @@ -0,0 +1,38 @@ +package transformer + +import ( + "crypto/sha256" + "hash" + + "github.com/TrueCloudLab/frostfs-sdk-go/checksum" + objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/TrueCloudLab/tzhash/tz" +) + +type payloadChecksumHasher struct { + hasher hash.Hash + typ checksum.Type +} + +func (h payloadChecksumHasher) writeChecksum(obj *objectSDK.Object) { + switch h.typ { + case checksum.SHA256: + csSHA := [sha256.Size]byte{} + h.hasher.Sum(csSHA[:0]) + + var cs checksum.Checksum + cs.SetSHA256(csSHA) + + obj.SetPayloadChecksum(cs) + case checksum.TZ: + csTZ := [tz.Size]byte{} + h.hasher.Sum(csTZ[:0]) + + var cs checksum.Checksum + cs.SetTillichZemor(csTZ) + + obj.SetPayloadHomomorphicHash(cs) + default: + panic("unreachable") + } +} diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go new file mode 100644 index 0000000..1c323a7 --- /dev/null +++ b/object/transformer/transformer.go @@ -0,0 +1,298 @@ +package transformer + +import ( + "crypto/ecdsa" + "crypto/sha256" + "fmt" + + "github.com/TrueCloudLab/frostfs-sdk-go/checksum" + "github.com/TrueCloudLab/frostfs-sdk-go/object" + oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/TrueCloudLab/frostfs-sdk-go/session" + "github.com/TrueCloudLab/frostfs-sdk-go/version" + "github.com/TrueCloudLab/tzhash/tz" +) + +type payloadSizeLimiter struct { + Params + + written, writtenCurrent uint64 + + current, parent *object.Object + + currentHashers, parentHashers []payloadChecksumHasher + + previous []oid.ID + + splitID *object.SplitID + + parAttrs []object.Attribute +} + +type Params struct { + Key *ecdsa.PrivateKey + NextTarget ObjectTarget + SessionToken *session.Object + NetworkState EpochSource + MaxSize uint64 + WithoutHomomorphicHash bool +} + +// NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length +// of the writing object and writes generated objects to targets from initializer. +// +// Calculates and adds homomorphic hash to resulting objects only if withoutHomomorphicHash +// is false. +// +// Objects w/ payload size less or equal than max size remain untouched. +func NewPayloadSizeLimiter(p Params) ObjectTarget { + return &payloadSizeLimiter{ + Params: p, + splitID: object.NewSplitID(), + } +} + +func (s *payloadSizeLimiter) WriteHeader(hdr *object.Object) error { + s.current = fromObject(hdr) + + s.initialize() + + return nil +} + +func (s *payloadSizeLimiter) Write(p []byte) (int, error) { + if err := s.writeChunk(p); err != nil { + return 0, err + } + + return len(p), nil +} + +func (s *payloadSizeLimiter) Close() (*AccessIdentifiers, error) { + return s.release(true) +} + +func (s *payloadSizeLimiter) initialize() { + s.current = fromObject(s.current) + + // if it is an object after the 1st + if ln := len(s.previous); ln > 0 { + // initialize parent object once (after 1st object) + if ln == 1 { + s.parent = fromObject(s.current) + s.parentHashers = append(s.parentHashers[:0], s.currentHashers...) + + // return source attributes + s.parent.SetAttributes(s.parAttrs...) + } + + // set previous object to the last previous identifier + s.current.SetPreviousID(s.previous[ln-1]) + } + + s.initializeCurrent() +} + +func fromObject(obj *object.Object) *object.Object { + cnr, _ := obj.ContainerID() + + res := object.New() + res.SetContainerID(cnr) + res.SetOwnerID(obj.OwnerID()) + res.SetAttributes(obj.Attributes()...) + res.SetType(obj.Type()) + + // obj.SetSplitID creates splitHeader but we don't need to do it in case + // of small objects, so we should make nil check. + if obj.SplitID() != nil { + res.SetSplitID(obj.SplitID()) + } + + return res +} + +func (s *payloadSizeLimiter) initializeCurrent() { + // create payload hashers + s.writtenCurrent = 0 + s.initPayloadHashers() +} + +func (s *payloadSizeLimiter) initPayloadHashers() { + s.currentHashers = append(s.currentHashers[:0], payloadChecksumHasher{ + hasher: sha256.New(), + typ: checksum.SHA256, + }) + + if !s.WithoutHomomorphicHash { + s.currentHashers = append(s.currentHashers, payloadChecksumHasher{ + hasher: tz.New(), + typ: checksum.TZ, + }) + } +} + +func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) { + // Arg finalize is true only when called from Close method. + // We finalize parent and generate linking objects only if it is more + // than 1 object in split-chain. + withParent := finalize && len(s.previous) > 0 + + if withParent { + for i := range s.parentHashers { + s.parentHashers[i].writeChecksum(s.parent) + } + s.parent.SetPayloadSize(s.written) + s.current.SetParent(s.parent) + } + + // release current object + for i := range s.currentHashers { + s.currentHashers[i].writeChecksum(s.current) + } + + curEpoch := s.NetworkState.CurrentEpoch() + ver := version.Current() + + s.current.SetVersion(&ver) + s.current.SetPayloadSize(s.writtenCurrent) + s.current.SetSessionToken(s.SessionToken) + s.current.SetCreationEpoch(curEpoch) + + var ( + parID *oid.ID + parHdr *object.Object + ) + + if par := s.current.Parent(); par != nil && par.Signature() == nil { + rawPar := object.NewFromV2(par.ToV2()) + + rawPar.SetSessionToken(s.SessionToken) + rawPar.SetCreationEpoch(curEpoch) + + if err := object.SetIDWithSignature(*s.Key, rawPar); err != nil { + return nil, fmt.Errorf("could not finalize parent object: %w", err) + } + + id, _ := rawPar.ID() + parID = &id + parHdr = rawPar + + s.current.SetParent(parHdr) + } + + if err := object.SetIDWithSignature(*s.Key, s.current); err != nil { + return nil, fmt.Errorf("could not finalize object: %w", err) + } + + if err := s.NextTarget.WriteHeader(s.current); err != nil { + return nil, fmt.Errorf("could not write header to next target: %w", err) + } + + if _, err := s.NextTarget.Close(); err != nil { + return nil, fmt.Errorf("could not close next target: %w", err) + } + + id, _ := s.current.ID() + + ids := &AccessIdentifiers{ + ParentID: parID, + SelfID: id, + ParentHeader: parHdr, + } + + // save identifier of the released object + s.previous = append(s.previous, ids.SelfID) + + if withParent { + // generate and release linking object + s.initializeLinking(ids.ParentHeader) + s.initializeCurrent() + + if _, err := s.release(false); err != nil { + return nil, fmt.Errorf("could not release linking object: %w", err) + } + } + + return ids, nil +} + +func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) { + s.current = fromObject(s.current) + s.current.SetParent(parHdr) + s.current.SetChildren(s.previous...) + s.current.SetSplitID(s.splitID) +} + +func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { + for { + // statement is true if the previous write of bytes reached exactly the boundary. + if s.written > 0 && s.written%s.MaxSize == 0 { + if s.written == s.MaxSize { + s.prepareFirstChild() + } + + // we need to release current object + if _, err := s.release(false); err != nil { + return fmt.Errorf("could not release object: %w", err) + } + + // initialize another object + s.initialize() + } + + var ( + ln = uint64(len(chunk)) + cut = ln + leftToEdge = s.MaxSize - s.written%s.MaxSize + ) + + // write bytes no further than the boundary of the current object + if ln > leftToEdge { + cut = leftToEdge + } + + if err := s.writeHashes(chunk[:cut]); err != nil { + return fmt.Errorf("could not write chunk to target: %w", err) + } + + // increase written bytes counter + s.writtenCurrent += cut + s.written += cut + + if cut == ln { + return nil + } + // if there are more bytes in buffer we call method again to start filling another object + chunk = chunk[cut:] + } +} + +func (s *payloadSizeLimiter) writeHashes(chunk []byte) error { + _, err := s.NextTarget.Write(chunk) + if err != nil { + return err + } + + // The `Write` method of `hash.Hash` never returns an error. + for i := range s.currentHashers { + _, _ = s.currentHashers[i].hasher.Write(chunk) + } + + for i := range s.parentHashers { + _, _ = s.parentHashers[i].hasher.Write(chunk) + } + + return nil +} + +func (s *payloadSizeLimiter) prepareFirstChild() { + // initialize split header with split ID on first object in chain + s.current.InitRelations() + s.current.SetSplitID(s.splitID) + + // cut source attributes + s.parAttrs = s.current.Attributes() + s.current.SetAttributes() + + // attributes will be added to parent in detachParent +} diff --git a/object/transformer/transformer_test.go b/object/transformer/transformer_test.go new file mode 100644 index 0000000..fbc8e45 --- /dev/null +++ b/object/transformer/transformer_test.go @@ -0,0 +1,189 @@ +package transformer + +import ( + "crypto/rand" + "crypto/sha256" + "testing" + + cid "github.com/TrueCloudLab/frostfs-sdk-go/container/id" + cidtest "github.com/TrueCloudLab/frostfs-sdk-go/container/id/test" + objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object" + "github.com/TrueCloudLab/frostfs-sdk-go/user" + "github.com/TrueCloudLab/frostfs-sdk-go/version" + "github.com/nspcc-dev/neo-go/pkg/crypto/keys" + "github.com/stretchr/testify/require" +) + +func TestTransformer(t *testing.T) { + const maxSize = 100 + + tt := new(testTarget) + + target, pk := newPayloadSizeLimiter(maxSize, tt) + + cnr := cidtest.ID() + hdr := newObject(cnr) + + var owner user.ID + user.IDFromKey(&owner, pk.PrivateKey.PublicKey) + hdr.SetOwnerID(&owner) + + expectedPayload := make([]byte, maxSize*2+maxSize/2) + _, _ = rand.Read(expectedPayload) + + ids := writeObject(t, target, hdr, expectedPayload) + require.Equal(t, 4, len(tt.objects)) // 3 parts + linking object + + var actualPayload []byte + for i := range tt.objects { + childCnr, ok := tt.objects[i].ContainerID() + require.True(t, ok) + require.Equal(t, cnr, childCnr) + require.Equal(t, objectSDK.TypeRegular, tt.objects[i].Type()) + require.Equal(t, &owner, tt.objects[i].OwnerID()) + + payload := tt.objects[i].Payload() + require.EqualValues(t, tt.objects[i].PayloadSize(), len(payload)) + actualPayload = append(actualPayload, payload...) + + if len(payload) != 0 { + cs, ok := tt.objects[i].PayloadChecksum() + require.True(t, ok) + + h := sha256.Sum256(payload) + require.Equal(t, h[:], cs.Value()) + } + + switch i { + case 0, 1: + require.EqualValues(t, maxSize, len(payload)) + case 2: + require.EqualValues(t, maxSize/2, len(payload)) + case 3: + parID, ok := tt.objects[i].ParentID() + require.True(t, ok) + require.Equal(t, ids.ParentID, &parID) + } + } + require.Equal(t, expectedPayload, actualPayload) + + t.Run("parent checksum", func(t *testing.T) { + cs, ok := ids.ParentHeader.PayloadChecksum() + require.True(t, ok) + + h := sha256.Sum256(expectedPayload) + require.Equal(t, h[:], cs.Value()) + }) +} + +func newObject(cnr cid.ID) *objectSDK.Object { + ver := version.Current() + hdr := objectSDK.New() + hdr.SetContainerID(cnr) + hdr.SetType(objectSDK.TypeRegular) + hdr.SetVersion(&ver) + return hdr +} + +func writeObject(t *testing.T, target ObjectTarget, header *objectSDK.Object, payload []byte) *AccessIdentifiers { + require.NoError(t, target.WriteHeader(header)) + + _, err := target.Write(payload) + require.NoError(t, err) + + ids, err := target.Close() + require.NoError(t, err) + + return ids +} + +func BenchmarkTransformer(b *testing.B) { + hdr := newObject(cidtest.ID()) + + b.Run("small", func(b *testing.B) { + benchmarkTransformer(b, hdr, 8*1024) + }) + b.Run("big", func(b *testing.B) { + benchmarkTransformer(b, hdr, 64*1024*1024*9/2) + }) +} + +func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize int) { + const maxSize = 64 * 1024 * 1024 + + payload := make([]byte, payloadSize) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + f, _ := newPayloadSizeLimiter(maxSize, benchTarget{}) + if err := f.WriteHeader(header); err != nil { + b.Fatalf("write header: %v", err) + } + if _, err := f.Write(payload); err != nil { + b.Fatalf("write: %v", err) + } + if _, err := f.Close(); err != nil { + b.Fatalf("close: %v", err) + } + } +} + +func newPayloadSizeLimiter(maxSize uint64, nextTarget ObjectTarget) (ObjectTarget, *keys.PrivateKey) { + p, err := keys.NewPrivateKey() + if err != nil { + panic(err) + } + + return NewPayloadSizeLimiter(Params{ + Key: &p.PrivateKey, + NextTarget: nextTarget, + NetworkState: dummyEpochSource(123), + MaxSize: maxSize, + WithoutHomomorphicHash: true, + }), p +} + +type dummyEpochSource uint64 + +func (s dummyEpochSource) CurrentEpoch() uint64 { + return uint64(s) +} + +type benchTarget struct{} + +func (benchTarget) WriteHeader(object *objectSDK.Object) error { + return nil +} + +func (benchTarget) Write(p []byte) (n int, err error) { + return len(p), nil +} + +func (benchTarget) Close() (*AccessIdentifiers, error) { + return nil, nil +} + +type testTarget struct { + current *objectSDK.Object + payload []byte + objects []*objectSDK.Object +} + +func (tt *testTarget) WriteHeader(object *objectSDK.Object) error { + tt.current = object + return nil +} + +func (tt *testTarget) Write(p []byte) (n int, err error) { + tt.payload = append(tt.payload, p...) + return len(p), nil +} + +func (tt *testTarget) Close() (*AccessIdentifiers, error) { + tt.current.SetPayload(tt.payload) + tt.objects = append(tt.objects, tt.current) + tt.current = nil + tt.payload = nil + return nil, nil // AccessIdentifiers should not be used. +} diff --git a/object/transformer/types.go b/object/transformer/types.go new file mode 100644 index 0000000..f030e26 --- /dev/null +++ b/object/transformer/types.go @@ -0,0 +1,55 @@ +package transformer + +import ( + "io" + + "github.com/TrueCloudLab/frostfs-sdk-go/object" + oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" +) + +// AccessIdentifiers represents group of the object identifiers +// that are returned after writing the object. +// Consists of the ID of the stored object and the ID of the parent object. +type AccessIdentifiers struct { + ParentID *oid.ID + SelfID oid.ID + ParentHeader *object.Object +} + +// EpochSource is a source for the current epoch. +type EpochSource interface { + CurrentEpoch() uint64 +} + +// ObjectTarget is an interface of the object writer. +type ObjectTarget interface { + // WriteHeader writes object header w/ payload part. + // The payload of the object may be incomplete. + // + // Must be called exactly once. Control remains with the caller. + // Missing a call or re-calling can lead to undefined behavior + // that depends on the implementation. + // + // Must not be called after Close call. + WriteHeader(*object.Object) error + + // Write writes object payload chunk. + // + // Can be called multiple times. + // + // Must not be called after Close call. + io.Writer + + // Close is used to finish object writing. + // + // Close must return access identifiers of the object + // that has been written. + // + // Must be called no more than once. Control remains with the caller. + // Re-calling can lead to undefined behavior + // that depends on the implementation. + Close() (*AccessIdentifiers, error) +} + +// TargetInitializer represents ObjectTarget constructor. +type TargetInitializer func() ObjectTarget