From ac8442bf9968451817bf82ccc3499489e2a8b9e3 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Fri, 17 Feb 2023 12:52:46 +0300 Subject: [PATCH] [#19] object: Move transformer implementation from node Signed-off-by: Evgenii Stratonikov --- object/transformer/fmt.go | 116 ++++++++++++ object/transformer/transformer.go | 294 ++++++++++++++++++++++++++++++ object/transformer/types.go | 111 +++++++++++ 3 files changed, 521 insertions(+) create mode 100644 object/transformer/fmt.go create mode 100644 object/transformer/transformer.go create mode 100644 object/transformer/types.go diff --git a/object/transformer/fmt.go b/object/transformer/fmt.go new file mode 100644 index 00000000..b0ca5c9b --- /dev/null +++ b/object/transformer/fmt.go @@ -0,0 +1,116 @@ +package transformer + +import ( + "crypto/ecdsa" + "fmt" + + "github.com/TrueCloudLab/frostfs-sdk-go/object" + oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/TrueCloudLab/frostfs-sdk-go/session" + "github.com/TrueCloudLab/frostfs-sdk-go/version" +) + +type formatter struct { + prm *FormatterParams + + obj *object.Object + + sz uint64 +} + +type EpochSource interface { + CurrentEpoch() uint64 +} + +// FormatterParams groups NewFormatTarget parameters. +type FormatterParams struct { + Key *ecdsa.PrivateKey + + NextTarget ObjectTarget + + SessionToken *session.Object + + NetworkState EpochSource +} + +// NewFormatTarget returns ObjectTarget instance that finalizes object structure +// and writes it to the next target. +// +// Chunks must be written before the WriteHeader call. +// +// Object changes: +// - sets version to current SDK version; +// - sets payload size to the total length of all written chunks; +// - sets session token; +// - sets number of creation epoch; +// - calculates and sets verification fields (ID, Signature). +func NewFormatTarget(p *FormatterParams) ObjectTarget { + return &formatter{ + prm: p, + } +} + +func (f *formatter) WriteHeader(obj *object.Object) error { + f.obj = obj + + return nil +} + +func (f *formatter) Write(p []byte) (n int, err error) { + n, err = f.prm.NextTarget.Write(p) + + f.sz += uint64(n) + + return +} + +func (f *formatter) Close() (*AccessIdentifiers, error) { + curEpoch := f.prm.NetworkState.CurrentEpoch() + ver := version.Current() + + f.obj.SetVersion(&ver) + f.obj.SetPayloadSize(f.sz) + f.obj.SetSessionToken(f.prm.SessionToken) + f.obj.SetCreationEpoch(curEpoch) + + var ( + parID *oid.ID + parHdr *object.Object + ) + + if par := f.obj.Parent(); par != nil && par.Signature() == nil { + rawPar := object.NewFromV2(par.ToV2()) + + rawPar.SetSessionToken(f.prm.SessionToken) + rawPar.SetCreationEpoch(curEpoch) + + if err := object.SetIDWithSignature(*f.prm.Key, rawPar); err != nil { + return nil, fmt.Errorf("could not finalize parent object: %w", err) + } + + id, _ := rawPar.ID() + parID = &id + parHdr = rawPar + + f.obj.SetParent(parHdr) + } + + if err := object.SetIDWithSignature(*f.prm.Key, f.obj); err != nil { + return nil, fmt.Errorf("could not finalize object: %w", err) + } + + if err := f.prm.NextTarget.WriteHeader(f.obj); err != nil { + return nil, fmt.Errorf("could not write header to next target: %w", err) + } + + if _, err := f.prm.NextTarget.Close(); err != nil { + return nil, fmt.Errorf("could not close next target: %w", err) + } + + id, _ := f.obj.ID() + + return new(AccessIdentifiers). + WithSelfID(id). + WithParentID(parID). + WithParent(parHdr), nil +} diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go new file mode 100644 index 00000000..cfd70980 --- /dev/null +++ b/object/transformer/transformer.go @@ -0,0 +1,294 @@ +package transformer + +import ( + "crypto/sha256" + "fmt" + "hash" + "io" + + "github.com/TrueCloudLab/frostfs-sdk-go/checksum" + "github.com/TrueCloudLab/frostfs-sdk-go/object" + oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" + "github.com/TrueCloudLab/tzhash/tz" +) + +type payloadSizeLimiter struct { + maxSize, written uint64 + + withoutHomomorphicHash bool + + targetInit func() ObjectTarget + + target ObjectTarget + + current, parent *object.Object + + currentHashers, parentHashers []*payloadChecksumHasher + + previous []oid.ID + + chunkWriter io.Writer + + splitID *object.SplitID + + parAttrs []object.Attribute +} + +type payloadChecksumHasher struct { + hasher hash.Hash + + checksumWriter func([]byte) +} + +// NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length +// of the writing object and writes generated objects to targets from initializer. +// +// Calculates and adds homomorphic hash to resulting objects only if withoutHomomorphicHash +// is false. +// +// Objects w/ payload size less or equal than max size remain untouched. +func NewPayloadSizeLimiter(maxSize uint64, withoutHomomorphicHash bool, targetInit TargetInitializer) ObjectTarget { + return &payloadSizeLimiter{ + maxSize: maxSize, + withoutHomomorphicHash: withoutHomomorphicHash, + targetInit: targetInit, + splitID: object.NewSplitID(), + } +} + +func (s *payloadSizeLimiter) WriteHeader(hdr *object.Object) error { + s.current = fromObject(hdr) + + s.initialize() + + return nil +} + +func (s *payloadSizeLimiter) Write(p []byte) (int, error) { + if err := s.writeChunk(p); err != nil { + return 0, err + } + + return len(p), nil +} + +func (s *payloadSizeLimiter) Close() (*AccessIdentifiers, error) { + return s.release(true) +} + +func (s *payloadSizeLimiter) initialize() { + // if it is an object after the 1st + if ln := len(s.previous); ln > 0 { + // initialize parent object once (after 1st object) + if ln == 1 { + s.detachParent() + } + + // set previous object to the last previous identifier + s.current.SetPreviousID(s.previous[ln-1]) + } + + s.initializeCurrent() +} + +func fromObject(obj *object.Object) *object.Object { + cnr, _ := obj.ContainerID() + + res := object.New() + res.SetContainerID(cnr) + res.SetOwnerID(obj.OwnerID()) + res.SetAttributes(obj.Attributes()...) + res.SetType(obj.Type()) + + // obj.SetSplitID creates splitHeader but we don't need to do it in case + // of small objects, so we should make nil check. + if obj.SplitID() != nil { + res.SetSplitID(obj.SplitID()) + } + + return res +} + +func (s *payloadSizeLimiter) initializeCurrent() { + // initialize current object target + s.target = s.targetInit() + + // create payload hashers + s.currentHashers = payloadHashersForObject(s.current, s.withoutHomomorphicHash) + + // compose multi-writer from target and all payload hashers + ws := make([]io.Writer, 0, 1+len(s.currentHashers)+len(s.parentHashers)) + + ws = append(ws, s.target) + + for i := range s.currentHashers { + ws = append(ws, s.currentHashers[i].hasher) + } + + for i := range s.parentHashers { + ws = append(ws, s.parentHashers[i].hasher) + } + + s.chunkWriter = io.MultiWriter(ws...) +} + +func payloadHashersForObject(obj *object.Object, withoutHomomorphicHash bool) []*payloadChecksumHasher { + hashers := make([]*payloadChecksumHasher, 0, 2) + + hashers = append(hashers, &payloadChecksumHasher{ + hasher: sha256.New(), + checksumWriter: func(binChecksum []byte) { + if ln := len(binChecksum); ln != sha256.Size { + panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", sha256.Size, ln)) + } + + csSHA := [sha256.Size]byte{} + copy(csSHA[:], binChecksum) + + var cs checksum.Checksum + cs.SetSHA256(csSHA) + + obj.SetPayloadChecksum(cs) + }, + }) + + if !withoutHomomorphicHash { + hashers = append(hashers, &payloadChecksumHasher{ + hasher: tz.New(), + checksumWriter: func(binChecksum []byte) { + if ln := len(binChecksum); ln != tz.Size { + panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", tz.Size, ln)) + } + + csTZ := [tz.Size]byte{} + copy(csTZ[:], binChecksum) + + var cs checksum.Checksum + cs.SetTillichZemor(csTZ) + + obj.SetPayloadHomomorphicHash(cs) + }, + }) + } + + return hashers +} + +func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) { + // Arg finalize is true only when called from Close method. + // We finalize parent and generate linking objects only if it is more + // than 1 object in split-chain. + withParent := finalize && len(s.previous) > 0 + + if withParent { + writeHashes(s.parentHashers) + s.parent.SetPayloadSize(s.written) + s.current.SetParent(s.parent) + } + + // release current object + writeHashes(s.currentHashers) + + // release current, get its id + if err := s.target.WriteHeader(s.current); err != nil { + return nil, fmt.Errorf("could not write header: %w", err) + } + + ids, err := s.target.Close() + if err != nil { + return nil, fmt.Errorf("could not close target: %w", err) + } + + // save identifier of the released object + s.previous = append(s.previous, ids.SelfID()) + + if withParent { + // generate and release linking object + s.initializeLinking(ids.Parent()) + s.initializeCurrent() + + if _, err := s.release(false); err != nil { + return nil, fmt.Errorf("could not release linking object: %w", err) + } + } + + return ids, nil +} + +func writeHashes(hashers []*payloadChecksumHasher) { + for i := range hashers { + hashers[i].checksumWriter(hashers[i].hasher.Sum(nil)) + } +} + +func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) { + s.current = fromObject(s.current) + s.current.SetParent(parHdr) + s.current.SetChildren(s.previous...) + s.current.SetSplitID(s.splitID) +} + +func (s *payloadSizeLimiter) writeChunk(chunk []byte) error { + // statement is true if the previous write of bytes reached exactly the boundary. + if s.written > 0 && s.written%s.maxSize == 0 { + if s.written == s.maxSize { + s.prepareFirstChild() + } + + // we need to release current object + if _, err := s.release(false); err != nil { + return fmt.Errorf("could not release object: %w", err) + } + + // initialize another object + s.initialize() + } + + var ( + ln = uint64(len(chunk)) + cut = ln + leftToEdge = s.maxSize - s.written%s.maxSize + ) + + // write bytes no further than the boundary of the current object + if ln > leftToEdge { + cut = leftToEdge + } + + if _, err := s.chunkWriter.Write(chunk[:cut]); err != nil { + return fmt.Errorf("could not write chunk to target: %w", err) + } + + // increase written bytes counter + s.written += cut + + // if there are more bytes in buffer we call method again to start filling another object + if ln > leftToEdge { + return s.writeChunk(chunk[cut:]) + } + + return nil +} + +func (s *payloadSizeLimiter) prepareFirstChild() { + // initialize split header with split ID on first object in chain + s.current.InitRelations() + s.current.SetSplitID(s.splitID) + + // cut source attributes + s.parAttrs = s.current.Attributes() + s.current.SetAttributes() + + // attributes will be added to parent in detachParent +} + +func (s *payloadSizeLimiter) detachParent() { + s.parent = s.current + s.current = fromObject(s.parent) + s.parent.ResetRelations() + s.parent.SetSignature(nil) + s.parentHashers = s.currentHashers + + // return source attributes + s.parent.SetAttributes(s.parAttrs...) +} diff --git a/object/transformer/types.go b/object/transformer/types.go new file mode 100644 index 00000000..72e6a742 --- /dev/null +++ b/object/transformer/types.go @@ -0,0 +1,111 @@ +package transformer + +import ( + "io" + + "github.com/TrueCloudLab/frostfs-sdk-go/object" + oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id" +) + +// AccessIdentifiers represents group of the object identifiers +// that are returned after writing the object. +// Consists of the ID of the stored object and the ID of the parent object. +type AccessIdentifiers struct { + par *oid.ID + + self oid.ID + + parHdr *object.Object +} + +// ObjectTarget is an interface of the object writer. +type ObjectTarget interface { + // WriteHeader writes object header w/ payload part. + // The payload of the object may be incomplete. + // + // Must be called exactly once. Control remains with the caller. + // Missing a call or re-calling can lead to undefined behavior + // that depends on the implementation. + // + // Must not be called after Close call. + WriteHeader(*object.Object) error + + // Write writes object payload chunk. + // + // Can be called multiple times. + // + // Must not be called after Close call. + io.Writer + + // Close is used to finish object writing. + // + // Close must return access identifiers of the object + // that has been written. + // + // Must be called no more than once. Control remains with the caller. + // Re-calling can lead to undefined behavior + // that depends on the implementation. + Close() (*AccessIdentifiers, error) +} + +// TargetInitializer represents ObjectTarget constructor. +type TargetInitializer func() ObjectTarget + +// SelfID returns identifier of the written object. +func (a AccessIdentifiers) SelfID() oid.ID { + return a.self +} + +// WithSelfID returns AccessIdentifiers with passed self identifier. +func (a *AccessIdentifiers) WithSelfID(v oid.ID) *AccessIdentifiers { + res := a + if res == nil { + res = new(AccessIdentifiers) + } + + res.self = v + + return res +} + +// ParentID return identifier of the parent of the written object. +func (a *AccessIdentifiers) ParentID() *oid.ID { + if a != nil { + return a.par + } + + return nil +} + +// WithParentID returns AccessIdentifiers with passed parent identifier. +func (a *AccessIdentifiers) WithParentID(v *oid.ID) *AccessIdentifiers { + res := a + if res == nil { + res = new(AccessIdentifiers) + } + + res.par = v + + return res +} + +// Parent return identifier of the parent of the written object. +func (a *AccessIdentifiers) Parent() *object.Object { + if a != nil { + return a.parHdr + } + + return nil +} + +// WithParent returns AccessIdentifiers with passed parent identifier. +func (a *AccessIdentifiers) WithParent(v *object.Object) *AccessIdentifiers { + res := a + if res == nil { + res = new(AccessIdentifiers) + } + + res.parHdr = v + + return res +}