From ac8442bf9968451817bf82ccc3499489e2a8b9e3 Mon Sep 17 00:00:00 2001
From: Evgenii Stratonikov <e.stratonikov@yadro.com>
Date: Fri, 17 Feb 2023 12:52:46 +0300
Subject: [PATCH] [#19] object: Move transformer implementation from node

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
---
 object/transformer/fmt.go         | 116 ++++++++++++
 object/transformer/transformer.go | 294 ++++++++++++++++++++++++++++++
 object/transformer/types.go       | 111 +++++++++++
 3 files changed, 521 insertions(+)
 create mode 100644 object/transformer/fmt.go
 create mode 100644 object/transformer/transformer.go
 create mode 100644 object/transformer/types.go

diff --git a/object/transformer/fmt.go b/object/transformer/fmt.go
new file mode 100644
index 00000000..b0ca5c9b
--- /dev/null
+++ b/object/transformer/fmt.go
@@ -0,0 +1,116 @@
+package transformer
+
+import (
+	"crypto/ecdsa"
+	"fmt"
+
+	"github.com/TrueCloudLab/frostfs-sdk-go/object"
+	oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id"
+	"github.com/TrueCloudLab/frostfs-sdk-go/session"
+	"github.com/TrueCloudLab/frostfs-sdk-go/version"
+)
+
+type formatter struct {
+	prm *FormatterParams
+
+	obj *object.Object
+
+	sz uint64
+}
+
+type EpochSource interface {
+	CurrentEpoch() uint64
+}
+
+// FormatterParams groups NewFormatTarget parameters.
+type FormatterParams struct {
+	Key *ecdsa.PrivateKey
+
+	NextTarget ObjectTarget
+
+	SessionToken *session.Object
+
+	NetworkState EpochSource
+}
+
+// NewFormatTarget returns ObjectTarget instance that finalizes object structure
+// and writes it to the next target.
+//
+// Chunks must be written before the WriteHeader call.
+//
+// Object changes:
+// - sets version to current SDK version;
+// - sets payload size to the total length of all written chunks;
+// - sets session token;
+// - sets number of creation epoch;
+// - calculates and sets verification fields (ID, Signature).
+func NewFormatTarget(p *FormatterParams) ObjectTarget {
+	return &formatter{
+		prm: p,
+	}
+}
+
+func (f *formatter) WriteHeader(obj *object.Object) error {
+	f.obj = obj
+
+	return nil
+}
+
+func (f *formatter) Write(p []byte) (n int, err error) {
+	n, err = f.prm.NextTarget.Write(p)
+
+	f.sz += uint64(n)
+
+	return
+}
+
+func (f *formatter) Close() (*AccessIdentifiers, error) {
+	curEpoch := f.prm.NetworkState.CurrentEpoch()
+	ver := version.Current()
+
+	f.obj.SetVersion(&ver)
+	f.obj.SetPayloadSize(f.sz)
+	f.obj.SetSessionToken(f.prm.SessionToken)
+	f.obj.SetCreationEpoch(curEpoch)
+
+	var (
+		parID  *oid.ID
+		parHdr *object.Object
+	)
+
+	if par := f.obj.Parent(); par != nil && par.Signature() == nil {
+		rawPar := object.NewFromV2(par.ToV2())
+
+		rawPar.SetSessionToken(f.prm.SessionToken)
+		rawPar.SetCreationEpoch(curEpoch)
+
+		if err := object.SetIDWithSignature(*f.prm.Key, rawPar); err != nil {
+			return nil, fmt.Errorf("could not finalize parent object: %w", err)
+		}
+
+		id, _ := rawPar.ID()
+		parID = &id
+		parHdr = rawPar
+
+		f.obj.SetParent(parHdr)
+	}
+
+	if err := object.SetIDWithSignature(*f.prm.Key, f.obj); err != nil {
+		return nil, fmt.Errorf("could not finalize object: %w", err)
+	}
+
+	if err := f.prm.NextTarget.WriteHeader(f.obj); err != nil {
+		return nil, fmt.Errorf("could not write header to next target: %w", err)
+	}
+
+	if _, err := f.prm.NextTarget.Close(); err != nil {
+		return nil, fmt.Errorf("could not close next target: %w", err)
+	}
+
+	id, _ := f.obj.ID()
+
+	return new(AccessIdentifiers).
+		WithSelfID(id).
+		WithParentID(parID).
+		WithParent(parHdr), nil
+}
diff --git a/object/transformer/transformer.go b/object/transformer/transformer.go
new file mode 100644
index 00000000..cfd70980
--- /dev/null
+++ b/object/transformer/transformer.go
@@ -0,0 +1,294 @@
+package transformer
+
+import (
+	"crypto/sha256"
+	"fmt"
+	"hash"
+	"io"
+
+	"github.com/TrueCloudLab/frostfs-sdk-go/checksum"
+	"github.com/TrueCloudLab/frostfs-sdk-go/object"
+	oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id"
+	"github.com/TrueCloudLab/tzhash/tz"
+)
+
+type payloadSizeLimiter struct {
+	maxSize, written uint64
+
+	withoutHomomorphicHash bool
+
+	targetInit func() ObjectTarget
+
+	target ObjectTarget
+
+	current, parent *object.Object
+
+	currentHashers, parentHashers []*payloadChecksumHasher
+
+	previous []oid.ID
+
+	chunkWriter io.Writer
+
+	splitID *object.SplitID
+
+	parAttrs []object.Attribute
+}
+
+type payloadChecksumHasher struct {
+	hasher hash.Hash
+
+	checksumWriter func([]byte)
+}
+
+// NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length
+// of the writing object and writes generated objects to targets from initializer.
+//
+// Calculates and adds homomorphic hash to resulting objects only if withoutHomomorphicHash
+// is false.
+//
+// Objects w/ payload size less or equal than max size remain untouched.
+func NewPayloadSizeLimiter(maxSize uint64, withoutHomomorphicHash bool, targetInit TargetInitializer) ObjectTarget {
+	return &payloadSizeLimiter{
+		maxSize:                maxSize,
+		withoutHomomorphicHash: withoutHomomorphicHash,
+		targetInit:             targetInit,
+		splitID:                object.NewSplitID(),
+	}
+}
+
+func (s *payloadSizeLimiter) WriteHeader(hdr *object.Object) error {
+	s.current = fromObject(hdr)
+
+	s.initialize()
+
+	return nil
+}
+
+func (s *payloadSizeLimiter) Write(p []byte) (int, error) {
+	if err := s.writeChunk(p); err != nil {
+		return 0, err
+	}
+
+	return len(p), nil
+}
+
+func (s *payloadSizeLimiter) Close() (*AccessIdentifiers, error) {
+	return s.release(true)
+}
+
+func (s *payloadSizeLimiter) initialize() {
+	// if it is an object after the 1st
+	if ln := len(s.previous); ln > 0 {
+		// initialize parent object once (after 1st object)
+		if ln == 1 {
+			s.detachParent()
+		}
+
+		// set previous object to the last previous identifier
+		s.current.SetPreviousID(s.previous[ln-1])
+	}
+
+	s.initializeCurrent()
+}
+
+func fromObject(obj *object.Object) *object.Object {
+	cnr, _ := obj.ContainerID()
+
+	res := object.New()
+	res.SetContainerID(cnr)
+	res.SetOwnerID(obj.OwnerID())
+	res.SetAttributes(obj.Attributes()...)
+	res.SetType(obj.Type())
+
+	// obj.SetSplitID creates splitHeader but we don't need to do it in case
+	// of small objects, so we should make nil check.
+	if obj.SplitID() != nil {
+		res.SetSplitID(obj.SplitID())
+	}
+
+	return res
+}
+
+func (s *payloadSizeLimiter) initializeCurrent() {
+	// initialize current object target
+	s.target = s.targetInit()
+
+	// create payload hashers
+	s.currentHashers = payloadHashersForObject(s.current, s.withoutHomomorphicHash)
+
+	// compose multi-writer from target and all payload hashers
+	ws := make([]io.Writer, 0, 1+len(s.currentHashers)+len(s.parentHashers))
+
+	ws = append(ws, s.target)
+
+	for i := range s.currentHashers {
+		ws = append(ws, s.currentHashers[i].hasher)
+	}
+
+	for i := range s.parentHashers {
+		ws = append(ws, s.parentHashers[i].hasher)
+	}
+
+	s.chunkWriter = io.MultiWriter(ws...)
+}
+
+func payloadHashersForObject(obj *object.Object, withoutHomomorphicHash bool) []*payloadChecksumHasher {
+	hashers := make([]*payloadChecksumHasher, 0, 2)
+
+	hashers = append(hashers, &payloadChecksumHasher{
+		hasher: sha256.New(),
+		checksumWriter: func(binChecksum []byte) {
+			if ln := len(binChecksum); ln != sha256.Size {
+				panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", sha256.Size, ln))
+			}
+
+			csSHA := [sha256.Size]byte{}
+			copy(csSHA[:], binChecksum)
+
+			var cs checksum.Checksum
+			cs.SetSHA256(csSHA)
+
+			obj.SetPayloadChecksum(cs)
+		},
+	})
+
+	if !withoutHomomorphicHash {
+		hashers = append(hashers, &payloadChecksumHasher{
+			hasher: tz.New(),
+			checksumWriter: func(binChecksum []byte) {
+				if ln := len(binChecksum); ln != tz.Size {
+					panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", tz.Size, ln))
+				}
+
+				csTZ := [tz.Size]byte{}
+				copy(csTZ[:], binChecksum)
+
+				var cs checksum.Checksum
+				cs.SetTillichZemor(csTZ)
+
+				obj.SetPayloadHomomorphicHash(cs)
+			},
+		})
+	}
+
+	return hashers
+}
+
+func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) {
+	// Arg finalize is true only when called from Close method.
+	// We finalize parent and generate linking objects only if it is more
+	// than 1 object in split-chain.
+	withParent := finalize && len(s.previous) > 0
+
+	if withParent {
+		writeHashes(s.parentHashers)
+		s.parent.SetPayloadSize(s.written)
+		s.current.SetParent(s.parent)
+	}
+
+	// release current object
+	writeHashes(s.currentHashers)
+
+	// release current, get its id
+	if err := s.target.WriteHeader(s.current); err != nil {
+		return nil, fmt.Errorf("could not write header: %w", err)
+	}
+
+	ids, err := s.target.Close()
+	if err != nil {
+		return nil, fmt.Errorf("could not close target: %w", err)
+	}
+
+	// save identifier of the released object
+	s.previous = append(s.previous, ids.SelfID())
+
+	if withParent {
+		// generate and release linking object
+		s.initializeLinking(ids.Parent())
+		s.initializeCurrent()
+
+		if _, err := s.release(false); err != nil {
+			return nil, fmt.Errorf("could not release linking object: %w", err)
+		}
+	}
+
+	return ids, nil
+}
+
+func writeHashes(hashers []*payloadChecksumHasher) {
+	for i := range hashers {
+		hashers[i].checksumWriter(hashers[i].hasher.Sum(nil))
+	}
+}
+
+func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) {
+	s.current = fromObject(s.current)
+	s.current.SetParent(parHdr)
+	s.current.SetChildren(s.previous...)
+	s.current.SetSplitID(s.splitID)
+}
+
+func (s *payloadSizeLimiter) writeChunk(chunk []byte) error {
+	// statement is true if the previous write of bytes reached exactly the boundary.
+	if s.written > 0 && s.written%s.maxSize == 0 {
+		if s.written == s.maxSize {
+			s.prepareFirstChild()
+		}
+
+		// we need to release current object
+		if _, err := s.release(false); err != nil {
+			return fmt.Errorf("could not release object: %w", err)
+		}
+
+		// initialize another object
+		s.initialize()
+	}
+
+	var (
+		ln         = uint64(len(chunk))
+		cut        = ln
+		leftToEdge = s.maxSize - s.written%s.maxSize
+	)
+
+	// write bytes no further than the boundary of the current object
+	if ln > leftToEdge {
+		cut = leftToEdge
+	}
+
+	if _, err := s.chunkWriter.Write(chunk[:cut]); err != nil {
+		return fmt.Errorf("could not write chunk to target: %w", err)
+	}
+
+	// increase written bytes counter
+	s.written += cut
+
+	// if there are more bytes in buffer we call method again to start filling another object
+	if ln > leftToEdge {
+		return s.writeChunk(chunk[cut:])
+	}
+
+	return nil
+}
+
+func (s *payloadSizeLimiter) prepareFirstChild() {
+	// initialize split header with split ID on first object in chain
+	s.current.InitRelations()
+	s.current.SetSplitID(s.splitID)
+
+	// cut source attributes
+	s.parAttrs = s.current.Attributes()
+	s.current.SetAttributes()
+
+	// attributes will be added to parent in detachParent
+}
+
+func (s *payloadSizeLimiter) detachParent() {
+	s.parent = s.current
+	s.current = fromObject(s.parent)
+	s.parent.ResetRelations()
+	s.parent.SetSignature(nil)
+	s.parentHashers = s.currentHashers
+
+	// return source attributes
+	s.parent.SetAttributes(s.parAttrs...)
+}
diff --git a/object/transformer/types.go b/object/transformer/types.go
new file mode 100644
index 00000000..72e6a742
--- /dev/null
+++ b/object/transformer/types.go
@@ -0,0 +1,111 @@
+package transformer
+
+import (
+	"io"
+
+	"github.com/TrueCloudLab/frostfs-sdk-go/object"
+	oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id"
+)
+
+// AccessIdentifiers represents group of the object identifiers
+// that are returned after writing the object.
+// Consists of the ID of the stored object and the ID of the parent object.
+type AccessIdentifiers struct {
+	par *oid.ID
+
+	self oid.ID
+
+	parHdr *object.Object
+}
+
+// ObjectTarget is an interface of the object writer.
+type ObjectTarget interface {
+	// WriteHeader writes object header w/ payload part.
+	// The payload of the object may be incomplete.
+	//
+	// Must be called exactly once. Control remains with the caller.
+	// Missing a call or re-calling can lead to undefined behavior
+	// that depends on the implementation.
+	//
+	// Must not be called after Close call.
+	WriteHeader(*object.Object) error
+
+	// Write writes object payload chunk.
+	//
+	// Can be called multiple times.
+	//
+	// Must not be called after Close call.
+	io.Writer
+
+	// Close is used to finish object writing.
+	//
+	// Close must return access identifiers of the object
+	// that has been written.
+	//
+	// Must be called no more than once. Control remains with the caller.
+	// Re-calling can lead to undefined behavior
+	// that depends on the implementation.
+	Close() (*AccessIdentifiers, error)
+}
+
+// TargetInitializer represents ObjectTarget constructor.
+type TargetInitializer func() ObjectTarget
+
+// SelfID returns identifier of the written object.
+func (a AccessIdentifiers) SelfID() oid.ID {
+	return a.self
+}
+
+// WithSelfID returns AccessIdentifiers with passed self identifier.
+func (a *AccessIdentifiers) WithSelfID(v oid.ID) *AccessIdentifiers {
+	res := a
+	if res == nil {
+		res = new(AccessIdentifiers)
+	}
+
+	res.self = v
+
+	return res
+}
+
+// ParentID return identifier of the parent of the written object.
+func (a *AccessIdentifiers) ParentID() *oid.ID {
+	if a != nil {
+		return a.par
+	}
+
+	return nil
+}
+
+// WithParentID returns AccessIdentifiers with passed parent identifier.
+func (a *AccessIdentifiers) WithParentID(v *oid.ID) *AccessIdentifiers {
+	res := a
+	if res == nil {
+		res = new(AccessIdentifiers)
+	}
+
+	res.par = v
+
+	return res
+}
+
+// Parent return identifier of the parent of the written object.
+func (a *AccessIdentifiers) Parent() *object.Object {
+	if a != nil {
+		return a.parHdr
+	}
+
+	return nil
+}
+
+// WithParent returns AccessIdentifiers with passed parent identifier.
+func (a *AccessIdentifiers) WithParent(v *object.Object) *AccessIdentifiers {
+	res := a
+	if res == nil {
+		res = new(AccessIdentifiers)
+	}
+
+	res.parHdr = v
+
+	return res
+}