2020-09-16 14:46:31 +00:00
|
|
|
package transformer
|
|
|
|
|
|
|
|
import (
|
2023-04-03 11:23:53 +00:00
|
|
|
"context"
|
2020-09-16 14:46:31 +00:00
|
|
|
"crypto/sha256"
|
|
|
|
"fmt"
|
|
|
|
"hash"
|
|
|
|
|
2023-03-07 13:38:26 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/checksum"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
|
|
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
|
|
|
"git.frostfs.info/TrueCloudLab/tzhash/tz"
|
2020-09-16 14:46:31 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type payloadSizeLimiter struct {
|
|
|
|
maxSize, written uint64
|
|
|
|
|
2022-04-29 18:44:00 +00:00
|
|
|
withoutHomomorphicHash bool
|
|
|
|
|
2020-09-16 14:46:31 +00:00
|
|
|
targetInit func() ObjectTarget
|
|
|
|
|
|
|
|
target ObjectTarget
|
|
|
|
|
2022-03-03 14:19:05 +00:00
|
|
|
current, parent *object.Object
|
2020-09-16 14:46:31 +00:00
|
|
|
|
|
|
|
currentHashers, parentHashers []*payloadChecksumHasher
|
|
|
|
|
2022-05-31 17:00:41 +00:00
|
|
|
previous []oid.ID
|
2020-09-16 14:46:31 +00:00
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
chunkWriter writer
|
2020-11-24 18:14:32 +00:00
|
|
|
|
2022-03-03 14:19:05 +00:00
|
|
|
splitID *object.SplitID
|
2020-12-02 08:29:51 +00:00
|
|
|
|
2022-03-15 12:11:35 +00:00
|
|
|
parAttrs []object.Attribute
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type payloadChecksumHasher struct {
|
|
|
|
hasher hash.Hash
|
|
|
|
|
|
|
|
checksumWriter func([]byte)
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length
|
|
|
|
// of the writing object and writes generated objects to targets from initializer.
|
|
|
|
//
|
2022-04-29 18:44:00 +00:00
|
|
|
// Calculates and adds homomorphic hash to resulting objects only if withoutHomomorphicHash
|
|
|
|
// is false.
|
|
|
|
//
|
2020-09-16 14:46:31 +00:00
|
|
|
// Objects w/ payload size less or equal than max size remain untouched.
|
2022-04-29 18:44:00 +00:00
|
|
|
func NewPayloadSizeLimiter(maxSize uint64, withoutHomomorphicHash bool, targetInit TargetInitializer) ObjectTarget {
|
2020-09-16 14:46:31 +00:00
|
|
|
return &payloadSizeLimiter{
|
2022-04-29 18:44:00 +00:00
|
|
|
maxSize: maxSize,
|
|
|
|
withoutHomomorphicHash: withoutHomomorphicHash,
|
|
|
|
targetInit: targetInit,
|
|
|
|
splitID: object.NewSplitID(),
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-12 14:01:29 +00:00
|
|
|
func (s *payloadSizeLimiter) WriteHeader(_ context.Context, hdr *object.Object) error {
|
2020-09-16 14:46:31 +00:00
|
|
|
s.current = fromObject(hdr)
|
|
|
|
|
2020-09-29 09:21:16 +00:00
|
|
|
s.initialize()
|
|
|
|
|
2020-09-16 14:46:31 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
func (s *payloadSizeLimiter) Write(ctx context.Context, p []byte) (int, error) {
|
|
|
|
if err := s.writeChunk(ctx, p); err != nil {
|
2020-09-16 14:46:31 +00:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return len(p), nil
|
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
func (s *payloadSizeLimiter) Close(ctx context.Context) (*AccessIdentifiers, error) {
|
|
|
|
return s.release(ctx, true)
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *payloadSizeLimiter) initialize() {
|
|
|
|
// if it is an object after the 1st
|
|
|
|
if ln := len(s.previous); ln > 0 {
|
|
|
|
// initialize parent object once (after 1st object)
|
|
|
|
if ln == 1 {
|
2020-12-02 08:29:51 +00:00
|
|
|
s.detachParent()
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// set previous object to the last previous identifier
|
2022-05-12 16:37:46 +00:00
|
|
|
s.current.SetPreviousID(s.previous[ln-1])
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
s.initializeCurrent()
|
|
|
|
}
|
|
|
|
|
2022-03-03 14:19:05 +00:00
|
|
|
func fromObject(obj *object.Object) *object.Object {
|
2022-05-12 16:37:46 +00:00
|
|
|
cnr, _ := obj.ContainerID()
|
|
|
|
|
2022-03-03 14:19:05 +00:00
|
|
|
res := object.New()
|
2022-05-12 16:37:46 +00:00
|
|
|
res.SetContainerID(cnr)
|
2020-11-16 09:43:52 +00:00
|
|
|
res.SetOwnerID(obj.OwnerID())
|
|
|
|
res.SetAttributes(obj.Attributes()...)
|
|
|
|
res.SetType(obj.Type())
|
2020-09-16 14:46:31 +00:00
|
|
|
|
2020-11-24 18:14:32 +00:00
|
|
|
// obj.SetSplitID creates splitHeader but we don't need to do it in case
|
|
|
|
// of small objects, so we should make nil check.
|
|
|
|
if obj.SplitID() != nil {
|
|
|
|
res.SetSplitID(obj.SplitID())
|
|
|
|
}
|
|
|
|
|
2020-09-16 14:46:31 +00:00
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *payloadSizeLimiter) initializeCurrent() {
|
|
|
|
// initialize current object target
|
|
|
|
s.target = s.targetInit()
|
|
|
|
|
|
|
|
// create payload hashers
|
2022-04-29 18:44:00 +00:00
|
|
|
s.currentHashers = payloadHashersForObject(s.current, s.withoutHomomorphicHash)
|
2020-09-24 12:22:59 +00:00
|
|
|
|
|
|
|
// compose multi-writer from target and all payload hashers
|
2023-04-03 11:23:53 +00:00
|
|
|
ws := make([]writer, 0, 1+len(s.currentHashers)+len(s.parentHashers))
|
2020-09-24 12:22:59 +00:00
|
|
|
|
|
|
|
ws = append(ws, s.target)
|
|
|
|
|
|
|
|
for i := range s.currentHashers {
|
2023-04-03 11:23:53 +00:00
|
|
|
ws = append(ws, newWriter(s.currentHashers[i].hasher))
|
2020-09-24 12:22:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for i := range s.parentHashers {
|
2023-04-03 11:23:53 +00:00
|
|
|
ws = append(ws, newWriter(s.parentHashers[i].hasher))
|
2020-09-24 12:22:59 +00:00
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
s.chunkWriter = newMultiWriter(ws...)
|
2020-09-24 12:22:59 +00:00
|
|
|
}
|
|
|
|
|
2022-04-29 18:44:00 +00:00
|
|
|
func payloadHashersForObject(obj *object.Object, withoutHomomorphicHash bool) []*payloadChecksumHasher {
|
|
|
|
hashers := make([]*payloadChecksumHasher, 0, 2)
|
2020-09-16 14:46:31 +00:00
|
|
|
|
2022-04-29 18:44:00 +00:00
|
|
|
hashers = append(hashers, &payloadChecksumHasher{
|
|
|
|
hasher: sha256.New(),
|
|
|
|
checksumWriter: func(binChecksum []byte) {
|
|
|
|
if ln := len(binChecksum); ln != sha256.Size {
|
|
|
|
panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", sha256.Size, ln))
|
|
|
|
}
|
2020-09-16 14:46:31 +00:00
|
|
|
|
2022-04-29 18:44:00 +00:00
|
|
|
csSHA := [sha256.Size]byte{}
|
|
|
|
copy(csSHA[:], binChecksum)
|
2020-09-16 14:46:31 +00:00
|
|
|
|
2022-04-29 18:44:00 +00:00
|
|
|
var cs checksum.Checksum
|
|
|
|
cs.SetSHA256(csSHA)
|
|
|
|
|
|
|
|
obj.SetPayloadChecksum(cs)
|
2020-09-16 14:46:31 +00:00
|
|
|
},
|
2022-04-29 18:44:00 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
if !withoutHomomorphicHash {
|
|
|
|
hashers = append(hashers, &payloadChecksumHasher{
|
2020-09-16 14:46:31 +00:00
|
|
|
hasher: tz.New(),
|
2022-05-11 16:35:01 +00:00
|
|
|
checksumWriter: func(binChecksum []byte) {
|
|
|
|
if ln := len(binChecksum); ln != tz.Size {
|
2022-02-25 09:20:49 +00:00
|
|
|
panic(fmt.Sprintf("wrong checksum length: expected %d, has %d", tz.Size, ln))
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
2022-02-25 09:20:49 +00:00
|
|
|
csTZ := [tz.Size]byte{}
|
2022-05-11 16:35:01 +00:00
|
|
|
copy(csTZ[:], binChecksum)
|
2020-09-16 14:46:31 +00:00
|
|
|
|
2022-05-11 16:35:01 +00:00
|
|
|
var cs checksum.Checksum
|
|
|
|
cs.SetTillichZemor(csTZ)
|
2020-09-16 14:46:31 +00:00
|
|
|
|
2022-05-11 16:35:01 +00:00
|
|
|
obj.SetPayloadHomomorphicHash(cs)
|
2020-09-16 14:46:31 +00:00
|
|
|
},
|
2022-04-29 18:44:00 +00:00
|
|
|
})
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
2022-04-29 18:44:00 +00:00
|
|
|
|
|
|
|
return hashers
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
func (s *payloadSizeLimiter) release(ctx context.Context, finalize bool) (*AccessIdentifiers, error) {
|
2022-10-17 12:37:01 +00:00
|
|
|
// Arg finalize is true only when called from Close method.
|
2020-09-16 14:46:31 +00:00
|
|
|
// We finalize parent and generate linking objects only if it is more
|
|
|
|
// than 1 object in split-chain.
|
2022-10-17 12:37:01 +00:00
|
|
|
withParent := finalize && len(s.previous) > 0
|
2020-09-16 14:46:31 +00:00
|
|
|
|
|
|
|
if withParent {
|
|
|
|
writeHashes(s.parentHashers)
|
|
|
|
s.parent.SetPayloadSize(s.written)
|
2022-03-03 14:19:05 +00:00
|
|
|
s.current.SetParent(s.parent)
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// release current object
|
|
|
|
writeHashes(s.currentHashers)
|
|
|
|
|
|
|
|
// release current, get its id
|
2023-04-12 14:01:29 +00:00
|
|
|
if err := s.target.WriteHeader(ctx, s.current); err != nil {
|
2021-05-18 08:12:51 +00:00
|
|
|
return nil, fmt.Errorf("could not write header: %w", err)
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
ids, err := s.target.Close(ctx)
|
2020-09-16 14:46:31 +00:00
|
|
|
if err != nil {
|
2021-05-18 08:12:51 +00:00
|
|
|
return nil, fmt.Errorf("could not close target: %w", err)
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// save identifier of the released object
|
2022-05-31 17:00:41 +00:00
|
|
|
s.previous = append(s.previous, ids.SelfID())
|
2020-09-16 14:46:31 +00:00
|
|
|
|
|
|
|
if withParent {
|
|
|
|
// generate and release linking object
|
2020-12-18 10:39:44 +00:00
|
|
|
s.initializeLinking(ids.Parent())
|
2020-09-16 14:46:31 +00:00
|
|
|
s.initializeCurrent()
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
if _, err := s.release(ctx, false); err != nil {
|
2021-05-18 08:12:51 +00:00
|
|
|
return nil, fmt.Errorf("could not release linking object: %w", err)
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ids, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func writeHashes(hashers []*payloadChecksumHasher) {
|
|
|
|
for i := range hashers {
|
|
|
|
hashers[i].checksumWriter(hashers[i].hasher.Sum(nil))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-03 14:19:05 +00:00
|
|
|
func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) {
|
2020-09-16 14:46:31 +00:00
|
|
|
s.current = fromObject(s.current)
|
2020-12-18 10:39:44 +00:00
|
|
|
s.current.SetParent(parHdr)
|
2020-09-16 14:46:31 +00:00
|
|
|
s.current.SetChildren(s.previous...)
|
2020-11-24 18:14:32 +00:00
|
|
|
s.current.SetSplitID(s.splitID)
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
func (s *payloadSizeLimiter) writeChunk(ctx context.Context, chunk []byte) error {
|
2020-09-29 09:21:16 +00:00
|
|
|
// statement is true if the previous write of bytes reached exactly the boundary.
|
|
|
|
if s.written > 0 && s.written%s.maxSize == 0 {
|
2020-10-20 06:54:19 +00:00
|
|
|
if s.written == s.maxSize {
|
2020-12-02 08:29:51 +00:00
|
|
|
s.prepareFirstChild()
|
2020-10-20 06:54:19 +00:00
|
|
|
}
|
|
|
|
|
2020-09-29 09:21:16 +00:00
|
|
|
// we need to release current object
|
2023-04-03 11:23:53 +00:00
|
|
|
if _, err := s.release(ctx, false); err != nil {
|
2021-05-18 08:12:51 +00:00
|
|
|
return fmt.Errorf("could not release object: %w", err)
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// initialize another object
|
|
|
|
s.initialize()
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
ln = uint64(len(chunk))
|
|
|
|
cut = ln
|
|
|
|
leftToEdge = s.maxSize - s.written%s.maxSize
|
|
|
|
)
|
|
|
|
|
|
|
|
// write bytes no further than the boundary of the current object
|
|
|
|
if ln > leftToEdge {
|
|
|
|
cut = leftToEdge
|
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
if _, err := s.chunkWriter.Write(ctx, chunk[:cut]); err != nil {
|
2021-05-18 08:12:51 +00:00
|
|
|
return fmt.Errorf("could not write chunk to target: %w", err)
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// increase written bytes counter
|
|
|
|
s.written += cut
|
|
|
|
|
|
|
|
// if there are more bytes in buffer we call method again to start filling another object
|
|
|
|
if ln > leftToEdge {
|
2023-04-03 11:23:53 +00:00
|
|
|
return s.writeChunk(ctx, chunk[cut:])
|
2020-09-16 14:46:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2020-12-02 08:29:51 +00:00
|
|
|
|
|
|
|
func (s *payloadSizeLimiter) prepareFirstChild() {
|
|
|
|
// initialize split header with split ID on first object in chain
|
|
|
|
s.current.InitRelations()
|
|
|
|
s.current.SetSplitID(s.splitID)
|
|
|
|
|
|
|
|
// cut source attributes
|
|
|
|
s.parAttrs = s.current.Attributes()
|
|
|
|
s.current.SetAttributes()
|
|
|
|
|
|
|
|
// attributes will be added to parent in detachParent
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *payloadSizeLimiter) detachParent() {
|
|
|
|
s.parent = s.current
|
|
|
|
s.current = fromObject(s.parent)
|
|
|
|
s.parent.ResetRelations()
|
2020-12-18 10:39:44 +00:00
|
|
|
s.parent.SetSignature(nil)
|
2020-12-02 08:29:51 +00:00
|
|
|
s.parentHashers = s.currentHashers
|
|
|
|
|
|
|
|
// return source attributes
|
|
|
|
s.parent.SetAttributes(s.parAttrs...)
|
|
|
|
}
|