2023-02-17 09:52:46 +00:00
|
|
|
package transformer
|
|
|
|
|
|
|
|
import (
|
2023-04-21 10:04:44 +00:00
|
|
|
"context"
|
2023-02-18 07:43:34 +00:00
|
|
|
"crypto/ecdsa"
|
2023-02-17 09:52:46 +00:00
|
|
|
"crypto/sha256"
|
|
|
|
"fmt"
|
|
|
|
|
2023-03-16 06:51:03 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/checksum"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
|
|
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/session"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/version"
|
|
|
|
"git.frostfs.info/TrueCloudLab/tzhash/tz"
|
2023-02-17 09:52:46 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type payloadSizeLimiter struct {
|
2023-02-18 07:43:34 +00:00
|
|
|
Params
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-02-18 07:43:34 +00:00
|
|
|
written, writtenCurrent uint64
|
2023-02-17 09:52:46 +00:00
|
|
|
|
|
|
|
current, parent *object.Object
|
2023-07-07 12:34:31 +00:00
|
|
|
payload []byte
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-02-24 14:01:38 +00:00
|
|
|
currentHashers, parentHashers []payloadChecksumHasher
|
2023-02-17 09:52:46 +00:00
|
|
|
|
|
|
|
previous []oid.ID
|
|
|
|
|
|
|
|
splitID *object.SplitID
|
|
|
|
|
|
|
|
parAttrs []object.Attribute
|
2023-05-03 08:18:08 +00:00
|
|
|
|
2023-07-07 12:34:31 +00:00
|
|
|
nextTarget ObjectWriter
|
2023-02-17 09:52:46 +00:00
|
|
|
}
|
|
|
|
|
2023-02-18 07:43:34 +00:00
|
|
|
type Params struct {
|
|
|
|
Key *ecdsa.PrivateKey
|
2023-07-07 12:34:31 +00:00
|
|
|
NextTargetInit TargetInitializer
|
2023-02-18 07:43:34 +00:00
|
|
|
SessionToken *session.Object
|
|
|
|
NetworkState EpochSource
|
|
|
|
MaxSize uint64
|
|
|
|
WithoutHomomorphicHash bool
|
[#188] transformer: Allow to provide size hint
For big objects with known size we can optimize allocation patterns
by providing size hint. As with any hint, it does not affect transformer
functionality: slices with capacity > MaxSize are never allocated.
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
│ out │
│ sec/op │
Transformer/small/no_size_hint-8 65.44µ ± 3%
Transformer/small/no_size_hint,_with_buffer-8 64.24µ ± 5%
Transformer/small/with_size_hint,_with_buffer-8 58.70µ ± 5%
Transformer/big/no_size_hint-8 367.8m ± 3%
Transformer/big/no_size_hint,_with_buffer-8 562.7m ± 0%
Transformer/big/with_size_hint,_with_buffer-8 385.6m ± 7%
geomean 5.197m
│ out │
│ B/op │
Transformer/small/no_size_hint-8 13.40Ki ± 0%
Transformer/small/no_size_hint,_with_buffer-8 13.40Ki ± 0%
Transformer/small/with_size_hint,_with_buffer-8 13.39Ki ± 0%
Transformer/big/no_size_hint-8 288.0Mi ± 0%
Transformer/big/no_size_hint,_with_buffer-8 1.390Gi ± 0%
Transformer/big/with_size_hint,_with_buffer-8 288.0Mi ± 0%
geomean 2.533Mi
│ out │
│ allocs/op │
Transformer/small/no_size_hint-8 92.00 ± 0%
Transformer/small/no_size_hint,_with_buffer-8 92.00 ± 0%
Transformer/small/with_size_hint,_with_buffer-8 92.00 ± 0%
Transformer/big/no_size_hint-8 546.5 ± 0%
Transformer/big/no_size_hint,_with_buffer-8 607.5 ± 0%
Transformer/big/with_size_hint,_with_buffer-8 545.5 ± 0%
geomean 228.1
```
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-10-26 15:13:54 +00:00
|
|
|
// SizeHint is a hint for the total payload size to be processed.
|
|
|
|
// It is used primarily to optimize allocations and doesn't affect
|
|
|
|
// functionality. Primary usecases are providing file size when putting an object
|
|
|
|
// with the frostfs-cli or using Content-Length header in gateways.
|
|
|
|
SizeHint uint64
|
2023-02-18 07:43:34 +00:00
|
|
|
}
|
|
|
|
|
2023-02-17 09:52:46 +00:00
|
|
|
// NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length
|
|
|
|
// of the writing object and writes generated objects to targets from initializer.
|
|
|
|
//
|
|
|
|
// Calculates and adds homomorphic hash to resulting objects only if withoutHomomorphicHash
|
|
|
|
// is false.
|
|
|
|
//
|
|
|
|
// Objects w/ payload size less or equal than max size remain untouched.
|
2023-07-07 12:34:31 +00:00
|
|
|
func NewPayloadSizeLimiter(p Params) ChunkedObjectWriter {
|
2023-02-17 09:52:46 +00:00
|
|
|
return &payloadSizeLimiter{
|
2023-02-18 07:43:34 +00:00
|
|
|
Params: p,
|
|
|
|
splitID: object.NewSplitID(),
|
2023-02-17 09:52:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-21 10:04:44 +00:00
|
|
|
func (s *payloadSizeLimiter) WriteHeader(_ context.Context, hdr *object.Object) error {
|
2023-02-17 09:52:46 +00:00
|
|
|
s.current = fromObject(hdr)
|
|
|
|
|
|
|
|
s.initialize()
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-04-21 10:04:44 +00:00
|
|
|
func (s *payloadSizeLimiter) Write(ctx context.Context, p []byte) (int, error) {
|
|
|
|
if err := s.writeChunk(ctx, p); err != nil {
|
2023-02-17 09:52:46 +00:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return len(p), nil
|
|
|
|
}
|
|
|
|
|
2023-04-21 10:04:44 +00:00
|
|
|
func (s *payloadSizeLimiter) Close(ctx context.Context) (*AccessIdentifiers, error) {
|
|
|
|
return s.release(ctx, true)
|
2023-02-17 09:52:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *payloadSizeLimiter) initialize() {
|
2023-02-24 13:31:12 +00:00
|
|
|
s.current = fromObject(s.current)
|
|
|
|
|
2023-02-17 09:52:46 +00:00
|
|
|
// if it is an object after the 1st
|
|
|
|
if ln := len(s.previous); ln > 0 {
|
|
|
|
// initialize parent object once (after 1st object)
|
|
|
|
if ln == 1 {
|
2023-06-23 10:34:42 +00:00
|
|
|
ver := version.Current()
|
2023-02-24 13:31:12 +00:00
|
|
|
s.parent = fromObject(s.current)
|
2023-05-03 08:09:12 +00:00
|
|
|
s.parent.ResetRelations()
|
|
|
|
s.parent.SetSignature(nil)
|
2023-02-24 13:31:12 +00:00
|
|
|
s.parent.SetAttributes(s.parAttrs...)
|
2023-06-23 10:34:42 +00:00
|
|
|
s.parent.SetVersion(&ver)
|
2023-05-03 08:09:12 +00:00
|
|
|
s.parentHashers = append(s.parentHashers[:0], s.currentHashers...)
|
2023-02-17 09:52:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// set previous object to the last previous identifier
|
|
|
|
s.current.SetPreviousID(s.previous[ln-1])
|
|
|
|
}
|
|
|
|
|
|
|
|
s.initializeCurrent()
|
|
|
|
}
|
|
|
|
|
|
|
|
func fromObject(obj *object.Object) *object.Object {
|
|
|
|
cnr, _ := obj.ContainerID()
|
|
|
|
|
|
|
|
res := object.New()
|
|
|
|
res.SetContainerID(cnr)
|
|
|
|
res.SetOwnerID(obj.OwnerID())
|
|
|
|
res.SetAttributes(obj.Attributes()...)
|
|
|
|
res.SetType(obj.Type())
|
|
|
|
|
|
|
|
// obj.SetSplitID creates splitHeader but we don't need to do it in case
|
|
|
|
// of small objects, so we should make nil check.
|
|
|
|
if obj.SplitID() != nil {
|
|
|
|
res.SetSplitID(obj.SplitID())
|
|
|
|
}
|
|
|
|
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *payloadSizeLimiter) initializeCurrent() {
|
2023-05-03 08:18:08 +00:00
|
|
|
s.nextTarget = s.NextTargetInit()
|
2023-02-18 07:43:34 +00:00
|
|
|
s.writtenCurrent = 0
|
2023-02-24 14:01:38 +00:00
|
|
|
s.initPayloadHashers()
|
[#188] transformer: Allow to provide size hint
For big objects with known size we can optimize allocation patterns
by providing size hint. As with any hint, it does not affect transformer
functionality: slices with capacity > MaxSize are never allocated.
```
goos: linux
goarch: amd64
pkg: git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer
cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz
│ out │
│ sec/op │
Transformer/small/no_size_hint-8 65.44µ ± 3%
Transformer/small/no_size_hint,_with_buffer-8 64.24µ ± 5%
Transformer/small/with_size_hint,_with_buffer-8 58.70µ ± 5%
Transformer/big/no_size_hint-8 367.8m ± 3%
Transformer/big/no_size_hint,_with_buffer-8 562.7m ± 0%
Transformer/big/with_size_hint,_with_buffer-8 385.6m ± 7%
geomean 5.197m
│ out │
│ B/op │
Transformer/small/no_size_hint-8 13.40Ki ± 0%
Transformer/small/no_size_hint,_with_buffer-8 13.40Ki ± 0%
Transformer/small/with_size_hint,_with_buffer-8 13.39Ki ± 0%
Transformer/big/no_size_hint-8 288.0Mi ± 0%
Transformer/big/no_size_hint,_with_buffer-8 1.390Gi ± 0%
Transformer/big/with_size_hint,_with_buffer-8 288.0Mi ± 0%
geomean 2.533Mi
│ out │
│ allocs/op │
Transformer/small/no_size_hint-8 92.00 ± 0%
Transformer/small/no_size_hint,_with_buffer-8 92.00 ± 0%
Transformer/small/with_size_hint,_with_buffer-8 92.00 ± 0%
Transformer/big/no_size_hint-8 546.5 ± 0%
Transformer/big/no_size_hint,_with_buffer-8 607.5 ± 0%
Transformer/big/with_size_hint,_with_buffer-8 545.5 ± 0%
geomean 228.1
```
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-10-26 15:13:54 +00:00
|
|
|
|
|
|
|
var payloadSize uint64
|
|
|
|
|
|
|
|
// Check whether SizeHint is valid.
|
|
|
|
if remaining := s.SizeHint - s.written; remaining <= s.SizeHint {
|
|
|
|
if remaining >= s.MaxSize {
|
|
|
|
payloadSize = s.MaxSize
|
|
|
|
} else {
|
|
|
|
payloadSize = remaining % s.MaxSize
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s.payload = make([]byte, 0, payloadSize)
|
2023-02-17 09:52:46 +00:00
|
|
|
}
|
|
|
|
|
2023-02-24 14:01:38 +00:00
|
|
|
func (s *payloadSizeLimiter) initPayloadHashers() {
|
|
|
|
s.currentHashers = append(s.currentHashers[:0], payloadChecksumHasher{
|
2023-02-17 09:52:46 +00:00
|
|
|
hasher: sha256.New(),
|
2023-02-24 13:31:12 +00:00
|
|
|
typ: checksum.SHA256,
|
2023-02-17 09:52:46 +00:00
|
|
|
})
|
|
|
|
|
2023-02-24 14:01:38 +00:00
|
|
|
if !s.WithoutHomomorphicHash {
|
|
|
|
s.currentHashers = append(s.currentHashers, payloadChecksumHasher{
|
2023-02-17 09:52:46 +00:00
|
|
|
hasher: tz.New(),
|
2023-02-24 13:31:12 +00:00
|
|
|
typ: checksum.TZ,
|
2023-02-17 09:52:46 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-21 10:04:44 +00:00
|
|
|
func (s *payloadSizeLimiter) release(ctx context.Context, finalize bool) (*AccessIdentifiers, error) {
|
2023-02-17 09:52:46 +00:00
|
|
|
// Arg finalize is true only when called from Close method.
|
|
|
|
// We finalize parent and generate linking objects only if it is more
|
|
|
|
// than 1 object in split-chain.
|
|
|
|
withParent := finalize && len(s.previous) > 0
|
|
|
|
|
|
|
|
if withParent {
|
2023-02-24 13:31:12 +00:00
|
|
|
for i := range s.parentHashers {
|
|
|
|
s.parentHashers[i].writeChecksum(s.parent)
|
|
|
|
}
|
2023-02-17 09:52:46 +00:00
|
|
|
s.parent.SetPayloadSize(s.written)
|
|
|
|
s.current.SetParent(s.parent)
|
|
|
|
}
|
|
|
|
|
|
|
|
// release current object
|
2023-02-24 13:31:12 +00:00
|
|
|
for i := range s.currentHashers {
|
|
|
|
s.currentHashers[i].writeChecksum(s.current)
|
|
|
|
}
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-04-21 12:30:24 +00:00
|
|
|
ids, err := s.fillHeader()
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("fillHeader: %w", err)
|
|
|
|
}
|
|
|
|
|
2023-07-07 12:34:31 +00:00
|
|
|
s.current.SetPayload(s.payload)
|
|
|
|
if err := s.nextTarget.WriteObject(ctx, s.current); err != nil {
|
|
|
|
return nil, fmt.Errorf("could not write to next target: %w", err)
|
2023-04-21 12:30:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// save identifier of the released object
|
|
|
|
s.previous = append(s.previous, ids.SelfID)
|
|
|
|
|
|
|
|
if withParent {
|
|
|
|
// generate and release linking object
|
|
|
|
s.initializeLinking(ids.ParentHeader)
|
|
|
|
s.initializeCurrent()
|
|
|
|
|
|
|
|
if _, err := s.release(ctx, false); err != nil {
|
|
|
|
return nil, fmt.Errorf("could not release linking object: %w", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ids, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *payloadSizeLimiter) fillHeader() (*AccessIdentifiers, error) {
|
2023-02-18 07:43:34 +00:00
|
|
|
curEpoch := s.NetworkState.CurrentEpoch()
|
|
|
|
ver := version.Current()
|
|
|
|
|
|
|
|
s.current.SetVersion(&ver)
|
|
|
|
s.current.SetPayloadSize(s.writtenCurrent)
|
|
|
|
s.current.SetSessionToken(s.SessionToken)
|
|
|
|
s.current.SetCreationEpoch(curEpoch)
|
|
|
|
|
|
|
|
var (
|
|
|
|
parID *oid.ID
|
|
|
|
parHdr *object.Object
|
|
|
|
)
|
|
|
|
|
|
|
|
if par := s.current.Parent(); par != nil && par.Signature() == nil {
|
|
|
|
rawPar := object.NewFromV2(par.ToV2())
|
|
|
|
|
|
|
|
rawPar.SetSessionToken(s.SessionToken)
|
|
|
|
rawPar.SetCreationEpoch(curEpoch)
|
|
|
|
|
|
|
|
if err := object.SetIDWithSignature(*s.Key, rawPar); err != nil {
|
|
|
|
return nil, fmt.Errorf("could not finalize parent object: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
id, _ := rawPar.ID()
|
|
|
|
parID = &id
|
|
|
|
parHdr = rawPar
|
|
|
|
|
|
|
|
s.current.SetParent(parHdr)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := object.SetIDWithSignature(*s.Key, s.current); err != nil {
|
|
|
|
return nil, fmt.Errorf("could not finalize object: %w", err)
|
2023-02-17 09:52:46 +00:00
|
|
|
}
|
|
|
|
|
2023-02-18 07:43:34 +00:00
|
|
|
id, _ := s.current.ID()
|
2023-04-21 12:30:24 +00:00
|
|
|
return &AccessIdentifiers{
|
2023-02-18 07:43:34 +00:00
|
|
|
ParentID: parID,
|
|
|
|
SelfID: id,
|
|
|
|
ParentHeader: parHdr,
|
2023-04-21 12:30:24 +00:00
|
|
|
}, nil
|
2023-02-17 09:52:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) {
|
|
|
|
s.current = fromObject(s.current)
|
|
|
|
s.current.SetParent(parHdr)
|
|
|
|
s.current.SetChildren(s.previous...)
|
|
|
|
s.current.SetSplitID(s.splitID)
|
|
|
|
}
|
|
|
|
|
2023-04-21 10:04:44 +00:00
|
|
|
func (s *payloadSizeLimiter) writeChunk(ctx context.Context, chunk []byte) error {
|
2023-02-18 07:36:20 +00:00
|
|
|
for {
|
|
|
|
// statement is true if the previous write of bytes reached exactly the boundary.
|
2023-02-18 07:43:34 +00:00
|
|
|
if s.written > 0 && s.written%s.MaxSize == 0 {
|
|
|
|
if s.written == s.MaxSize {
|
2023-02-18 07:36:20 +00:00
|
|
|
s.prepareFirstChild()
|
|
|
|
}
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-02-18 07:36:20 +00:00
|
|
|
// we need to release current object
|
2023-04-21 10:04:44 +00:00
|
|
|
if _, err := s.release(ctx, false); err != nil {
|
2023-02-18 07:36:20 +00:00
|
|
|
return fmt.Errorf("could not release object: %w", err)
|
|
|
|
}
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-02-18 07:36:20 +00:00
|
|
|
// initialize another object
|
|
|
|
s.initialize()
|
|
|
|
}
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-02-18 07:36:20 +00:00
|
|
|
var (
|
|
|
|
ln = uint64(len(chunk))
|
|
|
|
cut = ln
|
2023-02-18 07:43:34 +00:00
|
|
|
leftToEdge = s.MaxSize - s.written%s.MaxSize
|
2023-02-18 07:36:20 +00:00
|
|
|
)
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-02-18 07:36:20 +00:00
|
|
|
// write bytes no further than the boundary of the current object
|
|
|
|
if ln > leftToEdge {
|
|
|
|
cut = leftToEdge
|
|
|
|
}
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-07-07 12:34:31 +00:00
|
|
|
if err := s.writeHashes(chunk[:cut]); err != nil {
|
2023-02-18 07:36:20 +00:00
|
|
|
return fmt.Errorf("could not write chunk to target: %w", err)
|
|
|
|
}
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-02-18 07:36:20 +00:00
|
|
|
// increase written bytes counter
|
2023-02-18 07:43:34 +00:00
|
|
|
s.writtenCurrent += cut
|
2023-02-18 07:36:20 +00:00
|
|
|
s.written += cut
|
2023-02-17 09:52:46 +00:00
|
|
|
|
2023-02-18 07:36:20 +00:00
|
|
|
if cut == ln {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
// if there are more bytes in buffer we call method again to start filling another object
|
|
|
|
chunk = chunk[cut:]
|
2023-02-17 09:52:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-07 12:34:31 +00:00
|
|
|
func (s *payloadSizeLimiter) writeHashes(chunk []byte) error {
|
|
|
|
s.payload = append(s.payload, chunk...)
|
2023-02-24 14:09:34 +00:00
|
|
|
|
|
|
|
// The `Write` method of `hash.Hash` never returns an error.
|
|
|
|
for i := range s.currentHashers {
|
|
|
|
_, _ = s.currentHashers[i].hasher.Write(chunk)
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range s.parentHashers {
|
|
|
|
_, _ = s.parentHashers[i].hasher.Write(chunk)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-02-17 09:52:46 +00:00
|
|
|
func (s *payloadSizeLimiter) prepareFirstChild() {
|
|
|
|
// initialize split header with split ID on first object in chain
|
|
|
|
s.current.InitRelations()
|
|
|
|
s.current.SetSplitID(s.splitID)
|
|
|
|
|
|
|
|
// cut source attributes
|
|
|
|
s.parAttrs = s.current.Attributes()
|
|
|
|
s.current.SetAttributes()
|
|
|
|
|
|
|
|
// attributes will be added to parent in detachParent
|
|
|
|
}
|