Move transformer implementation from frostfs-node #19

Merged
fyrchik merged 12 commits from fyrchik/move-transformer into master 2023-03-15 05:39:10 +00:00
6 changed files with 679 additions and 0 deletions

View file

@ -0,0 +1,44 @@
package transformer
import (
objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object"
"github.com/nspcc-dev/neo-go/pkg/util/slice"
)
type chanTarget struct {
header *objectSDK.Object
payload []byte
ch chan<- *objectSDK.Object
}
// NewChannelTarget returns ObjectTarget which writes
// object parts to a provided channel.
func NewChannelTarget(ch chan<- *objectSDK.Object) ObjectTarget {
return &chanTarget{
ch: ch,
}
}
// WriteHeader implements the ObjectTarget interface.
func (c *chanTarget) WriteHeader(object *objectSDK.Object) error {
c.header = object
return nil
}
// Write implements the ObjectTarget interface.
func (c *chanTarget) Write(p []byte) (n int, err error) {
c.payload = append(c.payload, p...)
return len(p), nil
}
// Close implements the ObjectTarget interface.
func (c *chanTarget) Close() (*AccessIdentifiers, error) {
if len(c.payload) != 0 {
c.header.SetPayload(slice.Copy(c.payload))
}
c.ch <- c.header
c.header = nil
c.payload = nil
return new(AccessIdentifiers), nil
}

View file

@ -0,0 +1,55 @@
package transformer
import (
"crypto/rand"
"testing"
cidtest "github.com/TrueCloudLab/frostfs-sdk-go/container/id/test"
objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object"
"github.com/TrueCloudLab/frostfs-sdk-go/version"
"github.com/stretchr/testify/require"
)
func TestChannelTarget(t *testing.T) {
const maxSize = 100
ch := make(chan *objectSDK.Object, 10)
tt := new(testTarget)
chTarget, _ := newPayloadSizeLimiter(maxSize, NewChannelTarget(ch))
testTarget, _ := newPayloadSizeLimiter(maxSize, tt)
ver := version.Current()
cnr := cidtest.ID()
hdr := objectSDK.New()
hdr.SetContainerID(cnr)
hdr.SetType(objectSDK.TypeRegular)
hdr.SetVersion(&ver)
payload := make([]byte, maxSize*2+maxSize/2)
_, _ = rand.Read(payload)
expectedIDs := writeObject(t, testTarget, hdr, payload)
actualIDs := writeObject(t, chTarget, hdr, payload)
_ = expectedIDs
_ = actualIDs
//require.Equal(t, expectedIDs, actualIDs)
for i := range tt.objects {
select {
case obj := <-ch:
// Because of the split ID objects can be different.
// However, payload and attributes must be the same.
require.Equal(t, tt.objects[i].Payload(), obj.Payload())
require.Equal(t, tt.objects[i].Attributes(), obj.Attributes())
default:
require.FailNow(t, "received less parts than expected")
}
}
select {
case <-ch:
require.FailNow(t, "received more parts than expected")
default:
}
}

View file

@ -0,0 +1,38 @@
package transformer
import (
"crypto/sha256"
"hash"
"github.com/TrueCloudLab/frostfs-sdk-go/checksum"
objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object"
"github.com/TrueCloudLab/tzhash/tz"
)
type payloadChecksumHasher struct {
hasher hash.Hash
typ checksum.Type
}
func (h payloadChecksumHasher) writeChecksum(obj *objectSDK.Object) {
switch h.typ {
case checksum.SHA256:
csSHA := [sha256.Size]byte{}
h.hasher.Sum(csSHA[:0])
dstepanov-yadro commented 2023-02-28 05:46:16 +00:00 (Migrated from github.com)
Review

Too tricky i think.

Too tricky i think.
var cs checksum.Checksum
cs.SetSHA256(csSHA)
obj.SetPayloadChecksum(cs)
case checksum.TZ:
csTZ := [tz.Size]byte{}
h.hasher.Sum(csTZ[:0])
var cs checksum.Checksum
cs.SetTillichZemor(csTZ)
obj.SetPayloadHomomorphicHash(cs)
default:
panic("unreachable")
}
}

View file

@ -0,0 +1,298 @@
package transformer
import (
"crypto/ecdsa"
"crypto/sha256"
"fmt"
"github.com/TrueCloudLab/frostfs-sdk-go/checksum"
"github.com/TrueCloudLab/frostfs-sdk-go/object"
oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id"
"github.com/TrueCloudLab/frostfs-sdk-go/session"
"github.com/TrueCloudLab/frostfs-sdk-go/version"
"github.com/TrueCloudLab/tzhash/tz"
)
type payloadSizeLimiter struct {
Params
written, writtenCurrent uint64
current, parent *object.Object
currentHashers, parentHashers []payloadChecksumHasher
previous []oid.ID
splitID *object.SplitID
parAttrs []object.Attribute
}
type Params struct {
Key *ecdsa.PrivateKey
NextTarget ObjectTarget
SessionToken *session.Object
NetworkState EpochSource
MaxSize uint64
WithoutHomomorphicHash bool
}
// NewPayloadSizeLimiter returns ObjectTarget instance that restricts payload length
// of the writing object and writes generated objects to targets from initializer.
//
// Calculates and adds homomorphic hash to resulting objects only if withoutHomomorphicHash
// is false.
//
// Objects w/ payload size less or equal than max size remain untouched.
func NewPayloadSizeLimiter(p Params) ObjectTarget {
return &payloadSizeLimiter{
Params: p,
splitID: object.NewSplitID(),
}
}
func (s *payloadSizeLimiter) WriteHeader(hdr *object.Object) error {
s.current = fromObject(hdr)
s.initialize()
return nil
}
func (s *payloadSizeLimiter) Write(p []byte) (int, error) {
if err := s.writeChunk(p); err != nil {
return 0, err
}
return len(p), nil
}
func (s *payloadSizeLimiter) Close() (*AccessIdentifiers, error) {
return s.release(true)
}
func (s *payloadSizeLimiter) initialize() {
s.current = fromObject(s.current)
// if it is an object after the 1st
if ln := len(s.previous); ln > 0 {
// initialize parent object once (after 1st object)
if ln == 1 {
s.parent = fromObject(s.current)
s.parentHashers = append(s.parentHashers[:0], s.currentHashers...)
// return source attributes
s.parent.SetAttributes(s.parAttrs...)
}
// set previous object to the last previous identifier
s.current.SetPreviousID(s.previous[ln-1])
}
s.initializeCurrent()
}
func fromObject(obj *object.Object) *object.Object {
cnr, _ := obj.ContainerID()
res := object.New()
res.SetContainerID(cnr)
res.SetOwnerID(obj.OwnerID())
res.SetAttributes(obj.Attributes()...)
res.SetType(obj.Type())
// obj.SetSplitID creates splitHeader but we don't need to do it in case
// of small objects, so we should make nil check.
if obj.SplitID() != nil {
res.SetSplitID(obj.SplitID())
}
return res
}
func (s *payloadSizeLimiter) initializeCurrent() {
// create payload hashers
s.writtenCurrent = 0
s.initPayloadHashers()
}
func (s *payloadSizeLimiter) initPayloadHashers() {
s.currentHashers = append(s.currentHashers[:0], payloadChecksumHasher{
hasher: sha256.New(),
typ: checksum.SHA256,
})
if !s.WithoutHomomorphicHash {
s.currentHashers = append(s.currentHashers, payloadChecksumHasher{
hasher: tz.New(),
typ: checksum.TZ,
})
}
}
func (s *payloadSizeLimiter) release(finalize bool) (*AccessIdentifiers, error) {
// Arg finalize is true only when called from Close method.
// We finalize parent and generate linking objects only if it is more
// than 1 object in split-chain.
withParent := finalize && len(s.previous) > 0
if withParent {
for i := range s.parentHashers {
s.parentHashers[i].writeChecksum(s.parent)
}
s.parent.SetPayloadSize(s.written)
s.current.SetParent(s.parent)
}
// release current object
for i := range s.currentHashers {
s.currentHashers[i].writeChecksum(s.current)
}
curEpoch := s.NetworkState.CurrentEpoch()
ver := version.Current()
s.current.SetVersion(&ver)
s.current.SetPayloadSize(s.writtenCurrent)
s.current.SetSessionToken(s.SessionToken)
s.current.SetCreationEpoch(curEpoch)
var (
parID *oid.ID
parHdr *object.Object
)
if par := s.current.Parent(); par != nil && par.Signature() == nil {
rawPar := object.NewFromV2(par.ToV2())
rawPar.SetSessionToken(s.SessionToken)
rawPar.SetCreationEpoch(curEpoch)
if err := object.SetIDWithSignature(*s.Key, rawPar); err != nil {
return nil, fmt.Errorf("could not finalize parent object: %w", err)
}
id, _ := rawPar.ID()
parID = &id
parHdr = rawPar
s.current.SetParent(parHdr)
}
if err := object.SetIDWithSignature(*s.Key, s.current); err != nil {
return nil, fmt.Errorf("could not finalize object: %w", err)
}
if err := s.NextTarget.WriteHeader(s.current); err != nil {
return nil, fmt.Errorf("could not write header to next target: %w", err)
}
if _, err := s.NextTarget.Close(); err != nil {
return nil, fmt.Errorf("could not close next target: %w", err)
}
id, _ := s.current.ID()
ids := &AccessIdentifiers{
ParentID: parID,
SelfID: id,
ParentHeader: parHdr,
}
// save identifier of the released object
s.previous = append(s.previous, ids.SelfID)
if withParent {
// generate and release linking object
s.initializeLinking(ids.ParentHeader)
s.initializeCurrent()
if _, err := s.release(false); err != nil {
return nil, fmt.Errorf("could not release linking object: %w", err)
}
}
return ids, nil
}
func (s *payloadSizeLimiter) initializeLinking(parHdr *object.Object) {
s.current = fromObject(s.current)
s.current.SetParent(parHdr)
s.current.SetChildren(s.previous...)
s.current.SetSplitID(s.splitID)
}
func (s *payloadSizeLimiter) writeChunk(chunk []byte) error {
for {
// statement is true if the previous write of bytes reached exactly the boundary.
if s.written > 0 && s.written%s.MaxSize == 0 {
if s.written == s.MaxSize {
s.prepareFirstChild()
}
// we need to release current object
if _, err := s.release(false); err != nil {
return fmt.Errorf("could not release object: %w", err)
}
// initialize another object
s.initialize()
}
var (
ln = uint64(len(chunk))
cut = ln
leftToEdge = s.MaxSize - s.written%s.MaxSize
)
// write bytes no further than the boundary of the current object
if ln > leftToEdge {
cut = leftToEdge
}
if err := s.writeHashes(chunk[:cut]); err != nil {
return fmt.Errorf("could not write chunk to target: %w", err)
}
// increase written bytes counter
s.writtenCurrent += cut
s.written += cut
if cut == ln {
return nil
}
// if there are more bytes in buffer we call method again to start filling another object
chunk = chunk[cut:]
}
}
func (s *payloadSizeLimiter) writeHashes(chunk []byte) error {
_, err := s.NextTarget.Write(chunk)
if err != nil {
return err
}
// The `Write` method of `hash.Hash` never returns an error.
for i := range s.currentHashers {
_, _ = s.currentHashers[i].hasher.Write(chunk)
}
for i := range s.parentHashers {
_, _ = s.parentHashers[i].hasher.Write(chunk)
}
return nil
}
func (s *payloadSizeLimiter) prepareFirstChild() {
// initialize split header with split ID on first object in chain
s.current.InitRelations()
s.current.SetSplitID(s.splitID)
// cut source attributes
s.parAttrs = s.current.Attributes()
s.current.SetAttributes()
// attributes will be added to parent in detachParent
}

View file

@ -0,0 +1,189 @@
package transformer
import (
"crypto/rand"
"crypto/sha256"
"testing"
cid "github.com/TrueCloudLab/frostfs-sdk-go/container/id"
cidtest "github.com/TrueCloudLab/frostfs-sdk-go/container/id/test"
objectSDK "github.com/TrueCloudLab/frostfs-sdk-go/object"
"github.com/TrueCloudLab/frostfs-sdk-go/user"
"github.com/TrueCloudLab/frostfs-sdk-go/version"
"github.com/nspcc-dev/neo-go/pkg/crypto/keys"
"github.com/stretchr/testify/require"
)
func TestTransformer(t *testing.T) {
const maxSize = 100
tt := new(testTarget)
target, pk := newPayloadSizeLimiter(maxSize, tt)
cnr := cidtest.ID()
hdr := newObject(cnr)
var owner user.ID
user.IDFromKey(&owner, pk.PrivateKey.PublicKey)
hdr.SetOwnerID(&owner)
expectedPayload := make([]byte, maxSize*2+maxSize/2)
_, _ = rand.Read(expectedPayload)
ids := writeObject(t, target, hdr, expectedPayload)
require.Equal(t, 4, len(tt.objects)) // 3 parts + linking object
var actualPayload []byte
for i := range tt.objects {
childCnr, ok := tt.objects[i].ContainerID()
require.True(t, ok)
require.Equal(t, cnr, childCnr)
require.Equal(t, objectSDK.TypeRegular, tt.objects[i].Type())
require.Equal(t, &owner, tt.objects[i].OwnerID())
payload := tt.objects[i].Payload()
require.EqualValues(t, tt.objects[i].PayloadSize(), len(payload))
actualPayload = append(actualPayload, payload...)
if len(payload) != 0 {
cs, ok := tt.objects[i].PayloadChecksum()
require.True(t, ok)
h := sha256.Sum256(payload)
require.Equal(t, h[:], cs.Value())
}
switch i {
case 0, 1:
require.EqualValues(t, maxSize, len(payload))
case 2:
require.EqualValues(t, maxSize/2, len(payload))
case 3:
parID, ok := tt.objects[i].ParentID()
require.True(t, ok)
require.Equal(t, ids.ParentID, &parID)
}
}
require.Equal(t, expectedPayload, actualPayload)
t.Run("parent checksum", func(t *testing.T) {
cs, ok := ids.ParentHeader.PayloadChecksum()
require.True(t, ok)
h := sha256.Sum256(expectedPayload)
require.Equal(t, h[:], cs.Value())
})
}
func newObject(cnr cid.ID) *objectSDK.Object {
ver := version.Current()
hdr := objectSDK.New()
hdr.SetContainerID(cnr)
hdr.SetType(objectSDK.TypeRegular)
hdr.SetVersion(&ver)
return hdr
}
func writeObject(t *testing.T, target ObjectTarget, header *objectSDK.Object, payload []byte) *AccessIdentifiers {
require.NoError(t, target.WriteHeader(header))
_, err := target.Write(payload)
require.NoError(t, err)
ids, err := target.Close()
require.NoError(t, err)
return ids
}
func BenchmarkTransformer(b *testing.B) {
hdr := newObject(cidtest.ID())
b.Run("small", func(b *testing.B) {
benchmarkTransformer(b, hdr, 8*1024)
})
b.Run("big", func(b *testing.B) {
benchmarkTransformer(b, hdr, 64*1024*1024*9/2)
})
}
func benchmarkTransformer(b *testing.B, header *objectSDK.Object, payloadSize int) {
const maxSize = 64 * 1024 * 1024
payload := make([]byte, payloadSize)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
f, _ := newPayloadSizeLimiter(maxSize, benchTarget{})
if err := f.WriteHeader(header); err != nil {
b.Fatalf("write header: %v", err)
}
if _, err := f.Write(payload); err != nil {
b.Fatalf("write: %v", err)
}
if _, err := f.Close(); err != nil {
b.Fatalf("close: %v", err)
}
}
}
func newPayloadSizeLimiter(maxSize uint64, nextTarget ObjectTarget) (ObjectTarget, *keys.PrivateKey) {
p, err := keys.NewPrivateKey()
if err != nil {
panic(err)
}
return NewPayloadSizeLimiter(Params{
Key: &p.PrivateKey,
NextTarget: nextTarget,
NetworkState: dummyEpochSource(123),
MaxSize: maxSize,
WithoutHomomorphicHash: true,
}), p
}
type dummyEpochSource uint64
func (s dummyEpochSource) CurrentEpoch() uint64 {
return uint64(s)
}
type benchTarget struct{}
func (benchTarget) WriteHeader(object *objectSDK.Object) error {
return nil
}
func (benchTarget) Write(p []byte) (n int, err error) {
return len(p), nil
}
func (benchTarget) Close() (*AccessIdentifiers, error) {
return nil, nil
}
type testTarget struct {
current *objectSDK.Object
payload []byte
objects []*objectSDK.Object
}
func (tt *testTarget) WriteHeader(object *objectSDK.Object) error {
tt.current = object
return nil
}
func (tt *testTarget) Write(p []byte) (n int, err error) {
tt.payload = append(tt.payload, p...)
return len(p), nil
}
func (tt *testTarget) Close() (*AccessIdentifiers, error) {
tt.current.SetPayload(tt.payload)
tt.objects = append(tt.objects, tt.current)
tt.current = nil
tt.payload = nil
return nil, nil // AccessIdentifiers should not be used.
}

View file

@ -0,0 +1,55 @@
package transformer
import (
"io"
"github.com/TrueCloudLab/frostfs-sdk-go/object"
oid "github.com/TrueCloudLab/frostfs-sdk-go/object/id"
)
// AccessIdentifiers represents group of the object identifiers
// that are returned after writing the object.
// Consists of the ID of the stored object and the ID of the parent object.
type AccessIdentifiers struct {
ParentID *oid.ID
SelfID oid.ID
ParentHeader *object.Object
}
// EpochSource is a source for the current epoch.
type EpochSource interface {
CurrentEpoch() uint64
}
// ObjectTarget is an interface of the object writer.
type ObjectTarget interface {
// WriteHeader writes object header w/ payload part.
// The payload of the object may be incomplete.
//
// Must be called exactly once. Control remains with the caller.
// Missing a call or re-calling can lead to undefined behavior
// that depends on the implementation.
//
// Must not be called after Close call.
WriteHeader(*object.Object) error
// Write writes object payload chunk.
//
// Can be called multiple times.
//
// Must not be called after Close call.
io.Writer
// Close is used to finish object writing.
//
// Close must return access identifiers of the object
// that has been written.
//
// Must be called no more than once. Control remains with the caller.
// Re-calling can lead to undefined behavior
// that depends on the implementation.
Close() (*AccessIdentifiers, error)
}
// TargetInitializer represents ObjectTarget constructor.
type TargetInitializer func() ObjectTarget