frostfs-node/pkg/services/object/put/distributed.go
Evgenii Stratonikov 3fcf56f2fb [#643] objsvc/put: Copy config to distributedTarget
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-08-24 11:03:17 +03:00

249 lines
6 KiB
Go

package putsvc
import (
"context"
"fmt"
"sync"
"sync/atomic"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
svcutil "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object/util"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object_manager/placement"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer"
"go.uber.org/zap"
)
type preparedObjectTarget interface {
WriteObject(context.Context, *objectSDK.Object, object.ContentMeta) error
}
type distributedTarget struct {
traversal traversal
obj *objectSDK.Object
objMeta object.ContentMeta
*cfg
payload *payload
nodeTargetInitializer func(nodeDesc) preparedObjectTarget
relay func(context.Context, nodeDesc) error
}
// parameters and state of container traversal.
type traversal struct {
opts []placement.Option
// need of additional broadcast after the object is saved
extraBroadcastEnabled bool
// container nodes which was processed during the primary object placement
mExclude map[string]*bool
}
// updates traversal parameters after the primary placement finish and
// returns true if additional container broadcast is needed.
func (x *traversal) submitPrimaryPlacementFinish() bool {
if x.extraBroadcastEnabled {
// do not track success during container broadcast (best-effort)
x.opts = append(x.opts, placement.WithoutSuccessTracking())
// avoid 2nd broadcast
x.extraBroadcastEnabled = false
return true
}
return false
}
// marks the container node as processed during the primary object placement.
func (x *traversal) submitProcessed(n placement.Node, item *bool) {
if x.extraBroadcastEnabled {
key := string(n.PublicKey())
if x.mExclude == nil {
x.mExclude = make(map[string]*bool, 1)
}
x.mExclude[key] = item
}
}
type nodeDesc struct {
local bool
info placement.Node
}
// errIncompletePut is returned if processing on a container fails.
type errIncompletePut struct {
singleErr error // error from the last responding node
}
func (x errIncompletePut) Error() string {
const commonMsg = "incomplete object PUT by placement"
if x.singleErr != nil {
return fmt.Sprintf("%s: %v", commonMsg, x.singleErr)
}
return commonMsg
}
func (t *distributedTarget) WriteHeader(_ context.Context, obj *objectSDK.Object) error {
t.obj = obj
return nil
}
func (t *distributedTarget) Write(_ context.Context, p []byte) (n int, err error) {
t.payload.Data = append(t.payload.Data, p...)
return len(p), nil
}
func (t *distributedTarget) Close(ctx context.Context) (*transformer.AccessIdentifiers, error) {
defer func() {
putPayload(t.payload)
t.payload = nil
}()
t.obj.SetPayload(t.payload.Data)
if err := t.WriteObject(ctx, t.obj); err != nil {
return nil, err
}
id, _ := t.obj.ID()
return &transformer.AccessIdentifiers{
SelfID: id,
}, nil
}
// WriteObject implements the transformer.ObjectWriter interface.
func (t *distributedTarget) WriteObject(ctx context.Context, obj *objectSDK.Object) error {
t.obj = obj
var err error
if t.objMeta, err = t.fmtValidator.ValidateContent(t.obj); err != nil {
return fmt.Errorf("(%T) could not validate payload content: %w", t, err)
}
if len(t.obj.Children()) > 0 {
// enabling extra broadcast for linking objects
t.traversal.extraBroadcastEnabled = true
}
return t.iteratePlacement(ctx)
}
func (t *distributedTarget) sendObject(ctx context.Context, node nodeDesc) error {
if !node.local && t.relay != nil {
return t.relay(ctx, node)
}
target := t.nodeTargetInitializer(node)
err := target.WriteObject(ctx, t.obj, t.objMeta)
if err != nil {
return fmt.Errorf("could not write header: %w", err)
}
return nil
}
func (t *distributedTarget) iteratePlacement(ctx context.Context) error {
id, _ := t.obj.ID()
traverser, err := placement.NewTraverser(
append(t.traversal.opts, placement.ForObject(id))...,
)
if err != nil {
return fmt.Errorf("(%T) could not create object placement traverser: %w", t, err)
}
resErr := &atomic.Value{}
// Must iterate over all replicas, regardless of whether there are identical nodes there.
// At the same time need to exclude identical nodes from processing.
for {
addrs := traverser.Next()
if len(addrs) == 0 {
break
}
if t.iterateAddresses(ctx, traverser, addrs, resErr) {
break
}
}
if !traverser.Success() {
var err errIncompletePut
err.singleErr, _ = resErr.Load().(error)
return err
}
// perform additional container broadcast if needed
if t.traversal.submitPrimaryPlacementFinish() {
err = t.iteratePlacement(ctx)
if err != nil {
t.log.Error(logs.PutAdditionalContainerBroadcastFailure, zap.Error(err))
// we don't fail primary operation because of broadcast failure
}
}
return nil
}
func (t *distributedTarget) iterateAddresses(ctx context.Context, traverser *placement.Traverser, addrs []placement.Node, resErr *atomic.Value) bool {
wg := &sync.WaitGroup{}
for i := range addrs {
addr := addrs[i]
if val := t.traversal.mExclude[string(addr.PublicKey())]; val != nil {
// Check is node processed successful on the previous iteration.
if *val {
traverser.SubmitSuccess()
}
// it can happen only during additional container broadcast
continue
}
wg.Add(1)
item := new(bool)
workerPool, isLocal := t.getWorkerPool(addr.PublicKey())
if err := workerPool.Submit(func() {
defer wg.Done()
err := t.sendObject(ctx, nodeDesc{local: isLocal, info: addr})
if err != nil {
resErr.Store(err)
svcutil.LogServiceError(t.log, "PUT", addr.Addresses(), err)
return
}
traverser.SubmitSuccess()
*item = true
}); err != nil {
wg.Done()
svcutil.LogWorkerPoolError(t.log, "PUT", err)
return true
}
// mark the container node as processed in order to exclude it
// in subsequent container broadcast. Note that we don't
// process this node during broadcast if primary placement
// on it failed.
t.traversal.submitProcessed(addr, item)
}
wg.Wait()
return false
}