2020-09-21 14:31:31 +00:00
|
|
|
package putsvc
|
|
|
|
|
|
|
|
import (
|
2023-04-03 11:23:53 +00:00
|
|
|
"context"
|
2021-05-18 08:12:51 +00:00
|
|
|
"fmt"
|
2020-09-21 14:31:31 +00:00
|
|
|
"sync"
|
2021-09-08 23:13:55 +00:00
|
|
|
"sync/atomic"
|
2020-09-21 14:31:31 +00:00
|
|
|
|
2023-04-12 14:35:10 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
2023-03-07 13:38:26 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
|
|
|
svcutil "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object/util"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object_manager/placement"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
|
|
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
2023-04-27 15:46:42 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer"
|
2022-03-05 10:32:11 +00:00
|
|
|
"go.uber.org/zap"
|
2020-09-21 14:31:31 +00:00
|
|
|
)
|
|
|
|
|
2022-04-28 07:19:26 +00:00
|
|
|
type preparedObjectTarget interface {
|
2022-11-01 17:32:43 +00:00
|
|
|
WriteObject(*objectSDK.Object, object.ContentMeta) error
|
2023-04-03 11:23:53 +00:00
|
|
|
Close(ctx context.Context) (*transformer.AccessIdentifiers, error)
|
2022-04-28 07:19:26 +00:00
|
|
|
}
|
|
|
|
|
2020-09-21 14:31:31 +00:00
|
|
|
type distributedTarget struct {
|
2022-03-05 10:32:11 +00:00
|
|
|
traversal traversal
|
2020-09-21 14:31:31 +00:00
|
|
|
|
2022-11-01 17:32:43 +00:00
|
|
|
obj *objectSDK.Object
|
|
|
|
objMeta object.ContentMeta
|
2020-09-21 14:31:31 +00:00
|
|
|
|
2023-04-03 10:09:44 +00:00
|
|
|
payload *payload
|
2020-09-21 14:31:31 +00:00
|
|
|
|
2022-04-28 07:19:26 +00:00
|
|
|
nodeTargetInitializer func(nodeDesc) preparedObjectTarget
|
2020-09-30 17:54:25 +00:00
|
|
|
|
2023-06-19 10:52:26 +00:00
|
|
|
getWorkerPool func([]byte) (util.WorkerPool, bool)
|
2021-09-24 10:28:58 +00:00
|
|
|
|
2023-04-12 08:02:25 +00:00
|
|
|
relay func(context.Context, nodeDesc) error
|
2021-05-27 14:25:29 +00:00
|
|
|
|
2020-09-30 17:54:25 +00:00
|
|
|
fmt *object.FormatValidator
|
2020-11-23 11:51:02 +00:00
|
|
|
|
|
|
|
log *logger.Logger
|
2020-09-21 14:31:31 +00:00
|
|
|
}
|
|
|
|
|
2022-03-05 10:32:11 +00:00
|
|
|
// parameters and state of container traversal.
|
|
|
|
type traversal struct {
|
|
|
|
opts []placement.Option
|
|
|
|
|
|
|
|
// need of additional broadcast after the object is saved
|
|
|
|
extraBroadcastEnabled bool
|
|
|
|
|
2022-11-10 06:48:46 +00:00
|
|
|
// mtx protects mExclude map.
|
|
|
|
mtx sync.RWMutex
|
|
|
|
|
2022-03-05 10:32:11 +00:00
|
|
|
// container nodes which was processed during the primary object placement
|
|
|
|
mExclude map[string]struct{}
|
|
|
|
}
|
|
|
|
|
|
|
|
// updates traversal parameters after the primary placement finish and
|
|
|
|
// returns true if additional container broadcast is needed.
|
|
|
|
func (x *traversal) submitPrimaryPlacementFinish() bool {
|
|
|
|
if x.extraBroadcastEnabled {
|
|
|
|
// do not track success during container broadcast (best-effort)
|
|
|
|
x.opts = append(x.opts, placement.WithoutSuccessTracking())
|
|
|
|
|
|
|
|
// avoid 2nd broadcast
|
|
|
|
x.extraBroadcastEnabled = false
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// marks the container node as processed during the primary object placement.
|
|
|
|
func (x *traversal) submitProcessed(n placement.Node) {
|
|
|
|
if x.extraBroadcastEnabled {
|
2022-11-10 06:48:46 +00:00
|
|
|
key := string(n.PublicKey())
|
|
|
|
|
|
|
|
x.mtx.Lock()
|
2022-03-05 10:32:11 +00:00
|
|
|
if x.mExclude == nil {
|
|
|
|
x.mExclude = make(map[string]struct{}, 1)
|
|
|
|
}
|
|
|
|
|
2022-11-10 06:48:46 +00:00
|
|
|
x.mExclude[key] = struct{}{}
|
|
|
|
x.mtx.Unlock()
|
2022-03-05 10:32:11 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// checks if specified node was processed during the primary object placement.
|
2022-11-10 06:48:46 +00:00
|
|
|
func (x *traversal) processed(n placement.Node) bool {
|
|
|
|
x.mtx.RLock()
|
2022-03-05 10:32:11 +00:00
|
|
|
_, ok := x.mExclude[string(n.PublicKey())]
|
2022-11-10 06:48:46 +00:00
|
|
|
x.mtx.RUnlock()
|
2022-03-05 10:32:11 +00:00
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
2021-09-24 10:28:58 +00:00
|
|
|
type nodeDesc struct {
|
|
|
|
local bool
|
|
|
|
|
|
|
|
info placement.Node
|
|
|
|
}
|
|
|
|
|
2021-09-10 13:39:50 +00:00
|
|
|
// errIncompletePut is returned if processing on a container fails.
|
|
|
|
type errIncompletePut struct {
|
|
|
|
singleErr error // error from the last responding node
|
|
|
|
}
|
|
|
|
|
|
|
|
func (x errIncompletePut) Error() string {
|
|
|
|
const commonMsg = "incomplete object PUT by placement"
|
|
|
|
|
|
|
|
if x.singleErr != nil {
|
|
|
|
return fmt.Sprintf("%s: %v", commonMsg, x.singleErr)
|
|
|
|
}
|
|
|
|
|
|
|
|
return commonMsg
|
|
|
|
}
|
2020-09-21 14:31:31 +00:00
|
|
|
|
2023-04-12 14:01:29 +00:00
|
|
|
func (t *distributedTarget) WriteHeader(_ context.Context, obj *objectSDK.Object) error {
|
2020-09-21 14:31:31 +00:00
|
|
|
t.obj = obj
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
func (t *distributedTarget) Write(_ context.Context, p []byte) (n int, err error) {
|
2023-04-03 10:09:44 +00:00
|
|
|
t.payload.Data = append(t.payload.Data, p...)
|
2020-09-21 14:31:31 +00:00
|
|
|
|
|
|
|
return len(p), nil
|
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
func (t *distributedTarget) Close(ctx context.Context) (*transformer.AccessIdentifiers, error) {
|
2022-12-06 10:25:19 +00:00
|
|
|
defer func() {
|
|
|
|
putPayload(t.payload)
|
|
|
|
t.payload = nil
|
|
|
|
}()
|
|
|
|
|
2023-04-03 10:09:44 +00:00
|
|
|
t.obj.SetPayload(t.payload.Data)
|
2020-12-01 11:23:28 +00:00
|
|
|
|
2023-07-11 14:32:00 +00:00
|
|
|
if err := t.WriteObject(ctx, t.obj); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
id, _ := t.obj.ID()
|
|
|
|
return &transformer.AccessIdentifiers{
|
|
|
|
SelfID: id,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// WriteObject implements the transformer.ObjectWriter interface.
|
|
|
|
func (t *distributedTarget) WriteObject(ctx context.Context, obj *objectSDK.Object) error {
|
|
|
|
t.obj = obj
|
|
|
|
|
2022-11-01 17:32:43 +00:00
|
|
|
var err error
|
|
|
|
|
|
|
|
if t.objMeta, err = t.fmt.ValidateContent(t.obj); err != nil {
|
2023-07-11 14:32:00 +00:00
|
|
|
return fmt.Errorf("(%T) could not validate payload content: %w", t, err)
|
2020-09-30 17:54:25 +00:00
|
|
|
}
|
|
|
|
|
2023-02-16 12:33:33 +00:00
|
|
|
if len(t.obj.Children()) > 0 {
|
|
|
|
// enabling extra broadcast for linking objects
|
|
|
|
t.traversal.extraBroadcastEnabled = true
|
|
|
|
}
|
|
|
|
|
2023-07-11 14:32:00 +00:00
|
|
|
return t.iteratePlacement(ctx)
|
2021-05-28 07:34:31 +00:00
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
func (t *distributedTarget) sendObject(ctx context.Context, node nodeDesc) error {
|
2021-09-24 10:28:58 +00:00
|
|
|
if !node.local && t.relay != nil {
|
2023-04-12 08:02:25 +00:00
|
|
|
return t.relay(ctx, node)
|
2021-05-27 14:25:29 +00:00
|
|
|
}
|
|
|
|
|
2021-09-06 12:17:14 +00:00
|
|
|
target := t.nodeTargetInitializer(node)
|
2021-05-28 07:34:31 +00:00
|
|
|
|
2022-11-01 17:32:43 +00:00
|
|
|
if err := target.WriteObject(t.obj, t.objMeta); err != nil {
|
2021-05-28 07:34:31 +00:00
|
|
|
return fmt.Errorf("could not write header: %w", err)
|
2023-04-03 11:23:53 +00:00
|
|
|
} else if _, err := target.Close(ctx); err != nil {
|
2021-05-28 07:34:31 +00:00
|
|
|
return fmt.Errorf("could not close object stream: %w", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-07-11 14:33:13 +00:00
|
|
|
func (t *distributedTarget) iteratePlacement(ctx context.Context) error {
|
2022-05-12 16:37:46 +00:00
|
|
|
id, _ := t.obj.ID()
|
|
|
|
|
2021-05-28 07:34:31 +00:00
|
|
|
traverser, err := placement.NewTraverser(
|
2022-05-31 17:00:41 +00:00
|
|
|
append(t.traversal.opts, placement.ForObject(id))...,
|
2021-05-28 07:34:31 +00:00
|
|
|
)
|
|
|
|
if err != nil {
|
2023-07-11 14:33:13 +00:00
|
|
|
return fmt.Errorf("(%T) could not create object placement traverser: %w", t, err)
|
2021-05-28 07:34:31 +00:00
|
|
|
}
|
|
|
|
|
2023-04-03 09:24:01 +00:00
|
|
|
resErr := &atomic.Value{}
|
2021-09-08 23:13:55 +00:00
|
|
|
|
2020-09-21 14:31:31 +00:00
|
|
|
for {
|
|
|
|
addrs := traverser.Next()
|
|
|
|
if len(addrs) == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
if t.iterateAddresses(ctx, traverser, addrs, resErr) {
|
2023-04-03 09:24:01 +00:00
|
|
|
break
|
2020-09-21 14:31:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !traverser.Success() {
|
2021-09-10 13:39:50 +00:00
|
|
|
var err errIncompletePut
|
|
|
|
err.singleErr, _ = resErr.Load().(error)
|
2023-07-11 14:33:13 +00:00
|
|
|
return err
|
2020-09-21 14:31:31 +00:00
|
|
|
}
|
|
|
|
|
2022-03-05 10:32:11 +00:00
|
|
|
// perform additional container broadcast if needed
|
|
|
|
if t.traversal.submitPrimaryPlacementFinish() {
|
2023-07-11 14:33:13 +00:00
|
|
|
err = t.iteratePlacement(ctx)
|
2022-03-05 10:32:11 +00:00
|
|
|
if err != nil {
|
2023-04-12 14:35:10 +00:00
|
|
|
t.log.Error(logs.PutAdditionalContainerBroadcastFailure, zap.Error(err))
|
2022-03-05 10:32:11 +00:00
|
|
|
// we don't fail primary operation because of broadcast failure
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-11 14:33:13 +00:00
|
|
|
return nil
|
2020-09-21 14:31:31 +00:00
|
|
|
}
|
2023-04-03 09:24:01 +00:00
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
func (t *distributedTarget) iterateAddresses(ctx context.Context, traverser *placement.Traverser, addrs []placement.Node, resErr *atomic.Value) bool {
|
2023-04-03 09:24:01 +00:00
|
|
|
wg := &sync.WaitGroup{}
|
|
|
|
|
|
|
|
for i := range addrs {
|
|
|
|
if t.traversal.processed(addrs[i]) {
|
|
|
|
// it can happen only during additional container broadcast
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
|
|
|
|
addr := addrs[i]
|
|
|
|
|
2023-06-19 10:52:26 +00:00
|
|
|
workerPool, isLocal := t.getWorkerPool(addr.PublicKey())
|
2023-04-03 09:24:01 +00:00
|
|
|
if err := workerPool.Submit(func() {
|
|
|
|
defer wg.Done()
|
|
|
|
|
2023-04-03 11:23:53 +00:00
|
|
|
err := t.sendObject(ctx, nodeDesc{local: isLocal, info: addr})
|
2023-04-03 09:24:01 +00:00
|
|
|
|
|
|
|
// mark the container node as processed in order to exclude it
|
|
|
|
// in subsequent container broadcast. Note that we don't
|
|
|
|
// process this node during broadcast if primary placement
|
|
|
|
// on it failed.
|
|
|
|
t.traversal.submitProcessed(addr)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
resErr.Store(err)
|
|
|
|
svcutil.LogServiceError(t.log, "PUT", addr.Addresses(), err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
traverser.SubmitSuccess()
|
|
|
|
}); err != nil {
|
|
|
|
wg.Done()
|
|
|
|
svcutil.LogWorkerPoolError(t.log, "PUT", err)
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|