frostfs-node/pkg/services/object/put/ec.go

368 lines
9.3 KiB
Go
Raw Normal View History

package putsvc
import (
"context"
"crypto/ecdsa"
"encoding/hex"
"errors"
"fmt"
"sync/atomic"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/client"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/policy"
svcutil "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object/util"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/object_manager/placement"
containerSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/erasurecode"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
)
var _ transformer.ObjectWriter = (*ecWriter)(nil)
var (
errUnsupportedECObject = errors.New("object is not supported for erasure coding")
errFailedToSaveAllECParts = errors.New("failed to save all EC parts")
)
type ecWriter struct {
cfg *cfg
placementOpts []placement.Option
container containerSDK.Container
key *ecdsa.PrivateKey
commonPrm *svcutil.CommonPrm
relay func(context.Context, client.NodeInfo, client.MultiAddressClient) error
objMeta object.ContentMeta
objMetaValid bool
}
func (e *ecWriter) WriteObject(ctx context.Context, obj *objectSDK.Object) error {
relayed, err := e.relayIfNotContainerNode(ctx, obj)
if err != nil {
return err
}
if relayed {
return nil
}
if !object.IsECSupported(obj) {
// must be resolved by caller
return errUnsupportedECObject
}
if !e.objMetaValid {
if e.objMeta, err = e.cfg.fmtValidator.ValidateContent(obj); err != nil {
return fmt.Errorf("(%T) could not validate payload content: %w", e, err)
}
e.objMetaValid = true
}
if obj.ECHeader() != nil {
return e.writeECPart(ctx, obj)
}
return e.writeRawObject(ctx, obj)
}
func (e *ecWriter) relayIfNotContainerNode(ctx context.Context, obj *objectSDK.Object) (bool, error) {
if e.relay == nil {
return false, nil
}
currentNodeIsContainerNode, err := e.currentNodeIsContainerNode()
if err != nil {
return false, err
}
if currentNodeIsContainerNode {
// object can be splitted or saved local
return false, nil
}
objID := object.AddressOf(obj).Object()
var index uint32
if obj.ECHeader() != nil {
objID = obj.ECHeader().Parent()
index = obj.ECHeader().Index()
}
if err := e.relayToContainerNode(ctx, objID, index); err != nil {
return false, err
}
return true, nil
}
func (e *ecWriter) currentNodeIsContainerNode() (bool, error) {
t, err := placement.NewTraverser(e.placementOpts...)
if err != nil {
return false, err
}
for {
nodes := t.Next()
if len(nodes) == 0 {
break
}
for _, node := range nodes {
if e.cfg.netmapKeys.IsLocalKey(node.PublicKey()) {
return true, nil
}
}
}
return false, nil
}
func (e *ecWriter) relayToContainerNode(ctx context.Context, objID oid.ID, index uint32) error {
t, err := placement.NewTraverser(append(e.placementOpts, placement.ForObject(objID))...)
if err != nil {
return err
}
var lastErr error
offset := int(index)
for {
nodes := t.Next()
if len(nodes) == 0 {
break
}
for idx := range nodes {
node := nodes[(idx+offset)%len(nodes)]
var info client.NodeInfo
client.NodeInfoFromNetmapElement(&info, node)
c, err := e.cfg.clientConstructor.Get(info)
if err != nil {
return fmt.Errorf("could not create SDK client %s: %w", info.AddressGroup(), err)
}
completed := make(chan interface{})
if poolErr := e.cfg.remotePool.Submit(func() {
defer close(completed)
err = e.relay(ctx, info, c)
}); poolErr != nil {
close(completed)
svcutil.LogWorkerPoolError(e.cfg.log, "PUT", poolErr)
return poolErr
}
<-completed
if err == nil {
return nil
}
e.cfg.log.Logger.Warn(logs.ECFailedToSendToContainerNode, zap.Stringers("address_group", info.AddressGroup()))
lastErr = err
}
}
if lastErr == nil {
return nil
}
return errIncompletePut{
singleErr: lastErr,
}
}
func (e *ecWriter) writeECPart(ctx context.Context, obj *objectSDK.Object) error {
if e.commonPrm.LocalOnly() {
return e.writePartLocal(ctx, obj)
}
t, err := placement.NewTraverser(append(e.placementOpts, placement.ForObject(obj.ECHeader().Parent()))...)
if err != nil {
return err
}
eg, egCtx := errgroup.WithContext(ctx)
for {
nodes := t.Next()
if len(nodes) == 0 {
break
}
eg.Go(func() error {
return e.writePart(egCtx, obj, int(obj.ECHeader().Index()), nodes, make([]atomic.Bool, len(nodes)))
})
t.SubmitSuccess()
}
if err := eg.Wait(); err != nil {
return errIncompletePut{
singleErr: err,
}
}
return nil
}
func (e *ecWriter) writeRawObject(ctx context.Context, obj *objectSDK.Object) error {
// now only single EC policy is supported
c, err := erasurecode.NewConstructor(policy.ECDataCount(e.container.PlacementPolicy()), policy.ECParityCount(e.container.PlacementPolicy()))
if err != nil {
return err
}
parts, err := c.Split(obj, e.key)
if err != nil {
return err
}
partsProcessed := make([]atomic.Bool, len(parts))
objID, _ := obj.ID()
t, err := placement.NewTraverser(append(e.placementOpts, placement.ForObject(objID))...)
if err != nil {
return err
}
for {
eg, egCtx := errgroup.WithContext(ctx)
nodes := t.Next()
if len(nodes) == 0 {
break
}
visited := make([]atomic.Bool, len(nodes))
for idx := range parts {
visited[idx%len(nodes)].Store(true)
}
for idx := range parts {
if !partsProcessed[idx].Load() {
eg.Go(func() error {
err := e.writePart(egCtx, parts[idx], idx, nodes, visited)
if err == nil {
partsProcessed[idx].Store(true)
t.SubmitSuccess()
}
return err
})
}
}
err = eg.Wait()
}
if err != nil {
return errIncompletePut{
singleErr: err,
}
}
for idx := range partsProcessed {
if !partsProcessed[idx].Load() {
return errIncompletePut{
singleErr: errFailedToSaveAllECParts,
}
}
}
return nil
}
func (e *ecWriter) writePart(ctx context.Context, obj *objectSDK.Object, partIdx int, nodes []placement.Node, visited []atomic.Bool) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// try to save to node for current part index
node := nodes[partIdx%len(nodes)]
err := e.putECPartToNode(ctx, obj, node)
if err == nil {
return nil
}
e.cfg.log.Warn(logs.ECFailedToSaveECPart, zap.Stringer("part_address", object.AddressOf(obj)),
zap.Stringer("parent_address", obj.ECHeader().Parent()), zap.Int("part_index", partIdx),
zap.String("node", hex.EncodeToString(node.PublicKey())), zap.Error(err))
partVisited := make([]bool, len(nodes))
partVisited[partIdx%len(nodes)] = true
// try to save to any node not visited by any of other parts
for i := 1; i < len(nodes); i++ {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
idx := (partIdx + i) % len(nodes)
if !visited[idx].CompareAndSwap(false, true) {
continue
}
node = nodes[idx]
err := e.putECPartToNode(ctx, obj, node)
if err == nil {
return nil
}
e.cfg.log.Warn(logs.ECFailedToSaveECPart, zap.Stringer("part_address", object.AddressOf(obj)),
zap.Stringer("parent_address", obj.ECHeader().Parent()), zap.Int("part_index", partIdx),
zap.String("node", hex.EncodeToString(node.PublicKey())),
zap.Error(err))
partVisited[idx] = true
}
// try to save to any node not visited by current part
for i := 0; i < len(nodes); i++ {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
if partVisited[i] {
continue
}
node = nodes[i]
err := e.putECPartToNode(ctx, obj, node)
if err == nil {
return nil
}
e.cfg.log.Warn(logs.ECFailedToSaveECPart, zap.Stringer("part_address", object.AddressOf(obj)),
zap.Stringer("parent_address", obj.ECHeader().Parent()), zap.Int("part_index", partIdx),
zap.String("node", hex.EncodeToString(node.PublicKey())),
zap.Error(err))
}
return fmt.Errorf("failed to save EC chunk %s to any node", object.AddressOf(obj))
}
func (e *ecWriter) putECPartToNode(ctx context.Context, obj *objectSDK.Object, node placement.Node) error {
if e.cfg.netmapKeys.IsLocalKey(node.PublicKey()) {
return e.writePartLocal(ctx, obj)
}
return e.writePartRemote(ctx, obj, node)
}
func (e *ecWriter) writePartLocal(ctx context.Context, obj *objectSDK.Object) error {
var err error
localTarget := localTarget{
storage: e.cfg.localStore,
}
completed := make(chan interface{})
if poolErr := e.cfg.localPool.Submit(func() {
defer close(completed)
err = localTarget.WriteObject(ctx, obj, e.objMeta)
}); poolErr != nil {
close(completed)
return poolErr
}
<-completed
return err
}
func (e *ecWriter) writePartRemote(ctx context.Context, obj *objectSDK.Object, node placement.Node) error {
var clientNodeInfo client.NodeInfo
client.NodeInfoFromNetmapElement(&clientNodeInfo, node)
remoteTaget := remoteTarget{
privateKey: e.key,
clientConstructor: e.cfg.clientConstructor,
commonPrm: e.commonPrm,
nodeInfo: clientNodeInfo,
}
var err error
completed := make(chan interface{})
if poolErr := e.cfg.remotePool.Submit(func() {
defer close(completed)
err = remoteTaget.WriteObject(ctx, obj, e.objMeta)
}); poolErr != nil {
close(completed)
return poolErr
}
<-completed
return err
}