2020-10-21 09:24:02 +00:00
|
|
|
package policer
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-09-04 09:00:49 +00:00
|
|
|
"fmt"
|
2020-10-21 09:24:02 +00:00
|
|
|
|
2023-04-12 14:35:10 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
2023-08-24 12:35:19 +00:00
|
|
|
containercore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/container"
|
2023-03-07 13:38:26 +00:00
|
|
|
objectcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
2024-03-28 14:29:30 +00:00
|
|
|
policycore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/policy"
|
2023-03-07 13:38:26 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/replicator"
|
2024-11-06 07:57:01 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
2023-03-07 13:38:26 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap"
|
2023-07-06 12:36:41 +00:00
|
|
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
2023-04-04 12:29:12 +00:00
|
|
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
2024-11-06 07:57:01 +00:00
|
|
|
"go.opentelemetry.io/otel/attribute"
|
|
|
|
"go.opentelemetry.io/otel/trace"
|
2020-10-21 09:24:02 +00:00
|
|
|
"go.uber.org/zap"
|
|
|
|
)
|
|
|
|
|
2024-05-13 13:50:21 +00:00
|
|
|
func (p *Policer) processObject(ctx context.Context, objInfo objectcore.Info) error {
|
2024-11-06 07:57:01 +00:00
|
|
|
ctx, span := tracing.StartSpanFromContext(ctx, "Policer.ProcessObject", trace.WithAttributes(
|
|
|
|
attribute.String("address", objInfo.Address.String()),
|
|
|
|
attribute.Bool("is_linking_object", objInfo.IsLinkingObject),
|
|
|
|
attribute.Bool("is_ec_part", objInfo.ECInfo != nil),
|
|
|
|
attribute.String("type", objInfo.Type.String()),
|
|
|
|
))
|
|
|
|
defer span.End()
|
|
|
|
|
2024-05-13 13:50:21 +00:00
|
|
|
cnr, err := p.cnrSrc.Get(objInfo.Address.Container())
|
2020-10-21 09:24:02 +00:00
|
|
|
if err != nil {
|
2023-08-04 11:14:07 +00:00
|
|
|
if client.IsErrContainerNotFound(err) {
|
2024-05-13 13:50:21 +00:00
|
|
|
existed, errWasRemoved := containercore.WasRemoved(p.cnrSrc, objInfo.Address.Container())
|
2023-09-04 09:00:49 +00:00
|
|
|
if errWasRemoved != nil {
|
|
|
|
return fmt.Errorf("%s: %w", logs.PolicerCouldNotConfirmContainerRemoval, errWasRemoved)
|
2023-08-24 12:35:19 +00:00
|
|
|
} else if existed {
|
2024-05-13 13:50:21 +00:00
|
|
|
err := p.buryFn(ctx, objInfo.Address)
|
2023-08-24 12:35:19 +00:00
|
|
|
if err != nil {
|
2023-09-04 09:00:49 +00:00
|
|
|
return fmt.Errorf("%s: %w", logs.PolicerCouldNotInhumeObjectWithMissingContainer, err)
|
2023-08-24 12:35:19 +00:00
|
|
|
}
|
2021-10-16 11:21:03 +00:00
|
|
|
}
|
|
|
|
}
|
2020-10-21 09:24:02 +00:00
|
|
|
|
2023-09-04 09:00:49 +00:00
|
|
|
return fmt.Errorf("%s: %w", logs.PolicerCouldNotGetContainer, err)
|
2020-10-21 09:24:02 +00:00
|
|
|
}
|
|
|
|
|
2022-06-22 10:55:31 +00:00
|
|
|
policy := cnr.Value.PlacementPolicy()
|
2024-05-13 13:50:21 +00:00
|
|
|
|
2024-03-28 14:29:30 +00:00
|
|
|
if policycore.IsECPlacement(policy) {
|
2024-10-01 12:27:06 +00:00
|
|
|
return p.processECContainerObject(ctx, objInfo, cnr.Value)
|
2024-03-28 14:29:30 +00:00
|
|
|
}
|
2024-05-13 13:50:21 +00:00
|
|
|
return p.processRepContainerObject(ctx, objInfo, policy)
|
|
|
|
}
|
2020-10-21 09:24:02 +00:00
|
|
|
|
2024-05-13 13:50:21 +00:00
|
|
|
func (p *Policer) processRepContainerObject(ctx context.Context, objInfo objectcore.Info, policy netmap.PlacementPolicy) error {
|
|
|
|
idObj := objInfo.Address.Object()
|
|
|
|
idCnr := objInfo.Address.Container()
|
2022-11-12 13:48:44 +00:00
|
|
|
nn, err := p.placementBuilder.BuildPlacement(idCnr, &idObj, policy)
|
2020-10-21 09:24:02 +00:00
|
|
|
if err != nil {
|
2023-09-04 09:00:49 +00:00
|
|
|
return fmt.Errorf("%s: %w", logs.PolicerCouldNotBuildPlacementVectorForObject, err)
|
2020-10-21 09:24:02 +00:00
|
|
|
}
|
|
|
|
|
2023-04-04 11:57:44 +00:00
|
|
|
c := &placementRequirements{}
|
2020-10-21 09:24:02 +00:00
|
|
|
|
2022-06-09 19:11:35 +00:00
|
|
|
// cached info about already checked nodes
|
2022-10-07 09:19:31 +00:00
|
|
|
checkedNodes := newNodeCache()
|
2022-06-09 19:11:35 +00:00
|
|
|
|
2020-10-21 09:24:02 +00:00
|
|
|
for i := range nn {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2023-09-04 09:00:49 +00:00
|
|
|
return ctx.Err()
|
2020-10-21 09:24:02 +00:00
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2024-05-15 13:11:51 +00:00
|
|
|
shortage := policy.ReplicaDescriptor(i).NumberOfObjects()
|
|
|
|
if objInfo.Type == objectSDK.TypeLock || objInfo.Type == objectSDK.TypeTombstone || objInfo.IsLinkingObject {
|
|
|
|
// all nodes of a container must store the `LOCK`, `TOMBSTONE` and linking objects
|
|
|
|
// for correct object removal protection:
|
|
|
|
// - `LOCK`, `TOMBSTONE` and linking objects are broadcast on their PUT requests;
|
|
|
|
// - `LOCK` object removal is a prohibited action in the GC.
|
|
|
|
shortage = uint32(len(nn[i]))
|
|
|
|
}
|
|
|
|
|
|
|
|
p.processRepNodes(ctx, c, objInfo, nn[i], shortage, checkedNodes)
|
2022-05-20 11:31:55 +00:00
|
|
|
}
|
|
|
|
|
2023-03-06 13:47:35 +00:00
|
|
|
if !c.needLocalCopy && c.removeLocalCopy {
|
2024-10-21 07:22:54 +00:00
|
|
|
p.log.Info(ctx, logs.PolicerRedundantLocalObjectCopyDetected,
|
2024-05-13 13:50:21 +00:00
|
|
|
zap.Stringer("object", objInfo.Address),
|
2022-05-20 11:31:55 +00:00
|
|
|
)
|
|
|
|
|
2024-05-13 13:50:21 +00:00
|
|
|
p.cbRedundantCopy(ctx, objInfo.Address)
|
2020-10-21 09:24:02 +00:00
|
|
|
}
|
2023-09-04 09:00:49 +00:00
|
|
|
return nil
|
2020-10-21 09:24:02 +00:00
|
|
|
}
|
|
|
|
|
2023-04-04 11:57:44 +00:00
|
|
|
type placementRequirements struct {
|
2023-03-06 13:47:35 +00:00
|
|
|
// needLocalCopy is true if the current node must store an object according to the storage policy.
|
2022-05-20 11:31:55 +00:00
|
|
|
needLocalCopy bool
|
2023-03-06 13:47:35 +00:00
|
|
|
// removeLocalCopy is true if all copies are stored according to the storage policy
|
|
|
|
// and the current node doesn't need to store an object.
|
|
|
|
removeLocalCopy bool
|
2022-05-20 11:31:55 +00:00
|
|
|
}
|
|
|
|
|
2024-05-13 13:50:21 +00:00
|
|
|
func (p *Policer) processRepNodes(ctx context.Context, requirements *placementRequirements, objInfo objectcore.Info,
|
2023-10-31 11:56:55 +00:00
|
|
|
nodes []netmap.NodeInfo, shortage uint32, checkedNodes nodeCache,
|
|
|
|
) {
|
2024-05-13 13:50:21 +00:00
|
|
|
addr := objInfo.Address
|
2020-10-21 09:24:02 +00:00
|
|
|
|
2022-10-26 14:10:14 +00:00
|
|
|
// Number of copies that are stored on maintenance nodes.
|
|
|
|
var uncheckedCopies int
|
2022-05-20 11:31:55 +00:00
|
|
|
for i := 0; shortage > 0 && i < len(nodes); i++ {
|
2020-10-21 09:24:02 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2021-09-06 12:17:14 +00:00
|
|
|
if p.netmapKeys.IsLocalKey(nodes[i].PublicKey()) {
|
2023-04-04 11:57:44 +00:00
|
|
|
requirements.needLocalCopy = true
|
2022-05-20 11:31:55 +00:00
|
|
|
|
|
|
|
shortage--
|
2024-09-18 09:21:53 +00:00
|
|
|
} else if nodes[i].Status().IsMaintenance() {
|
2023-04-04 12:29:12 +00:00
|
|
|
shortage, uncheckedCopies = p.handleMaintenance(nodes[i], checkedNodes, shortage, uncheckedCopies)
|
2022-05-30 05:19:59 +00:00
|
|
|
} else {
|
2023-06-29 09:13:01 +00:00
|
|
|
if status := checkedNodes.processStatus(nodes[i]); status.Processed() {
|
|
|
|
if status == nodeHoldsObject {
|
2022-06-09 19:11:35 +00:00
|
|
|
// node already contains replica, no need to replicate
|
|
|
|
nodes = append(nodes[:i], nodes[i+1:]...)
|
|
|
|
i--
|
|
|
|
shortage--
|
|
|
|
}
|
|
|
|
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2022-05-30 05:19:59 +00:00
|
|
|
callCtx, cancel := context.WithTimeout(ctx, p.headTimeout)
|
2022-05-20 11:31:55 +00:00
|
|
|
|
2024-05-14 11:43:21 +00:00
|
|
|
_, err := p.remoteHeader(callCtx, nodes[i], addr, false)
|
2020-10-21 09:24:02 +00:00
|
|
|
|
2022-05-30 05:19:59 +00:00
|
|
|
cancel()
|
2020-10-21 09:24:02 +00:00
|
|
|
|
2024-01-10 12:17:05 +00:00
|
|
|
if err == nil {
|
2020-10-21 09:24:02 +00:00
|
|
|
shortage--
|
2022-10-07 09:19:31 +00:00
|
|
|
checkedNodes.submitReplicaHolder(nodes[i])
|
2024-01-10 12:17:05 +00:00
|
|
|
} else {
|
|
|
|
if client.IsErrObjectNotFound(err) {
|
|
|
|
checkedNodes.submitReplicaCandidate(nodes[i])
|
|
|
|
continue
|
2024-06-18 07:20:45 +00:00
|
|
|
} else if client.IsErrNodeUnderMaintenance(err) {
|
2024-01-10 12:17:05 +00:00
|
|
|
shortage, uncheckedCopies = p.handleMaintenance(nodes[i], checkedNodes, shortage, uncheckedCopies)
|
|
|
|
} else {
|
2024-10-21 07:22:54 +00:00
|
|
|
p.log.Error(ctx, logs.PolicerReceiveObjectHeaderToCheckPolicyCompliance,
|
2024-01-10 12:17:05 +00:00
|
|
|
zap.Stringer("object", addr),
|
|
|
|
zap.String("error", err.Error()),
|
|
|
|
)
|
|
|
|
}
|
2020-10-21 09:24:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nodes = append(nodes[:i], nodes[i+1:]...)
|
|
|
|
i--
|
|
|
|
}
|
|
|
|
|
2023-04-04 12:29:12 +00:00
|
|
|
p.handleProcessNodesResult(ctx, addr, requirements, nodes, checkedNodes, shortage, uncheckedCopies)
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleMaintenance handles node in maintenance mode and returns new shortage and uncheckedCopies values
|
|
|
|
//
|
|
|
|
// consider remote nodes under maintenance as problem OK. Such
|
|
|
|
// nodes MAY not respond with object, however, this is how we
|
|
|
|
// prevent spam with new replicas.
|
|
|
|
// However, additional copies should not be removed in this case,
|
|
|
|
// because we can remove the only copy this way.
|
2023-06-29 09:13:01 +00:00
|
|
|
func (p *Policer) handleMaintenance(node netmap.NodeInfo, checkedNodes nodeCache, shortage uint32, uncheckedCopies int) (uint32, int) {
|
2023-04-04 12:29:12 +00:00
|
|
|
checkedNodes.submitReplicaHolder(node)
|
|
|
|
shortage--
|
|
|
|
uncheckedCopies++
|
|
|
|
|
2024-10-21 07:22:54 +00:00
|
|
|
p.log.Debug(context.Background(), logs.PolicerConsiderNodeUnderMaintenanceAsOK,
|
2023-04-04 12:29:12 +00:00
|
|
|
zap.String("node", netmap.StringifyPublicKey(node)),
|
|
|
|
)
|
|
|
|
return shortage, uncheckedCopies
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *Policer) handleProcessNodesResult(ctx context.Context, addr oid.Address, requirements *placementRequirements,
|
2023-10-31 11:56:55 +00:00
|
|
|
nodes []netmap.NodeInfo, checkedNodes nodeCache, shortage uint32, uncheckedCopies int,
|
|
|
|
) {
|
2023-06-29 09:13:01 +00:00
|
|
|
switch {
|
|
|
|
case shortage > 0:
|
2024-10-21 07:22:54 +00:00
|
|
|
p.log.Debug(ctx, logs.PolicerShortageOfObjectCopiesDetected,
|
2022-06-02 17:20:27 +00:00
|
|
|
zap.Stringer("object", addr),
|
2020-10-21 09:24:02 +00:00
|
|
|
zap.Uint32("shortage", shortage),
|
|
|
|
)
|
2020-10-21 11:50:48 +00:00
|
|
|
|
2023-06-29 09:13:01 +00:00
|
|
|
task := replicator.Task{
|
|
|
|
NumCopies: shortage,
|
|
|
|
Addr: addr,
|
|
|
|
Nodes: nodes,
|
|
|
|
}
|
2022-09-19 11:01:19 +00:00
|
|
|
|
2024-05-13 13:50:21 +00:00
|
|
|
p.replicator.HandleReplicationTask(ctx, task, checkedNodes)
|
2023-06-29 09:13:01 +00:00
|
|
|
|
|
|
|
case uncheckedCopies > 0:
|
2022-10-26 14:10:14 +00:00
|
|
|
// If we have more copies than needed, but some of them are from the maintenance nodes,
|
|
|
|
// save the local copy.
|
2024-10-21 07:22:54 +00:00
|
|
|
p.log.Debug(ctx, logs.PolicerSomeOfTheCopiesAreStoredOnNodesUnderMaintenance,
|
2022-10-26 14:10:14 +00:00
|
|
|
zap.Int("count", uncheckedCopies))
|
2023-06-29 09:13:01 +00:00
|
|
|
|
|
|
|
case uncheckedCopies == 0:
|
2023-03-06 13:47:35 +00:00
|
|
|
// Safe to remove: checked all copies, shortage == 0.
|
2023-04-04 11:57:44 +00:00
|
|
|
requirements.removeLocalCopy = true
|
2020-10-21 09:24:02 +00:00
|
|
|
}
|
|
|
|
}
|