forked from TrueCloudLab/frostfs-node
[#1604] policer: Properly handle maintenance nodes
Consider `REP 1 REP 1` placement (selects/filters are omitted). The placement is `[1, 2], [1, 0]`. We are the 0-th node. Node 1 is under maintenance, so we do not replicate object on the node 2. In the second replication group node 1 is under maintenance, but current caching logic considers it as "replica holder" and removes local copy. Voilà, we have DL if the object is missing from the node 1. Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
This commit is contained in:
parent
26e0c82fb8
commit
57efa0bc8e
3 changed files with 14 additions and 1 deletions
|
@ -128,6 +128,10 @@ func (p *Policer) processRepNodes(ctx context.Context, requirements *placementRe
|
|||
if status == nodeHoldsObject {
|
||||
shortage--
|
||||
}
|
||||
if status == nodeIsUnderMaintenance {
|
||||
shortage--
|
||||
uncheckedCopies++
|
||||
}
|
||||
|
||||
nodes = append(nodes[:i], nodes[i+1:]...)
|
||||
i--
|
||||
|
@ -174,7 +178,7 @@ func (p *Policer) processRepNodes(ctx context.Context, requirements *placementRe
|
|||
// However, additional copies should not be removed in this case,
|
||||
// because we can remove the only copy this way.
|
||||
func (p *Policer) handleMaintenance(ctx context.Context, node netmap.NodeInfo, checkedNodes nodeCache, shortage uint32, uncheckedCopies int) (uint32, int) {
|
||||
checkedNodes.set(node, nodeHoldsObject)
|
||||
checkedNodes.set(node, nodeIsUnderMaintenance)
|
||||
shortage--
|
||||
uncheckedCopies++
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ const (
|
|||
nodeDoesNotHoldObject
|
||||
nodeHoldsObject
|
||||
nodeStatusUnknown
|
||||
nodeIsUnderMaintenance
|
||||
)
|
||||
|
||||
func (st nodeProcessStatus) Processed() bool {
|
||||
|
|
|
@ -170,6 +170,14 @@ func TestProcessObject(t *testing.T) {
|
|||
placement: [][]int{{0, 1, 2}},
|
||||
wantReplicateTo: []int{1, 2},
|
||||
},
|
||||
{
|
||||
desc: "do not remove local copy when MAINTENANCE status is cached",
|
||||
objType: objectSDK.TypeRegular,
|
||||
nodeCount: 3,
|
||||
policy: `REP 1 REP 1`,
|
||||
placement: [][]int{{1, 2}, {1, 0}},
|
||||
headResult: map[int]error{1: new(apistatus.NodeUnderMaintenance)},
|
||||
},
|
||||
}
|
||||
|
||||
for i := range tests {
|
||||
|
|
Loading…
Add table
Reference in a new issue