forked from TrueCloudLab/frostfs-node
[#1210] reputation: Resolve race condition
Make all epoch independent in reputation process. Do not reset any timers related to reputation. Make it possible to finish iteration after the unexpected `NewEpoch` event. Signed-off-by: Pavel Karpy <carpawell@nspcc.ru>
This commit is contained in:
parent
77d847dbea
commit
c3db12d71b
6 changed files with 66 additions and 115 deletions
|
@ -31,7 +31,6 @@ import (
|
||||||
nmClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/netmap"
|
nmClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/netmap"
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/morph/event"
|
"github.com/nspcc-dev/neofs-node/pkg/morph/event"
|
||||||
netmap2 "github.com/nspcc-dev/neofs-node/pkg/morph/event/netmap"
|
netmap2 "github.com/nspcc-dev/neofs-node/pkg/morph/event/netmap"
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/morph/timer"
|
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/network"
|
"github.com/nspcc-dev/neofs-node/pkg/network"
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/network/cache"
|
"github.com/nspcc-dev/neofs-node/pkg/network/cache"
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/services/control"
|
"github.com/nspcc-dev/neofs-node/pkg/services/control"
|
||||||
|
@ -134,8 +133,7 @@ type cfgMorph struct {
|
||||||
|
|
||||||
disableCache bool
|
disableCache bool
|
||||||
|
|
||||||
blockTimers []*timer.BlockTimer // all combined timers
|
eigenTrustTicker *eigenTrustTickers // timers for EigenTrust iterations
|
||||||
eigenTrustTimer *timer.BlockTimer // timer for EigenTrust iterations
|
|
||||||
|
|
||||||
proxyScriptHash neogoutil.Uint160
|
proxyScriptHash neogoutil.Uint160
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,7 +93,6 @@ func bootUp(c *cfg) {
|
||||||
makeAndWaitNotaryDeposit(c)
|
makeAndWaitNotaryDeposit(c)
|
||||||
bootstrapNode(c)
|
bootstrapNode(c)
|
||||||
startWorkers(c)
|
startWorkers(c)
|
||||||
startBlockTimers(c)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func wait(c *cfg) {
|
func wait(c *cfg) {
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/common"
|
"github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/common"
|
||||||
intermediatereputation "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/intermediate"
|
intermediatereputation "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/intermediate"
|
||||||
localreputation "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/local"
|
localreputation "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/local"
|
||||||
|
"github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/ticker"
|
||||||
repClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/reputation"
|
repClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/reputation"
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/morph/event"
|
"github.com/nspcc-dev/neofs-node/pkg/morph/event"
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/morph/event/netmap"
|
"github.com/nspcc-dev/neofs-node/pkg/morph/event/netmap"
|
||||||
|
@ -215,36 +216,40 @@ func initReputationService(c *cfg) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize eigen trust block timer
|
// initialize eigen trust block timer
|
||||||
durationMeter := NewEigenTrustDuration(c.cfgNetmap.wrapper)
|
newEigenTrustIterTimer(c)
|
||||||
|
|
||||||
newEigenTrustIterTimer(c, durationMeter, func() {
|
addNewEpochAsyncNotificationHandler(
|
||||||
epoch, err := c.cfgNetmap.wrapper.Epoch()
|
c,
|
||||||
|
func(e event.Event) {
|
||||||
|
epoch := e.(netmap.NewEpoch).EpochNumber()
|
||||||
|
|
||||||
|
log := c.log.With(zap.Uint64("epoch", epoch))
|
||||||
|
|
||||||
|
duration, err := c.cfgNetmap.wrapper.EpochDuration()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.log.Debug(
|
log.Debug("could not fetch epoch duration", zap.Error(err))
|
||||||
"could not get current epoch",
|
|
||||||
zap.String("error", err.Error()),
|
|
||||||
)
|
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
iterations, err := c.cfgNetmap.wrapper.EigenTrustIterations()
|
||||||
|
if err != nil {
|
||||||
|
log.Debug("could not fetch iteration number", zap.Error(err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
epochTimer, err := ticker.NewIterationsTicker(duration, iterations, func() {
|
||||||
eigenTrustController.Continue(
|
eigenTrustController.Continue(
|
||||||
eigentrustctrl.ContinuePrm{
|
eigentrustctrl.ContinuePrm{
|
||||||
Epoch: epoch - 1,
|
Epoch: epoch - 1,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
addNewEpochAsyncNotificationHandler(
|
|
||||||
c,
|
|
||||||
func(e event.Event) {
|
|
||||||
durationMeter.Update() // recalculate duration of one iteration round
|
|
||||||
|
|
||||||
err := c.cfgMorph.eigenTrustTimer.Reset() // start iteration rounds again
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.log.Warn("can't reset block timer to start eigen trust calculations again",
|
log.Debug("could not create fixed epoch timer", zap.Error(err))
|
||||||
zap.String("error", err.Error()))
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c.cfgMorph.eigenTrustTicker.addEpochTimer(epoch, epochTimer)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,83 +3,41 @@ package main
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
nmClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/netmap"
|
"github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/ticker"
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/morph/timer"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type (
|
type eigenTrustTickers struct {
|
||||||
// EigenTrustDuration is a structure that provides duration of one
|
m sync.Mutex
|
||||||
// eigen trust iteration round in blocks for block timer.
|
|
||||||
EigenTrustDuration struct {
|
|
||||||
sync.Mutex
|
|
||||||
|
|
||||||
nm *nmClient.Client
|
timers map[uint64]*ticker.IterationsTicker
|
||||||
val uint32
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
// NewEigenTrustDuration returns instance of EigenTrustDuration.
|
|
||||||
func NewEigenTrustDuration(nm *nmClient.Client) *EigenTrustDuration {
|
|
||||||
return &EigenTrustDuration{
|
|
||||||
nm: nm,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Value returns number of blocks between two iterations of EigenTrust
|
func (e *eigenTrustTickers) addEpochTimer(epoch uint64, timer *ticker.IterationsTicker) {
|
||||||
// calculation. This value is not changed between `Update` calls.
|
e.m.Lock()
|
||||||
func (e *EigenTrustDuration) Value() (uint32, error) {
|
defer e.m.Unlock()
|
||||||
e.Lock()
|
|
||||||
defer e.Unlock()
|
|
||||||
|
|
||||||
if e.val == 0 {
|
e.timers[epoch] = timer
|
||||||
e.update()
|
|
||||||
}
|
|
||||||
|
|
||||||
return e.val, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update function recalculate duration of EigenTrust iteration based on
|
func (e *eigenTrustTickers) tick() {
|
||||||
// NeoFS epoch duration and amount of iteration rounds from global config.
|
e.m.Lock()
|
||||||
func (e *EigenTrustDuration) Update() {
|
defer e.m.Unlock()
|
||||||
e.Lock()
|
|
||||||
defer e.Unlock()
|
|
||||||
|
|
||||||
e.update()
|
for epoch, t := range e.timers {
|
||||||
}
|
if !t.Tick() {
|
||||||
|
delete(e.timers, epoch)
|
||||||
func (e *EigenTrustDuration) update() {
|
|
||||||
iterationAmount, err := e.nm.EigenTrustIterations()
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
epochDuration, err := e.nm.EpochDuration()
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
e.val = uint32(epochDuration / iterationAmount)
|
|
||||||
}
|
|
||||||
|
|
||||||
func startBlockTimers(c *cfg) {
|
|
||||||
for i := range c.cfgMorph.blockTimers {
|
|
||||||
if err := c.cfgMorph.blockTimers[i].Reset(); err != nil {
|
|
||||||
fatalOnErr(err)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func tickBlockTimers(c *cfg) {
|
func tickBlockTimers(c *cfg) {
|
||||||
for i := range c.cfgMorph.blockTimers {
|
c.cfgMorph.eigenTrustTicker.tick()
|
||||||
c.cfgMorph.blockTimers[i].Tick()
|
}
|
||||||
|
|
||||||
|
func newEigenTrustIterTimer(c *cfg) {
|
||||||
|
c.cfgMorph.eigenTrustTicker = &eigenTrustTickers{
|
||||||
|
// it is expected to have max 2 concurrent epoch
|
||||||
|
// in normal mode work
|
||||||
|
timers: make(map[uint64]*ticker.IterationsTicker, 2),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newEigenTrustIterTimer(c *cfg, it *EigenTrustDuration, handler timer.BlockTickHandler) {
|
|
||||||
c.cfgMorph.eigenTrustTimer = timer.NewBlockTimer(
|
|
||||||
it.Value,
|
|
||||||
handler,
|
|
||||||
)
|
|
||||||
|
|
||||||
c.cfgMorph.blockTimers = append(c.cfgMorph.blockTimers, c.cfgMorph.eigenTrustTimer)
|
|
||||||
}
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ type iterContext struct {
|
||||||
|
|
||||||
eigentrust.EpochIteration
|
eigentrust.EpochIteration
|
||||||
|
|
||||||
|
iterationNumber uint32
|
||||||
last bool
|
last bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,11 +43,20 @@ func (c *Controller) Continue(prm ContinuePrm) {
|
||||||
|
|
||||||
iterCtx.Context, iterCtx.cancel = context.WithCancel(context.Background())
|
iterCtx.Context, iterCtx.cancel = context.WithCancel(context.Background())
|
||||||
iterCtx.EpochIteration.SetEpoch(prm.Epoch)
|
iterCtx.EpochIteration.SetEpoch(prm.Epoch)
|
||||||
|
|
||||||
|
iterations, err := c.prm.IterationsProvider.EigenTrustIterations()
|
||||||
|
if err != nil {
|
||||||
|
c.opts.log.Error("could not get EigenTrust iteration number",
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
iterCtx.iterationNumber = uint32(iterations)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
iterCtx.cancel()
|
iterCtx.cancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
iterCtx.last = iterCtx.I() == c.iterationNumber-1
|
iterCtx.last = iterCtx.I() == iterCtx.iterationNumber-1
|
||||||
|
|
||||||
err := c.prm.WorkerPool.Submit(func() {
|
err := c.prm.WorkerPool.Submit(func() {
|
||||||
c.prm.DaughtersTrustCalculator.Calculate(iterCtx.iterContext)
|
c.prm.DaughtersTrustCalculator.Calculate(iterCtx.iterContext)
|
||||||
|
@ -66,16 +76,6 @@ func (c *Controller) Continue(prm ContinuePrm) {
|
||||||
// number as already processed, but in any case it grows up
|
// number as already processed, but in any case it grows up
|
||||||
// In this case and worker pool failure we can mark epoch
|
// In this case and worker pool failure we can mark epoch
|
||||||
delete(c.mCtx, prm.Epoch)
|
delete(c.mCtx, prm.Epoch)
|
||||||
|
|
||||||
iterations, err := c.prm.IterationsProvider.EigenTrustIterations()
|
|
||||||
if err != nil {
|
|
||||||
c.opts.log.Debug(
|
|
||||||
"could not get iteration numbers",
|
|
||||||
zap.String("error", err.Error()),
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
c.iterationNumber = uint32(iterations)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -46,9 +46,6 @@ type Controller struct {
|
||||||
|
|
||||||
opts *options
|
opts *options
|
||||||
|
|
||||||
// Number of iterations
|
|
||||||
iterationNumber uint32
|
|
||||||
|
|
||||||
mtx sync.Mutex
|
mtx sync.Mutex
|
||||||
mCtx map[uint64]*iterContextCancel
|
mCtx map[uint64]*iterContextCancel
|
||||||
}
|
}
|
||||||
|
@ -75,11 +72,6 @@ func New(prm Prm, opts ...Option) *Controller {
|
||||||
panicOnPrmValue("DaughtersTrustCalculator", prm.DaughtersTrustCalculator)
|
panicOnPrmValue("DaughtersTrustCalculator", prm.DaughtersTrustCalculator)
|
||||||
}
|
}
|
||||||
|
|
||||||
iterations, err := prm.IterationsProvider.EigenTrustIterations()
|
|
||||||
if err != nil {
|
|
||||||
panic(fmt.Errorf("could not init EigenTrust controller: could not get num of iterations: %w", err))
|
|
||||||
}
|
|
||||||
|
|
||||||
o := defaultOpts()
|
o := defaultOpts()
|
||||||
|
|
||||||
for _, opt := range opts {
|
for _, opt := range opts {
|
||||||
|
@ -87,7 +79,6 @@ func New(prm Prm, opts ...Option) *Controller {
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Controller{
|
return &Controller{
|
||||||
iterationNumber: uint32(iterations),
|
|
||||||
prm: prm,
|
prm: prm,
|
||||||
opts: o,
|
opts: o,
|
||||||
mCtx: make(map[uint64]*iterContextCancel),
|
mCtx: make(map[uint64]*iterContextCancel),
|
||||||
|
|
Loading…
Reference in a new issue