[#1210] reputation: Resolve race condition

Make all epoch independent in reputation process. Do not reset any timers
related to reputation. Make it possible to finish iteration after the
unexpected `NewEpoch` event.

Signed-off-by: Pavel Karpy <carpawell@nspcc.ru>
This commit is contained in:
Pavel Karpy 2022-03-02 19:31:56 +03:00 committed by Alex Vanin
parent 77d847dbea
commit c3db12d71b
6 changed files with 66 additions and 115 deletions

View file

@ -31,7 +31,6 @@ import (
nmClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/netmap" nmClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/netmap"
"github.com/nspcc-dev/neofs-node/pkg/morph/event" "github.com/nspcc-dev/neofs-node/pkg/morph/event"
netmap2 "github.com/nspcc-dev/neofs-node/pkg/morph/event/netmap" netmap2 "github.com/nspcc-dev/neofs-node/pkg/morph/event/netmap"
"github.com/nspcc-dev/neofs-node/pkg/morph/timer"
"github.com/nspcc-dev/neofs-node/pkg/network" "github.com/nspcc-dev/neofs-node/pkg/network"
"github.com/nspcc-dev/neofs-node/pkg/network/cache" "github.com/nspcc-dev/neofs-node/pkg/network/cache"
"github.com/nspcc-dev/neofs-node/pkg/services/control" "github.com/nspcc-dev/neofs-node/pkg/services/control"
@ -134,8 +133,7 @@ type cfgMorph struct {
disableCache bool disableCache bool
blockTimers []*timer.BlockTimer // all combined timers eigenTrustTicker *eigenTrustTickers // timers for EigenTrust iterations
eigenTrustTimer *timer.BlockTimer // timer for EigenTrust iterations
proxyScriptHash neogoutil.Uint160 proxyScriptHash neogoutil.Uint160
} }

View file

@ -93,7 +93,6 @@ func bootUp(c *cfg) {
makeAndWaitNotaryDeposit(c) makeAndWaitNotaryDeposit(c)
bootstrapNode(c) bootstrapNode(c)
startWorkers(c) startWorkers(c)
startBlockTimers(c)
} }
func wait(c *cfg) { func wait(c *cfg) {

View file

@ -10,6 +10,7 @@ import (
"github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/common" "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/common"
intermediatereputation "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/intermediate" intermediatereputation "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/intermediate"
localreputation "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/local" localreputation "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/local"
"github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/ticker"
repClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/reputation" repClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/reputation"
"github.com/nspcc-dev/neofs-node/pkg/morph/event" "github.com/nspcc-dev/neofs-node/pkg/morph/event"
"github.com/nspcc-dev/neofs-node/pkg/morph/event/netmap" "github.com/nspcc-dev/neofs-node/pkg/morph/event/netmap"
@ -215,36 +216,40 @@ func initReputationService(c *cfg) {
} }
// initialize eigen trust block timer // initialize eigen trust block timer
durationMeter := NewEigenTrustDuration(c.cfgNetmap.wrapper) newEigenTrustIterTimer(c)
newEigenTrustIterTimer(c, durationMeter, func() { addNewEpochAsyncNotificationHandler(
epoch, err := c.cfgNetmap.wrapper.Epoch() c,
func(e event.Event) {
epoch := e.(netmap.NewEpoch).EpochNumber()
log := c.log.With(zap.Uint64("epoch", epoch))
duration, err := c.cfgNetmap.wrapper.EpochDuration()
if err != nil { if err != nil {
c.log.Debug( log.Debug("could not fetch epoch duration", zap.Error(err))
"could not get current epoch",
zap.String("error", err.Error()),
)
return return
} }
iterations, err := c.cfgNetmap.wrapper.EigenTrustIterations()
if err != nil {
log.Debug("could not fetch iteration number", zap.Error(err))
return
}
epochTimer, err := ticker.NewIterationsTicker(duration, iterations, func() {
eigenTrustController.Continue( eigenTrustController.Continue(
eigentrustctrl.ContinuePrm{ eigentrustctrl.ContinuePrm{
Epoch: epoch - 1, Epoch: epoch - 1,
}, },
) )
}) })
addNewEpochAsyncNotificationHandler(
c,
func(e event.Event) {
durationMeter.Update() // recalculate duration of one iteration round
err := c.cfgMorph.eigenTrustTimer.Reset() // start iteration rounds again
if err != nil { if err != nil {
c.log.Warn("can't reset block timer to start eigen trust calculations again", log.Debug("could not create fixed epoch timer", zap.Error(err))
zap.String("error", err.Error())) return
} }
c.cfgMorph.eigenTrustTicker.addEpochTimer(epoch, epochTimer)
}, },
) )
} }

View file

@ -3,83 +3,41 @@ package main
import ( import (
"sync" "sync"
nmClient "github.com/nspcc-dev/neofs-node/pkg/morph/client/netmap" "github.com/nspcc-dev/neofs-node/cmd/neofs-node/reputation/ticker"
"github.com/nspcc-dev/neofs-node/pkg/morph/timer"
) )
type ( type eigenTrustTickers struct {
// EigenTrustDuration is a structure that provides duration of one m sync.Mutex
// eigen trust iteration round in blocks for block timer.
EigenTrustDuration struct {
sync.Mutex
nm *nmClient.Client timers map[uint64]*ticker.IterationsTicker
val uint32
}
)
// NewEigenTrustDuration returns instance of EigenTrustDuration.
func NewEigenTrustDuration(nm *nmClient.Client) *EigenTrustDuration {
return &EigenTrustDuration{
nm: nm,
}
} }
// Value returns number of blocks between two iterations of EigenTrust func (e *eigenTrustTickers) addEpochTimer(epoch uint64, timer *ticker.IterationsTicker) {
// calculation. This value is not changed between `Update` calls. e.m.Lock()
func (e *EigenTrustDuration) Value() (uint32, error) { defer e.m.Unlock()
e.Lock()
defer e.Unlock()
if e.val == 0 { e.timers[epoch] = timer
e.update()
} }
return e.val, nil func (e *eigenTrustTickers) tick() {
} e.m.Lock()
defer e.m.Unlock()
// Update function recalculate duration of EigenTrust iteration based on for epoch, t := range e.timers {
// NeoFS epoch duration and amount of iteration rounds from global config. if !t.Tick() {
func (e *EigenTrustDuration) Update() { delete(e.timers, epoch)
e.Lock()
defer e.Unlock()
e.update()
}
func (e *EigenTrustDuration) update() {
iterationAmount, err := e.nm.EigenTrustIterations()
if err != nil {
return
}
epochDuration, err := e.nm.EpochDuration()
if err != nil {
return
}
e.val = uint32(epochDuration / iterationAmount)
}
func startBlockTimers(c *cfg) {
for i := range c.cfgMorph.blockTimers {
if err := c.cfgMorph.blockTimers[i].Reset(); err != nil {
fatalOnErr(err)
} }
} }
} }
func tickBlockTimers(c *cfg) { func tickBlockTimers(c *cfg) {
for i := range c.cfgMorph.blockTimers { c.cfgMorph.eigenTrustTicker.tick()
c.cfgMorph.blockTimers[i].Tick()
}
} }
func newEigenTrustIterTimer(c *cfg, it *EigenTrustDuration, handler timer.BlockTickHandler) { func newEigenTrustIterTimer(c *cfg) {
c.cfgMorph.eigenTrustTimer = timer.NewBlockTimer( c.cfgMorph.eigenTrustTicker = &eigenTrustTickers{
it.Value, // it is expected to have max 2 concurrent epoch
handler, // in normal mode work
) timers: make(map[uint64]*ticker.IterationsTicker, 2),
}
c.cfgMorph.blockTimers = append(c.cfgMorph.blockTimers, c.cfgMorph.eigenTrustTimer)
} }

View file

@ -17,6 +17,7 @@ type iterContext struct {
eigentrust.EpochIteration eigentrust.EpochIteration
iterationNumber uint32
last bool last bool
} }
@ -42,11 +43,20 @@ func (c *Controller) Continue(prm ContinuePrm) {
iterCtx.Context, iterCtx.cancel = context.WithCancel(context.Background()) iterCtx.Context, iterCtx.cancel = context.WithCancel(context.Background())
iterCtx.EpochIteration.SetEpoch(prm.Epoch) iterCtx.EpochIteration.SetEpoch(prm.Epoch)
iterations, err := c.prm.IterationsProvider.EigenTrustIterations()
if err != nil {
c.opts.log.Error("could not get EigenTrust iteration number",
zap.Error(err),
)
} else {
iterCtx.iterationNumber = uint32(iterations)
}
} else { } else {
iterCtx.cancel() iterCtx.cancel()
} }
iterCtx.last = iterCtx.I() == c.iterationNumber-1 iterCtx.last = iterCtx.I() == iterCtx.iterationNumber-1
err := c.prm.WorkerPool.Submit(func() { err := c.prm.WorkerPool.Submit(func() {
c.prm.DaughtersTrustCalculator.Calculate(iterCtx.iterContext) c.prm.DaughtersTrustCalculator.Calculate(iterCtx.iterContext)
@ -66,16 +76,6 @@ func (c *Controller) Continue(prm ContinuePrm) {
// number as already processed, but in any case it grows up // number as already processed, but in any case it grows up
// In this case and worker pool failure we can mark epoch // In this case and worker pool failure we can mark epoch
delete(c.mCtx, prm.Epoch) delete(c.mCtx, prm.Epoch)
iterations, err := c.prm.IterationsProvider.EigenTrustIterations()
if err != nil {
c.opts.log.Debug(
"could not get iteration numbers",
zap.String("error", err.Error()),
)
} else {
c.iterationNumber = uint32(iterations)
}
} }
} }

View file

@ -46,9 +46,6 @@ type Controller struct {
opts *options opts *options
// Number of iterations
iterationNumber uint32
mtx sync.Mutex mtx sync.Mutex
mCtx map[uint64]*iterContextCancel mCtx map[uint64]*iterContextCancel
} }
@ -75,11 +72,6 @@ func New(prm Prm, opts ...Option) *Controller {
panicOnPrmValue("DaughtersTrustCalculator", prm.DaughtersTrustCalculator) panicOnPrmValue("DaughtersTrustCalculator", prm.DaughtersTrustCalculator)
} }
iterations, err := prm.IterationsProvider.EigenTrustIterations()
if err != nil {
panic(fmt.Errorf("could not init EigenTrust controller: could not get num of iterations: %w", err))
}
o := defaultOpts() o := defaultOpts()
for _, opt := range opts { for _, opt := range opts {
@ -87,7 +79,6 @@ func New(prm Prm, opts ...Option) *Controller {
} }
return &Controller{ return &Controller{
iterationNumber: uint32(iterations),
prm: prm, prm: prm,
opts: o, opts: o,
mCtx: make(map[uint64]*iterContextCancel), mCtx: make(map[uint64]*iterContextCancel),