From 96449d803a91b258e42bed8a8e1fb8970cbe3383 Mon Sep 17 00:00:00 2001 From: Anna Shaleva Date: Wed, 30 Aug 2023 14:02:42 +0300 Subject: [PATCH] native: rework native NEO next block validators cache Blockchain passes his own pure unwrapped DAO to (*Blockchain).ComputeNextBlockValidators which means that native RW NEO cache structure stored inside this DAO can be modified by anyone who uses exported ComputeNextBlockValidators Blockchain API, and technically it's valid, and we should allow this, because it's the only purpose of `validators` caching. However, at the same time some RPC server is allowed to request a subsequent wrapped DAO for some test invocation. It means that descendant wrapped DAO eventually will request RW NEO cache and try to `Copy()` the underlying's DAO cache which is in direct use of ComputeNextBlockValidators. Here's the race: ComputeNextBlockValidators called by Consensus service tries to update cached `validators` value, and descendant wrapped DAO created by the RPC server tries to copy DAO's native cache and read the cached `validators` value. So the problem is that native cache not designated to handle concurrent access between parent DAO layer and derived (wrapped) DAO layer. I've carefully reviewed all the usages of native cache, and turns out that the described situation is the only place where parent DAO is used directly to modify its cache concurrently with some descendant DAO that is trying to access the cache. All other usages of native cache (not only NEO, but also all other native contrcts) strictly rely on the hierarchical DAO structure and don't try to perform these concurrent operations between DAO layers. There's also persist operation, but it keeps cache RW lock taken, so it doesn't have this problem as far. Thus, in this commit we rework NEO's `validators` cache value so that it always contain the relevant list for upper Blockchain's DAO and is updated every PostPersist (if needed). Note: we must be very careful extending our native cache in the future, every usage of native cache must be checked against the described problem. Close #2989. Signed-off-by: Anna Shaleva --- pkg/consensus/consensus.go | 6 +- pkg/core/blockchain.go | 14 +++-- pkg/core/blockchain_core_test.go | 5 +- pkg/core/native/native_neo.go | 78 ++++++++++++++++++------- pkg/core/native/native_test/neo_test.go | 8 +-- 5 files changed, 76 insertions(+), 35 deletions(-) diff --git a/pkg/consensus/consensus.go b/pkg/consensus/consensus.go index 9a29f8c2d..8120e1d16 100644 --- a/pkg/consensus/consensus.go +++ b/pkg/consensus/consensus.go @@ -48,7 +48,7 @@ type Ledger interface { GetNextBlockValidators() ([]*keys.PublicKey, error) GetStateRoot(height uint32) (*state.MPTRoot, error) GetTransaction(util.Uint256) (*transaction.Transaction, uint32, error) - GetValidators() ([]*keys.PublicKey, error) + GetValidators() []*keys.PublicKey PoolTx(t *transaction.Transaction, pools ...*mempool.Pool) error SubscribeForBlocks(ch chan *coreb.Block) UnsubscribeFromBlocks(ch chan *coreb.Block) @@ -679,7 +679,7 @@ func (s *service) getValidators(txes ...block.Transaction) []crypto.PublicKey { if txes == nil { pKeys, err = s.Chain.GetNextBlockValidators() } else { - pKeys, err = s.Chain.GetValidators() + pKeys = s.Chain.GetValidators() } if err != nil { s.log.Error("error while trying to get validators", zap.Error(err)) @@ -725,7 +725,7 @@ func (s *service) newBlockFromContext(ctx *dbft.Context) block.Block { var err error cfg := s.Chain.GetConfig().ProtocolConfiguration if cfg.ShouldUpdateCommitteeAt(ctx.BlockIndex) { - validators, err = s.Chain.GetValidators() + validators = s.Chain.GetValidators() } else { validators, err = s.Chain.GetNextBlockValidators() } diff --git a/pkg/core/blockchain.go b/pkg/core/blockchain.go index c2f3e9e00..a75fdad95 100644 --- a/pkg/core/blockchain.go +++ b/pkg/core/blockchain.go @@ -2697,12 +2697,18 @@ func (bc *Blockchain) GetCommittee() (keys.PublicKeys, error) { return pubs, nil } -// GetValidators returns current validators. -func (bc *Blockchain) GetValidators() ([]*keys.PublicKey, error) { - return bc.contracts.NEO.ComputeNextBlockValidators(bc.blockHeight, bc.dao) +// GetValidators returns current validators. Validators list returned from this +// method may be updated every block (depending on register/unregister/vote +// calls to NeoToken contract), not only once per (committee size) number of +// blocks. +func (bc *Blockchain) GetValidators() []*keys.PublicKey { + return bc.contracts.NEO.ComputeNextBlockValidators(bc.dao) } -// GetNextBlockValidators returns next block validators. +// GetNextBlockValidators returns next block validators. Validators list returned +// from this method is the sorted top NumOfCNs number of public keys from the +// current committee, thus, validators list returned from this method is being +// updated once per (committee size) number of blocks. func (bc *Blockchain) GetNextBlockValidators() ([]*keys.PublicKey, error) { return bc.contracts.NEO.GetNextBlockValidatorsInternal(bc.dao), nil } diff --git a/pkg/core/blockchain_core_test.go b/pkg/core/blockchain_core_test.go index edfeabc25..e17ee7df9 100644 --- a/pkg/core/blockchain_core_test.go +++ b/pkg/core/blockchain_core_test.go @@ -261,8 +261,7 @@ func TestChainWithVolatileNumOfValidators(t *testing.T) { priv0 := testchain.PrivateKeyByID(0) - vals, err := bc.GetValidators() - require.NoError(t, err) + vals := bc.GetValidators() script, err := smartcontract.CreateDefaultMultiSigRedeemScript(vals) require.NoError(t, err) curWit := transaction.Witness{ @@ -280,7 +279,7 @@ func TestChainWithVolatileNumOfValidators(t *testing.T) { } // Mimic consensus. if bc.config.ShouldUpdateCommitteeAt(uint32(i)) { - vals, err = bc.GetValidators() + vals = bc.GetValidators() } else { vals, err = bc.GetNextBlockValidators() } diff --git a/pkg/core/native/native_neo.go b/pkg/core/native/native_neo.go index 6c8742dbb..ce5ddd5bc 100644 --- a/pkg/core/native/native_neo.go +++ b/pkg/core/native/native_neo.go @@ -50,7 +50,12 @@ type NeoCache struct { votesChanged bool nextValidators keys.PublicKeys - validators keys.PublicKeys + // validators contains cached next block validators. This list is updated every + // PostPersist if candidates votes ratio has been changed or register/unregister + // operation was performed within the persisted block. The updated value is + // being persisted following the standard layered DAO persist rules, so that + // external users will always get the proper value with upper Blockchain's DAO. + validators keys.PublicKeys // committee contains cached committee members and their votes. // It is updated once in a while depending on committee size // (every 28 blocks for mainnet). It's value @@ -269,6 +274,7 @@ func (n *NEO) Initialize(ic *interop.Context) error { cache := &NeoCache{ gasPerVoteCache: make(map[string]big.Int), votesChanged: true, + validators: nil, // will be updated in genesis PostPersist. } // We need cache to be present in DAO before the subsequent call to `mint`. @@ -325,6 +331,12 @@ func (n *NEO) InitializeCache(blockHeight uint32, d *dao.Simple) error { cache.gasPerBlock = n.getSortedGASRecordFromDAO(d) cache.registerPrice = getIntWithKey(n.ID, d, []byte{prefixRegisterPrice}) + numOfCNs := n.cfg.GetNumOfCNs(blockHeight + 1) + err := n.updateCachedValidators(d, cache, blockHeight, numOfCNs) + if err != nil { + return fmt.Errorf("failed to update next block validators cache: %w", err) + } + d.SetCache(n.ID, cache) return nil } @@ -353,6 +365,20 @@ func (n *NEO) updateCache(cache *NeoCache, cvs keysWithVotes, blockHeight uint32 return nil } +// updateCachedValidators sets validators cache that will be used by external users +// to retrieve next block validators list. Thus, it stores the list of validators +// computed using the currently persisted block state. +func (n *NEO) updateCachedValidators(d *dao.Simple, cache *NeoCache, blockHeight uint32, numOfCNs int) error { + result, _, err := n.computeCommitteeMembers(blockHeight, d) + if err != nil { + return fmt.Errorf("failed to compute committee members: %w", err) + } + result = result[:numOfCNs] + sort.Sort(result) + cache.validators = result + return nil +} + func (n *NEO) updateCommittee(cache *NeoCache, ic *interop.Context) error { if !cache.votesChanged { // We need to put in storage anyway, as it affects dumps @@ -399,6 +425,7 @@ func (n *NEO) PostPersist(ic *interop.Context) error { committeeReward := new(big.Int).Mul(gas, bigCommitteeRewardRatio) n.GAS.mint(ic, pubs[index].GetScriptHash(), committeeReward.Div(committeeReward, big100), false) + var isCacheRW bool if n.cfg.ShouldUpdateCommitteeAt(ic.Block.Index) { var voterReward = new(big.Int).Set(bigVoterRewardRatio) voterReward.Mul(voterReward, gas) @@ -408,9 +435,8 @@ func (n *NEO) PostPersist(ic *interop.Context) error { voterReward.Div(voterReward, big100) var ( - cs = cache.committee - isCacheRW bool - key = make([]byte, 34) + cs = cache.committee + key = make([]byte, 34) ) for i := range cs { if cs[i].Votes.Sign() > 0 { @@ -437,6 +463,21 @@ func (n *NEO) PostPersist(ic *interop.Context) error { } } } + // Update next block validators cache for external users. + var ( + h = ic.Block.Index // consider persisting block as stored to get _next_ block validators + numOfCNs = n.cfg.GetNumOfCNs(h + 1) + ) + if cache.validators == nil || numOfCNs != len(cache.validators) { + if !isCacheRW { + cache = ic.DAO.GetRWCache(n.ID).(*NeoCache) + } + err := n.updateCachedValidators(ic.DAO, cache, h, numOfCNs) + if err != nil { + return fmt.Errorf("failed to update next block validators cache: %w", err) + } + } + return nil } @@ -1042,24 +1083,21 @@ func (n *NEO) getAccountState(ic *interop.Context, args []stackitem.Item) stacki return item } -// ComputeNextBlockValidators returns an actual list of current validators. -func (n *NEO) ComputeNextBlockValidators(blockHeight uint32, d *dao.Simple) (keys.PublicKeys, error) { - numOfCNs := n.cfg.GetNumOfCNs(blockHeight + 1) - // Most of the time it should be OK with RO cache, thus try to retrieve - // validators without RW cache creation to avoid cached values copying. +// ComputeNextBlockValidators computes an actual list of current validators that is +// relevant for the latest persisted block and based on the latest changes made by +// register/unregister/vote calls. +// Note: this method isn't actually "computes" new committee list and calculates +// new validators list from it. Instead, it uses cache, but the cache itself is +// updated every block. +func (n *NEO) ComputeNextBlockValidators(d *dao.Simple) keys.PublicKeys { + // It should always be OK with RO cache if using lower-layered DAO with proper + // cache set. cache := d.GetROCache(n.ID).(*NeoCache) - if vals := cache.validators; vals != nil && numOfCNs == len(vals) { - return vals.Copy(), nil + if vals := cache.validators; vals != nil { + return vals.Copy() } - cache = d.GetRWCache(n.ID).(*NeoCache) - result, _, err := n.computeCommitteeMembers(blockHeight, d) - if err != nil { - return nil, err - } - result = result[:numOfCNs] - sort.Sort(result) - cache.validators = result - return result, nil + // It's a program error not to have the right value in lower cache. + panic(fmt.Errorf("unexpected validators cache content: %v", cache.validators)) } func (n *NEO) getCommittee(ic *interop.Context, _ []stackitem.Item) stackitem.Item { diff --git a/pkg/core/native/native_test/neo_test.go b/pkg/core/native/native_test/neo_test.go index cbaefa883..dcb9ac415 100644 --- a/pkg/core/native/native_test/neo_test.go +++ b/pkg/core/native/native_test/neo_test.go @@ -138,8 +138,7 @@ func TestNEO_Vote(t *testing.T) { require.NoError(t, err) standBySorted = standBySorted[:validatorsCount] sort.Sort(standBySorted) - pubs, err := e.Chain.GetValidators() - require.NoError(t, err) + pubs := e.Chain.GetValidators() require.Equal(t, standBySorted, keys.PublicKeys(pubs)) // voters vote for candidates. The aim of this test is to check if voting @@ -176,7 +175,7 @@ func TestNEO_Vote(t *testing.T) { } // We still haven't voted enough validators in. - pubs, err = e.Chain.GetValidators() + pubs = e.Chain.GetValidators() require.NoError(t, err) require.Equal(t, standBySorted, keys.PublicKeys(pubs)) @@ -267,8 +266,7 @@ func TestNEO_Vote(t *testing.T) { advanceChain(t) - pubs, err = e.Chain.GetValidators() - require.NoError(t, err) + pubs = e.Chain.GetValidators() for i := range pubs { require.NotEqual(t, candidates[0], pubs[i]) require.NotEqual(t, candidates[len(candidates)-1], pubs[i])