forked from TrueCloudLab/frostfs-node
[#19] node: Make policier read shards concurrently
* Introduce ListWithMultiCursor that simultaneously reads objects from different shards Signed-off-by: Airat Arifullin a.arifullin@yadro.com
This commit is contained in:
parent
1f4061c0e2
commit
ada081dfd5
2 changed files with 187 additions and 92 deletions
|
@ -1,6 +1,7 @@
|
|||
package engine
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"sort"
|
||||
|
||||
objectcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||
|
@ -12,10 +13,38 @@ import (
|
|||
// cursor. Use nil cursor object to start listing again.
|
||||
var ErrEndOfListing = shard.ErrEndOfListing
|
||||
|
||||
// Cursor is a type for continuous object listing.
|
||||
// Cursor is a type for continuous object listing. Cursor contains shard IDs to read
|
||||
// and shard cursors that contain state from previous read.
|
||||
type Cursor struct {
|
||||
shardID string
|
||||
shardCursor *shard.Cursor
|
||||
current string
|
||||
shardIDs map[string]bool
|
||||
shardIDToCursor map[string]*shard.Cursor
|
||||
}
|
||||
|
||||
func (c *Cursor) getCurrentShardCursor() *shard.Cursor {
|
||||
return c.shardIDToCursor[c.current]
|
||||
}
|
||||
|
||||
func (c *Cursor) setCurrentShardCursor(sc *shard.Cursor) {
|
||||
c.shardIDToCursor[c.current] = sc
|
||||
}
|
||||
|
||||
func (c *Cursor) nextShard() bool {
|
||||
var shardsToRead []string
|
||||
for shardID, read := range c.shardIDs {
|
||||
if !read {
|
||||
shardsToRead = append(shardsToRead, shardID)
|
||||
}
|
||||
}
|
||||
if len(shardsToRead) == 0 {
|
||||
return false
|
||||
}
|
||||
c.current = shardsToRead[rand.Intn(len(shardsToRead))]
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *Cursor) setShardRead(shardID string) {
|
||||
c.shardIDs[shardID] = true
|
||||
}
|
||||
|
||||
// ListWithCursorPrm contains parameters for ListWithCursor operation.
|
||||
|
@ -57,65 +86,69 @@ func (l ListWithCursorRes) Cursor() *Cursor {
|
|||
// Does not include inhumed objects. Use cursor value from the response
|
||||
// for consecutive requests.
|
||||
//
|
||||
// If count param is big enough, then the method reads objects from different shards
|
||||
// by portions. In this case shards are chosen randomly, if they're not read out yet.
|
||||
//
|
||||
// Adding a shard between ListWithCursor does not invalidate the cursor but new shard
|
||||
// won't be listed.
|
||||
// Removing a shard between ListWithCursor leads to the undefined behavior
|
||||
// (e.g. usage of the objects from the removed shard).
|
||||
//
|
||||
// Returns ErrEndOfListing if there are no more objects to return or count
|
||||
// parameter set to zero.
|
||||
func (e *StorageEngine) ListWithCursor(prm ListWithCursorPrm) (ListWithCursorRes, error) {
|
||||
result := make([]objectcore.AddressWithType, 0, prm.count)
|
||||
|
||||
// 1. Get available shards and sort them.
|
||||
e.mtx.RLock()
|
||||
shardIDs := make([]string, 0, len(e.shards))
|
||||
for id := range e.shards {
|
||||
shardIDs = append(shardIDs, id)
|
||||
}
|
||||
e.mtx.RUnlock()
|
||||
|
||||
if len(shardIDs) == 0 {
|
||||
return ListWithCursorRes{}, ErrEndOfListing
|
||||
}
|
||||
|
||||
sort.Slice(shardIDs, func(i, j int) bool {
|
||||
return shardIDs[i] < shardIDs[j]
|
||||
})
|
||||
|
||||
// 2. Prepare cursor object.
|
||||
// Set initial cursors
|
||||
cursor := prm.cursor
|
||||
if cursor == nil {
|
||||
cursor = &Cursor{shardID: shardIDs[0]}
|
||||
shardIDs := getSortedShardIDs(e)
|
||||
if len(shardIDs) == 0 {
|
||||
return ListWithCursorRes{}, ErrEndOfListing
|
||||
}
|
||||
cursor = newCursor(shardIDs)
|
||||
}
|
||||
|
||||
// 3. Iterate over available shards. Skip unavailable shards.
|
||||
for i := range shardIDs {
|
||||
const (
|
||||
splitShardCountLimit = 100
|
||||
shardsNum = 4
|
||||
)
|
||||
|
||||
batchSize := prm.count
|
||||
if batchSize >= splitShardCountLimit {
|
||||
batchSize /= shardsNum
|
||||
}
|
||||
|
||||
for cursor.nextShard() {
|
||||
if len(result) >= int(prm.count) {
|
||||
break
|
||||
}
|
||||
|
||||
if shardIDs[i] < cursor.shardID {
|
||||
continue
|
||||
}
|
||||
curr := cursor.current
|
||||
|
||||
e.mtx.RLock()
|
||||
shardInstance, ok := e.shards[shardIDs[i]]
|
||||
shardInstance, ok := e.shards[curr]
|
||||
e.mtx.RUnlock()
|
||||
if !ok {
|
||||
cursor.setShardRead(curr)
|
||||
continue
|
||||
}
|
||||
|
||||
count := uint32(int(prm.count) - len(result))
|
||||
count := prm.count - uint32(len(result))
|
||||
if count > batchSize {
|
||||
count = batchSize
|
||||
}
|
||||
|
||||
var shardPrm shard.ListWithCursorPrm
|
||||
shardPrm.WithCount(count)
|
||||
if shardIDs[i] == cursor.shardID {
|
||||
shardPrm.WithCursor(cursor.shardCursor)
|
||||
}
|
||||
shardPrm.WithCursor(cursor.getCurrentShardCursor())
|
||||
|
||||
res, err := shardInstance.ListWithCursor(shardPrm)
|
||||
if err != nil {
|
||||
cursor.setShardRead(curr)
|
||||
continue
|
||||
}
|
||||
|
||||
result = append(result, res.AddressList()...)
|
||||
cursor.shardCursor = res.Cursor()
|
||||
cursor.shardID = shardIDs[i]
|
||||
cursor.setCurrentShardCursor(res.Cursor())
|
||||
}
|
||||
|
||||
if len(result) == 0 {
|
||||
|
@ -127,3 +160,23 @@ func (e *StorageEngine) ListWithCursor(prm ListWithCursorPrm) (ListWithCursorRes
|
|||
cursor: cursor,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getSortedShardIDs(e *StorageEngine) []string {
|
||||
e.mtx.RLock()
|
||||
shardIDs := make([]string, 0, len(e.shards))
|
||||
for id := range e.shards {
|
||||
shardIDs = append(shardIDs, id)
|
||||
}
|
||||
e.mtx.RUnlock()
|
||||
sort.Strings(shardIDs)
|
||||
return shardIDs
|
||||
}
|
||||
|
||||
func newCursor(shardIDs []string) *Cursor {
|
||||
shardIDsMap := make(map[string]bool)
|
||||
shardIDToCursor := make(map[string]*shard.Cursor)
|
||||
for _, shardID := range shardIDs {
|
||||
shardIDsMap[shardID] = false
|
||||
}
|
||||
return &Cursor{shardIDs: shardIDsMap, shardIDToCursor: shardIDToCursor}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue