[#1337] blobovniczatree: Add rebuild by fill percent

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2024-08-29 11:34:18 +03:00
parent 007827255e
commit d508da8397
9 changed files with 470 additions and 119 deletions

View file

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
@ -59,7 +60,7 @@ func (b *Blobovniczas) Rebuild(ctx context.Context, prm common.RebuildPrm) (comm
b.log.Debug(logs.BlobovniczaTreeCompletedPreviousRebuildSuccess)
b.log.Debug(logs.BlobovniczaTreeCollectingDBToRebuild)
dbsToMigrate, err := b.getDBsToRebuild(ctx)
dbsToMigrate, err := b.getDBsToRebuild(ctx, prm.Action)
if err != nil {
b.log.Warn(logs.BlobovniczaTreeCollectingDBToRebuildFailed, zap.Error(err))
success = false
@ -93,7 +94,33 @@ func (b *Blobovniczas) migrateDBs(ctx context.Context, dbs []string, prm common.
return res, nil
}
func (b *Blobovniczas) getDBsToRebuild(ctx context.Context) ([]string, error) {
func (b *Blobovniczas) getDBsToRebuild(ctx context.Context, action common.RebuildAction) ([]string, error) {
schemaChange := make(map[string]struct{})
fillPercent := make(map[string]struct{})
var err error
if action.SchemaChange {
schemaChange, err = b.selectDBsDoNotMatchSchema(ctx)
if err != nil {
return nil, err
}
}
if action.FillPercent {
fillPercent, err = b.selectDBsDoNotMatchFillPercent(ctx, action.FillPercentValue)
if err != nil {
return nil, err
}
}
for k := range fillPercent {
schemaChange[k] = struct{}{}
}
result := make([]string, 0, len(schemaChange))
for db := range schemaChange {
result = append(result, db)
}
return result, nil
}
func (b *Blobovniczas) selectDBsDoNotMatchSchema(ctx context.Context) (map[string]struct{}, error) {
dbsToMigrate := make(map[string]struct{})
if err := b.iterateExistingDBPaths(ctx, func(s string) (bool, error) {
dbsToMigrate[s] = struct{}{}
@ -107,13 +134,69 @@ func (b *Blobovniczas) getDBsToRebuild(ctx context.Context) ([]string, error) {
}); err != nil {
return nil, err
}
result := make([]string, 0, len(dbsToMigrate))
for db := range dbsToMigrate {
result = append(result, db)
return dbsToMigrate, nil
}
func (b *Blobovniczas) selectDBsDoNotMatchFillPercent(ctx context.Context, target int) (map[string]struct{}, error) {
if target <= 0 || target > 100 {
return nil, fmt.Errorf("invalid fill percent value %d: must be (0; 100]", target)
}
result := make(map[string]struct{})
if err := b.iterateDeepest(ctx, oid.Address{}, func(lvlPath string) (bool, error) {
dir := filepath.Join(b.rootPath, lvlPath)
entries, err := os.ReadDir(dir)
if os.IsNotExist(err) { // non initialized tree
return false, nil
}
if err != nil {
return false, err
}
hasDBs := false
// db with maxIdx could be an active, so it should not be rebuilded
var maxIdx uint64
for _, e := range entries {
if e.IsDir() || strings.HasSuffix(e.Name(), rebuildSuffix) {
continue
}
hasDBs = true
maxIdx = max(u64FromHexString(e.Name()), maxIdx)
}
if !hasDBs {
return false, nil
}
for _, e := range entries {
if e.IsDir() || strings.HasSuffix(e.Name(), rebuildSuffix) {
continue
}
if u64FromHexString(e.Name()) == maxIdx {
continue
}
path := filepath.Join(lvlPath, e.Name())
resettlementRequired, err := b.fillPercentIsLow(path, target)
if err != nil {
return false, err
}
if resettlementRequired {
result[path] = struct{}{}
}
}
return false, nil
}); err != nil {
return nil, err
}
return result, nil
}
func (b *Blobovniczas) fillPercentIsLow(path string, target int) (bool, error) {
shDB := b.getBlobovnicza(path)
blz, err := shDB.Open()
if err != nil {
return false, err
}
defer shDB.Close()
return blz.FillPercent() < target, nil
}
func (b *Blobovniczas) rebuildDB(ctx context.Context, path string, meta common.MetaStorage, limiter common.ConcurrentWorkersLimiter) (uint64, error) {
shDB := b.getBlobovnicza(path)
blz, err := shDB.Open()

View file

@ -15,7 +15,7 @@ import (
"golang.org/x/sync/errgroup"
)
func TestBlobovniczaTreeRebuild(t *testing.T) {
func TestBlobovniczaTreeSchemaRebuild(t *testing.T) {
t.Parallel()
t.Run("width increased", func(t *testing.T) {
@ -39,6 +39,197 @@ func TestBlobovniczaTreeRebuild(t *testing.T) {
})
}
func TestBlobovniczaTreeFillPercentRebuild(t *testing.T) {
t.Parallel()
t.Run("no rebuild by fill percent", func(t *testing.T) {
t.Parallel()
dir := t.TempDir()
b := NewBlobovniczaTree(
context.Background(),
WithLogger(test.NewLogger(t)),
WithObjectSizeLimit(64*1024),
WithBlobovniczaShallowWidth(1), // single directory
WithBlobovniczaShallowDepth(1),
WithRootPath(dir),
WithBlobovniczaSize(100*1024), // 100 KB limit for each blobovnicza
WithWaitBeforeDropDB(0),
WithOpenedCacheSize(1000),
WithMoveBatchSize(3))
require.NoError(t, b.Open(mode.ComponentReadWrite))
require.NoError(t, b.Init())
storageIDs := make(map[oid.Address][]byte)
for i := 0; i < 100; i++ {
obj := blobstortest.NewObject(64 * 1024) // 64KB object
data, err := obj.Marshal()
require.NoError(t, err)
var prm common.PutPrm
prm.Address = object.AddressOf(obj)
prm.RawData = data
res, err := b.Put(context.Background(), prm)
require.NoError(t, err)
storageIDs[prm.Address] = res.StorageID
}
metaStub := &storageIDUpdateStub{
storageIDs: storageIDs,
guard: &sync.Mutex{},
}
rRes, err := b.Rebuild(context.Background(), common.RebuildPrm{
MetaStorage: metaStub,
WorkerLimiter: &rebuildLimiterStub{},
Action: common.RebuildAction{
SchemaChange: false,
FillPercent: true,
FillPercentValue: 60,
},
})
require.NoError(t, err)
dataMigrated := rRes.ObjectsMoved > 0 || rRes.FilesRemoved > 0 || metaStub.updatedCount > 0
require.False(t, dataMigrated)
for addr, storageID := range storageIDs {
var gPrm common.GetPrm
gPrm.Address = addr
gPrm.StorageID = storageID
_, err := b.Get(context.Background(), gPrm)
require.NoError(t, err)
}
require.NoError(t, b.Close())
})
t.Run("no rebuild single db", func(t *testing.T) {
t.Parallel()
dir := t.TempDir()
b := NewBlobovniczaTree(
context.Background(),
WithLogger(test.NewLogger(t)),
WithObjectSizeLimit(64*1024),
WithBlobovniczaShallowWidth(1), // single directory
WithBlobovniczaShallowDepth(1),
WithRootPath(dir),
WithBlobovniczaSize(100*1024), // 100 KB soft limit for each blobovnicza
WithWaitBeforeDropDB(0),
WithOpenedCacheSize(1000),
WithMoveBatchSize(3))
require.NoError(t, b.Open(mode.ComponentReadWrite))
require.NoError(t, b.Init())
storageIDs := make(map[oid.Address][]byte)
obj := blobstortest.NewObject(64 * 1024) // 64KB object
data, err := obj.Marshal()
require.NoError(t, err)
var prm common.PutPrm
prm.Address = object.AddressOf(obj)
prm.RawData = data
res, err := b.Put(context.Background(), prm)
require.NoError(t, err)
storageIDs[prm.Address] = res.StorageID
metaStub := &storageIDUpdateStub{
storageIDs: storageIDs,
guard: &sync.Mutex{},
}
rRes, err := b.Rebuild(context.Background(), common.RebuildPrm{
MetaStorage: metaStub,
WorkerLimiter: &rebuildLimiterStub{},
Action: common.RebuildAction{
SchemaChange: false,
FillPercent: true,
FillPercentValue: 90, // 64KB / 100KB = 64%
},
})
require.NoError(t, err)
dataMigrated := rRes.ObjectsMoved > 0 || rRes.FilesRemoved > 0 || metaStub.updatedCount > 0
require.False(t, dataMigrated)
for addr, storageID := range storageIDs {
var gPrm common.GetPrm
gPrm.Address = addr
gPrm.StorageID = storageID
_, err := b.Get(context.Background(), gPrm)
require.NoError(t, err)
}
require.NoError(t, b.Close())
})
t.Run("rebuild by fill percent", func(t *testing.T) {
t.Parallel()
dir := t.TempDir()
b := NewBlobovniczaTree(
context.Background(),
WithLogger(test.NewLogger(t)),
WithObjectSizeLimit(64*1024),
WithBlobovniczaShallowWidth(1), // single directory
WithBlobovniczaShallowDepth(1),
WithRootPath(dir),
WithBlobovniczaSize(100*1024), // 100 KB limit for each blobovnicza
WithWaitBeforeDropDB(0),
WithOpenedCacheSize(1000),
WithMoveBatchSize(3))
require.NoError(t, b.Open(mode.ComponentReadWrite))
require.NoError(t, b.Init())
storageIDs := make(map[oid.Address][]byte)
toDelete := make(map[oid.Address][]byte)
for i := 0; i < 100; i++ { // 2 objects for one blobovnicza, so 50 DBs total will be created
obj := blobstortest.NewObject(64 * 1024)
data, err := obj.Marshal()
require.NoError(t, err)
var prm common.PutPrm
prm.Address = object.AddressOf(obj)
prm.RawData = data
res, err := b.Put(context.Background(), prm)
require.NoError(t, err)
storageIDs[prm.Address] = res.StorageID
if i%2 == 1 {
toDelete[prm.Address] = res.StorageID
}
}
for addr, storageID := range toDelete {
var prm common.DeletePrm
prm.Address = addr
prm.StorageID = storageID
_, err := b.Delete(context.Background(), prm)
require.NoError(t, err)
}
metaStub := &storageIDUpdateStub{
storageIDs: storageIDs,
guard: &sync.Mutex{},
}
rRes, err := b.Rebuild(context.Background(), common.RebuildPrm{
MetaStorage: metaStub,
WorkerLimiter: &rebuildLimiterStub{},
Action: common.RebuildAction{
SchemaChange: false,
FillPercent: true,
FillPercentValue: 80,
},
})
require.NoError(t, err)
require.Equal(t, uint64(49), rRes.FilesRemoved)
require.Equal(t, uint64(49), rRes.ObjectsMoved) // 49 DBs with 1 objects
require.Equal(t, uint64(49), metaStub.updatedCount)
for addr, storageID := range storageIDs {
if _, found := toDelete[addr]; found {
continue
}
var gPrm common.GetPrm
gPrm.Address = addr
gPrm.StorageID = storageID
_, err := b.Get(context.Background(), gPrm)
require.NoError(t, err)
}
require.NoError(t, b.Close())
})
}
func TestBlobovniczaTreeRebuildLargeObject(t *testing.T) {
t.Parallel()
@ -92,6 +283,7 @@ func TestBlobovniczaTreeRebuildLargeObject(t *testing.T) {
var rPrm common.RebuildPrm
rPrm.MetaStorage = metaStub
rPrm.WorkerLimiter = &rebuildLimiterStub{}
rPrm.Action = common.RebuildAction{SchemaChange: true}
rRes, err := b.Rebuild(context.Background(), rPrm)
require.NoError(t, err)
dataMigrated := rRes.ObjectsMoved > 0 || rRes.FilesRemoved > 0 || metaStub.updatedCount > 0
@ -180,6 +372,7 @@ func testBlobovniczaTreeRebuildHelper(t *testing.T, sourceDepth, sourceWidth, ta
var rPrm common.RebuildPrm
rPrm.MetaStorage = metaStub
rPrm.WorkerLimiter = &rebuildLimiterStub{}
rPrm.Action = common.RebuildAction{SchemaChange: true}
rRes, err := b.Rebuild(context.Background(), rPrm)
require.NoError(t, err)
dataMigrated := rRes.ObjectsMoved > 0 || rRes.FilesRemoved > 0 || metaStub.updatedCount > 0

View file

@ -11,9 +11,17 @@ type RebuildRes struct {
FilesRemoved uint64
}
type RebuildAction struct {
SchemaChange bool
FillPercent bool
FillPercentValue int
}
type RebuildPrm struct {
MetaStorage MetaStorage
WorkerLimiter ConcurrentWorkersLimiter
Action RebuildAction
}
type MetaStorage interface {

View file

@ -18,13 +18,14 @@ type ConcurrentWorkersLimiter interface {
ReleaseWorkSlot()
}
func (b *BlobStor) Rebuild(ctx context.Context, upd StorageIDUpdate, limiter ConcurrentWorkersLimiter) error {
func (b *BlobStor) Rebuild(ctx context.Context, upd StorageIDUpdate, limiter ConcurrentWorkersLimiter, action common.RebuildAction) error {
var summary common.RebuildRes
var rErr error
for _, storage := range b.storage {
res, err := storage.Storage.Rebuild(ctx, common.RebuildPrm{
MetaStorage: upd,
WorkerLimiter: limiter,
Action: action,
})
summary.FilesRemoved += res.FilesRemoved
summary.ObjectsMoved += res.ObjectsMoved