diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 78bcd0c0e..97b189529 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -543,4 +543,5 @@ const ( WritecacheSealCompletedAsync = "writecache seal completed successfully" FailedToSealWritecacheAsync = "failed to seal writecache async" WritecacheShrinkSkippedNotEmpty = "writecache shrink skipped: database is not empty" + BlobovniczatreeFailedToRemoveRebuildTempFile = "failed to remove rebuild temp file" ) diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/blobovnicza.go b/pkg/local_object_storage/blobstor/blobovniczatree/blobovnicza.go index 952203367..c909113c7 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/blobovnicza.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/blobovnicza.go @@ -135,7 +135,7 @@ func getBlobovniczaMaxIndex(directory string) (bool, uint64, error) { var hasDBs bool var maxIdx uint64 for _, e := range entries { - if e.IsDir() { + if e.IsDir() || strings.HasSuffix(e.Name(), rebuildSuffix) { continue } hasDBs = true diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/control.go b/pkg/local_object_storage/blobstor/blobovniczatree/control.go index a31e9d6cb..681cf876c 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/control.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/control.go @@ -2,6 +2,7 @@ package blobovniczatree import ( "context" + "strings" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" @@ -41,10 +42,9 @@ func (b *Blobovniczas) initializeDBs(ctx context.Context) error { eg, egCtx := errgroup.WithContext(ctx) eg.SetLimit(b.blzInitWorkerCount) - visited := make(map[string]struct{}) - err = b.iterateExistingDBPaths(egCtx, func(p string) (bool, error) { - visited[p] = struct{}{} + err = b.iterateIncompletedRebuildDBPaths(egCtx, func(p string) (bool, error) { eg.Go(func() error { + p = strings.TrimSuffix(p, rebuildSuffix) shBlz := b.getBlobovniczaWithoutCaching(p) blz, err := shBlz.Open() if err != nil { diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/iterate.go b/pkg/local_object_storage/blobstor/blobovniczatree/iterate.go index af3d9e720..f6acb46aa 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/iterate.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/iterate.go @@ -188,11 +188,11 @@ func (b *Blobovniczas) iterateExistingDBPaths(ctx context.Context, f func(string b.dbFilesGuard.RLock() defer b.dbFilesGuard.RUnlock() - _, err := b.iterateExistingDBPathsDFS(ctx, "", f) + _, err := b.iterateExistingPathsDFS(ctx, "", f, func(path string) bool { return !strings.HasSuffix(path, rebuildSuffix) }) return err } -func (b *Blobovniczas) iterateExistingDBPathsDFS(ctx context.Context, path string, f func(string) (bool, error)) (bool, error) { +func (b *Blobovniczas) iterateExistingPathsDFS(ctx context.Context, path string, f func(string) (bool, error), fileFilter func(path string) bool) (bool, error) { sysPath := filepath.Join(b.rootPath, path) entries, err := os.ReadDir(sysPath) if os.IsNotExist(err) && b.readOnly && path == "" { // non initialized tree in read only mode @@ -208,7 +208,7 @@ func (b *Blobovniczas) iterateExistingDBPathsDFS(ctx context.Context, path strin default: } if entry.IsDir() { - stop, err := b.iterateExistingDBPathsDFS(ctx, filepath.Join(path, entry.Name()), f) + stop, err := b.iterateExistingPathsDFS(ctx, filepath.Join(path, entry.Name()), f, fileFilter) if err != nil { return false, err } @@ -216,6 +216,9 @@ func (b *Blobovniczas) iterateExistingDBPathsDFS(ctx context.Context, path strin return true, nil } } else { + if !fileFilter(entry.Name()) { + continue + } stop, err := f(filepath.Join(path, entry.Name())) if err != nil { return false, err @@ -228,6 +231,15 @@ func (b *Blobovniczas) iterateExistingDBPathsDFS(ctx context.Context, path strin return false, nil } +// iterateIncompletedRebuildDBPaths iterates over the paths of Blobovniczas with incompleted rebuild files without any order. +func (b *Blobovniczas) iterateIncompletedRebuildDBPaths(ctx context.Context, f func(string) (bool, error)) error { + b.dbFilesGuard.RLock() + defer b.dbFilesGuard.RUnlock() + + _, err := b.iterateExistingPathsDFS(ctx, "", f, func(path string) bool { return strings.HasSuffix(path, rebuildSuffix) }) + return err +} + func (b *Blobovniczas) iterateSortedDBPaths(ctx context.Context, addr oid.Address, f func(string) (bool, error)) error { b.dbFilesGuard.RLock() defer b.dbFilesGuard.RUnlock() @@ -249,6 +261,9 @@ func (b *Blobovniczas) iterateSordedDBPathsInternal(ctx context.Context, path st var dirIdxs []uint64 for _, entry := range entries { + if strings.HasSuffix(entry.Name(), rebuildSuffix) { + continue + } idx := u64FromHexString(entry.Name()) if entry.IsDir() { dirIdxs = append(dirIdxs, idx) diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go index cfc17cfae..058fe1fb6 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go @@ -6,6 +6,7 @@ import ( "errors" "os" "path/filepath" + "strings" "sync" "sync/atomic" "time" @@ -19,6 +20,8 @@ import ( "golang.org/x/sync/errgroup" ) +const rebuildSuffix = ".rebuild" + var ( errRebuildInProgress = errors.New("rebuild is in progress, the operation cannot be performed") errBatchFull = errors.New("batch full") @@ -124,15 +127,36 @@ func (b *Blobovniczas) rebuildDB(ctx context.Context, path string, meta common.M } shDB.Close() }() - + dropTempFile, err := b.addRebuildTempFile(path) + if err != nil { + return 0, err + } migratedObjects, err := b.moveObjects(ctx, blz, shDB.SystemPath(), meta, limiter) if err != nil { return migratedObjects, err } shDBClosed, err = b.dropDB(ctx, path, shDB) + if err == nil { + // drop only on success to continue rebuild on error + dropTempFile() + } return migratedObjects, err } +func (b *Blobovniczas) addRebuildTempFile(path string) (func(), error) { + sysPath := filepath.Join(b.rootPath, path) + sysPath = sysPath + rebuildSuffix + _, err := os.OpenFile(sysPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, b.perm) + if err != nil { + return nil, err + } + return func() { + if err := os.Remove(sysPath); err != nil { + b.log.Warn(logs.BlobovniczatreeFailedToRemoveRebuildTempFile, zap.Error(err)) + } + }, nil +} + func (b *Blobovniczas) moveObjects(ctx context.Context, blz *blobovnicza.Blobovnicza, blzPath string, meta common.MetaStorage, limiter common.ConcurrentWorkersLimiter) (uint64, error) { var result atomic.Uint64 batch := make(map[oid.Address][]byte) @@ -256,7 +280,10 @@ func (b *Blobovniczas) dropDirectoryIfEmpty(path string) error { func (b *Blobovniczas) completeIncompletedMove(ctx context.Context, metaStore common.MetaStorage) (uint64, error) { var count uint64 - return count, b.iterateExistingDBPaths(ctx, func(s string) (bool, error) { + var rebuildTempFilesToRemove []string + err := b.iterateIncompletedRebuildDBPaths(ctx, func(s string) (bool, error) { + rebuildTmpFilePath := s + s = strings.TrimSuffix(s, rebuildSuffix) shDB := b.getBlobovnicza(s) blz, err := shDB.Open() if err != nil { @@ -276,8 +303,15 @@ func (b *Blobovniczas) completeIncompletedMove(ctx context.Context, metaStore co count++ } + rebuildTempFilesToRemove = append(rebuildTempFilesToRemove, rebuildTmpFilePath) return false, nil }) + for _, tmp := range rebuildTempFilesToRemove { + if err := os.Remove(filepath.Join(b.rootPath, tmp)); err != nil { + b.log.Warn(logs.BlobovniczatreeFailedToRemoveRebuildTempFile, zap.Error(err)) + } + } + return count, err } func (b *Blobovniczas) performMove(ctx context.Context, source *blobovnicza.Blobovnicza, sourcePath string, diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild_failover_test.go b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild_failover_test.go index a6afed60c..9fec795ca 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild_failover_test.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild_failover_test.go @@ -3,6 +3,7 @@ package blobovniczatree import ( "bytes" "context" + "os" "path/filepath" "sync" "testing" @@ -53,6 +54,8 @@ func testRebuildFailoverOnlyMoveInfoSaved(t *testing.T) { })) require.NoError(t, blz.Close()) + _, err = os.OpenFile(filepath.Join(dir, "0", "0", "1.db.rebuild"), os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, defaultPerm) + require.NoError(t, err) testRebuildFailoverValidate(t, dir, obj, true) } @@ -82,6 +85,9 @@ func testRebuildFailoverObjectSavedToTarget(t *testing.T) { require.NoError(t, blz.Close()) + _, err = os.OpenFile(filepath.Join(dir, "0", "0", "1.db.rebuild"), os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, defaultPerm) + require.NoError(t, err) + blz = blobovnicza.New(blobovnicza.WithPath(filepath.Join(dir, "0", "0", "0.db"))) require.NoError(t, blz.Open()) require.NoError(t, blz.Init()) @@ -113,6 +119,9 @@ func testRebuildFailoverObjectDeletedFromSource(t *testing.T) { require.NoError(t, blz.Close()) + _, err = os.OpenFile(filepath.Join(dir, "0", "0", "1.db.rebuild"), os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, defaultPerm) + require.NoError(t, err) + blz = blobovnicza.New(blobovnicza.WithPath(filepath.Join(dir, "0", "0", "0.db"))) require.NoError(t, blz.Open()) require.NoError(t, blz.Init()) @@ -194,4 +203,7 @@ func testRebuildFailoverValidate(t *testing.T, dir string, obj *objectSDK.Object } require.NoError(t, blz.Close()) + + _, err = os.Stat(filepath.Join(dir, "0", "0", "1.db.rebuild")) + require.True(t, os.IsNotExist(err)) }