From 007827255ee65e9d7fd61c0f6bdd6bb59479bac1 Mon Sep 17 00:00:00 2001
From: Dmitrii Stepanov <d.stepanov@yadro.com>
Date: Tue, 27 Aug 2024 15:51:55 +0300
Subject: [PATCH] [#1337] blobovniczatree: Add .rebuild temp files

This allows to reduce open/close DBs to check incompleted rebuilds.

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
---
 internal/logs/logs.go                         |  1 +
 .../blobstor/blobovniczatree/blobovnicza.go   |  2 +-
 .../blobstor/blobovniczatree/control.go       |  6 +--
 .../blobstor/blobovniczatree/iterate.go       | 21 ++++++++--
 .../blobstor/blobovniczatree/rebuild.go       | 38 ++++++++++++++++++-
 .../blobovniczatree/rebuild_failover_test.go  | 12 ++++++
 6 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/internal/logs/logs.go b/internal/logs/logs.go
index 78bcd0c0e..97b189529 100644
--- a/internal/logs/logs.go
+++ b/internal/logs/logs.go
@@ -543,4 +543,5 @@ const (
 	WritecacheSealCompletedAsync                                         = "writecache seal completed successfully"
 	FailedToSealWritecacheAsync                                          = "failed to seal writecache async"
 	WritecacheShrinkSkippedNotEmpty                                      = "writecache shrink skipped: database is not empty"
+	BlobovniczatreeFailedToRemoveRebuildTempFile                         = "failed to remove rebuild temp file"
 )
diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/blobovnicza.go b/pkg/local_object_storage/blobstor/blobovniczatree/blobovnicza.go
index 952203367..c909113c7 100644
--- a/pkg/local_object_storage/blobstor/blobovniczatree/blobovnicza.go
+++ b/pkg/local_object_storage/blobstor/blobovniczatree/blobovnicza.go
@@ -135,7 +135,7 @@ func getBlobovniczaMaxIndex(directory string) (bool, uint64, error) {
 	var hasDBs bool
 	var maxIdx uint64
 	for _, e := range entries {
-		if e.IsDir() {
+		if e.IsDir() || strings.HasSuffix(e.Name(), rebuildSuffix) {
 			continue
 		}
 		hasDBs = true
diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/control.go b/pkg/local_object_storage/blobstor/blobovniczatree/control.go
index a31e9d6cb..681cf876c 100644
--- a/pkg/local_object_storage/blobstor/blobovniczatree/control.go
+++ b/pkg/local_object_storage/blobstor/blobovniczatree/control.go
@@ -2,6 +2,7 @@ package blobovniczatree
 
 import (
 	"context"
+	"strings"
 
 	"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
 	"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
@@ -41,10 +42,9 @@ func (b *Blobovniczas) initializeDBs(ctx context.Context) error {
 
 	eg, egCtx := errgroup.WithContext(ctx)
 	eg.SetLimit(b.blzInitWorkerCount)
-	visited := make(map[string]struct{})
-	err = b.iterateExistingDBPaths(egCtx, func(p string) (bool, error) {
-		visited[p] = struct{}{}
+	err = b.iterateIncompletedRebuildDBPaths(egCtx, func(p string) (bool, error) {
 		eg.Go(func() error {
+			p = strings.TrimSuffix(p, rebuildSuffix)
 			shBlz := b.getBlobovniczaWithoutCaching(p)
 			blz, err := shBlz.Open()
 			if err != nil {
diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/iterate.go b/pkg/local_object_storage/blobstor/blobovniczatree/iterate.go
index af3d9e720..f6acb46aa 100644
--- a/pkg/local_object_storage/blobstor/blobovniczatree/iterate.go
+++ b/pkg/local_object_storage/blobstor/blobovniczatree/iterate.go
@@ -188,11 +188,11 @@ func (b *Blobovniczas) iterateExistingDBPaths(ctx context.Context, f func(string
 	b.dbFilesGuard.RLock()
 	defer b.dbFilesGuard.RUnlock()
 
-	_, err := b.iterateExistingDBPathsDFS(ctx, "", f)
+	_, err := b.iterateExistingPathsDFS(ctx, "", f, func(path string) bool { return !strings.HasSuffix(path, rebuildSuffix) })
 	return err
 }
 
-func (b *Blobovniczas) iterateExistingDBPathsDFS(ctx context.Context, path string, f func(string) (bool, error)) (bool, error) {
+func (b *Blobovniczas) iterateExistingPathsDFS(ctx context.Context, path string, f func(string) (bool, error), fileFilter func(path string) bool) (bool, error) {
 	sysPath := filepath.Join(b.rootPath, path)
 	entries, err := os.ReadDir(sysPath)
 	if os.IsNotExist(err) && b.readOnly && path == "" { // non initialized tree in read only mode
@@ -208,7 +208,7 @@ func (b *Blobovniczas) iterateExistingDBPathsDFS(ctx context.Context, path strin
 		default:
 		}
 		if entry.IsDir() {
-			stop, err := b.iterateExistingDBPathsDFS(ctx, filepath.Join(path, entry.Name()), f)
+			stop, err := b.iterateExistingPathsDFS(ctx, filepath.Join(path, entry.Name()), f, fileFilter)
 			if err != nil {
 				return false, err
 			}
@@ -216,6 +216,9 @@ func (b *Blobovniczas) iterateExistingDBPathsDFS(ctx context.Context, path strin
 				return true, nil
 			}
 		} else {
+			if !fileFilter(entry.Name()) {
+				continue
+			}
 			stop, err := f(filepath.Join(path, entry.Name()))
 			if err != nil {
 				return false, err
@@ -228,6 +231,15 @@ func (b *Blobovniczas) iterateExistingDBPathsDFS(ctx context.Context, path strin
 	return false, nil
 }
 
+// iterateIncompletedRebuildDBPaths iterates over the paths of Blobovniczas with incompleted rebuild files without any order.
+func (b *Blobovniczas) iterateIncompletedRebuildDBPaths(ctx context.Context, f func(string) (bool, error)) error {
+	b.dbFilesGuard.RLock()
+	defer b.dbFilesGuard.RUnlock()
+
+	_, err := b.iterateExistingPathsDFS(ctx, "", f, func(path string) bool { return strings.HasSuffix(path, rebuildSuffix) })
+	return err
+}
+
 func (b *Blobovniczas) iterateSortedDBPaths(ctx context.Context, addr oid.Address, f func(string) (bool, error)) error {
 	b.dbFilesGuard.RLock()
 	defer b.dbFilesGuard.RUnlock()
@@ -249,6 +261,9 @@ func (b *Blobovniczas) iterateSordedDBPathsInternal(ctx context.Context, path st
 	var dirIdxs []uint64
 
 	for _, entry := range entries {
+		if strings.HasSuffix(entry.Name(), rebuildSuffix) {
+			continue
+		}
 		idx := u64FromHexString(entry.Name())
 		if entry.IsDir() {
 			dirIdxs = append(dirIdxs, idx)
diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go
index cfc17cfae..058fe1fb6 100644
--- a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go
+++ b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go
@@ -6,6 +6,7 @@ import (
 	"errors"
 	"os"
 	"path/filepath"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -19,6 +20,8 @@ import (
 	"golang.org/x/sync/errgroup"
 )
 
+const rebuildSuffix = ".rebuild"
+
 var (
 	errRebuildInProgress = errors.New("rebuild is in progress, the operation cannot be performed")
 	errBatchFull         = errors.New("batch full")
@@ -124,15 +127,36 @@ func (b *Blobovniczas) rebuildDB(ctx context.Context, path string, meta common.M
 		}
 		shDB.Close()
 	}()
-
+	dropTempFile, err := b.addRebuildTempFile(path)
+	if err != nil {
+		return 0, err
+	}
 	migratedObjects, err := b.moveObjects(ctx, blz, shDB.SystemPath(), meta, limiter)
 	if err != nil {
 		return migratedObjects, err
 	}
 	shDBClosed, err = b.dropDB(ctx, path, shDB)
+	if err == nil {
+		// drop only on success to continue rebuild on error
+		dropTempFile()
+	}
 	return migratedObjects, err
 }
 
+func (b *Blobovniczas) addRebuildTempFile(path string) (func(), error) {
+	sysPath := filepath.Join(b.rootPath, path)
+	sysPath = sysPath + rebuildSuffix
+	_, err := os.OpenFile(sysPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, b.perm)
+	if err != nil {
+		return nil, err
+	}
+	return func() {
+		if err := os.Remove(sysPath); err != nil {
+			b.log.Warn(logs.BlobovniczatreeFailedToRemoveRebuildTempFile, zap.Error(err))
+		}
+	}, nil
+}
+
 func (b *Blobovniczas) moveObjects(ctx context.Context, blz *blobovnicza.Blobovnicza, blzPath string, meta common.MetaStorage, limiter common.ConcurrentWorkersLimiter) (uint64, error) {
 	var result atomic.Uint64
 	batch := make(map[oid.Address][]byte)
@@ -256,7 +280,10 @@ func (b *Blobovniczas) dropDirectoryIfEmpty(path string) error {
 
 func (b *Blobovniczas) completeIncompletedMove(ctx context.Context, metaStore common.MetaStorage) (uint64, error) {
 	var count uint64
-	return count, b.iterateExistingDBPaths(ctx, func(s string) (bool, error) {
+	var rebuildTempFilesToRemove []string
+	err := b.iterateIncompletedRebuildDBPaths(ctx, func(s string) (bool, error) {
+		rebuildTmpFilePath := s
+		s = strings.TrimSuffix(s, rebuildSuffix)
 		shDB := b.getBlobovnicza(s)
 		blz, err := shDB.Open()
 		if err != nil {
@@ -276,8 +303,15 @@ func (b *Blobovniczas) completeIncompletedMove(ctx context.Context, metaStore co
 			count++
 		}
 
+		rebuildTempFilesToRemove = append(rebuildTempFilesToRemove, rebuildTmpFilePath)
 		return false, nil
 	})
+	for _, tmp := range rebuildTempFilesToRemove {
+		if err := os.Remove(filepath.Join(b.rootPath, tmp)); err != nil {
+			b.log.Warn(logs.BlobovniczatreeFailedToRemoveRebuildTempFile, zap.Error(err))
+		}
+	}
+	return count, err
 }
 
 func (b *Blobovniczas) performMove(ctx context.Context, source *blobovnicza.Blobovnicza, sourcePath string,
diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild_failover_test.go b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild_failover_test.go
index a6afed60c..9fec795ca 100644
--- a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild_failover_test.go
+++ b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild_failover_test.go
@@ -3,6 +3,7 @@ package blobovniczatree
 import (
 	"bytes"
 	"context"
+	"os"
 	"path/filepath"
 	"sync"
 	"testing"
@@ -53,6 +54,8 @@ func testRebuildFailoverOnlyMoveInfoSaved(t *testing.T) {
 	}))
 
 	require.NoError(t, blz.Close())
+	_, err = os.OpenFile(filepath.Join(dir, "0", "0", "1.db.rebuild"), os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, defaultPerm)
+	require.NoError(t, err)
 
 	testRebuildFailoverValidate(t, dir, obj, true)
 }
@@ -82,6 +85,9 @@ func testRebuildFailoverObjectSavedToTarget(t *testing.T) {
 
 	require.NoError(t, blz.Close())
 
+	_, err = os.OpenFile(filepath.Join(dir, "0", "0", "1.db.rebuild"), os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, defaultPerm)
+	require.NoError(t, err)
+
 	blz = blobovnicza.New(blobovnicza.WithPath(filepath.Join(dir, "0", "0", "0.db")))
 	require.NoError(t, blz.Open())
 	require.NoError(t, blz.Init())
@@ -113,6 +119,9 @@ func testRebuildFailoverObjectDeletedFromSource(t *testing.T) {
 
 	require.NoError(t, blz.Close())
 
+	_, err = os.OpenFile(filepath.Join(dir, "0", "0", "1.db.rebuild"), os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, defaultPerm)
+	require.NoError(t, err)
+
 	blz = blobovnicza.New(blobovnicza.WithPath(filepath.Join(dir, "0", "0", "0.db")))
 	require.NoError(t, blz.Open())
 	require.NoError(t, blz.Init())
@@ -194,4 +203,7 @@ func testRebuildFailoverValidate(t *testing.T, dir string, obj *objectSDK.Object
 	}
 
 	require.NoError(t, blz.Close())
+
+	_, err = os.Stat(filepath.Join(dir, "0", "0", "1.db.rebuild"))
+	require.True(t, os.IsNotExist(err))
 }