From d4d905ecc6cd00a04be5f2b0d8dcf0b91312b551 Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Wed, 27 Sep 2023 18:21:37 +0300 Subject: [PATCH] [#661] metrics: Add blobovniczatree rebuild metrics Signed-off-by: Dmitrii Stepanov --- .../blobstor/blobovniczatree/control.go | 1 + .../blobstor/blobovniczatree/metrics.go | 12 ++++++ .../blobstor/blobovniczatree/rebuild.go | 17 ++++++++ .../metrics/blobovnicza.go | 8 ++++ pkg/metrics/blobovnicza.go | 42 +++++++++++++++---- pkg/metrics/consts.go | 1 + 6 files changed, 74 insertions(+), 7 deletions(-) diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/control.go b/pkg/local_object_storage/blobstor/blobovniczatree/control.go index 561b0859..f1d78dc5 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/control.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/control.go @@ -19,6 +19,7 @@ var errFailedToChangeExtensionReadOnly = errors.New("failed to change blobovnicz func (b *Blobovniczas) Open(readOnly bool) error { b.readOnly = readOnly b.metrics.SetMode(readOnly) + b.metrics.SetRebuildStatus(rebuildStatusNotStarted) b.openManagers() return nil } diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/metrics.go b/pkg/local_object_storage/blobstor/blobovniczatree/metrics.go index 032d8cf7..ee0326bc 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/metrics.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/metrics.go @@ -6,6 +6,13 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobovnicza" ) +const ( + rebuildStatusNotStarted = "not_started" + rebuildStatusRunning = "running" + rebuildStatusCompleted = "completed" + rebuildStatusFailed = "failed" +) + type Metrics interface { Blobovnicza() blobovnicza.Metrics @@ -14,6 +21,9 @@ type Metrics interface { SetMode(readOnly bool) Close() + SetRebuildStatus(status string) + ObjectMoved(d time.Duration) + Delete(d time.Duration, success, withStorageID bool) Exists(d time.Duration, success, withStorageID bool) GetRange(d time.Duration, size int, success, withStorageID bool) @@ -27,6 +37,8 @@ type noopMetrics struct{} func (m *noopMetrics) SetParentID(string) {} func (m *noopMetrics) SetMode(bool) {} func (m *noopMetrics) Close() {} +func (m *noopMetrics) SetRebuildStatus(string) {} +func (m *noopMetrics) ObjectMoved(time.Duration) {} func (m *noopMetrics) Delete(time.Duration, bool, bool) {} func (m *noopMetrics) Exists(time.Duration, bool, bool) {} func (m *noopMetrics) GetRange(time.Duration, int, bool, bool) {} diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go index 5f87933b..c1f21703 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/rebuild.go @@ -23,6 +23,16 @@ func (b *Blobovniczas) Rebuild(ctx context.Context, prm common.RebuildPrm) (comm return common.RebuildRes{}, common.ErrReadOnly } + b.metrics.SetRebuildStatus(rebuildStatusRunning) + success := true + defer func() { + if success { + b.metrics.SetRebuildStatus(rebuildStatusCompleted) + } else { + b.metrics.SetRebuildStatus(rebuildStatusFailed) + } + }() + b.rebuildGuard.Lock() defer b.rebuildGuard.Unlock() @@ -33,6 +43,7 @@ func (b *Blobovniczas) Rebuild(ctx context.Context, prm common.RebuildPrm) (comm res.ObjectsMoved += completedPreviosMoves if err != nil { b.log.Warn(logs.BlobovniczaTreeCompletedPreviousRebuildFailed, zap.Error(err)) + success = false return res, err } b.log.Debug(logs.BlobovniczaTreeCompletedPreviousRebuildSuccess) @@ -41,6 +52,7 @@ func (b *Blobovniczas) Rebuild(ctx context.Context, prm common.RebuildPrm) (comm dbsToMigrate, err := b.getDBsToRebuild(ctx) if err != nil { b.log.Warn(logs.BlobovniczaTreeCollectingDBToRebuildFailed, zap.Error(err)) + success = false return res, err } b.log.Info(logs.BlobovniczaTreeCollectingDBToRebuildSuccess, zap.Int("blobovniczas_to_rebuild", len(dbsToMigrate))) @@ -50,6 +62,7 @@ func (b *Blobovniczas) Rebuild(ctx context.Context, prm common.RebuildPrm) (comm res.ObjectsMoved += movedObjects if err != nil { b.log.Warn(logs.BlobovniczaTreeRebuildingBlobovniczaFailed, zap.String("path", db), zap.Uint64("moved_objects_count", movedObjects), zap.Error(err)) + success = false return res, err } b.log.Debug(logs.BlobovniczaTreeRebuildingBlobovniczaSuccess, zap.String("path", db), zap.Uint64("moved_objects_count", movedObjects)) @@ -121,6 +134,10 @@ func (b *Blobovniczas) moveObjects(ctx context.Context, blz *blobovnicza.Blobovn func (b *Blobovniczas) moveObject(ctx context.Context, source *blobovnicza.Blobovnicza, sourcePath string, addr oid.Address, data []byte, metaStore common.MetaStorage) error { + startedAt := time.Now() + defer func() { + b.metrics.ObjectMoved(time.Since(startedAt)) + }() it := &moveIterator{ B: b, ID: nil, diff --git a/pkg/local_object_storage/metrics/blobovnicza.go b/pkg/local_object_storage/metrics/blobovnicza.go index 0d0318b3..3d751ba3 100644 --- a/pkg/local_object_storage/metrics/blobovnicza.go +++ b/pkg/local_object_storage/metrics/blobovnicza.go @@ -42,6 +42,14 @@ func (m *blobovniczaTreeMetrics) Close() { m.m.CloseBlobobvnizcaTree(m.shardID, m.path) } +func (m *blobovniczaTreeMetrics) SetRebuildStatus(status string) { + m.m.BlobovniczaTreeRebuildStatus(m.shardID, m.path, status) +} + +func (m *blobovniczaTreeMetrics) ObjectMoved(d time.Duration) { + m.m.BlobovniczaTreeObjectMoved(m.shardID, m.path, d) +} + func (m *blobovniczaTreeMetrics) Delete(d time.Duration, success, withStorageID bool) { m.m.BlobobvnizcaTreeMethodDuration(m.shardID, m.path, "Delete", d, success, metrics_impl.NullBool{Valid: true, Bool: withStorageID}) } diff --git a/pkg/metrics/blobovnicza.go b/pkg/metrics/blobovnicza.go index a1ecbc70..ddeeb406 100644 --- a/pkg/metrics/blobovnicza.go +++ b/pkg/metrics/blobovnicza.go @@ -23,16 +23,21 @@ type BlobobvnizcaMetrics interface { IncOpenBlobovniczaCount(shardID, path string) DecOpenBlobovniczaCount(shardID, path string) + + BlobovniczaTreeRebuildStatus(shardID, path, status string) + BlobovniczaTreeObjectMoved(shardID, path string, d time.Duration) } type blobovnicza struct { - treeMode *shardIDPathModeValue - treeReqDuration *prometheus.HistogramVec - treePut *prometheus.CounterVec - treeGet *prometheus.CounterVec - treeOpenSize *prometheus.GaugeVec - treeOpenItems *prometheus.GaugeVec - treeOpenCounter *prometheus.GaugeVec + treeMode *shardIDPathModeValue + treeReqDuration *prometheus.HistogramVec + treePut *prometheus.CounterVec + treeGet *prometheus.CounterVec + treeOpenSize *prometheus.GaugeVec + treeOpenItems *prometheus.GaugeVec + treeOpenCounter *prometheus.GaugeVec + treeObjectMoveDuration *prometheus.HistogramVec + treeRebuildStatus *shardIDPathModeValue } func newBlobovnicza() *blobovnicza { @@ -75,6 +80,13 @@ func newBlobovnicza() *blobovnicza { Name: "open_blobovnicza_count", Help: "Count of opened blobovniczas of Blobovnicza tree", }, []string{shardIDLabel, pathLabel}), + treeObjectMoveDuration: metrics.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: blobovniczaTreeSubSystem, + Name: "object_move_duration_seconds", + Help: "Accumulated Blobovnicza tree object move duration", + }, []string{shardIDLabel, pathLabel}), + treeRebuildStatus: newShardIDPathMode(blobovniczaTreeSubSystem, "rebuild_status", "Blobovnicza tree rebuild status"), } } @@ -96,6 +108,11 @@ func (b *blobovnicza) CloseBlobobvnizcaTree(shardID, path string) { shardIDLabel: shardID, pathLabel: path, }) + b.treeObjectMoveDuration.DeletePartialMatch(prometheus.Labels{ + shardIDLabel: shardID, + pathLabel: path, + }) + b.treeRebuildStatus.SetMode(shardID, path, undefinedStatus) } func (b *blobovnicza) BlobobvnizcaTreeMethodDuration(shardID, path string, method string, d time.Duration, success bool, withStorageID NullBool) { @@ -163,3 +180,14 @@ func (b *blobovnicza) SubOpenBlobovniczaItems(shardID, path string, items uint64 pathLabel: path, }).Sub(float64(items)) } + +func (b *blobovnicza) BlobovniczaTreeRebuildStatus(shardID, path, status string) { + b.treeRebuildStatus.SetMode(shardID, path, status) +} + +func (b *blobovnicza) BlobovniczaTreeObjectMoved(shardID, path string, d time.Duration) { + b.treeObjectMoveDuration.With(prometheus.Labels{ + shardIDLabel: shardID, + pathLabel: path, + }).Observe(d.Seconds()) +} diff --git a/pkg/metrics/consts.go b/pkg/metrics/consts.go index f7a8fd77..f3b83a2e 100644 --- a/pkg/metrics/consts.go +++ b/pkg/metrics/consts.go @@ -45,4 +45,5 @@ const ( failedToDeleteStatus = "failed_to_delete" deletedStatus = "deleted" + undefinedStatus = "undefined" )