[#424] metrics: Refactor engine metrics

Use histogram vector to measure request duration.
Fix naming like in Prometheus best practice.

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2023-06-13 19:48:15 +03:00
parent c8023a9c8d
commit 1b364d8cf4
13 changed files with 46 additions and 122 deletions

View file

@ -67,7 +67,7 @@ func ContainerSize(e *StorageEngine, id cid.ID) (uint64, error) {
func (e *StorageEngine) containerSize(prm ContainerSizePrm) (res ContainerSizeRes, err error) {
if e.metrics != nil {
defer elapsed(e.metrics.AddEstimateContainerSizeDuration)()
defer elapsed("EstimateContainerSize", e.metrics.AddMethodDuration)()
}
e.iterateOverUnsortedShards(func(sh hashedShard) (stop bool) {
@ -115,7 +115,7 @@ func ListContainers(e *StorageEngine) ([]cid.ID, error) {
func (e *StorageEngine) listContainers() (ListContainersRes, error) {
if e.metrics != nil {
defer elapsed(e.metrics.AddListContainersDuration)()
defer elapsed("ListContainers", e.metrics.AddMethodDuration)()
}
uniqueIDs := make(map[string]cid.ID)

View file

@ -67,7 +67,7 @@ func (e *StorageEngine) Delete(ctx context.Context, prm DeletePrm) (res DeleteRe
func (e *StorageEngine) delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
if e.metrics != nil {
defer elapsed(e.metrics.AddDeleteDuration)()
defer elapsed("Delete", e.metrics.AddMethodDuration)()
}
var locked struct {

View file

@ -64,7 +64,7 @@ func (e *StorageEngine) Get(ctx context.Context, prm GetPrm) (res GetRes, err er
func (e *StorageEngine) get(ctx context.Context, prm GetPrm) (GetRes, error) {
if e.metrics != nil {
defer elapsed(e.metrics.AddGetDuration)()
defer elapsed("Get", e.metrics.AddMethodDuration)()
}
var errNotFound apistatus.ObjectNotFound

View file

@ -68,7 +68,7 @@ func (e *StorageEngine) head(ctx context.Context, prm HeadPrm) (HeadRes, error)
defer span.End()
if e.metrics != nil {
defer elapsed(e.metrics.AddHeadDuration)()
defer elapsed("Head", e.metrics.AddMethodDuration)()
}
var (

View file

@ -78,7 +78,7 @@ func (e *StorageEngine) Inhume(ctx context.Context, prm InhumePrm) (res InhumeRe
func (e *StorageEngine) inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) {
if e.metrics != nil {
defer elapsed(e.metrics.AddInhumeDuration)()
defer elapsed("Inhume", e.metrics.AddMethodDuration)()
}
var shPrm shard.InhumePrm

View file

@ -7,17 +7,7 @@ import (
)
type MetricRegister interface {
AddListContainersDuration(d time.Duration)
AddEstimateContainerSizeDuration(d time.Duration)
AddDeleteDuration(d time.Duration)
AddExistsDuration(d time.Duration)
AddGetDuration(d time.Duration)
AddHeadDuration(d time.Duration)
AddInhumeDuration(d time.Duration)
AddPutDuration(d time.Duration)
AddRangeDuration(d time.Duration)
AddSearchDuration(d time.Duration)
AddListObjectsDuration(d time.Duration)
AddMethodDuration(method string, d time.Duration)
SetObjectCounter(shardID, objectType string, v uint64)
AddToObjectCounter(shardID, objectType string, delta int)
@ -28,17 +18,17 @@ type MetricRegister interface {
AddToPayloadCounter(shardID string, size int64)
IncErrorCounter(shardID string)
ClearErrorCounter(shardID string)
DeleteErrorCounter(shardID string)
DeleteShardMetrics(shardID string)
WriteCache() metrics.WriteCacheMetrics
GC() metrics.GCMetrics
}
func elapsed(addFunc func(d time.Duration)) func() {
func elapsed(method string, addFunc func(method string, d time.Duration)) func() {
t := time.Now()
return func() {
addFunc(time.Since(t))
addFunc(method, time.Since(t))
}
}

View file

@ -57,7 +57,7 @@ func (e *StorageEngine) Put(ctx context.Context, prm PutPrm) (err error) {
func (e *StorageEngine) put(ctx context.Context, prm PutPrm) error {
if e.metrics != nil {
defer elapsed(e.metrics.AddPutDuration)()
defer elapsed("Put", e.metrics.AddMethodDuration)()
}
addr := object.AddressOf(prm.obj)

View file

@ -80,7 +80,7 @@ func (e *StorageEngine) getRange(ctx context.Context, prm RngPrm) (RngRes, error
defer span.End()
if e.metrics != nil {
defer elapsed(e.metrics.AddRangeDuration)()
defer elapsed("GetRange", e.metrics.AddMethodDuration)()
}
var errNotFound apistatus.ObjectNotFound

View file

@ -60,7 +60,7 @@ func (e *StorageEngine) Select(ctx context.Context, prm SelectPrm) (res SelectRe
func (e *StorageEngine) _select(ctx context.Context, prm SelectPrm) (SelectRes, error) {
if e.metrics != nil {
defer elapsed(e.metrics.AddSearchDuration)()
defer elapsed("Search", e.metrics.AddMethodDuration)()
}
addrList := make([]oid.Address, 0)
@ -109,7 +109,7 @@ func (e *StorageEngine) List(limit uint64) (res SelectRes, err error) {
func (e *StorageEngine) list(limit uint64) (SelectRes, error) {
if e.metrics != nil {
defer elapsed(e.metrics.AddListObjectsDuration)()
defer elapsed("ListObjects", e.metrics.AddMethodDuration)()
}
addrList := make([]oid.Address, 0, limit)

View file

@ -70,8 +70,8 @@ func (m *metricsWithID) ClearErrorCounter() {
m.mw.ClearErrorCounter(m.id)
}
func (m *metricsWithID) DeleteErrorCounter() {
m.mw.DeleteErrorCounter(m.id)
func (m *metricsWithID) DeleteShardMetrics() {
m.mw.DeleteShardMetrics(m.id)
}
// AddShard adds a new shard to the storage engine.
@ -186,7 +186,7 @@ func (e *StorageEngine) removeShards(ids ...string) {
continue
}
sh.DeleteErrorCounter()
sh.DeleteShardMetrics()
ss = append(ss, sh)
delete(e.shards, id)

View file

@ -77,7 +77,7 @@ func (m *metricsStore) ClearErrorCounter() {
m.errCounter = 0
}
func (m *metricsStore) DeleteErrorCounter() {
func (m *metricsStore) DeleteShardMetrics() {
m.errCounter = 0
}

View file

@ -79,8 +79,8 @@ type MetricsWriter interface {
IncErrorCounter()
// ClearErrorCounter clear error counter.
ClearErrorCounter()
// DeleteErrorCounter delete error counter.
DeleteErrorCounter()
// DeleteShardMetrics deletes shard metrics from registry.
DeleteShardMetrics()
}
type cfg struct {
@ -447,8 +447,8 @@ func (s *Shard) ClearErrorCounter() {
}
}
func (s *Shard) DeleteErrorCounter() {
func (s *Shard) DeleteShardMetrics() {
if s.cfg.metricsWriter != nil {
s.cfg.metricsWriter.DeleteErrorCounter()
s.cfg.metricsWriter.DeleteShardMetrics()
}
}

View file

@ -1,8 +1,6 @@
package metrics
import (
"fmt"
"strings"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-observability/metrics"
@ -11,60 +9,33 @@ import (
type (
engineMetrics struct {
listContainersDuration prometheus.Counter
estimateContainerSizeDuration prometheus.Counter
deleteDuration prometheus.Counter
existsDuration prometheus.Counter
getDuration prometheus.Counter
headDuration prometheus.Counter
inhumeDuration prometheus.Counter
putDuration prometheus.Counter
rangeDuration prometheus.Counter
searchDuration prometheus.Counter
listObjectsDuration prometheus.Counter
containerSize *prometheus.GaugeVec
payloadSize *prometheus.GaugeVec
errorCounter *prometheus.GaugeVec
methodDuration *prometheus.HistogramVec
containerSize *prometheus.GaugeVec
payloadSize *prometheus.GaugeVec
errorCounter *prometheus.GaugeVec
}
)
const engineSubsystem = "engine"
const (
engineSubsystem = "engine"
engineMethod = "method"
)
func newEngineMetrics() engineMetrics {
return engineMetrics{
listContainersDuration: newEngineMethodDurationCounter("list_containers_"),
estimateContainerSizeDuration: newEngineCounter("estimate_container_size_duration", "Accumulated duration of engine container size estimate operations"),
deleteDuration: newEngineMethodDurationCounter("delete"),
existsDuration: newEngineMethodDurationCounter("exists"),
getDuration: newEngineMethodDurationCounter("get"),
headDuration: newEngineMethodDurationCounter("head"),
inhumeDuration: newEngineMethodDurationCounter("inhume"),
putDuration: newEngineMethodDurationCounter("put"),
rangeDuration: newEngineMethodDurationCounter("range"),
searchDuration: newEngineMethodDurationCounter("search"),
listObjectsDuration: newEngineMethodDurationCounter("list_objects"),
containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}),
payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}),
errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}),
containerSize: newEngineGaugeVector("container_size_bytes", "Accumulated size of all objects in a container", []string{containerIDLabelKey}),
payloadSize: newEngineGaugeVector("payload_size_bytes", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}),
errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}),
methodDuration: metrics.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: engineSubsystem,
Name: "request_duration_seconds",
Help: "Duration of Engine requests",
}, []string{engineMethod}),
}
}
func newEngineCounter(name, help string) prometheus.Counter {
return metrics.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: engineSubsystem,
Name: name,
Help: help,
})
}
func newEngineMethodDurationCounter(method string) prometheus.Counter {
return newEngineCounter(
fmt.Sprintf("%s_duration", method),
fmt.Sprintf("Accumulated duration of engine %s operations", strings.ReplaceAll(method, "_", " ")),
)
}
func newEngineGaugeVector(name, help string, labels []string) *prometheus.GaugeVec {
return metrics.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
@ -74,48 +45,10 @@ func newEngineGaugeVector(name, help string, labels []string) *prometheus.GaugeV
}, labels)
}
func (m engineMetrics) AddListContainersDuration(d time.Duration) {
m.listObjectsDuration.Add(float64(d))
}
func (m engineMetrics) AddEstimateContainerSizeDuration(d time.Duration) {
m.estimateContainerSizeDuration.Add(float64(d))
}
func (m engineMetrics) AddDeleteDuration(d time.Duration) {
m.deleteDuration.Add(float64(d))
}
func (m engineMetrics) AddExistsDuration(d time.Duration) {
m.existsDuration.Add(float64(d))
}
func (m engineMetrics) AddGetDuration(d time.Duration) {
m.getDuration.Add(float64(d))
}
func (m engineMetrics) AddHeadDuration(d time.Duration) {
m.headDuration.Add(float64(d))
}
func (m engineMetrics) AddInhumeDuration(d time.Duration) {
m.inhumeDuration.Add(float64(d))
}
func (m engineMetrics) AddPutDuration(d time.Duration) {
m.putDuration.Add(float64(d))
}
func (m engineMetrics) AddRangeDuration(d time.Duration) {
m.rangeDuration.Add(float64(d))
}
func (m engineMetrics) AddSearchDuration(d time.Duration) {
m.searchDuration.Add(float64(d))
}
func (m engineMetrics) AddListObjectsDuration(d time.Duration) {
m.listObjectsDuration.Add(float64(d))
func (m *engineMetrics) AddMethodDuration(method string, d time.Duration) {
m.methodDuration.With(prometheus.Labels{
engineMethod: method,
}).Observe(d.Seconds())
}
func (m engineMetrics) AddToContainerSize(cnrID string, size int64) {
@ -134,6 +67,7 @@ func (m engineMetrics) ClearErrorCounter(shardID string) {
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Set(0)
}
func (m engineMetrics) DeleteErrorCounter(shardID string) {
func (m engineMetrics) DeleteShardMetrics(shardID string) {
m.errorCounter.Delete(prometheus.Labels{shardIDLabelKey: shardID})
m.payloadSize.Delete(prometheus.Labels{shardIDLabelKey: shardID})
}