writecache: Fix metric values #759

Merged
dstepanov-yadro merged 1 commit from dstepanov-yadro/frostfs-node:fix/wc_counter into master 2023-10-30 13:05:01 +00:00
11 changed files with 19 additions and 31 deletions

View file

@ -68,16 +68,10 @@ func (m *writeCacheMetrics) Get(d time.Duration, success bool, st writecache.Sto
func (m *writeCacheMetrics) Delete(d time.Duration, success bool, st writecache.StorageType) {
m.metrics.AddMethodDuration(m.shardID, "Delete", success, d, st.String())
if success {
m.metrics.DecActualCount(m.shardID, st.String())
}
}
func (m *writeCacheMetrics) Put(d time.Duration, success bool, st writecache.StorageType) {
m.metrics.AddMethodDuration(m.shardID, "Put", success, d, st.String())
if success {
m.metrics.IncActualCount(m.shardID, st.String())
}
}
func (m *writeCacheMetrics) SetEstimateSize(db, fstree uint64) {
@ -99,7 +93,6 @@ func (m *writeCacheMetrics) Flush(success bool, st writecache.StorageType) {
}
func (m *writeCacheMetrics) Evict(st writecache.StorageType) {
m.metrics.DecActualCount(m.shardID, st.String())
m.metrics.IncOperationCounter(m.shardID, "Evict", metrics.NullBool{}, st.String())
}

View file

@ -59,7 +59,7 @@ func (c *cache) Delete(ctx context.Context, addr oid.Address) error {
storagelog.OpField("db DELETE"),
)
deleted = true
c.objCounters.DecDB()
c.decDB()
}
return err
}

View file

@ -76,7 +76,7 @@ func (c *cache) put(obj objectInfo) error {
storagelog.StorageTypeField(wcStorageType),
storagelog.OpField("db PUT"),
)
c.objCounters.IncDB()
c.incDB()
}
return err
}

View file

@ -55,3 +55,13 @@ func (c *cache) initCounters() error {
return nil
}
func (c *cache) incDB() {
c.objCounters.IncDB()
c.metrics.SetActualCounters(c.objCounters.DB(), 0)
}
func (c *cache) decDB() {
c.objCounters.DecDB()
c.metrics.SetActualCounters(c.objCounters.DB(), 0)
}

View file

@ -73,7 +73,7 @@ func (c *cache) deleteFromDB(keys []internalKey) []internalKey {
}
for i := 0; i < errorIndex; i++ {
c.objCounters.DecDB()
c.decDB()
c.metrics.Evict(writecache.StorageTypeDB)
storagelog.Write(c.log,
storagelog.AddressField(keys[i]),

View file

@ -83,6 +83,7 @@ func (c *cache) Delete(ctx context.Context, addr oid.Address) error {
storagelog.OpField("fstree DELETE"),
)
deleted = true
// counter changed by fstree
c.estimateCacheSize()
}
return metaerr.Wrap(err)

View file

@ -60,6 +60,7 @@ func (c *cache) runFlushLoop(ctx context.Context) {
case <-tt.C:
c.flushSmallObjects(ctx)
tt.Reset(defaultFlushInterval)
c.estimateCacheSize()
fyrchik marked this conversation as resolved Outdated

Scenario

Cache DB contains two object
Goroutine 1: flush and delete object 1, decrements counter, counter value = 1
Goroutine 2: flush and delete object 2, decrements counter, counter value = 0, set metric value to 0
Goroutine 1: set metric value to 1

Now metric will be updated every 1 second in case of empty cache.

Scenario Cache DB contains two object Goroutine 1: flush and delete object 1, decrements counter, counter value = 1 Goroutine 2: flush and delete object 2, decrements counter, counter value = 0, set metric value to 0 Goroutine 1: set metric value to 1 Now metric will be updated every 1 second in case of empty cache.

In you scenario we have metric=1, when it should be 0. What is the scenario for the negative metric?

In you scenario we have metric=1, when it should be 0. What is the scenario for the negative metric?

Described in main comment.

When we flush the same object twice: we can put the same object in channel twice, if worker haven't deleted it yet. c.metrics.Evict is called twice, so metric counter also was decremented twice, because recordDeleted is not checked in c.metrics.Evict: https://git.frostfs.info/TrueCloudLab/frostfs-node/src/branch/master/pkg/local_object_storage/writecache/writecachebbolt/storage.go#L81

Described in main comment. When we flush the same object twice: we can put the same object in channel twice, if worker haven't deleted it yet. `c.metrics.Evict` is called twice, so metric counter also was decremented twice, because `recordDeleted` is not checked in `c.metrics.Evict`: https://git.frostfs.info/TrueCloudLab/frostfs-node/src/branch/master/pkg/local_object_storage/writecache/writecachebbolt/storage.go#L81
case <-ctx.Done():
return
}

View file

@ -131,6 +131,7 @@ func (c *cache) putBig(ctx context.Context, addr string, prm common.PutPrm) erro
storagelog.StorageTypeField(wcStorageType),
storagelog.OpField("fstree PUT"),
)
// counter changed by fstree
c.estimateCacheSize()
return nil

View file

@ -72,5 +72,6 @@ func (c *cache) initCounters() error {
return fmt.Errorf("could not read write-cache DB counter: %w", err)
}
c.objCounters.cDB.Store(inDB)
c.estimateCacheSize()
return nil
}

View file

@ -73,7 +73,7 @@ func (c *cache) deleteFromDB(key string) {
err := c.db.Batch(func(tx *bbolt.Tx) error {
b := tx.Bucket(defaultBucket)
key := []byte(key)
recordDeleted = !recordDeleted && b.Get(key) != nil
fyrchik marked this conversation as resolved
Review

Why this change?

Why this change?
Review

We discussed it in previous PR with metrics, so it was just not deleted.

We discussed it in previous PR with metrics, so it was just not deleted.
recordDeleted = b.Get(key) != nil
return b.Delete(key)
})
@ -122,6 +122,7 @@ func (c *cache) deleteFromDisk(ctx context.Context, keys []string) []string {
storagelog.OpField("fstree DELETE"),
)
c.metrics.Evict(writecache.StorageTypeFSTree)
// counter changed by fstree
c.estimateCacheSize()
}
}

View file

@ -10,16 +10,10 @@ import (
type WriteCacheMetrics interface {
AddMethodDuration(shardID string, method string, success bool, d time.Duration, storageType string)
IncActualCount(shardID string, storageType string)
DecActualCount(shardID string, storageType string)
SetActualCount(shardID string, count uint64, storageType string)
SetEstimateSize(shardID string, size uint64, storageType string)
SetMode(shardID string, mode string)
IncOperationCounter(shardID string, operation string, success NullBool, storageType string)
Close(shardID string)
}
@ -65,20 +59,6 @@ func (m *writeCacheMetrics) AddMethodDuration(shardID string, method string, suc
).Observe(d.Seconds())
}
func (m *writeCacheMetrics) IncActualCount(shardID string, storageType string) {
m.actualCount.With(prometheus.Labels{
shardIDLabel: shardID,
storageLabel: storageType,
}).Inc()
}
func (m *writeCacheMetrics) DecActualCount(shardID string, storageType string) {
m.actualCount.With(prometheus.Labels{
shardIDLabel: shardID,
storageLabel: storageType,
}).Dec()
}
func (m *writeCacheMetrics) SetActualCount(shardID string, count uint64, storageType string) {
m.actualCount.With(prometheus.Labels{
shardIDLabel: shardID,