writecache: Fix metric values #759

Merged
dstepanov-yadro merged 1 commit from dstepanov-yadro/frostfs-node:fix/wc_counter into master 2023-10-30 13:05:01 +00:00
11 changed files with 19 additions and 31 deletions
Showing only changes of commit d4b6ebe7e7 - Show all commits

View file

@ -68,16 +68,10 @@ func (m *writeCacheMetrics) Get(d time.Duration, success bool, st writecache.Sto
func (m *writeCacheMetrics) Delete(d time.Duration, success bool, st writecache.StorageType) { func (m *writeCacheMetrics) Delete(d time.Duration, success bool, st writecache.StorageType) {
m.metrics.AddMethodDuration(m.shardID, "Delete", success, d, st.String()) m.metrics.AddMethodDuration(m.shardID, "Delete", success, d, st.String())
if success {
m.metrics.DecActualCount(m.shardID, st.String())
}
} }
func (m *writeCacheMetrics) Put(d time.Duration, success bool, st writecache.StorageType) { func (m *writeCacheMetrics) Put(d time.Duration, success bool, st writecache.StorageType) {
m.metrics.AddMethodDuration(m.shardID, "Put", success, d, st.String()) m.metrics.AddMethodDuration(m.shardID, "Put", success, d, st.String())
if success {
m.metrics.IncActualCount(m.shardID, st.String())
}
} }
func (m *writeCacheMetrics) SetEstimateSize(db, fstree uint64) { func (m *writeCacheMetrics) SetEstimateSize(db, fstree uint64) {
@ -99,7 +93,6 @@ func (m *writeCacheMetrics) Flush(success bool, st writecache.StorageType) {
} }
func (m *writeCacheMetrics) Evict(st writecache.StorageType) { func (m *writeCacheMetrics) Evict(st writecache.StorageType) {
m.metrics.DecActualCount(m.shardID, st.String())
m.metrics.IncOperationCounter(m.shardID, "Evict", metrics.NullBool{}, st.String()) m.metrics.IncOperationCounter(m.shardID, "Evict", metrics.NullBool{}, st.String())
} }

View file

@ -59,7 +59,7 @@ func (c *cache) Delete(ctx context.Context, addr oid.Address) error {
storagelog.OpField("db DELETE"), storagelog.OpField("db DELETE"),
) )
deleted = true deleted = true
c.objCounters.DecDB() c.decDB()
} }
return err return err
} }

View file

@ -76,7 +76,7 @@ func (c *cache) put(obj objectInfo) error {
storagelog.StorageTypeField(wcStorageType), storagelog.StorageTypeField(wcStorageType),
storagelog.OpField("db PUT"), storagelog.OpField("db PUT"),
) )
c.objCounters.IncDB() c.incDB()
} }
return err return err
} }

View file

@ -55,3 +55,13 @@ func (c *cache) initCounters() error {
return nil return nil
} }
func (c *cache) incDB() {
c.objCounters.IncDB()
c.metrics.SetActualCounters(c.objCounters.DB(), 0)
}
func (c *cache) decDB() {
c.objCounters.DecDB()
c.metrics.SetActualCounters(c.objCounters.DB(), 0)
}

View file

@ -73,7 +73,7 @@ func (c *cache) deleteFromDB(keys []internalKey) []internalKey {
} }
for i := 0; i < errorIndex; i++ { for i := 0; i < errorIndex; i++ {
c.objCounters.DecDB() c.decDB()
c.metrics.Evict(writecache.StorageTypeDB) c.metrics.Evict(writecache.StorageTypeDB)
storagelog.Write(c.log, storagelog.Write(c.log,
storagelog.AddressField(keys[i]), storagelog.AddressField(keys[i]),

View file

@ -83,6 +83,7 @@ func (c *cache) Delete(ctx context.Context, addr oid.Address) error {
storagelog.OpField("fstree DELETE"), storagelog.OpField("fstree DELETE"),
) )
deleted = true deleted = true
// counter changed by fstree
c.estimateCacheSize() c.estimateCacheSize()
} }
return metaerr.Wrap(err) return metaerr.Wrap(err)

View file

@ -60,6 +60,7 @@ func (c *cache) runFlushLoop(ctx context.Context) {
case <-tt.C: case <-tt.C:
c.flushSmallObjects(ctx) c.flushSmallObjects(ctx)
tt.Reset(defaultFlushInterval) tt.Reset(defaultFlushInterval)
c.estimateCacheSize()
fyrchik marked this conversation as resolved Outdated

Scenario

Cache DB contains two object
Goroutine 1: flush and delete object 1, decrements counter, counter value = 1
Goroutine 2: flush and delete object 2, decrements counter, counter value = 0, set metric value to 0
Goroutine 1: set metric value to 1

Now metric will be updated every 1 second in case of empty cache.

Scenario Cache DB contains two object Goroutine 1: flush and delete object 1, decrements counter, counter value = 1 Goroutine 2: flush and delete object 2, decrements counter, counter value = 0, set metric value to 0 Goroutine 1: set metric value to 1 Now metric will be updated every 1 second in case of empty cache.

In you scenario we have metric=1, when it should be 0. What is the scenario for the negative metric?

In you scenario we have metric=1, when it should be 0. What is the scenario for the negative metric?

Described in main comment.

When we flush the same object twice: we can put the same object in channel twice, if worker haven't deleted it yet. c.metrics.Evict is called twice, so metric counter also was decremented twice, because recordDeleted is not checked in c.metrics.Evict: https://git.frostfs.info/TrueCloudLab/frostfs-node/src/branch/master/pkg/local_object_storage/writecache/writecachebbolt/storage.go#L81

Described in main comment. When we flush the same object twice: we can put the same object in channel twice, if worker haven't deleted it yet. `c.metrics.Evict` is called twice, so metric counter also was decremented twice, because `recordDeleted` is not checked in `c.metrics.Evict`: https://git.frostfs.info/TrueCloudLab/frostfs-node/src/branch/master/pkg/local_object_storage/writecache/writecachebbolt/storage.go#L81
case <-ctx.Done(): case <-ctx.Done():
return return
} }

View file

@ -131,6 +131,7 @@ func (c *cache) putBig(ctx context.Context, addr string, prm common.PutPrm) erro
storagelog.StorageTypeField(wcStorageType), storagelog.StorageTypeField(wcStorageType),
storagelog.OpField("fstree PUT"), storagelog.OpField("fstree PUT"),
) )
// counter changed by fstree
c.estimateCacheSize() c.estimateCacheSize()
return nil return nil

View file

@ -72,5 +72,6 @@ func (c *cache) initCounters() error {
return fmt.Errorf("could not read write-cache DB counter: %w", err) return fmt.Errorf("could not read write-cache DB counter: %w", err)
} }
c.objCounters.cDB.Store(inDB) c.objCounters.cDB.Store(inDB)
c.estimateCacheSize()
return nil return nil
} }

View file

@ -73,7 +73,7 @@ func (c *cache) deleteFromDB(key string) {
err := c.db.Batch(func(tx *bbolt.Tx) error { err := c.db.Batch(func(tx *bbolt.Tx) error {
b := tx.Bucket(defaultBucket) b := tx.Bucket(defaultBucket)
key := []byte(key) key := []byte(key)
recordDeleted = !recordDeleted && b.Get(key) != nil recordDeleted = b.Get(key) != nil
fyrchik marked this conversation as resolved
Review

Why this change?

Why this change?
Review

We discussed it in previous PR with metrics, so it was just not deleted.

We discussed it in previous PR with metrics, so it was just not deleted.
return b.Delete(key) return b.Delete(key)
}) })
@ -122,6 +122,7 @@ func (c *cache) deleteFromDisk(ctx context.Context, keys []string) []string {
storagelog.OpField("fstree DELETE"), storagelog.OpField("fstree DELETE"),
) )
c.metrics.Evict(writecache.StorageTypeFSTree) c.metrics.Evict(writecache.StorageTypeFSTree)
// counter changed by fstree
c.estimateCacheSize() c.estimateCacheSize()
} }
} }

View file

@ -10,16 +10,10 @@ import (
type WriteCacheMetrics interface { type WriteCacheMetrics interface {
AddMethodDuration(shardID string, method string, success bool, d time.Duration, storageType string) AddMethodDuration(shardID string, method string, success bool, d time.Duration, storageType string)
IncActualCount(shardID string, storageType string)
DecActualCount(shardID string, storageType string)
SetActualCount(shardID string, count uint64, storageType string) SetActualCount(shardID string, count uint64, storageType string)
SetEstimateSize(shardID string, size uint64, storageType string) SetEstimateSize(shardID string, size uint64, storageType string)
SetMode(shardID string, mode string) SetMode(shardID string, mode string)
IncOperationCounter(shardID string, operation string, success NullBool, storageType string) IncOperationCounter(shardID string, operation string, success NullBool, storageType string)
Close(shardID string) Close(shardID string)
} }
@ -65,20 +59,6 @@ func (m *writeCacheMetrics) AddMethodDuration(shardID string, method string, suc
).Observe(d.Seconds()) ).Observe(d.Seconds())
} }
func (m *writeCacheMetrics) IncActualCount(shardID string, storageType string) {
m.actualCount.With(prometheus.Labels{
shardIDLabel: shardID,
storageLabel: storageType,
}).Inc()
}
func (m *writeCacheMetrics) DecActualCount(shardID string, storageType string) {
m.actualCount.With(prometheus.Labels{
shardIDLabel: shardID,
storageLabel: storageType,
}).Dec()
}
func (m *writeCacheMetrics) SetActualCount(shardID string, count uint64, storageType string) { func (m *writeCacheMetrics) SetActualCount(shardID string, count uint64, storageType string) {
m.actualCount.With(prometheus.Labels{ m.actualCount.With(prometheus.Labels{
shardIDLabel: shardID, shardIDLabel: shardID,