Fix writecache counters #595

Merged
fyrchik merged 5 commits from dstepanov-yadro/frostfs-node:fix/writecache_bbolt_db_counter into master 2024-09-04 19:51:02 +00:00
6 changed files with 55 additions and 42 deletions
Showing only changes of commit eec97d177e - Show all commits

View file

@ -513,4 +513,5 @@ const (
FrostFSNodeCantUnmarshalObjectFromDB = "can't unmarshal an object from the DB" // Error in ../node/cmd/frostfs-node/morph.go
RuntimeSoftMemoryLimitUpdated = "soft runtime memory limit value updated"
RuntimeSoftMemoryDefinedWithGOMEMLIMIT = "soft runtime memory defined with GOMEMLIMIT environment variable, config value skipped"
FailedToCountWritecacheItems = "failed to count writecache items"
)

View file

@ -107,13 +107,14 @@ func (c *cache) Open(readOnly bool) error {
// thus we need to create a channel here.
c.closeCh = make(chan struct{})
return metaerr.Wrap(c.initCounters())
return metaerr.Wrap(c.setCounters())
}
// Init runs necessary services.
func (c *cache) Init() error {
c.metrics.SetMode(c.mode)
c.runFlushLoop()
c.runDBCounterLoop()
return nil
}

View file

@ -63,7 +63,6 @@ func (c *cache) Delete(ctx context.Context, addr oid.Address) error {
storagelog.OpField("db DELETE"),
)
deleted = true
c.objCounters.DecDB()
return nil
}
@ -75,7 +74,6 @@ func (c *cache) Delete(ctx context.Context, addr oid.Address) error {
storagelog.StorageTypeField(wcStorageType),
storagelog.OpField("fstree DELETE"),
)
c.objCounters.DecFS()
deleted = true
}

View file

@ -95,7 +95,6 @@ func (c *cache) putSmall(obj objectInfo) error {
storagelog.StorageTypeField(wcStorageType),
storagelog.OpField("db PUT"),
)
c.objCounters.IncDB()
}
return err
}
@ -117,7 +116,6 @@ func (c *cache) putBig(ctx context.Context, addr string, prm common.PutPrm) erro
c.compressFlags[addr] = struct{}{}
c.mtx.Unlock()
}
c.objCounters.IncFS()
storagelog.Write(c.log,
storagelog.AddressField(addr),
storagelog.StorageTypeField(wcStorageType),

View file

@ -2,10 +2,13 @@ package writecachebbolt
import (
"fmt"
"math"
"sync/atomic"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
"go.etcd.io/bbolt"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
)
func (c *cache) estimateCacheSize() uint64 {
@ -27,54 +30,68 @@ type counters struct {
cDB, cFS atomic.Uint64
}
func (x *counters) IncDB() {
x.cDB.Add(1)
}
func (x *counters) DecDB() {
x.cDB.Add(math.MaxUint64)
}
func (x *counters) DB() uint64 {
return x.cDB.Load()
}
func (x *counters) IncFS() {
x.cFS.Add(1)
}
func (x *counters) DecFS() {
x.cFS.Add(math.MaxUint64)
}
func (x *counters) FS() uint64 {
return x.cFS.Load()
}
func (c *cache) initCounters() error {
func (c *cache) setCounters() error {
var inDB uint64
err := c.db.View(func(tx *bbolt.Tx) error {
b := tx.Bucket(defaultBucket)
if b != nil {
inDB = uint64(b.Stats().KeyN)
var inFS uint64
var eg errgroup.Group
eg.Go(func() error {
err := c.db.View(func(tx *bbolt.Tx) error {
b := tx.Bucket(defaultBucket)
if b != nil {
inDB = uint64(b.Stats().KeyN)
}
return nil
})
if err != nil {
return fmt.Errorf("could not read write-cache DB counter: %w", err)
}
c.objCounters.cDB.Store(inDB)
return nil
})
if err != nil {
return fmt.Errorf("could not read write-cache DB counter: %w", err)
}
inFS, err := c.fsTree.NumberOfObjects()
if err != nil {
return fmt.Errorf("could not read write-cache FS counter: %w", err)
eg.Go(func() error {
var err error
inFS, err = c.fsTree.NumberOfObjects()
if err != nil {
return fmt.Errorf("could not read write-cache FS counter: %w", err)
}
if inFS > 0 {
inFS-- //small.bolt DB file
}
c.objCounters.cFS.Store(inFS)
return nil
})
if err := eg.Wait(); err != nil {
fyrchik marked this conversation as resolved Outdated

Why do we update counters for db and fstree in different places?

Why do we update counters for db and fstree in different places?

db counter relates to db. fstree counter relates to fstree. But both of counters we update in cache.Open method.

db counter relates to db. fstree counter relates to fstree. But both of counters _we_ update in `cache.Open` method.

Ok, I don't mind, but to me it adds cognitive complexity, so at some point sth similar to #610 will happen.

Ok, I don't mind, but to me it adds cognitive complexity, so at some point sth similar to https://git.frostfs.info/TrueCloudLab/frostfs-node/issues/610 will happen.
return err
}
if inFS > 0 {
inFS-- //small.bolt DB file
}
c.objCounters.cDB.Store(inDB)
c.objCounters.cFS.Store(inFS)
c.metrics.SetActualCounters(inDB, inFS)
return nil
}
func (c *cache) runDBCounterLoop() {
go func() {
t := time.NewTicker(time.Second * 30)
defer t.Stop()
for {
select {
case <-t.C:
err := c.setCounters()
if err != nil {
c.log.Warn(logs.FailedToCountWritecacheItems, zap.Error(err))
}
case <-c.closeCh:
return
}
}
}()
}

View file

@ -69,7 +69,6 @@ func (c *cache) deleteFromDB(key string) {
})
if err == nil {
c.objCounters.DecDB()
c.metrics.Evict(writecache.StorageTypeDB)
storagelog.Write(c.log,
storagelog.AddressField(key),
@ -111,7 +110,6 @@ func (c *cache) deleteFromDisk(ctx context.Context, keys []string) []string {
storagelog.OpField("fstree DELETE"),
)
c.metrics.Evict(writecache.StorageTypeFSTree)
c.objCounters.DecFS()
c.estimateCacheSize()
}
}