From 8f776b2f417fb1fc976fcccff03fda8e1c53dd32 Mon Sep 17 00:00:00 2001 From: Aleksey Savchuk Date: Tue, 18 Feb 2025 10:51:43 +0300 Subject: [PATCH] [#1648] writecache: Fix race condition when reporting cache size metrics There is a race condition when multiple cache operation try to report the cache size metrics simultaneously. Consider the following example: - the initial total size of objects stored in the cache size is 2 - worker X deletes an object and reads the cache size, which is 1 - worker Y deletes an object and reads the cache size, which is 0 - worker Y reports the cache size it learnt, which is 0 - worker X reports the cache size it learnt, which is 1 As a result, the observed cache size is 1 (i. e. one object remains in the cache), which is incorrect because the actual cache size is 0. To fix this, let's report the metrics periodically in the flush loop. Signed-off-by: Aleksey Savchuk --- pkg/local_object_storage/writecache/cache.go | 1 - pkg/local_object_storage/writecache/delete.go | 2 -- pkg/local_object_storage/writecache/flush.go | 3 +++ pkg/local_object_storage/writecache/put.go | 2 -- pkg/local_object_storage/writecache/state.go | 4 ---- pkg/local_object_storage/writecache/storage.go | 2 -- 6 files changed, 3 insertions(+), 11 deletions(-) diff --git a/pkg/local_object_storage/writecache/cache.go b/pkg/local_object_storage/writecache/cache.go index b99d73d3a..ac29d5b77 100644 --- a/pkg/local_object_storage/writecache/cache.go +++ b/pkg/local_object_storage/writecache/cache.go @@ -94,7 +94,6 @@ func (c *cache) Open(_ context.Context, mod mode.Mode) error { if err != nil { return metaerr.Wrap(err) } - c.initCounters() return nil } diff --git a/pkg/local_object_storage/writecache/delete.go b/pkg/local_object_storage/writecache/delete.go index 94a0a40db..f3f021875 100644 --- a/pkg/local_object_storage/writecache/delete.go +++ b/pkg/local_object_storage/writecache/delete.go @@ -52,8 +52,6 @@ func (c *cache) Delete(ctx context.Context, addr oid.Address) error { storagelog.OpField("fstree DELETE"), ) deleted = true - // counter changed by fstree - c.estimateCacheSize() } return metaerr.Wrap(err) } diff --git a/pkg/local_object_storage/writecache/flush.go b/pkg/local_object_storage/writecache/flush.go index 3f9b36f9d..2d07d8b32 100644 --- a/pkg/local_object_storage/writecache/flush.go +++ b/pkg/local_object_storage/writecache/flush.go @@ -87,6 +87,9 @@ func (c *cache) pushToFlushQueue(ctx context.Context, fl *flushLimiter) { } c.modeMtx.RUnlock() + + // counter changed by fstree + c.estimateCacheSize() case <-ctx.Done(): return } diff --git a/pkg/local_object_storage/writecache/put.go b/pkg/local_object_storage/writecache/put.go index 7da5c4d3a..a935e451d 100644 --- a/pkg/local_object_storage/writecache/put.go +++ b/pkg/local_object_storage/writecache/put.go @@ -73,8 +73,6 @@ func (c *cache) putBig(ctx context.Context, prm common.PutPrm) error { storagelog.StorageTypeField(wcStorageType), storagelog.OpField("fstree PUT"), ) - // counter changed by fstree - c.estimateCacheSize() return nil } diff --git a/pkg/local_object_storage/writecache/state.go b/pkg/local_object_storage/writecache/state.go index 44caa2603..586b3cc0d 100644 --- a/pkg/local_object_storage/writecache/state.go +++ b/pkg/local_object_storage/writecache/state.go @@ -18,7 +18,3 @@ func (c *cache) hasEnoughSpace(objectSize uint64) bool { } return c.maxCacheSize >= size+objectSize } - -func (c *cache) initCounters() { - c.estimateCacheSize() -} diff --git a/pkg/local_object_storage/writecache/storage.go b/pkg/local_object_storage/writecache/storage.go index e88566cdf..16e086882 100644 --- a/pkg/local_object_storage/writecache/storage.go +++ b/pkg/local_object_storage/writecache/storage.go @@ -51,7 +51,5 @@ func (c *cache) deleteFromDisk(ctx context.Context, addr oid.Address, size uint6 storagelog.OpField("fstree DELETE"), ) c.metrics.Evict(StorageTypeFSTree) - // counter changed by fstree - c.estimateCacheSize() } }