From 02f3a7f65c0d649f29db04f5c49703e48621b95b Mon Sep 17 00:00:00 2001 From: Aleksey Savchuk Date: Tue, 18 Feb 2025 10:51:43 +0300 Subject: [PATCH] [#1648] writecache: Fix race condition when reporting cache size metrics There is a race condition when multiple cache operation try to report the cache size metrics simultaneously. Consider the following example: - the initial total size of objects stored in the cache size is 2 - worker X deletes an object and reads the cache size, which is 1 - worker Y deletes an object and reads the cache size, which is 0 - worker Y reports the cache size it learnt, which is 0 - worker X reports the cache size it learnt, which is 1 As a result, the observed cache size is 1 (i. e. one object remains in the cache), which is incorrect because the actual cache size is 0. To fix this, let's report the metrics periodically in the flush loop. Signed-off-by: Aleksey Savchuk --- pkg/local_object_storage/writecache/flush.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/local_object_storage/writecache/flush.go b/pkg/local_object_storage/writecache/flush.go index 3f9b36f9de..2d07d8b327 100644 --- a/pkg/local_object_storage/writecache/flush.go +++ b/pkg/local_object_storage/writecache/flush.go @@ -87,6 +87,9 @@ func (c *cache) pushToFlushQueue(ctx context.Context, fl *flushLimiter) { } c.modeMtx.RUnlock() + + // counter changed by fstree + c.estimateCacheSize() case <-ctx.Done(): return }