frostfs-node/pkg/local_object_storage/writecache/put.go
Aleksey Savchuk 8f776b2f41
All checks were successful
DCO action / DCO (pull_request) Successful in 38s
Vulncheck / Vulncheck (pull_request) Successful in 55s
Build / Build Components (pull_request) Successful in 1m29s
Pre-commit hooks / Pre-commit (pull_request) Successful in 1m30s
Tests and linters / gopls check (pull_request) Successful in 2m24s
Tests and linters / Run gofumpt (pull_request) Successful in 2m47s
Tests and linters / Tests (pull_request) Successful in 3m1s
Tests and linters / Staticcheck (pull_request) Successful in 3m10s
Tests and linters / Lint (pull_request) Successful in 3m18s
Tests and linters / Tests with -race (pull_request) Successful in 4m0s
[#1648] writecache: Fix race condition when reporting cache size metrics
There is a race condition when multiple cache operation try to report
the cache size metrics simultaneously. Consider the following example:
- the initial total size of objects stored in the cache size is 2
- worker X deletes an object and reads the cache size, which is 1
- worker Y deletes an object and reads the cache size, which is 0
- worker Y reports the cache size it learnt, which is 0
- worker X reports the cache size it learnt, which is 1

As a result, the observed cache size is 1 (i. e. one object remains
in the cache), which is incorrect because the actual cache size is 0.

To fix this, let's report the metrics periodically in the flush loop.

Signed-off-by: Aleksey Savchuk <a.savchuk@yadro.com>
2025-02-19 15:07:28 +03:00

78 lines
2.1 KiB
Go

package writecache
import (
"context"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/common"
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)
// Put puts object to write-cache.
//
// Returns ErrReadOnly if write-cache is in R/O mode.
// Returns ErrNotInitialized if write-cache has not been initialized yet.
// Returns ErrOutOfSpace if saving an object leads to WC's size overflow.
// Returns ErrBigObject if an objects exceeds maximum object size.
func (c *cache) Put(ctx context.Context, prm common.PutPrm) (common.PutRes, error) {
ctx, span := tracing.StartSpanFromContext(ctx, "writecache.Put",
trace.WithAttributes(
attribute.String("address", prm.Address.EncodeToString()),
attribute.Bool("dont_compress", prm.DontCompress),
))
defer span.End()
startedAt := time.Now()
added := false
storageType := StorageTypeUndefined
defer func() {
c.metrics.Put(time.Since(startedAt), added, storageType)
}()
if !c.modeMtx.TryRLock() {
return common.PutRes{}, ErrNotInitialized
}
defer c.modeMtx.RUnlock()
if c.readOnly() {
return common.PutRes{}, ErrReadOnly
}
if c.noMetabase() {
return common.PutRes{}, ErrDegraded
}
sz := uint64(len(prm.RawData))
if sz > c.maxObjectSize {
return common.PutRes{}, ErrBigObject
}
storageType = StorageTypeFSTree
err := c.putBig(ctx, prm)
if err == nil {
added = true
}
return common.PutRes{}, metaerr.Wrap(err)
}
// putBig writes object to FSTree and pushes it to the flush workers queue.
func (c *cache) putBig(ctx context.Context, prm common.PutPrm) error {
if !c.hasEnoughSpaceFS() {
return ErrOutOfSpace
}
_, err := c.fsTree.Put(ctx, prm)
if err != nil {
return err
}
storagelog.Write(ctx, c.log,
storagelog.AddressField(prm.Address.EncodeToString()),
storagelog.StorageTypeField(wcStorageType),
storagelog.OpField("fstree PUT"),
)
return nil
}