frostfs-node/pkg/local_object_storage/writecache/delete.go
Aleksey Savchuk 8f776b2f41
All checks were successful
DCO action / DCO (pull_request) Successful in 38s
Vulncheck / Vulncheck (pull_request) Successful in 55s
Build / Build Components (pull_request) Successful in 1m29s
Pre-commit hooks / Pre-commit (pull_request) Successful in 1m30s
Tests and linters / gopls check (pull_request) Successful in 2m24s
Tests and linters / Run gofumpt (pull_request) Successful in 2m47s
Tests and linters / Tests (pull_request) Successful in 3m1s
Tests and linters / Staticcheck (pull_request) Successful in 3m10s
Tests and linters / Lint (pull_request) Successful in 3m18s
Tests and linters / Tests with -race (pull_request) Successful in 4m0s
[#1648] writecache: Fix race condition when reporting cache size metrics
There is a race condition when multiple cache operation try to report
the cache size metrics simultaneously. Consider the following example:
- the initial total size of objects stored in the cache size is 2
- worker X deletes an object and reads the cache size, which is 1
- worker Y deletes an object and reads the cache size, which is 0
- worker Y reports the cache size it learnt, which is 0
- worker X reports the cache size it learnt, which is 1

As a result, the observed cache size is 1 (i. e. one object remains
in the cache), which is incorrect because the actual cache size is 0.

To fix this, let's report the metrics periodically in the flush loop.

Signed-off-by: Aleksey Savchuk <a.savchuk@yadro.com>
2025-02-19 15:07:28 +03:00

57 lines
1.7 KiB
Go

package writecache
import (
"context"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/common"
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)
// Delete removes object from write-cache.
//
// Returns an error of type apistatus.ObjectNotFound if object is missing in write-cache.
// Returns ErrNotInitialized if write-cache has not been initialized yet.
// Returns ErrDegraded if write-cache is in DEGRADED mode.
func (c *cache) Delete(ctx context.Context, addr oid.Address) error {
ctx, span := tracing.StartSpanFromContext(ctx, "writecache.Delete",
trace.WithAttributes(
attribute.String("address", addr.EncodeToString()),
))
defer span.End()
deleted := false
storageType := StorageTypeUndefined
startedAt := time.Now()
defer func() {
c.metrics.Delete(time.Since(startedAt), deleted, storageType)
}()
if !c.modeMtx.TryRLock() {
return ErrNotInitialized
}
defer c.modeMtx.RUnlock()
if c.readOnly() {
return ErrReadOnly
}
if c.noMetabase() {
return ErrDegraded
}
storageType = StorageTypeFSTree
_, err := c.fsTree.Delete(ctx, common.DeletePrm{Address: addr})
if err == nil {
storagelog.Write(ctx, c.log,
storagelog.AddressField(addr.EncodeToString()),
storagelog.StorageTypeField(wcStorageType),
storagelog.OpField("fstree DELETE"),
)
deleted = true
}
return metaerr.Wrap(err)
}