engine: Optimize Inhume operation to improve speed with large object sets #1476

Merged
fyrchik merged 3 commits from a-savchuk/frostfs-node:boost-inhume-speed into master 2024-12-04 07:37:17 +00:00
2 changed files with 170 additions and 74 deletions

View file

@ -81,110 +81,146 @@ func (e *StorageEngine) Inhume(ctx context.Context, prm InhumePrm) (res InhumeRe
}
func (e *StorageEngine) inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) {
addrsPerShard, err := e.groupObjectsByShard(ctx, prm.addrs, !prm.forceRemoval)
if err != nil {
return InhumeRes{}, err
}
var shPrm shard.InhumePrm
if prm.forceRemoval {
shPrm.ForceRemoval()
}
for i := range prm.addrs {
if !prm.forceRemoval {
locked, err := e.IsLocked(ctx, prm.addrs[i])
if err != nil {
e.log.Warn(ctx, logs.EngineRemovingAnObjectWithoutFullLockingCheck,
zap.Error(err),
zap.Stringer("addr", prm.addrs[i]),
zap.String("trace_id", tracingPkg.GetTraceID(ctx)))
} else if locked {
return InhumeRes{}, new(apistatus.ObjectLocked)
}
}
var errLocked *apistatus.ObjectLocked
for shardID, addrs := range addrsPerShard {
if prm.tombstone != nil {
shPrm.SetTarget(*prm.tombstone, prm.addrs[i])
shPrm.SetTarget(*prm.tombstone, addrs...)
} else {
shPrm.MarkAsGarbage(prm.addrs[i])
shPrm.MarkAsGarbage(addrs...)
}
ok, err := e.inhumeAddr(ctx, prm.addrs[i], shPrm, true)
if err != nil {
return InhumeRes{}, err
sh, exists := e.shards[shardID]
if !exists {
e.log.Warn(ctx, logs.EngineCouldNotInhumeObjectInShard,
zap.Error(errors.New("this shard was expected to exist")),
zap.String("shard_id", shardID),
zap.String("trace_id", tracingPkg.GetTraceID(ctx)),
)
return InhumeRes{}, errInhumeFailure
}
if !ok {
ok, err := e.inhumeAddr(ctx, prm.addrs[i], shPrm, false)
if err != nil {
return InhumeRes{}, err
} else if !ok {
return InhumeRes{}, errInhumeFailure
if _, err := sh.Inhume(ctx, shPrm); err != nil {
switch {
case errors.As(err, &errLocked):
case errors.Is(err, shard.ErrLockObjectRemoval):
case errors.Is(err, shard.ErrReadOnlyMode):
case errors.Is(err, shard.ErrDegradedMode):
default:
e.reportShardError(ctx, sh, "couldn't inhume object in shard", err)
}
return InhumeRes{}, err
}
}
return InhumeRes{}, nil
}
// Returns ok if object was inhumed during this invocation or before.
func (e *StorageEngine) inhumeAddr(ctx context.Context, addr oid.Address, prm shard.InhumePrm, checkExists bool) (bool, error) {
root := false
var existPrm shard.ExistsPrm
var retErr error
var ok bool
// groupObjectsByShard groups objects based on the shard(s) they are stored on.
//
// If checkLocked is set, [apistatus.ObjectLocked] will be returned if any of
// the objects are locked.
func (e *StorageEngine) groupObjectsByShard(ctx context.Context, addrs []oid.Address, checkLocked bool) (map[string][]oid.Address, error) {
groups := make(map[string][]oid.Address)
for _, addr := range addrs {
ids, err := e.findShards(ctx, addr, checkLocked)
if err != nil {
return nil, err
}
for _, id := range ids {
groups[id] = append(groups[id], addr)
}
}
return groups, nil
}
// findShards determines the shard(s) where the object is stored.
//
// If the object is a root object, multiple shards will be returned.
//
// If checkLocked is set, [apistatus.ObjectLocked] will be returned if any of
// the objects are locked.
func (e *StorageEngine) findShards(ctx context.Context, addr oid.Address, checkLocked bool) ([]string, error) {
var (
ids []string
retErr error
prm shard.ExistsPrm
siErr *objectSDK.SplitInfoError
ecErr *objectSDK.ECInfoError
isRootObject bool
objectExists bool
)
e.iterateOverSortedShards(addr, func(_ int, sh hashedShard) (stop bool) {
Review

stop is no more needed.

`stop` is no more needed.
Review

Sure, but it makes the function's signature more explicit

Sure, but it makes the function's signature more explicit
defer func() {
// if object is root we continue since information about it
// can be presented in other shards
if checkExists && root {
stop = false
}
}()
objectExists = false
if checkExists {
existPrm.Address = addr
exRes, err := sh.Exists(ctx, existPrm)
if err != nil {
if client.IsErrObjectAlreadyRemoved(err) || shard.IsErrObjectExpired(err) {
// inhumed once - no need to be inhumed again
ok = true
return true
}
var siErr *objectSDK.SplitInfoError
var ecErr *objectSDK.ECInfoError
if !(errors.As(err, &siErr) || errors.As(err, &ecErr)) {
e.reportShardError(ctx, sh, "could not check for presents in shard", err, zap.Stringer("address", addr))
return
}
root = true
} else if !exRes.Exists() {
return
}
prm.Address = addr
switch res, err := sh.Exists(ctx, prm); {
case client.IsErrObjectAlreadyRemoved(err) || shard.IsErrObjectExpired(err):
// NOTE(@a-savchuk): there were some considerations that we can stop
// immediately if the object is already removed or expired. However,
// the previous method behavior was:
// - keep iterating if it's a root object and already removed,
// - stop iterating if it's not a root object and removed.
//
// Since my task was only improving method speed, let's keep the
// previous method behavior. Continue if it's a root object.
return !isRootObject
case errors.As(err, &siErr) || errors.As(err, &ecErr):
isRootObject = true
objectExists = true
case err != nil:
e.reportShardError(
ctx, sh, "couldn't check for presence in shard",
err, zap.Stringer("address", addr),
)
case res.Exists():
objectExists = true
default:
}
_, err := sh.Inhume(ctx, prm)
if err != nil {
var errLocked *apistatus.ObjectLocked
switch {
case errors.As(err, &errLocked):
if !objectExists {
return
}
if checkLocked {
if isLocked, err := sh.IsLocked(ctx, addr); err != nil {
e.log.Warn(ctx, logs.EngineRemovingAnObjectWithoutFullLockingCheck,
zap.Error(err),
zap.Stringer("address", addr),
zap.String("trace_id", tracingPkg.GetTraceID(ctx)),
)
} else if isLocked {
retErr = new(apistatus.ObjectLocked)
return true
case errors.Is(err, shard.ErrLockObjectRemoval):
retErr = meta.ErrLockObjectRemoval
return true
case errors.Is(err, shard.ErrReadOnlyMode) || errors.Is(err, shard.ErrDegradedMode):
retErr = err
return true
}
e.reportShardError(ctx, sh, "could not inhume object in shard", err, zap.Stringer("address", addr))
return false
}
ok = true
return true
ids = append(ids, sh.ID().String())
// Continue if it's a root object.
return !isRootObject
})
return ok, retErr
if retErr != nil {
return nil, retErr
}
return ids, nil
}
// IsLocked checks whether an object is locked according to StorageEngine's state.

View file

@ -2,6 +2,7 @@ package engine
import (
"context"
"fmt"
"strconv"
"testing"
@ -12,8 +13,11 @@ import (
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/api/object"
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
objecttest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/test"
"github.com/stretchr/testify/require"
"golang.org/x/sync/errgroup"
)
func TestStorageEngine_Inhume(t *testing.T) {
@ -137,3 +141,59 @@ func TestInhumeExpiredRegularObject(t *testing.T) {
require.NoError(t, err)
})
}
func BenchmarkInhumeMultipart(b *testing.B) {
// The benchmark result insignificantly depends on the number of shards,
// so do not use it as a benchmark parameter, just set it big enough.
numShards := 100
for numObjects := 1; numObjects <= 10000; numObjects *= 10 {
b.Run(
fmt.Sprintf("objects=%d", numObjects),
func(b *testing.B) {
benchmarkInhumeMultipart(b, numShards, numObjects)
},
)
}
}
func benchmarkInhumeMultipart(b *testing.B, numShards, numObjects int) {
b.StopTimer()
engine := testNewEngine(b, WithShardPoolSize(uint32(numObjects))).
setShardsNum(b, numShards).prepare(b).engine
defer func() { require.NoError(b, engine.Close(context.Background())) }()
cnt := cidtest.ID()
eg := errgroup.Group{}
for range b.N {
addrs := make([]oid.Address, numObjects)
for i := range numObjects {
prm := PutPrm{}
prm.Object = objecttest.Object().Parent()
prm.Object.SetContainerID(cnt)
prm.Object.SetType(objectSDK.TypeRegular)
addrs[i] = object.AddressOf(prm.Object)
eg.Go(func() error {
return engine.Put(context.Background(), prm)
})
}
require.NoError(b, eg.Wait())
ts := oidtest.Address()
ts.SetContainer(cnt)
prm := InhumePrm{}
prm.WithTarget(ts, addrs...)
b.StartTimer()
_, err := engine.Inhume(context.Background(), prm)
require.NoError(b, err)
b.StopTimer()
}
}