frostfs-node/pkg/local_object_storage/engine/shards.go
2024-01-23 11:16:40 +03:00

345 lines
8.1 KiB
Go

package engine
import (
"context"
"fmt"
"sync/atomic"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"git.frostfs.info/TrueCloudLab/hrw"
"github.com/google/uuid"
"github.com/panjf2000/ants/v2"
"go.uber.org/zap"
)
var errShardNotFound = logicerr.New("shard not found")
type hashedShard struct {
shardWrapper
hash uint64
}
type metricsWithID struct {
id string
mw MetricRegister
}
func (m *metricsWithID) SetShardID(id string) {
// concurrent settings are not expected =>
// no mutex protection
m.id = id
}
func (m *metricsWithID) SetObjectCounter(objectType string, v uint64) {
m.mw.SetObjectCounter(m.id, objectType, v)
}
func (m *metricsWithID) AddToObjectCounter(objectType string, delta int) {
m.mw.AddToObjectCounter(m.id, objectType, delta)
}
func (m *metricsWithID) IncObjectCounter(objectType string) {
m.mw.AddToObjectCounter(m.id, objectType, +1)
}
func (m *metricsWithID) SetMode(mode mode.Mode) {
m.mw.SetMode(m.id, mode)
}
func (m *metricsWithID) AddToContainerSize(cnr string, size int64) {
m.mw.AddToContainerSize(cnr, size)
}
func (m *metricsWithID) AddToPayloadSize(size int64) {
m.mw.AddToPayloadCounter(m.id, size)
}
func (m *metricsWithID) IncErrorCounter() {
m.mw.IncErrorCounter(m.id)
}
func (m *metricsWithID) ClearErrorCounter() {
m.mw.ClearErrorCounter(m.id)
}
func (m *metricsWithID) DeleteShardMetrics() {
m.mw.DeleteShardMetrics(m.id)
}
func (m *metricsWithID) SetContainerObjectsCount(cnrID string, objectType string, value uint64) {
m.mw.SetContainerObjectCounter(m.id, cnrID, objectType, value)
}
func (m *metricsWithID) IncContainerObjectsCount(cnrID string, objectType string) {
m.mw.IncContainerObjectCounter(m.id, cnrID, objectType)
}
func (m *metricsWithID) SubContainerObjectsCount(cnrID string, objectType string, value uint64) {
m.mw.SubContainerObjectCounter(m.id, cnrID, objectType, value)
}
// AddShard adds a new shard to the storage engine.
//
// Returns any error encountered that did not allow adding a shard.
// Otherwise returns the ID of the added shard.
func (e *StorageEngine) AddShard(ctx context.Context, opts ...shard.Option) (*shard.ID, error) {
sh, err := e.createShard(ctx, opts)
if err != nil {
return nil, fmt.Errorf("could not create a shard: %w", err)
}
err = e.addShard(sh)
if err != nil {
return nil, fmt.Errorf("could not add %s shard: %w", sh.ID().String(), err)
}
if e.cfg.metrics != nil {
e.cfg.metrics.SetMode(sh.ID().String(), sh.GetMode())
}
return sh.ID(), nil
}
func (e *StorageEngine) createShard(ctx context.Context, opts []shard.Option) (*shard.Shard, error) {
id, err := generateShardID()
if err != nil {
return nil, fmt.Errorf("could not generate shard ID: %w", err)
}
opts = e.appendMetrics(id, opts)
sh := shard.New(append(opts,
shard.WithID(id),
shard.WithExpiredTombstonesCallback(e.processExpiredTombstones),
shard.WithExpiredLocksCallback(e.processExpiredLocks),
shard.WithDeletedLockCallback(e.processDeletedLocks),
shard.WithReportErrorFunc(e.reportShardErrorBackground),
shard.WithRebuildWorkerLimiter(e.rebuildLimiter),
shard.WithZeroSizeCallback(e.processZeroSizeContainers),
shard.WithZeroCountCallback(e.processZeroCountContainers),
)...)
if err := sh.UpdateID(ctx); err != nil {
e.log.Warn(logs.FailedToUpdateShardID, zap.Stringer("shard_id", sh.ID()), zap.String("metabase_path", sh.DumpInfo().MetaBaseInfo.Path), zap.Error(err))
}
return sh, nil
}
func (e *StorageEngine) appendMetrics(id *shard.ID, opts []shard.Option) []shard.Option {
e.mtx.RLock()
defer e.mtx.RUnlock()
if e.metrics != nil {
opts = append(opts,
shard.WithMetricsWriter(
&metricsWithID{
id: id.String(),
mw: e.metrics,
},
),
shard.WithWriteCacheMetrics(
&writeCacheMetrics{
shardID: id.String(),
metrics: e.metrics.WriteCache(),
},
),
shard.WithGCMetrics(
&gcMetrics{
storage: e.metrics.GC(),
shardID: id.String(),
},
),
)
}
return opts
}
func (e *StorageEngine) addShard(sh *shard.Shard) error {
e.mtx.Lock()
defer e.mtx.Unlock()
pool, err := ants.NewPool(int(e.shardPoolSize), ants.WithNonblocking(true))
if err != nil {
return fmt.Errorf("could not create pool: %w", err)
}
strID := sh.ID().String()
if _, ok := e.shards[strID]; ok {
return fmt.Errorf("shard with id %s was already added", strID)
}
e.shards[strID] = hashedShard{
shardWrapper: shardWrapper{
errorCount: new(atomic.Uint32),
Shard: sh,
},
hash: hrw.StringHash(strID),
}
e.shardPools[strID] = pool
return nil
}
// removeShards removes specified shards. Skips non-existent shards.
// Logs errors about shards that it could not Close after the removal.
func (e *StorageEngine) removeShards(ids ...string) {
if len(ids) == 0 {
return
}
ss := make([]hashedShard, 0, len(ids))
e.mtx.Lock()
for _, id := range ids {
sh, found := e.shards[id]
if !found {
continue
}
sh.DeleteShardMetrics()
ss = append(ss, sh)
delete(e.shards, id)
pool, ok := e.shardPools[id]
if ok {
pool.Release()
delete(e.shardPools, id)
}
e.log.Info(logs.EngineShardHasBeenRemoved,
zap.String("id", id))
}
e.mtx.Unlock()
for _, sh := range ss {
err := sh.SetMode(mode.Disabled)
if err != nil {
e.log.Error(logs.EngineCouldNotChangeShardModeToDisabled,
zap.Stringer("id", sh.ID()),
zap.Error(err),
)
}
err = sh.Close()
if err != nil {
e.log.Error(logs.EngineCouldNotCloseRemovedShard,
zap.Stringer("id", sh.ID()),
zap.Error(err),
)
}
}
}
func generateShardID() (*shard.ID, error) {
uid, err := uuid.NewRandom()
if err != nil {
return nil, err
}
bin, err := uid.MarshalBinary()
if err != nil {
return nil, err
}
return shard.NewIDFromBytes(bin), nil
}
func (e *StorageEngine) shardWeight(sh *shard.Shard) float64 {
weightValues := sh.WeightValues()
return float64(weightValues.FreeSpace)
}
func (e *StorageEngine) sortShardsByWeight(objAddr interface{ EncodeToString() string }) []hashedShard {
e.mtx.RLock()
defer e.mtx.RUnlock()
h := hrw.StringHash(objAddr.EncodeToString())
shards := make([]hashedShard, 0, len(e.shards))
for _, sh := range e.shards {
shards = append(shards, hashedShard(sh))
}
return sortShardsByWeight(shards, h)
}
func sortShardsByWeight(shards []hashedShard, h uint64) []hashedShard {
weights := make([]float64, 0, len(shards))
for _, sh := range shards {
weights = append(weights, float64(sh.Shard.WeightValues().FreeSpace))
}
hrw.SortHasherSliceByWeightValue(shards, weights, h)
return shards
}
func (e *StorageEngine) unsortedShards() []hashedShard {
e.mtx.RLock()
defer e.mtx.RUnlock()
shards := make([]hashedShard, 0, len(e.shards))
for _, sh := range e.shards {
shards = append(shards, hashedShard(sh))
}
return shards
}
func (e *StorageEngine) iterateOverSortedShards(addr oid.Address, handler func(int, hashedShard) (stop bool)) {
for i, sh := range e.sortShardsByWeight(addr) {
if handler(i, sh) {
break
}
}
}
func (e *StorageEngine) iterateOverUnsortedShards(handler func(hashedShard) (stop bool)) {
for _, sh := range e.unsortedShards() {
if handler(sh) {
break
}
}
}
// SetShardMode sets mode of the shard with provided identifier.
//
// Returns an error if shard mode was not set, or shard was not found in storage engine.
func (e *StorageEngine) SetShardMode(id *shard.ID, m mode.Mode, resetErrorCounter bool) error {
e.mtx.RLock()
defer e.mtx.RUnlock()
for shID, sh := range e.shards {
if id.String() == shID {
if resetErrorCounter {
sh.errorCount.Store(0)
sh.Shard.ClearErrorCounter()
}
return sh.SetMode(m)
}
}
return errShardNotFound
}
// HandleNewEpoch notifies every shard about NewEpoch event.
func (e *StorageEngine) HandleNewEpoch(epoch uint64) {
ev := shard.EventNewEpoch(epoch)
e.mtx.RLock()
defer e.mtx.RUnlock()
for _, sh := range e.shards {
sh.NotificationChannel() <- ev
}
}
func (s hashedShard) Hash() uint64 {
return s.hash
}