package shard import ( "context" "errors" "sync" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor" meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) var ErrRebuildInProgress = errors.New("shard rebuild in progress") type RebuildWorkerLimiter interface { AcquireWorkSlot(ctx context.Context) error ReleaseWorkSlot() } type rebuildLimiter struct { semaphore chan struct{} } func NewRebuildLimiter(workersCount uint32) RebuildWorkerLimiter { return &rebuildLimiter{ semaphore: make(chan struct{}, workersCount), } } func (l *rebuildLimiter) AcquireWorkSlot(ctx context.Context) error { select { case l.semaphore <- struct{}{}: return nil case <-ctx.Done(): return ctx.Err() } } func (l *rebuildLimiter) ReleaseWorkSlot() { <-l.semaphore } type rebuildTask struct { limiter RebuildWorkerLimiter fillPercent int } type rebuilder struct { mtx *sync.Mutex wg *sync.WaitGroup cancel func() done chan struct{} tasks chan rebuildTask } func newRebuilder() *rebuilder { return &rebuilder{ mtx: &sync.Mutex{}, wg: &sync.WaitGroup{}, tasks: make(chan rebuildTask), } } func (r *rebuilder) Start(ctx context.Context, bs *blobstor.BlobStor, mb *meta.DB, log *logger.Logger) { r.mtx.Lock() defer r.mtx.Unlock() if r.done != nil { return // already started } ctx, cancel := context.WithCancel(ctx) r.cancel = cancel r.done = make(chan struct{}) r.wg.Add(1) go func() { defer r.wg.Done() for { select { case <-r.done: return case t, ok := <-r.tasks: if !ok { continue } runRebuild(ctx, bs, mb, log, t.fillPercent, t.limiter) } } }() } func runRebuild(ctx context.Context, bs *blobstor.BlobStor, mb *meta.DB, log *logger.Logger, fillPercent int, limiter RebuildWorkerLimiter, ) { select { case <-ctx.Done(): return default: } log.Info(ctx, logs.BlobstoreRebuildStarted) if err := bs.Rebuild(ctx, &mbStorageIDUpdate{mb: mb}, limiter, fillPercent); err != nil { log.Warn(ctx, logs.FailedToRebuildBlobstore, zap.Error(err)) } else { log.Info(ctx, logs.BlobstoreRebuildCompletedSuccessfully) } } func (r *rebuilder) ScheduleRebuild(ctx context.Context, limiter RebuildWorkerLimiter, fillPercent int, ) error { select { case <-ctx.Done(): return ctx.Err() case r.tasks <- rebuildTask{ limiter: limiter, fillPercent: fillPercent, }: return nil default: return ErrRebuildInProgress } } func (r *rebuilder) Stop(log *logger.Logger) { r.mtx.Lock() defer r.mtx.Unlock() if r.done != nil { close(r.done) } if r.cancel != nil { r.cancel() } r.wg.Wait() r.cancel = nil r.done = nil log.Info(context.Background(), logs.BlobstoreRebuildStopped) } var errMBIsNotAvailable = errors.New("metabase is not available") type mbStorageIDUpdate struct { mb *meta.DB } func (u *mbStorageIDUpdate) UpdateStorageID(ctx context.Context, addr oid.Address, storageID []byte) error { select { case <-ctx.Done(): return ctx.Err() default: } if u.mb == nil { return errMBIsNotAvailable } var prm meta.UpdateStorageIDPrm prm.SetAddress(addr) prm.SetStorageID(storageID) _, err := u.mb.UpdateStorageID(ctx, prm) return err } type RebuildPrm struct { ConcurrencyLimiter RebuildWorkerLimiter TargetFillPercent uint32 } func (s *Shard) ScheduleRebuild(ctx context.Context, p RebuildPrm) error { ctx, span := tracing.StartSpanFromContext(ctx, "Shard.ScheduleRebuild", trace.WithAttributes( attribute.String("shard_id", s.ID().String()), attribute.Int64("target_fill_percent", int64(p.TargetFillPercent)), )) defer span.End() s.m.RLock() defer s.m.RUnlock() if s.info.Mode.ReadOnly() { return ErrReadOnlyMode } if s.info.Mode.NoMetabase() { return ErrDegradedMode } return s.rb.ScheduleRebuild(ctx, p.ConcurrencyLimiter, int(p.TargetFillPercent)) }