package fstree import ( "context" "crypto/sha256" "errors" "fmt" "io/fs" "os" "path/filepath" "strconv" "strings" "syscall" "time" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/common" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor/compression" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) type keyLock interface { Lock(string) Unlock(string) } type noopKeyLock struct{} func (l *noopKeyLock) Lock(string) {} func (l *noopKeyLock) Unlock(string) {} // FSTree represents an object storage as a filesystem tree. type FSTree struct { Info log *logger.Logger *compression.Config Depth uint64 DirNameLen int noSync bool readOnly bool metrics Metrics fileCounter FileCounter writer writer } // Info groups the information about file storage. type Info struct { // Permission bits of the root directory. Permissions fs.FileMode // Full path to the root directory. RootPath string } const ( // DirNameLen is how many bytes is used to group keys into directories. DirNameLen = 1 // in bytes // MaxDepth is maximum depth of nested directories. MaxDepth = (sha256.Size - 1) / DirNameLen ) var _ common.Storage = (*FSTree)(nil) func New(opts ...Option) *FSTree { f := &FSTree{ Info: Info{ Permissions: 0o700, RootPath: "./", }, Config: nil, Depth: 4, DirNameLen: DirNameLen, metrics: &noopMetrics{}, fileCounter: &noopCounter{}, log: logger.NewLoggerWrapper(zap.L()), } for i := range opts { opts[i](f) } f.writer = newGenericWriteData(f.fileCounter, f.Permissions, f.noSync) return f } func stringifyAddress(addr oid.Address) string { return addr.Object().EncodeToString() + "." + addr.Container().EncodeToString() } func addressFromString(s string) (oid.Address, error) { before, after, found := strings.Cut(s, ".") if !found { return oid.Address{}, errors.New("invalid address") } var obj oid.ID if err := obj.DecodeString(before); err != nil { return oid.Address{}, err } var cnr cid.ID if err := cnr.DecodeString(after); err != nil { return oid.Address{}, err } var addr oid.Address addr.SetObject(obj) addr.SetContainer(cnr) return addr, nil } // Iterate iterates over all stored objects. func (t *FSTree) Iterate(ctx context.Context, prm common.IteratePrm) (common.IterateRes, error) { var ( err error startedAt = time.Now() ) defer func() { t.metrics.Iterate(time.Since(startedAt), err == nil) }() _, span := tracing.StartSpanFromContext(ctx, "FSTree.Iterate", trace.WithAttributes( attribute.String("path", t.RootPath), attribute.Bool("ignore_errors", prm.IgnoreErrors), )) defer span.End() err = t.iterate(ctx, 0, []string{t.RootPath}, prm) return common.IterateRes{}, err } func (t *FSTree) iterate(ctx context.Context, depth uint64, curPath []string, prm common.IteratePrm) error { curName := strings.Join(curPath[1:], "") dirPath := filepath.Join(curPath...) des, err := os.ReadDir(dirPath) if err != nil { if prm.IgnoreErrors { t.log.Warn(ctx, logs.BlobstorErrorOccurredDuringTheIteration, zap.String("err", err.Error()), zap.String("directory_path", dirPath)) return nil } return err } isLast := depth >= t.Depth l := len(curPath) curPath = append(curPath, "") for i := range des { select { case <-ctx.Done(): return ctx.Err() default: } curPath[l] = des[i].Name() if !isLast && des[i].IsDir() { err := t.iterate(ctx, depth+1, curPath, prm) if err != nil { // Must be error from handler in case errors are ignored. // Need to report. return err } } if depth != t.Depth { continue } addr, err := addressFromString(curName + des[i].Name()) if err != nil { continue } path := filepath.Join(curPath...) data, err := os.ReadFile(path) if err != nil && os.IsNotExist(err) { continue } if err == nil { data, err = t.Decompress(data) } if err != nil { if prm.IgnoreErrors { t.log.Warn(ctx, logs.BlobstorErrorOccurredDuringTheIteration, zap.Stringer("address", addr), zap.String("err", err.Error()), zap.String("path", path)) continue } return err } err = prm.Handler(common.IterationElement{ Address: addr, ObjectData: data, StorageID: []byte{}, }) if err != nil { return err } } return nil } type ObjectInfo struct { Address oid.Address DataSize uint64 } type IterateInfoHandler func(ObjectInfo) error func (t *FSTree) IterateInfo(ctx context.Context, handler IterateInfoHandler) error { var ( err error startedAt = time.Now() ) defer func() { t.metrics.IterateInfo(time.Since(startedAt), err == nil) }() _, span := tracing.StartSpanFromContext(ctx, "FSTree.IterateInfo") defer span.End() return t.iterateInfo(ctx, 0, []string{t.RootPath}, handler) } func (t *FSTree) iterateInfo(ctx context.Context, depth uint64, curPath []string, handler IterateInfoHandler) error { curName := strings.Join(curPath[1:], "") dirPath := filepath.Join(curPath...) entries, err := os.ReadDir(dirPath) if err != nil { return fmt.Errorf("read fstree dir '%s': %w", dirPath, err) } isLast := depth >= t.Depth l := len(curPath) curPath = append(curPath, "") for i := range entries { select { case <-ctx.Done(): return ctx.Err() default: } curPath[l] = entries[i].Name() if !isLast && entries[i].IsDir() { err := t.iterateInfo(ctx, depth+1, curPath, handler) if err != nil { return err } } if depth != t.Depth { continue } addr, err := addressFromString(curName + entries[i].Name()) if err != nil { continue } info, err := entries[i].Info() if err != nil { if os.IsNotExist(err) { continue } return err } err = handler(ObjectInfo{ Address: addr, DataSize: uint64(info.Size()), }) if err != nil { return err } } return nil } func (t *FSTree) treePath(addr oid.Address) string { sAddr := stringifyAddress(addr) var sb strings.Builder sb.Grow(len(t.RootPath) + len(sAddr) + int(t.Depth) + 1) sb.WriteString(t.RootPath) for i := 0; uint64(i) < t.Depth; i++ { sb.WriteRune(filepath.Separator) sb.WriteString(sAddr[:t.DirNameLen]) sAddr = sAddr[t.DirNameLen:] } sb.WriteRune(filepath.Separator) sb.WriteString(sAddr) return sb.String() } // Delete removes the object with the specified address from the storage. func (t *FSTree) Delete(ctx context.Context, prm common.DeletePrm) (common.DeleteRes, error) { var ( err error startedAt = time.Now() ) defer func() { t.metrics.Delete(time.Since(startedAt), err == nil) }() _, span := tracing.StartSpanFromContext(ctx, "FSTree.Delete", trace.WithAttributes( attribute.String("path", t.RootPath), attribute.String("address", prm.Address.EncodeToString()), )) defer span.End() if t.readOnly { err = common.ErrReadOnly return common.DeleteRes{}, err } p := t.treePath(prm.Address) err = t.writer.removeFile(p, prm.Size) return common.DeleteRes{}, err } // Exists returns the path to the file with object contents if it exists in the storage // and an error otherwise. func (t *FSTree) Exists(ctx context.Context, prm common.ExistsPrm) (common.ExistsRes, error) { var ( success = false startedAt = time.Now() ) defer func() { t.metrics.Exists(time.Since(startedAt), success) }() _, span := tracing.StartSpanFromContext(ctx, "FSTree.Exists", trace.WithAttributes( attribute.String("path", t.RootPath), attribute.String("address", prm.Address.EncodeToString()), )) defer span.End() p := t.treePath(prm.Address) _, err := os.Stat(p) found := err == nil if os.IsNotExist(err) { err = nil } success = err == nil return common.ExistsRes{Exists: found}, err } // Put puts an object in the storage. func (t *FSTree) Put(ctx context.Context, prm common.PutPrm) (common.PutRes, error) { var ( size int startedAt = time.Now() err error ) defer func() { t.metrics.Put(time.Since(startedAt), size, err == nil) }() _, span := tracing.StartSpanFromContext(ctx, "FSTree.Put", trace.WithAttributes( attribute.String("path", t.RootPath), attribute.String("address", prm.Address.EncodeToString()), attribute.Bool("dont_compress", prm.DontCompress), )) defer span.End() if t.readOnly { err = common.ErrReadOnly return common.PutRes{}, err } p := t.treePath(prm.Address) if err = util.MkdirAllX(filepath.Dir(p), t.Permissions); err != nil { if errors.Is(err, syscall.ENOSPC) { err = common.ErrNoSpace return common.PutRes{}, err } return common.PutRes{}, err } if !prm.DontCompress { prm.RawData = t.Compress(prm.RawData) } size = len(prm.RawData) return common.PutRes{StorageID: []byte{}}, t.writer.writeData(p, prm.RawData) } // Get returns an object from the storage by address. func (t *FSTree) Get(ctx context.Context, prm common.GetPrm) (common.GetRes, error) { var ( startedAt = time.Now() success = false size = 0 ) defer func() { t.metrics.Get(time.Since(startedAt), size, success) }() ctx, span := tracing.StartSpanFromContext(ctx, "FSTree.Get", trace.WithAttributes( attribute.String("path", t.RootPath), attribute.Bool("raw", prm.Raw), attribute.String("address", prm.Address.EncodeToString()), )) defer span.End() p := t.treePath(prm.Address) var data []byte var err error { _, span := tracing.StartSpanFromContext(ctx, "FSTree.Get.ReadFile") defer span.End() data, err = os.ReadFile(p) if err != nil { if os.IsNotExist(err) { return common.GetRes{}, logicerr.Wrap(new(apistatus.ObjectNotFound)) } return common.GetRes{}, err } } data, err = t.Decompress(data) if err != nil { return common.GetRes{}, err } size = len(data) obj := objectSDK.New() if err := obj.Unmarshal(data); err != nil { return common.GetRes{}, err } success = true return common.GetRes{Object: obj, RawData: data}, nil } // GetRange implements common.Storage. func (t *FSTree) GetRange(ctx context.Context, prm common.GetRangePrm) (common.GetRangeRes, error) { var ( startedAt = time.Now() success = false size = 0 ) defer func() { t.metrics.GetRange(time.Since(startedAt), size, success) }() ctx, span := tracing.StartSpanFromContext(ctx, "FSTree.GetRange", trace.WithAttributes( attribute.String("path", t.RootPath), attribute.String("address", prm.Address.EncodeToString()), attribute.String("offset", strconv.FormatUint(prm.Range.GetOffset(), 10)), attribute.String("length", strconv.FormatUint(prm.Range.GetLength(), 10)), )) defer span.End() res, err := t.Get(ctx, common.GetPrm{Address: prm.Address}) if err != nil { return common.GetRangeRes{}, err } payload := res.Object.Payload() from := prm.Range.GetOffset() to := from + prm.Range.GetLength() if pLen := uint64(len(payload)); to < from || pLen < from || pLen < to { return common.GetRangeRes{}, logicerr.Wrap(new(apistatus.ObjectOutOfRange)) } success = true data := payload[from:to] size = len(data) return common.GetRangeRes{ Data: data, }, nil } // initFileCounter walks the file tree rooted at FSTree's root, // counts total items count, inits counter and returns number of stored objects. func (t *FSTree) initFileCounter() error { if !counterEnabled(t.fileCounter) { return nil } count, size, err := t.countFiles() if err != nil { return err } t.fileCounter.Set(count, size) return nil } func (t *FSTree) countFiles() (uint64, uint64, error) { var count, size uint64 // it is simpler to just consider every file // that is not directory as an object err := filepath.WalkDir(t.RootPath, func(_ string, d fs.DirEntry, _ error) error { if d.IsDir() { return nil } count++ info, err := d.Info() if err != nil { return err } size += uint64(info.Size()) return nil }, ) if err != nil { return 0, 0, fmt.Errorf("could not walk through %s directory: %w", t.RootPath, err) } return count, size, nil } func (t *FSTree) ObjectsCount(ctx context.Context) (uint64, error) { var ( startedAt = time.Now() success = false ) defer func() { t.metrics.ObjectsCount(time.Since(startedAt), success) }() ctx, span := tracing.StartSpanFromContext(ctx, "FSTree.ObjectsCount", trace.WithAttributes( attribute.String("path", t.RootPath), )) defer span.End() var result uint64 err := filepath.WalkDir(t.RootPath, func(_ string, d fs.DirEntry, _ error) error { select { case <-ctx.Done(): return ctx.Err() default: } if !d.IsDir() { result++ } return nil }, ) if err != nil { return 0, fmt.Errorf("could not walk through %s directory: %w", t.RootPath, err) } success = true return result, nil } // Type is fstree storage type used in logs and configuration. const Type = "fstree" // Type implements common.Storage. func (*FSTree) Type() string { return Type } // Path implements common.Storage. func (t *FSTree) Path() string { return t.RootPath } // SetCompressor implements common.Storage. func (t *FSTree) SetCompressor(cc *compression.Config) { t.Config = cc } func (t *FSTree) Compressor() *compression.Config { return t.Config } // SetReportErrorFunc implements common.Storage. func (t *FSTree) SetReportErrorFunc(_ func(context.Context, string, error)) { // Do nothing, FSTree can encounter only one error which is returned. } func (t *FSTree) SetParentID(parentID string) { t.metrics.SetParentID(parentID) } func (t *FSTree) Rebuild(_ context.Context, _ common.RebuildPrm) (common.RebuildRes, error) { return common.RebuildRes{}, nil }