[#9999] metabase: Fix db engine to pebble in db.go and control.go

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2024-07-02 15:33:02 +03:00
parent c229b08f46
commit d0a5d96089
2 changed files with 94 additions and 225 deletions

View file

@ -2,8 +2,8 @@ package meta
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"os"
"path/filepath" "path/filepath"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
@ -11,7 +11,7 @@ import (
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util"
"go.etcd.io/bbolt" "github.com/cockroachdb/pebble"
"go.uber.org/zap" "go.uber.org/zap"
) )
@ -21,23 +21,7 @@ var ErrDegradedMode = logicerr.New("metabase is in a degraded mode")
// ErrReadOnlyMode is returned when metabase is in a read-only mode. // ErrReadOnlyMode is returned when metabase is in a read-only mode.
var ErrReadOnlyMode = logicerr.New("metabase is in a read-only mode") var ErrReadOnlyMode = logicerr.New("metabase is in a read-only mode")
var ( // Open metabase.
mStaticBuckets = map[string]struct{}{
string(containerVolumeBucketName): {},
string(containerCounterBucketName): {},
string(graveyardBucketName): {},
string(garbageBucketName): {},
string(shardInfoBucket): {},
string(bucketNameLocked): {},
}
// deprecatedBuckets buckets that are not used anymore.
deprecatedBuckets = [][]byte{
toMoveItBucketName,
}
)
// Open boltDB instance for metabase.
func (db *DB) Open(_ context.Context, m mode.Mode) error { func (db *DB) Open(_ context.Context, m mode.Mode) error {
db.modeMtx.Lock() db.modeMtx.Lock()
defer db.modeMtx.Unlock() defer db.modeMtx.Unlock()
@ -47,147 +31,122 @@ func (db *DB) Open(_ context.Context, m mode.Mode) error {
if m.NoMetabase() { if m.NoMetabase() {
return nil return nil
} }
return db.openDB(m) return db.openDB(m)
} }
func (db *DB) openDB(mode mode.Mode) error { func (db *DB) openDB(mode mode.Mode) error {
err := util.MkdirAllX(filepath.Dir(db.info.Path), db.info.Permission) err := util.MkdirAllX(db.info.Path, db.info.Permission)
if err != nil { if err != nil {
return fmt.Errorf("can't create dir %s for metabase: %w", db.info.Path, err) return fmt.Errorf("can't create dir %s for metabase: %w", db.info.Path, err)
} }
db.log.Debug(logs.MetabaseCreatedDirectoryForMetabase, zap.String("path", db.info.Path)) db.log.Debug(logs.MetabaseCreatedDirectoryForMetabase, zap.String("path", db.info.Path))
if db.boltOptions == nil { return metaerr.Wrap(db.openDatabase(mode.ReadOnly()))
opts := *bbolt.DefaultOptions
db.boltOptions = &opts
}
db.boltOptions.ReadOnly = mode.ReadOnly()
return metaerr.Wrap(db.openBolt())
} }
func (db *DB) openBolt() error { func (db *DB) pebbleOptions(readOnly bool) *pebble.Options {
var err error return &pebble.Options{
ReadOnly: readOnly,
db.database, err = bbolt.Open(db.info.Path, db.info.Permission, db.boltOptions)
if err != nil {
return fmt.Errorf("can't open boltDB database: %w", err)
} }
db.database.MaxBatchDelay = db.boltBatchDelay }
db.database.MaxBatchSize = db.boltBatchSize
db.log.Debug(logs.MetabaseOpenedBoltDBInstanceForMetabase) func (db *DB) openDatabase(readOnly bool) error {
opts := db.pebbleOptions(readOnly)
db.log.Debug(logs.MetabaseCheckingMetabaseVersion) var err error
return db.database.View(func(tx *bbolt.Tx) error { db.database, err = pebble.Open(db.info.Path, opts)
// The safest way to check if the metabase is fresh is to check if it has no buckets. if err != nil {
// However, shard info can be present. So here we check that the number of buckets is return fmt.Errorf("can't open badger database: %w", err)
// at most 1. }
// Another thing to consider is that tests do not persist shard ID, we want to support
// this case too.
var n int
err := tx.ForEach(func([]byte, *bbolt.Bucket) error {
if n++; n >= 2 { // do not iterate a lot
return errBreakBucketForEach
}
return nil
})
if err == errBreakBucketForEach { return db.snapshot(func(s *pebble.Snapshot) error {
db.initialized = true data, err := valueSafe(s, shardInfoKey(versionKey))
err = nil if err != nil {
return err
} }
return err db.initialized = len(data) > 0
return nil
}) })
} }
// Init initializes metabase. It creates static (CID-independent) buckets in underlying BoltDB instance. // Init initializes metabase.
// //
// Returns ErrOutdatedVersion if a database at the provided path is outdated. // Returns ErrOutdatedVersion if a database at the provided path is outdated.
// //
// Does nothing if metabase has already been initialized and filled. To roll back the database to its initial state, // Does nothing if metabase has already been initialized and filled. To roll back the database to its initial state,
// use Reset. // use Reset.
func (db *DB) Init() error { func (db *DB) Init() error {
return metaerr.Wrap(db.init(false)) db.modeMtx.Lock()
defer db.modeMtx.Unlock()
return metaerr.Wrap(db.init(context.TODO(), false))
} }
// Reset resets metabase. Works similar to Init but cleans up all static buckets and func (db *DB) Init2(ctx context.Context) error {
// removes all dynamic (CID-dependent) ones in non-blank BoltDB instances. db.modeMtx.Lock()
func (db *DB) Reset() error { defer db.modeMtx.Unlock()
db.modeMtx.RLock()
defer db.modeMtx.RUnlock() return metaerr.Wrap(db.init(ctx, false))
}
// Reset resets metabase. Works similar to Init but cleans up all data records.
func (db *DB) Reset(ctx context.Context) error {
db.modeMtx.Lock()
defer db.modeMtx.Unlock()
if db.mode.NoMetabase() { if db.mode.NoMetabase() {
return ErrDegradedMode return ErrDegradedMode
} }
return metaerr.Wrap(db.init(true)) return metaerr.Wrap(db.init(ctx, true))
} }
func (db *DB) init(reset bool) error { func (db *DB) init(ctx context.Context, reset bool) error {
if db.mode.NoMetabase() || db.mode.ReadOnly() { if db.mode.NoMetabase() || db.mode.ReadOnly() {
return nil return nil
} }
if reset {
return db.database.Update(func(tx *bbolt.Tx) error { if err := db.reset(); err != nil {
var err error return err
if !reset {
// Normal open, check version and update if not initialized.
err := checkVersion(tx, db.initialized)
if err != nil {
return err
}
} }
for k := range mStaticBuckets { }
name := []byte(k)
if reset {
err := tx.DeleteBucket(name)
if err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) {
return fmt.Errorf("could not delete static bucket %s: %w", k, err)
}
}
_, err := tx.CreateBucketIfNotExists(name) return db.batch(func(b *pebble.Batch) error {
if err != nil { err := checkVersion(b, db.initialized)
return fmt.Errorf("could not create static bucket %s: %w", k, err) if err != nil {
} return err
}
err = syncCounter(ctx, b, false)
if err != nil {
return fmt.Errorf("could not sync object counter: %w", err)
} }
for _, b := range deprecatedBuckets { return nil
err := tx.DeleteBucket(b) })
if err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { }
return fmt.Errorf("could not delete deprecated bucket %s: %w", string(b), err)
}
}
if !reset { // counters will be recalculated by refill metabase func (db *DB) reset() error {
err = syncCounter(tx, false) if err := db.database.Close(); err != nil {
if err != nil { return err
return fmt.Errorf("could not sync object counter: %w", err) }
} db.database = nil
if err := os.RemoveAll(db.info.Path); err != nil {
return nil return err
} }
var err error
bucketCursor := tx.Cursor() db.database, err = pebble.Open(db.info.Path, db.pebbleOptions(false))
name, _ := bucketCursor.First() if err != nil {
for name != nil { return fmt.Errorf("can't open badger database: %w", err)
if _, ok := mStaticBuckets[string(name)]; !ok { }
if err := tx.DeleteBucket(name); err != nil { return db.batch(func(b *pebble.Batch) error {
return err return updateVersion(b, version)
}
name, _ = bucketCursor.Seek(name)
continue
}
name, _ = bucketCursor.Next()
}
return updateVersion(tx, version)
}) })
} }
// SyncCounters forces to synchronize the object counters. // SyncCounters forces to synchronize the object counters.
func (db *DB) SyncCounters() error { func (db *DB) SyncCounters(ctx context.Context) error {
db.modeMtx.RLock() db.modeMtx.RLock()
defer db.modeMtx.RUnlock() defer db.modeMtx.RUnlock()
@ -197,26 +156,29 @@ func (db *DB) SyncCounters() error {
return ErrReadOnlyMode return ErrReadOnlyMode
} }
return metaerr.Wrap(db.database.Update(func(tx *bbolt.Tx) error { return metaerr.Wrap(db.batch(func(b *pebble.Batch) error {
return syncCounter(tx, true) return syncCounter(ctx, b, true)
})) }))
} }
// Close closes boltDB instance // Close closes metabase.
// and reports metabase metric.
func (db *DB) Close() error { func (db *DB) Close() error {
var err error db.modeMtx.Lock()
if db.database != nil { defer db.modeMtx.Unlock()
err = db.close()
} return db.close()
if err == nil {
db.metrics.Close()
}
return err
} }
func (db *DB) close() error { func (db *DB) close() error {
return metaerr.Wrap(db.database.Close()) var err error
if db.database != nil {
err = metaerr.Wrap(db.database.Close())
}
if err == nil {
db.database = nil
db.metrics.Close()
}
return err
} }
// Reload reloads part of the configuration. // Reload reloads part of the configuration.
@ -235,14 +197,14 @@ func (db *DB) Reload(opts ...Option) (bool, error) {
defer db.modeMtx.Unlock() defer db.modeMtx.Unlock()
if db.mode.NoMetabase() || c.info.Path != "" && filepath.Clean(db.info.Path) != filepath.Clean(c.info.Path) { if db.mode.NoMetabase() || c.info.Path != "" && filepath.Clean(db.info.Path) != filepath.Clean(c.info.Path) {
if err := db.Close(); err != nil { if err := db.close(); err != nil {
return false, err return false, err
} }
db.mode = mode.Disabled db.mode = mode.Disabled
db.metrics.SetMode(mode.ComponentDisabled) db.metrics.SetMode(mode.ComponentDisabled)
db.info.Path = c.info.Path db.info.Path = c.info.Path
if err := db.openBolt(); err != nil { if err := db.openDatabase(false); err != nil {
return false, metaerr.Wrap(fmt.Errorf("%w: %v", ErrDegradedMode, err)) return false, metaerr.Wrap(fmt.Errorf("%w: %v", ErrDegradedMode, err))
} }

View file

@ -1,10 +1,8 @@
package meta package meta
import ( import (
"bytes"
"encoding/binary" "encoding/binary"
"encoding/hex" "encoding/hex"
"errors"
"io/fs" "io/fs"
"os" "os"
"strconv" "strconv"
@ -85,20 +83,16 @@ func New(opts ...Option) *DB {
cfg: c, cfg: c,
matchers: map[objectSDK.SearchMatchType]matcher{ matchers: map[objectSDK.SearchMatchType]matcher{
objectSDK.MatchUnknown: { objectSDK.MatchUnknown: {
matchSlow: unknownMatcher, matchSlow: unknownMatcher,
matchBucket: unknownMatcherBucket,
}, },
objectSDK.MatchStringEqual: { objectSDK.MatchStringEqual: {
matchSlow: stringEqualMatcher, matchSlow: stringEqualMatcher,
matchBucket: stringEqualMatcherBucket,
}, },
objectSDK.MatchStringNotEqual: { objectSDK.MatchStringNotEqual: {
matchSlow: stringNotEqualMatcher, matchSlow: stringNotEqualMatcher,
matchBucket: stringNotEqualMatcherBucket,
}, },
objectSDK.MatchCommonPrefix: { objectSDK.MatchCommonPrefix: {
matchSlow: stringCommonPrefixMatcher, matchSlow: stringCommonPrefixMatcher,
matchBucket: stringCommonPrefixMatcherBucket,
}, },
}, },
mode: mode.Disabled, mode: mode.Disabled,
@ -180,105 +174,18 @@ func stringEqualMatcher(key string, objVal []byte, filterVal string) bool {
return stringifyValue(key, objVal) == filterVal return stringifyValue(key, objVal) == filterVal
} }
func stringEqualMatcherBucket(r pebble.Reader, fKey string, fValue string, f func([]byte, []byte) error) error {
// Ignore the second return value because we check for strict equality.
val, _, ok := destringifyValue(fKey, fValue, false)
if !ok {
return nil
}
data, err := valueSafe(r, val)
if err != nil {
return err
}
if data != nil {
return f(val, data)
}
return nil
}
func stringNotEqualMatcher(key string, objVal []byte, filterVal string) bool { func stringNotEqualMatcher(key string, objVal []byte, filterVal string) bool {
return stringifyValue(key, objVal) != filterVal return stringifyValue(key, objVal) != filterVal
} }
func stringNotEqualMatcherBucket(r pebble.Reader, fKey string, fValue string, f func([]byte, []byte) error) error {
// Ignore the second return value because we check for strict inequality.
val, _, ok := destringifyValue(fKey, fValue, false)
it, err := r.NewIter(&pebble.IterOptions{
SkipPoint: func(k []byte) bool {
return ok && bytes.Equal(val, k)
},
})
if err != nil {
return err
}
for v := it.First(); v; v = it.Next() {
if err := f(it.Key(), it.Value()); err != nil {
err = errors.Join(err, it.Close())
return err
}
}
return it.Close()
}
func stringCommonPrefixMatcher(key string, objVal []byte, filterVal string) bool { func stringCommonPrefixMatcher(key string, objVal []byte, filterVal string) bool {
return strings.HasPrefix(stringifyValue(key, objVal), filterVal) return strings.HasPrefix(stringifyValue(key, objVal), filterVal)
} }
func stringCommonPrefixMatcherBucket(r pebble.Reader, fKey string, fVal string, f func([]byte, []byte) error) error {
val, checkLast, ok := destringifyValue(fKey, fVal, true)
if !ok {
return nil
}
prefix := val
if checkLast {
prefix = val[:len(val)-1]
}
if len(val) == 0 {
it, err := r.NewIter(&pebble.IterOptions{})
if err != nil {
return err
}
for v := it.First(); v; v = it.Next() {
if err := f(it.Key(), it.Value()); err != nil {
err = errors.Join(err, it.Close())
return err
}
}
return it.Close()
}
it, err := r.NewIter(&pebble.IterOptions{
LowerBound: prefix,
})
if err != nil {
return err
}
for v := it.First(); v && bytes.HasPrefix(it.Key(), prefix); v = it.Next() {
if checkLast && (len(it.Key()) == len(prefix) || it.Key()[len(prefix)]>>4 != val[len(val)-1]) {
// If the last byte doesn't match, this means the prefix does no longer match,
// so we need to break here.
break
}
if err := f(it.Key(), it.Value()); err != nil {
err = errors.Join(err, it.Close())
return err
}
}
return it.Close()
}
func unknownMatcher(_ string, _ []byte, _ string) bool { func unknownMatcher(_ string, _ []byte, _ string) bool {
return false return false
} }
func unknownMatcherBucket(_ pebble.Reader, _ string, _ string, _ func([]byte, []byte) error) error {
return nil
}
// bucketKeyHelper returns byte representation of val that is used as a key // bucketKeyHelper returns byte representation of val that is used as a key
// in boltDB. Useful for getting filter values from unique and list indexes. // in boltDB. Useful for getting filter values from unique and list indexes.
func bucketKeyHelper(hdr string, val string) []byte { func bucketKeyHelper(hdr string, val string) []byte {