From d8c915985347faab70a758d122d1d785b590e6a6 Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Fri, 26 Apr 2024 12:11:42 +0300 Subject: [PATCH] [#1099] metabase: Change engine to badger Signed-off-by: Dmitrii Stepanov --- cmd/frostfs-lens/internal/meta/root.go | 7 - cmd/frostfs-node/config.go | 43 +- cmd/frostfs-node/config/engine/config_test.go | 27 +- .../config/engine/shard/metabase/config.go | 123 +- config/example/node.env | 13 +- config/example/node.json | 15 +- config/example/node.yaml | 13 +- docs/storage-node-configuration.md | 30 +- go.mod | 10 + go.sum | Bin 42738 -> 50119 bytes internal/logs/logs.go | 1121 +++++++++-------- .../blobstor/blobovniczatree/control.go | 2 +- pkg/local_object_storage/engine/container.go | 51 - .../engine/control_test.go | 7 +- pkg/local_object_storage/engine/inhume.go | 4 +- pkg/local_object_storage/metabase/badger.go | 137 ++ pkg/local_object_storage/metabase/bucket.go | 106 ++ pkg/local_object_storage/metabase/children.go | 37 +- .../metabase/containers.go | 237 +++- .../metabase/containers_test.go | 4 +- pkg/local_object_storage/metabase/control.go | 254 ++-- .../metabase/control_test.go | 4 +- pkg/local_object_storage/metabase/counter.go | 728 ++++------- .../metabase/counter_test.go | 46 +- pkg/local_object_storage/metabase/db.go | 251 ++-- pkg/local_object_storage/metabase/delete.go | 432 ++----- .../metabase/delete_test.go | 18 +- pkg/local_object_storage/metabase/exists.go | 165 ++- pkg/local_object_storage/metabase/expired.go | 260 ++-- .../metabase/expired_test.go | 39 + pkg/local_object_storage/metabase/get.go | 161 ++- pkg/local_object_storage/metabase/get_test.go | 8 +- .../metabase/graveyard.go | 118 +- .../metabase/index_test.go | 65 - pkg/local_object_storage/metabase/inhume.go | 222 ++-- .../metabase/iterators.go | 230 ++-- pkg/local_object_storage/metabase/list.go | 258 ++-- .../metabase/list_test.go | 7 +- pkg/local_object_storage/metabase/lock.go | 266 ++-- .../metabase/lock_test.go | 4 +- pkg/local_object_storage/metabase/logger.go | 32 + pkg/local_object_storage/metabase/mode.go | 8 +- .../metabase/mode_test.go | 8 +- pkg/local_object_storage/metabase/parse.go | 62 + pkg/local_object_storage/metabase/put.go | 371 ++---- pkg/local_object_storage/metabase/put_test.go | 10 +- .../metabase/reset_test.go | 21 +- pkg/local_object_storage/metabase/select.go | 840 +++++++++--- pkg/local_object_storage/metabase/shard_id.go | 28 +- .../metabase/storage_id.go | 46 +- pkg/local_object_storage/metabase/util.go | 684 ++++++++-- pkg/local_object_storage/metabase/version.go | 56 +- .../metabase/version_test.go | 31 +- pkg/local_object_storage/shard/container.go | 4 +- pkg/local_object_storage/shard/control.go | 9 +- .../shard/control_test.go | 23 +- pkg/local_object_storage/shard/count.go | 2 +- pkg/local_object_storage/shard/delete.go | 27 +- pkg/local_object_storage/shard/gc.go | 6 +- .../shard/gc_internal_test.go | 4 +- pkg/local_object_storage/shard/id.go | 2 +- pkg/local_object_storage/shard/lock_test.go | 4 +- pkg/local_object_storage/shard/shard.go | 17 +- pkg/local_object_storage/shard/shard_test.go | 4 +- 64 files changed, 4322 insertions(+), 3500 deletions(-) create mode 100644 pkg/local_object_storage/metabase/badger.go create mode 100644 pkg/local_object_storage/metabase/bucket.go delete mode 100644 pkg/local_object_storage/metabase/index_test.go create mode 100644 pkg/local_object_storage/metabase/logger.go create mode 100644 pkg/local_object_storage/metabase/parse.go diff --git a/cmd/frostfs-lens/internal/meta/root.go b/cmd/frostfs-lens/internal/meta/root.go index 6741abd0c..1fb65d3eb 100644 --- a/cmd/frostfs-lens/internal/meta/root.go +++ b/cmd/frostfs-lens/internal/meta/root.go @@ -1,13 +1,10 @@ package meta import ( - "time" - common "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-lens/internal" meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" "github.com/spf13/cobra" - "go.etcd.io/bbolt" ) var ( @@ -38,10 +35,6 @@ func init() { func openMeta(cmd *cobra.Command) *meta.DB { db := meta.New( meta.WithPath(vPath), - meta.WithBoltDBOptions(&bbolt.Options{ - ReadOnly: true, - Timeout: 100 * time.Millisecond, - }), meta.WithEpochState(epochState{}), ) common.ExitOnErr(cmd, common.Errf("could not open metabase: %w", db.Open(cmd.Context(), mode.ReadOnly))) diff --git a/cmd/frostfs-node/config.go b/cmd/frostfs-node/config.go index 3aded7020..e7912d4ae 100644 --- a/cmd/frostfs-node/config.go +++ b/cmd/frostfs-node/config.go @@ -76,7 +76,6 @@ import ( "github.com/nspcc-dev/neo-go/pkg/crypto/keys" neogoutil "github.com/nspcc-dev/neo-go/pkg/util" "github.com/panjf2000/ants/v2" - "go.etcd.io/bbolt" "go.uber.org/zap" "go.uber.org/zap/zapcore" "google.golang.org/grpc" @@ -128,10 +127,17 @@ type shardCfg struct { mode shardmode.Mode metaCfg struct { - path string - perm fs.FileMode - maxBatchSize int - maxBatchDelay time.Duration + path string + perm fs.FileMode + noSync bool + verbose bool + memtablesCount int + valueThreshold int64 + valueLogFileSize int64 + indexCacheSize int64 + numCompactors int + gcInterval time.Duration + gcDiscardRatio float64 } subStorages []subStorageCfg @@ -341,9 +347,16 @@ func (a *applicationConfiguration) setMetabaseConfig(newConfig *shardCfg, oldCon m := &newConfig.metaCfg m.path = metabaseCfg.Path() - m.perm = metabaseCfg.BoltDB().Perm() - m.maxBatchDelay = metabaseCfg.BoltDB().MaxBatchDelay() - m.maxBatchSize = metabaseCfg.BoltDB().MaxBatchSize() + m.perm = metabaseCfg.Perm() + m.noSync = metabaseCfg.NoSync() + m.verbose = metabaseCfg.Verbose() + m.memtablesCount = metabaseCfg.MemtablesCount() + m.valueThreshold = metabaseCfg.ValueThreshold() + m.valueLogFileSize = metabaseCfg.ValueLogFileSize() + m.indexCacheSize = metabaseCfg.IndexCacheSize() + m.numCompactors = metabaseCfg.CompactorsCount() + m.gcDiscardRatio = float64(metabaseCfg.GCDiscardPercent()) / 100.0 + m.gcInterval = metabaseCfg.GCInterval() } func (a *applicationConfiguration) setGCConfig(newConfig *shardCfg, oldConfig *shardconfig.Config) { @@ -980,13 +993,17 @@ func (c *cfg) getShardOpts(ctx context.Context, shCfg shardCfg) shardOptsWithID mbOptions := []meta.Option{ meta.WithPath(shCfg.metaCfg.path), meta.WithPermissions(shCfg.metaCfg.perm), - meta.WithMaxBatchSize(shCfg.metaCfg.maxBatchSize), - meta.WithMaxBatchDelay(shCfg.metaCfg.maxBatchDelay), - meta.WithBoltDBOptions(&bbolt.Options{ - Timeout: 100 * time.Millisecond, - }), meta.WithLogger(c.log), meta.WithEpochState(c.cfgNetmap.state), + meta.WithNoSync(shCfg.metaCfg.noSync), + meta.WithVerbose(shCfg.metaCfg.verbose), + meta.WithMemtablesCount(shCfg.metaCfg.memtablesCount), + meta.WithValueThreshold(shCfg.metaCfg.valueThreshold), + meta.WithValueLogFileSize(shCfg.metaCfg.valueLogFileSize), + meta.WithIndexCacheSize(shCfg.metaCfg.indexCacheSize), + meta.WithNumCompactors(shCfg.metaCfg.numCompactors), + meta.WithGCInterval(shCfg.metaCfg.gcInterval), + meta.WithGCDiscardRatio(shCfg.metaCfg.gcDiscardRatio), } if c.metricsCollector != nil { mbOptions = append(mbOptions, meta.WithMetrics(lsmetrics.NewMetabaseMetrics(shCfg.metaCfg.path, c.metricsCollector.MetabaseMetrics()))) diff --git a/cmd/frostfs-node/config/engine/config_test.go b/cmd/frostfs-node/config/engine/config_test.go index 3f9c7ec71..f31e86cd7 100644 --- a/cmd/frostfs-node/config/engine/config_test.go +++ b/cmd/frostfs-node/config/engine/config_test.go @@ -11,6 +11,7 @@ import ( blobovniczaconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/blobstor/blobovnicza" fstreeconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/blobstor/fstree" gcconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/gc" + metabaseconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/metabase" piloramaconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/pilorama" configtest "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/test" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" @@ -80,9 +81,16 @@ func TestEngineSection(t *testing.T) { require.EqualValues(t, 3221225472, wc.SizeLimit()) require.Equal(t, "tmp/0/meta", meta.Path()) - require.Equal(t, fs.FileMode(0o644), meta.BoltDB().Perm()) - require.Equal(t, 100, meta.BoltDB().MaxBatchSize()) - require.Equal(t, 10*time.Millisecond, meta.BoltDB().MaxBatchDelay()) + require.Equal(t, fs.FileMode(0o644), meta.Perm()) + require.Equal(t, true, meta.NoSync()) + require.Equal(t, true, meta.Verbose()) + require.Equal(t, 10, meta.MemtablesCount()) + require.Equal(t, int64(1024), meta.ValueThreshold()) + require.Equal(t, int64(512*1024*1024), meta.ValueLogFileSize()) + require.Equal(t, int64(10*1024*1024), meta.IndexCacheSize()) + require.Equal(t, 10, meta.CompactorsCount()) + require.Equal(t, 20*time.Minute, meta.GCInterval()) + require.Equal(t, 60, meta.GCDiscardPercent()) require.Equal(t, true, sc.Compress()) require.Equal(t, []string{"audio/*", "video/*"}, sc.UncompressableContentTypes()) @@ -137,9 +145,16 @@ func TestEngineSection(t *testing.T) { require.EqualValues(t, 4294967296, wc.SizeLimit()) require.Equal(t, "tmp/1/meta", meta.Path()) - require.Equal(t, fs.FileMode(0o644), meta.BoltDB().Perm()) - require.Equal(t, 200, meta.BoltDB().MaxBatchSize()) - require.Equal(t, 20*time.Millisecond, meta.BoltDB().MaxBatchDelay()) + require.Equal(t, fs.FileMode(0o644), meta.Perm()) + require.Equal(t, false, meta.NoSync()) + require.Equal(t, false, meta.Verbose()) + require.Equal(t, metabaseconfig.MemtablesCountDefault, meta.MemtablesCount()) + require.Equal(t, int64(metabaseconfig.ValueThresholdDefault), meta.ValueThreshold()) + require.Equal(t, int64(metabaseconfig.ValueLogFileSizeDefault), meta.ValueLogFileSize()) + require.Equal(t, int64(metabaseconfig.IndexCacheSizeDefault), meta.IndexCacheSize()) + require.Equal(t, metabaseconfig.CompactorsCountDefault, meta.CompactorsCount()) + require.Equal(t, metabaseconfig.GCIntervalDefault, meta.GCInterval()) + require.Equal(t, metabaseconfig.GCDiscardPercentDefault, meta.GCDiscardPercent()) require.Equal(t, false, sc.Compress()) require.Equal(t, []string(nil), sc.UncompressableContentTypes()) diff --git a/cmd/frostfs-node/config/engine/shard/metabase/config.go b/cmd/frostfs-node/config/engine/shard/metabase/config.go index 3730094d9..45eed7a9c 100644 --- a/cmd/frostfs-node/config/engine/shard/metabase/config.go +++ b/cmd/frostfs-node/config/engine/shard/metabase/config.go @@ -1,8 +1,21 @@ package metabaseconfig import ( + "io/fs" + "time" + "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config" - boltdbconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/boltdb" +) + +const ( + PermDefault = 0o660 + MemtablesCountDefault = 32 + ValueThresholdDefault = 512 + ValueLogFileSizeDefault = 1<<30 - 1 + IndexCacheSizeDefault = 256 << 20 + CompactorsCountDefault = 8 + GCIntervalDefault = 10 * time.Minute + GCDiscardPercentDefault = 50 ) // Config is a wrapper over the config section @@ -30,7 +43,109 @@ func (x *Config) Path() string { return p } -// BoltDB returns config instance for querying bolt db specific parameters. -func (x *Config) BoltDB() *boltdbconfig.Config { - return (*boltdbconfig.Config)(x) +// Perm returns the value of "perm" config parameter as a fs.FileMode. +// +// Returns PermDefault if the value is not a positive number. +func (x *Config) Perm() fs.FileMode { + p := config.UintSafe( + (*config.Config)(x), + "perm", + ) + + if p == 0 { + p = PermDefault + } + + return fs.FileMode(p) +} + +// NoSync returns the value of "no_sync" config parameter. +// +// Returns false if the value is not a boolean. +func (x *Config) NoSync() bool { + return config.BoolSafe((*config.Config)(x), "no_sync") +} + +// Verbose returns the value of "verbose" config parameter. +// +// Returns false if the value is not a boolean. +func (x *Config) Verbose() bool { + return config.BoolSafe((*config.Config)(x), "verbose") +} + +// MemtablesCount returns the value of "memtables_count" config parameter. +// +// Returns MemtablesCountDefault if the value is not a positive number. +func (x *Config) MemtablesCount() int { + s := int(config.IntSafe((*config.Config)(x), "memtables_count")) + if s <= 0 { + s = MemtablesCountDefault + } + return s +} + +// ValueThreshold returns the value of "value_threshold" config parameter. +// +// Returns ValueThresholdDefault if the value is not a positive number. +func (x *Config) ValueThreshold() int64 { + s := config.SizeInBytesSafe((*config.Config)(x), "value_threshold") + if s <= 0 { + s = ValueThresholdDefault + } + return int64(s) +} + +// ValueLogFileSize returns the value of "valuelog_file_size" config parameter. +// +// Returns ValueLogFileSizeDefault if the value is not a positive number. +func (x *Config) ValueLogFileSize() int64 { + s := config.SizeInBytesSafe((*config.Config)(x), "valuelog_file_size") + if s <= 0 { + s = ValueLogFileSizeDefault + } + return int64(s) +} + +// IndexCacheSize returns the value of "index_cache_size" config parameter. +// +// Returns IndexCacheSizeDefault if the value is not a positive number. +func (x *Config) IndexCacheSize() int64 { + s := config.SizeInBytesSafe((*config.Config)(x), "index_cache_size") + if s <= 0 { + s = IndexCacheSizeDefault + } + return int64(s) +} + +// CompactorsCount returns the value of "compactors_count" config parameter. +// +// Returns CompactorsCountDefault if the value is not a positive number. +func (x *Config) CompactorsCount() int { + s := int(config.IntSafe((*config.Config)(x), "compactors_count")) + if s <= 0 { + s = CompactorsCountDefault + } + return s +} + +// GCInterval returns the value of "gc_interval" config parameter. +// +// Returns GCIntervalDefault if the value is not a positive number. +func (x *Config) GCInterval() time.Duration { + s := config.DurationSafe((*config.Config)(x), "gc_interval") + if s <= 0 { + s = GCIntervalDefault + } + return s +} + +// GCDiscardPercent returns the value of "gc_discard_percent" config parameter. +// +// Returns GCDiscardPercentDefault if the value is not a number in range (0;100]. +func (x *Config) GCDiscardPercent() int { + s := int(config.IntSafe((*config.Config)(x), "gc_discard_percent")) + if s <= 0 || s > 100 { + s = GCDiscardPercentDefault + } + return s } diff --git a/config/example/node.env b/config/example/node.env index 72f56e96c..10ee2feb0 100644 --- a/config/example/node.env +++ b/config/example/node.env @@ -108,8 +108,15 @@ FROSTFS_STORAGE_SHARD_0_WRITECACHE_CAPACITY=3221225472 ### Metabase config FROSTFS_STORAGE_SHARD_0_METABASE_PATH=tmp/0/meta FROSTFS_STORAGE_SHARD_0_METABASE_PERM=0644 -FROSTFS_STORAGE_SHARD_0_METABASE_MAX_BATCH_SIZE=100 -FROSTFS_STORAGE_SHARD_0_METABASE_MAX_BATCH_DELAY=10ms +FROSTFS_STORAGE_SHARD_0_METABASE_NO_SYNC=TRUE +FROSTFS_STORAGE_SHARD_0_METABASE_VERBOSE=TRUE +FROSTFS_STORAGE_SHARD_0_METABASE_MEMTABLES_COUNT=10 +FROSTFS_STORAGE_SHARD_0_METABASE_VALUE_THRESHOLD=1024 +FROSTFS_STORAGE_SHARD_0_METABASE_VALUELOG_FILE_SIZE=512mb +FROSTFS_STORAGE_SHARD_0_METABASE_INDEX_CACHE_SIZE=10mb +FROSTFS_STORAGE_SHARD_0_METABASE_COMPACTORS_COUNT=10 +FROSTFS_STORAGE_SHARD_0_METABASE_GC_INTERVAL=20m +FROSTFS_STORAGE_SHARD_0_METABASE_GC_DISCARD_PERCENT=60 ### Blobstor config FROSTFS_STORAGE_SHARD_0_COMPRESS=true FROSTFS_STORAGE_SHARD_0_COMPRESSION_EXCLUDE_CONTENT_TYPES="audio/* video/*" @@ -164,8 +171,6 @@ FROSTFS_STORAGE_SHARD_1_WRITECACHE_CAPACITY=4294967296 ### Metabase config FROSTFS_STORAGE_SHARD_1_METABASE_PATH=tmp/1/meta FROSTFS_STORAGE_SHARD_1_METABASE_PERM=0644 -FROSTFS_STORAGE_SHARD_1_METABASE_MAX_BATCH_SIZE=200 -FROSTFS_STORAGE_SHARD_1_METABASE_MAX_BATCH_DELAY=20ms ### Blobstor config FROSTFS_STORAGE_SHARD_1_COMPRESS=false FROSTFS_STORAGE_SHARD_1_SMALL_OBJECT_SIZE=102400 diff --git a/config/example/node.json b/config/example/node.json index b9dc6014c..855f6dbbf 100644 --- a/config/example/node.json +++ b/config/example/node.json @@ -153,8 +153,15 @@ "metabase": { "path": "tmp/0/meta", "perm": "0644", - "max_batch_size": 100, - "max_batch_delay": "10ms" + "no_sync": true, + "verbose": true, + "memtables_count": 10, + "value_threshold": 1024, + "valuelog_file_size": "512mb", + "index_cache_size": "10mb", + "compactors_count": 10, + "gc_interval": "20m", + "gc_discard_percent": 60 }, "compress": true, "compression_exclude_content_types": [ @@ -212,9 +219,7 @@ }, "metabase": { "path": "tmp/1/meta", - "perm": "0644", - "max_batch_size": 200, - "max_batch_delay": "20ms" + "perm": "0644" }, "compress": false, "small_object_size": 102400, diff --git a/config/example/node.yaml b/config/example/node.yaml index bad67816a..e596f0212 100644 --- a/config/example/node.yaml +++ b/config/example/node.yaml @@ -131,8 +131,6 @@ storage: metabase: perm: 0644 # permissions for metabase files(directories: +x for current user and group) - max_batch_size: 200 - max_batch_delay: 20ms pilorama: max_batch_delay: 5ms # maximum delay for a batch of operations to be executed @@ -175,8 +173,15 @@ storage: metabase: path: tmp/0/meta # metabase path - max_batch_size: 100 - max_batch_delay: 10ms + no_sync: true + verbose: true + memtables_count: 10 + value_threshold: 1024 + valuelog_file_size: 512mb + index_cache_size: 10mb + compactors_count: 10 + gc_interval: 20m + gc_discard_percent: 60 compress: true # turn on/off zstd(level 3) compression of stored objects compression_exclude_content_types: diff --git a/docs/storage-node-configuration.md b/docs/storage-node-configuration.md index 5389bfbb5..e359a0335 100644 --- a/docs/storage-node-configuration.md +++ b/docs/storage-node-configuration.md @@ -270,16 +270,30 @@ gc: metabase: path: /path/to/meta.db perm: 0644 - max_batch_size: 200 - max_batch_delay: 20ms + no_sync: true + verbose: true + memtables_count: 10 + value_threshold: 1024 + valuelog_file_size: 512mb + index_cache_size: 10mb + compactors_count: 10 + gc_interval: 20m + gc_discard_percent: 60 ``` -| Parameter | Type | Default value | Description | -|-------------------|------------|---------------|------------------------------------------------------------------------| -| `path` | `string` | | Path to the metabase file. | -| `perm` | file mode | `0660` | Permissions to set for the database file. | -| `max_batch_size` | `int` | `1000` | Maximum amount of write operations to perform in a single transaction. | -| `max_batch_delay` | `duration` | `10ms` | Maximum delay before a batch starts. | +| Parameter | Type | Default value | Description | +| -------------------- | ---------- | ------------- | ----------------------------------------------------------------------------------------------------------------- | +| `path` | `string` | | Path to the metabase file. | +| `perm` | file mode | `0660` | Permissions to set for the database file. | +| `no_sync` | `bool` | `false` | If `true` then metabase storage engine doesn't flush all changes to disk after each operation. | +| `verbose` | `bool` | `false` | If `true` then metabase storage engine's logs will be passed to logger. | +| `memtables_count` | `int` | `32` | Maximum number of tables to keep in memory before stalling. | +| `value_threshold` | `size` | `512B` | Threshold used to decide whether a value is stored directly in the LSM tree or separately in the log value files. | +| `valuelog_file_size` | `size` | `1GB` | Maximum size of a single value log file. | +| `index_cache_size` | `size` | `256MB` | Maximum size of memory used by table indices. | +| `compactors_count` | `int` | `64` | Number of compaction workers to run concurrently. | +| `gc_interval` | `duration` | `10m` | Time interval between value log garbage collection runs. | +| `gc_discard_percent` | `int` | `50` | Value log file will be rewritten if more than `gc_discard_percent` of the space can be discarded. | ### `writecache` subsection diff --git a/go.mod b/go.mod index 38e66a20f..52b3d32ee 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02 github.com/cheggaaa/pb v1.0.29 github.com/chzyer/readline v1.5.1 + github.com/dgraph-io/badger/v4 v4.2.0 github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568 github.com/go-pkgz/expirable-cache/v3 v3.0.0 github.com/google/uuid v1.6.0 @@ -67,11 +68,18 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/davidmz/go-pageant v1.0.2 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect + github.com/dgraph-io/ristretto v0.1.1 // indirect + github.com/dustin/go-humanize v1.0.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-fed/httpsig v1.1.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/glog v1.2.0 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.4 // indirect + github.com/google/flatbuffers v1.12.1 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.0 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1 // indirect @@ -99,6 +107,7 @@ require ( github.com/nspcc-dev/neo-go/pkg/interop v0.0.0-20240521091047-78685785716d // indirect github.com/nspcc-dev/rfc6979 v0.2.1 // indirect github.com/pelletier/go-toml/v2 v2.1.1 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/common v0.48.0 // indirect @@ -111,6 +120,7 @@ require ( github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d // indirect github.com/twmb/murmur3 v1.1.8 // indirect github.com/urfave/cli v1.22.14 // indirect + go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.22.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.22.0 // indirect diff --git a/go.sum b/go.sum index 0849b74a11c343918575b55cfbcb1ac83d4c9299..6b2047766a7a1a91504a9331399d20365b6ff4b4 100644 GIT binary patch delta 5337 zcmb_gTc|AQRc6j{9Gw{t$uQ^4LE>ajl4Ay&-u?e|uQ5@ptGlaj)wk;E>S}^c-MhN3 zUAL}o&@C!4Y6MLo84^ZRmsyy^tyS;-)t$fi<-3yMN4a_JM3LiL*G3eyoam|m*BEsT(684)WL|CI8`@k} zz4d98=C;`A9C}deHb7|`a)YFX2VNVyh6i{*5SNF~KL3XgUifivjbFnWK-pLLiLh&v z(cbMO%*hTTTS3CvO*c%lBoC}G(=s;-TTyI< zhV}TXIEk_+p5C%!Q=?hnCef}OaS%(w<9^DK?e>_MvJ5`8En&9b^%ZFuT5{#x`$YB6 z(|grd@3GbK-Ye(N+zY<%#&?l5wiUQ(8e6`9i3NU5)R^pd)Na7@_?$lVHpNPb=+T%v z@%XVM1=>3nQpYn`5XZA43Weil+gyJfS0gm+R&K`Hdl3fLposy*5iEq|X1*ea>LFNK>zUH<7|E#q<)ugGZ@l!! z&)%3$qQ%>$8?|)JbgZOR5LX3pja`FFLcL+)Ydd|t9Ef!Blt@*pR`Tfmy7Ac+Ha8W`hCg+F4QHYq*gTCIE%VNZaBQO;>&3g)Z z;-bu3*^?B*Yk2XQ;*|rL4JS@)Ib0(3<4J!w^39302Y7GJAJoDYSBkcqj+oCre^U$n z2Tp$&w&t`1g_+dmRerDyRukLEwvawzr%g6rQiqdwat?A~fmKIQxDWe$Og$GNHEk^k zE(LQ*?ad{(D6cpMNc7XBG#0Q7W*L@+UC3mnzv1IeYsPlA#))4_Zs`n0!C_WVjl+!W z2)q4QJ&=;@eoyAU!`jUU=ISK(ax$ICVcOf-!FnN1W=GwSTmiHlS;4rx*Joy2C}HlU zkLwZ4)65Mo9JhI(h3?6^D&U3Zh000_m#NB!ey^{ISj%;$a7*2+@jgexoe}BtytD$x z+Y=`>`rc4$9`~~((|g7SvN9>~^suuJ3|VS3^k7eRF>(}2lr5?%BEsB6Lp@yN34tj>AwmN)!RivZsq6geA zN8DoYbR#$KS%FO?rk6Gc7==oK0c4Z38C}B|lOSlk#`OQ!hnpJHx_nVdoO33>8RIOS z;;|a3#@NYWFz^>L!v$n7AI<=2@FI(vs8#_rrjMo9XgXHdEHvLWwd^rhR1Y;a4Oa?E z;>oa+(&-nckOUf(2XcImd3odv2=B(oLrnJ%z&6z!z=G|jvqPicO8l}1t5wvPU3)RVz_D9 zghd&U&}+U)?LK{UN|%Xk+1I9?DCXQen+@b1V;Rbf9CZB_zo1t$toog+`)@ok0NArEX^SSS82Lc?2 z7)H<}*(9K@Lq@kP*3>TPiUwrK?^8Y^DZ*#miH!DxHfhCWkZSvOBiRaL5Rkh}-FKf( z>fE`)fu-uf*#s>0666B zvD#kUJO9nq<_=v4kr_KK#KZ`WWV}fxBj{&D$)`dQAC-A)wJNB2CuQVaw7yipRrTpl zs&|QNy!x?E{%3XXwHGn+nz%&HrLEzZ*>9Gy)Lla*`U0VFP7xPH|>vt~6*W zmUn?+&|@_wez)fg1_w3u8+n>YaJd$+KCyGCK_DEe zk34#L@LgbPzCq4f<)d_~6hy%-qK%fFsKB3tVJR>AU{sKe)5IE%hP=igv{zZQl+>P% z)+bwx;t@0CcD~A%J)-DKD^AkDaeQ3UQkadWq_$s-fO!7MqestF|M2J!p24ese6%`$ z@kjsinY*Y{t9M@SRv&-#?0Nk9yPtlnD!p!dT*8xtK_WWDyCc0%+m>7E^1{ovbMBCg zNck`pmWPZuU(J5$j{Ky>&o6bF#xfILe(X?K*06ZVJ( z-XKyvY8hZW6f#vXZ6V9{T=n(&$Io9~yzwGS+@idrEy0TN{(e-hS7a>bqb_ke(PnSQ zD0aSSFmNV~Rm&g)qWar6UpxQ(PrrKS8CZS$P4)b5Z_a=4=E+ig?LOIv5%#6Njx?T2 zGBt!GJr`$~7h(wxih}|}iLE?pwTa}j90Z8hj>kqc;8c;1S#bm??Y)osMTX=d9 zB{oqs=)O}nKDtT4QEdQwnmKKIHy=gSzXm^imUqAM!URPS1rX903(@EUtZnI*%`jR$ zg8UF@iX;M(#sEIHslm8mvrFhMS)qOGl=f!C&6oTF_vY23f~x*K|9!NlhgnVanag~I zEDq3oH7FVY%+PUY)aR?bGQ$oO0;=pUCcL`z7Bbe_b#veDJo9VwoyLOaDQ^pl(kWmsYt5joIK9` z6;Ehj#EE>KuH#uG|4+tnh(i*mF{Wt(2!KxMCZpfl94JWveSX(+AnZA6*7qG*l@Gi( zu&rs2VIyO(I0@4hQ+UStH;V^%9?%$CObG_yO`M`}ivVaJ!N}_CrSu8|80<1aaS}FB zk|3~Xox+*wPs(NWP8riDexTJyPA5L*JnP?X80ZeK1uNShj?SL~No zBC4OuE{I^8HjUA!A1s<=j9RO)s!x|Y4Q#?a?)J8LKN=*Fxn`q@xGqe?nw)%j!JKwt zp;jOLt%ucLSKmKHjdlqx#8`rALdcRNq;cIe%^L2QKdpqRwxG)Obmz_SD9mAR*zF}i zQlP`r#>j(tkMV>>TE``k`Vj~nYhnh_EYw~U#K{`&!r94e;I!8pcdz!_VY$}w4HOF4 z?f1D8JKC+>In9*qHJHDTd;aw=-G8>L#ptAW+{;dW8P)YM(AB#s;8q2LZAh)uN&7RNl<{ zY2q#ER<}*Ay9H70-}+efwJ$$Eg{ToAjS7LnXrw3r zU9~@?UED2CBRt*ddr9aFm9RYa)*IdL;g+0EiYk8VN6ugS^0)510dL<)!9FL(dS{sv zYhkoah+uJ&(?BE^*($PvrKB>DD>s)nJQvUIb4hGm5{u!tGOHpP$b#b`qHbT;P^FG zuLQ9#D*d^lEe&HopZkG_i^Gl75R~=QJX`S!Tl zy}fx2y)AouD58}fU1D36^O>Ep3i1uQjPc(a|Y&n}V^_Df;l0C$m^yW&*Q2 zVKEG|#%&$~vp#T936ptx8M8rpfeVu@hQhNmhzA0b5Q}TGQH&J=v(b*M1CuY550mm%a$Iyr3-yvz@6E1(Qy&6O+lZ9JBJUz5=sewc7%dlek*5zq#}WvyH+! E0U7F5+yDRo diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 4f0b29124..60b1c416d 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -19,527 +19,602 @@ const ( ) const ( - InnerringNonalphabetModeDoNotStopContainerEstimations = "non-alphabet mode, do not stop container estimations" - InnerringCantStopEpochEstimation = "can't stop epoch estimation" - InnerringCantMakeNotaryDepositInMainChain = "can't make notary deposit in main chain" - InnerringCantMakeNotaryDepositInSideChain = "can't make notary deposit in side chain" - InnerringNotaryDepositHasAlreadyBeenMade = "notary deposit has already been made" - InnerringCantGetInnerRingIndex = "can't get inner ring index" - InnerringCantGetInnerRingSize = "can't get inner ring size" - InnerringCantGetAlphabetIndex = "can't get alphabet index" - InnerringIgnoreValidatorVoteNodeNotInAlphabetRange = "ignore validator vote: node not in alphabet range" - InnerringIgnoreValidatorVoteEmptyValidatorsList = "ignore validator vote: empty validators list" - InnerringCantInvokeVoteMethodInAlphabetContract = "can't invoke vote method in alphabet contract" - InnerringCantGetLastProcessedMainChainBlockNumber = "can't get last processed main chain block number" - InnerringNotarySupport = "notary support" - InnerringAlphabetKeysSyncIsDisabled = "alphabet keys sync is disabled" - InnerringNoControlServerEndpointSpecified = "no Control server endpoint specified, service is disabled" - InnerringCantGetLastProcessedSideChainBlockNumber = "can't get last processed side chain block number" - InnerringFailedToSetGroupSignerScope = "failed to set group signer scope, continue with Global" - InnerringCantVoteForPreparedValidators = "can't vote for prepared validators" - InnerringNewBlock = "new block" - InnerringCantUpdatePersistentState = "can't update persistent state" - InnerringCloserError = "closer error" - InnerringReadConfigFromBlockchain = "read config from blockchain" - NotificatorNotificatorStartProcessingObjectNotifications = "notificator: start processing object notifications" - NotificatorNotificatorProcessingObjectNotification = "notificator: processing object notification" - PolicerCouldNotGetContainer = "could not get container" - PolicerCouldNotConfirmContainerRemoval = "could not confirm container removal" - PolicerCouldNotInhumeObjectWithMissingContainer = "could not inhume object with missing container" - PolicerCouldNotBuildPlacementVectorForObject = "could not build placement vector for object" - PolicerRedundantLocalObjectCopyDetected = "redundant local object copy detected" - PolicerReceiveObjectHeaderToCheckPolicyCompliance = "receive object header to check policy compliance" - PolicerConsiderNodeUnderMaintenanceAsOK = "consider node under maintenance as OK" - PolicerShortageOfObjectCopiesDetected = "shortage of object copies detected" - PolicerSomeOfTheCopiesAreStoredOnNodesUnderMaintenance = "some of the copies are stored on nodes under maintenance, save local copy" - PolicerRoutineStopped = "routine stopped" - PolicerFailureAtObjectSelectForReplication = "failure at object select for replication" - PolicerPoolSubmission = "pool submission" - PolicerUnableToProcessObj = "unable to process object" - ReplicatorFinishWork = "finish work" - ReplicatorCouldNotGetObjectFromLocalStorage = "could not get object from local storage" - ReplicatorCouldNotReplicateObject = "could not replicate object" - ReplicatorObjectSuccessfullyReplicated = "object successfully replicated" - TreeRedirectingTreeServiceQuery = "redirecting tree service query" - TreeBearerPresentedButNotAllowedByACL = "bearer presented but not allowed by ACL" - TreeCouldNotGetLastSynchronizedHeightForATree = "could not get last synchronized height for a tree" - TreeCouldNotUpdateLastSynchronizedHeightForATree = "could not update last synchronized height for a tree" - TreeSynchronizeTree = "synchronize tree" - TreeFailedToRunTreeSynchronizationOverAllNodes = "failed to run tree synchronization over all nodes" - TreeFailedToRunTreeSynchronizationForSpecificNode = "failed to run tree synchronization for specific node" - TreeFailedToParseAddressForTreeSynchronization = "failed to parse address for tree synchronization" - TreeFailedToConnectForTreeSynchronization = "failed to connect for tree synchronization" - TreeSyncingTrees = "syncing trees..." - TreeCouldNotFetchContainers = "could not fetch containers" - TreeTreesHaveBeenSynchronized = "trees have been synchronized" - TreeSyncingContainerTrees = "syncing container trees..." - TreeCouldNotSyncTrees = "could not sync trees" - TreeContainerTreesHaveBeenSynced = "container trees have been synced" - TreeCouldNotQueryTreesForSynchronization = "could not query trees for synchronization" - TreeRemovingRedundantTrees = "removing redundant trees..." - TreeCouldNotCheckIfContainerExisted = "could not check if the container ever existed" - TreeCouldNotRemoveRedundantTree = "could not remove redundant tree" - TreeCouldNotCalculateContainerNodes = "could not calculate container nodes" - TreeFailedToApplyReplicatedOperation = "failed to apply replicated operation" - TreeDoNotSendUpdateToTheNode = "do not send update to the node" - TreeFailedToSentUpdateToTheNode = "failed to sent update to the node" - TreeErrorDuringReplication = "error during replication" - PersistentCouldNotGetSessionFromPersistentStorage = "could not get session from persistent storage" - PersistentCouldNotDeleteSToken = "could not delete token" - PersistentCouldNotCleanUpExpiredTokens = "could not clean up expired tokens" - TombstoneCouldNotGetTheTombstoneTheSource = "tombstone getter: could not get the tombstone the source" - DeleteNoSplitInfoObjectIsPHY = "no split info, object is PHY" - DeleteAssemblingChain = "assembling chain..." - DeleteCollectingChildren = "collecting children..." - DeleteSupplementBySplitID = "supplement by split ID" - DeleteFormingTombstoneStructure = "forming tombstone structure..." - DeleteTombstoneStructureSuccessfullyFormedSaving = "tombstone structure successfully formed, saving..." - DeleteFormingSplitInfo = "forming split info..." - DeleteSplitInfoSuccessfullyFormedCollectingMembers = "split info successfully formed, collecting members..." - DeleteMembersSuccessfullyCollected = "members successfully collected" - DeleteECObjectReceived = "erasure-coded object received, form tombstone" - GetRemoteCallFailed = "remote call failed" - GetCanNotAssembleTheObject = "can not assemble the object" - GetTryingToAssembleTheObject = "trying to assemble the object..." - GetTryingToAssembleTheECObject = "trying to assemble the ec object..." - GetAssemblingSplittedObject = "assembling splitted object..." - GetAssemblingECObject = "assembling erasure-coded object..." - GetUnableToGetAllPartsECObject = "unable to get all parts, continue to reconstruct with existed" - GetUnableToGetPartECObject = "unable to get part of the erasure-encoded object" - GetUnableToHeadPartECObject = "unable to head part of the erasure-encoded object" - GetUnableToGetECObjectContainer = "unable to get container for erasure-coded object" - GetAssemblingSplittedObjectCompleted = "assembling splitted object completed" - GetAssemblingECObjectCompleted = "assembling erasure-coded object completed" - GetFailedToAssembleSplittedObject = "failed to assemble splitted object" - GetFailedToAssembleECObject = "failed to assemble erasure-coded object" - GetCouldNotGenerateContainerTraverser = "could not generate container traverser" - GetCouldNotConstructRemoteNodeClient = "could not construct remote node client" - GetCouldNotWriteHeader = "could not write header" - GetCouldNotWritePayloadChunk = "could not write payload chunk" - GetLocalGetFailed = "local get failed" - GetReturnResultDirectly = "return result directly" - GetCompletingTheOperation = "completing the operation" - GetRequestedObjectWasMarkedAsRemoved = "requested object was marked as removed" - GetRequestedObjectIsVirtual = "requested object is virtual" - GetRequestedObjectIsEC = "requested object is erasure-coded" - GetRequestedRangeIsOutOfObjectBounds = "requested range is out of object bounds" - PutAdditionalContainerBroadcastFailure = "additional container broadcast failure" - SearchReturnResultDirectly = "return result directly" - SearchCouldNotConstructRemoteNodeClient = "could not construct remote node client" - SearchRemoteOperationFailed = "remote operation failed" - SearchCouldNotGenerateContainerTraverser = "could not generate container traverser" - SearchCouldNotWriteObjectIdentifiers = "could not write object identifiers" - SearchLocalOperationFailed = "local operation failed" - UtilObjectServiceError = "object service error" - UtilCouldNotPushTaskToWorkerPool = "could not push task to worker pool" - V2CantCheckIfRequestFromInnerRing = "can't check if request from inner ring" - V2CantCheckIfRequestFromContainerNode = "can't check if request from container node" - ClientCouldNotRestoreBlockSubscriptionAfterRPCSwitch = "could not restore block subscription after RPC switch" - ClientCouldNotRestoreNotificationSubscriptionAfterRPCSwitch = "could not restore notification subscription after RPC switch" - ClientCouldNotRestoreNotaryNotificationSubscriptionAfterRPCSwitch = "could not restore notary notification subscription after RPC switch" - ClientCouldNotEstablishConnectionToTheSwitchedRPCNode = "could not establish connection to the switched RPC node" - ClientConnectionToTheNewRPCNodeHasBeenEstablished = "connection to the new RPC node has been established" - ClientCouldNotCreateClientToTheHigherPriorityNode = "could not create client to the higher priority node" - ClientSwitchedToTheHigherPriorityRPC = "switched to the higher priority RPC" - ClientNotaryDepositHasAlreadyBeenMade = "notary deposit has already been made" - ClientNotaryDepositInvoke = "notary deposit invoke" - ClientNotaryRequestWithPreparedMainTXInvoked = "notary request with prepared main TX invoked" - ClientNotaryRequestInvoked = "notary request invoked" - ClientNotaryDepositTransactionWasSuccessfullyPersisted = "notary deposit transaction was successfully persisted" - ClientAttemptToWaitForNotaryDepositTransactionToGetPersisted = "attempt to wait for notary deposit transaction to get persisted" - ClientNeoClientInvoke = "neo client invoke" - ClientNativeGasTransferInvoke = "native gas transfer invoke" - ClientBatchGasTransferInvoke = "batch gas transfer invoke" - ClientCantGetBlockchainHeight = "can't get blockchain height" - ClientCantGetBlockchainHeight243 = "can't get blockchain height" - EventCouldNotSubmitHandlerToWorkerPool = "could not Submit handler to worker pool" - EventCouldNotStartListenToEvents = "could not start listen to events" - EventStopEventListenerByError = "stop event listener by error" - EventStopEventListenerByContext = "stop event listener by context" - EventStopEventListenerByNotificationChannel = "stop event listener by notification channel" - EventNilNotificationEventWasCaught = "nil notification event was caught" - EventStopEventListenerByNotaryChannel = "stop event listener by notary channel" - EventNilNotaryEventWasCaught = "nil notary event was caught" - EventStopEventListenerByBlockChannel = "stop event listener by block channel" - EventNilBlockWasCaught = "nil block was caught" - EventListenerWorkerPoolDrained = "listener worker pool drained" - EventEventParserNotSet = "event parser not set" - EventCouldNotParseNotificationEvent = "could not parse notification event" - EventNotificationHandlersForParsedNotificationEventWereNotRegistered = "notification handlers for parsed notification event were not registered" - EventSkipExpiredMainTXNotaryEvent = "skip expired main TX notary event" - EventCouldNotPrepareAndValidateNotaryEvent = "could not prepare and validate notary event" - EventNotaryParserNotSet = "notary parser not set" - EventCouldNotParseNotaryEvent = "could not parse notary event" - EventNotaryHandlersForParsedNotificationEventWereNotRegistered = "notary handlers for parsed notification event were not registered" - EventIgnoreNilEventParser = "ignore nil event parser" - EventListenerHasBeenAlreadyStartedIgnoreParser = "listener has been already started, ignore parser" - EventRegisteredNewEventParser = "registered new event parser" - EventIgnoreNilEventHandler = "ignore nil event handler" - EventIgnoreHandlerOfEventWoParser = "ignore handler of event w/o parser" - EventRegisteredNewEventHandler = "registered new event handler" - EventIgnoreNilNotaryEventParser = "ignore nil notary event parser" - EventListenerHasBeenAlreadyStartedIgnoreNotaryParser = "listener has been already started, ignore notary parser" - EventIgnoreNilNotaryEventHandler = "ignore nil notary event handler" - EventIgnoreHandlerOfNotaryEventWoParser = "ignore handler of notary event w/o parser" - EventIgnoreNilBlockHandler = "ignore nil block handler" - StorageOperation = "local object storage operation" - BlobovniczaCreatingDirectoryForBoltDB = "creating directory for BoltDB" - BlobovniczaOpeningBoltDB = "opening BoltDB" - BlobovniczaInitializing = "initializing..." - BlobovniczaAlreadyInitialized = "already initialized" - BlobovniczaCreatingBucketForSizeRange = "creating bucket for size range" - BlobovniczaClosingBoltDB = "closing BoltDB" - BlobovniczaObjectWasRemovedFromBucket = "object was removed from bucket" - BlobstorOpening = "opening..." - BlobstorInitializing = "initializing..." - BlobstorClosing = "closing..." - BlobstorCouldntCloseStorage = "couldn't close storage" - BlobstorErrorOccurredDuringObjectExistenceChecking = "error occurred during object existence checking" - BlobstorErrorOccurredDuringTheIteration = "error occurred during the iteration" - EngineShardHasBeenRemoved = "shard has been removed" - EngineCouldNotCloseRemovedShard = "could not close removed shard" - EngineCouldNotOpenShardClosingAndSkipping = "could not open shard, closing and skipping" - EngineCouldNotClosePartiallyInitializedShard = "could not close partially initialized shard" - EngineCouldNotInitializeShardClosingAndSkipping = "could not initialize shard, closing and skipping" - EngineCouldNotCloseShard = "could not close shard" - EngineCouldNotReloadAShard = "could not reload a shard" - EngineAddedNewShard = "added new shard" - EngineCouldNotPutObjectToShard = "could not put object to shard" - EngineCouldNotCheckObjectExistence = "could not check object existence when put object to shard" - EngineErrorDuringSearchingForObjectChildren = "error during searching for object children" - EngineCouldNotInhumeObjectInShard = "could not inhume object in shard" - EngineStartingRemovalOfLocallyredundantCopies = "starting removal of locally-redundant copies" - EngineStartedDuplicatesRemovalRoutine = "started duplicates removal routine" - EngineFinishedRemovalOfLocallyredundantCopies = "finished removal of locally-redundant copies" - EngineRemovingAnObjectWithoutFullLockingCheck = "removing an object without full locking check" - EngineInterruptProcessingTheExpiredLocks = "interrupt processing the expired locks" - EngineInterruptGettingLockers = "can't get object's lockers" - EngineInterruptProcessingTheDeletedLocks = "interrupt processing the deleted locks" - EngineFailedToMoveShardInDegradedreadonlyModeMovingToReadonly = "failed to move shard in degraded-read-only mode, moving to read-only" - EngineFailedToMoveShardInReadonlyMode = "failed to move shard in read-only mode" - EngineShardIsMovedInReadonlyModeDueToErrorThreshold = "shard is moved in read-only mode due to error threshold" - EngineShardIsMovedInDegradedModeDueToErrorThreshold = "shard is moved in degraded mode due to error threshold" - EngineModeChangeIsInProgressIgnoringSetmodeRequest = "mode change is in progress, ignoring set-mode request" - EngineStartedShardsEvacuation = "started shards evacuation" - EngineFinishedSuccessfullyShardsEvacuation = "shards evacuation finished successfully" - EngineFinishedWithErrorShardsEvacuation = "shards evacuation finished with error" - EngineObjectIsMovedToAnotherShard = "object is moved to another shard" - MetabaseMissingMatcher = "missing matcher" - MetabaseErrorInFKBTSelection = "error in FKBT selection" - MetabaseCantDecodeListBucketLeaf = "can't decode list bucket leaf" - MetabaseUnknownOperation = "unknown operation" - MetabaseCantIterateOverTheBucket = "can't iterate over the bucket" - MetabaseCouldNotIterateOverTheBuckets = "could not iterate over the buckets" - MetabaseCreatedDirectoryForMetabase = "created directory for Metabase" - MetabaseOpenedBoltDBInstanceForMetabase = "opened boltDB instance for Metabase" - MetabaseCheckingMetabaseVersion = "checking metabase version" - ShardCantSelectAllObjects = "can't select all objects" - ShardSettingShardMode = "setting shard mode" - ShardShardModeSetSuccessfully = "shard mode set successfully" - ShardFetchingObjectWithoutMeta = "fetching object without meta" - ShardObjectIsMissingInWritecache = "object is missing in write-cache" - ShardFailedToFetchObjectFromWritecache = "failed to fetch object from write-cache" - ShardCantPutObjectToTheWritecacheTryingBlobstor = "can't put object to the write-cache, trying blobstor" - ShardMetaObjectCounterRead = "meta: object counter read" - ShardMetaCantReadContainerList = "meta: can't read container list" - ShardMetaCantReadContainerSize = "meta: can't read container size" - ShardMetaInfoPresentButObjectNotFound = "meta info was present, but the object is missing" - ShardMetabaseFailureSwitchingMode = "metabase failure, switching mode" - ShardCantMoveShardToReadonlySwitchMode = "can't move shard to readonly, switch mode" - ShardCouldNotUnmarshalObject = "could not unmarshal object" - ShardCouldNotCloseShardComponent = "could not close shard component" - ShardCantOpenMetabaseMoveToADegradedMode = "can't open metabase, move to a degraded mode" - ShardCantInitializeMetabaseMoveToADegradedreadonlyMode = "can't initialize metabase, move to a degraded-read-only mode" - ShardStopEventListenerByClosedEventChannel = "stop event listener by closed `event` channel" - ShardStopEventListenerByClosedStopChannel = "stop event listener by closed `stop` channel" - ShardStopEventListenerByContext = "stop event listener by context" - ShardCouldNotSubmitGCJobToWorkerPool = "could not submit GC job to worker pool" - ShardGCIsStopped = "GC is stopped" - ShardWaitingForGCWorkersToStop = "waiting for GC workers to stop..." - ShardIteratorOverMetabaseGraveyardFailed = "iterator over metabase graveyard failed" - ShardCouldNotDeleteTheObjects = "could not delete the objects" - ShardIteratorOverExpiredObjectsFailed = "iterator over expired objects failed" - ShardCouldNotInhumeTheObjects = "could not inhume the objects" - ShardStartedExpiredTombstonesHandling = "started expired tombstones handling" - ShardIteratingTombstones = "iterating tombstones" - ShardShardIsInADegradedModeSkipCollectingExpiredTombstones = "shard is in a degraded mode, skip collecting expired tombstones" - ShardIteratorOverGraveyardFailed = "iterator over graveyard failed" - ShardHandlingExpiredTombstonesBatch = "handling expired tombstones batch" - ShardFinishedExpiredTombstonesHandling = "finished expired tombstones handling" - ShardIteratorOverExpiredLocksFailed = "iterator over expired locks failed" - ShardCouldNotMarkTombstonesAsGarbage = "could not mark tombstones as garbage" - ShardCouldNotDropExpiredGraveRecords = "could not drop expired grave records" - ShardFailureToUnlockObjects = "failure to unlock objects" - ShardFailureToMarkLockersAsGarbage = "failure to mark lockers as garbage" - ShardFailureToGetExpiredUnlockedObjects = "failure to get expired unlocked objects" - ShardCouldNotMarkObjectToDeleteInMetabase = "could not mark object to delete in metabase" - WritecacheTriedToFlushItemsFromWritecache = "tried to flush items from write-cache" - WritecacheWaitingForChannelsToFlush = "waiting for channels to flush" - WritecacheCantRemoveObjectsFromTheDatabase = "can't remove objects from the database" - WritecacheCantRemoveObjectFromWritecache = "can't remove object from write-cache" - BlobovniczatreeCouldNotGetObjectFromLevel = "could not get object from level" - BlobovniczatreeCouldNotCloseBlobovnicza = "could not close Blobovnicza" - BlobovniczatreeCouldNotRemoveObjectFromLevel = "could not remove object from level" - BlobovniczatreeCouldNotGetActiveBlobovnicza = "could not get active blobovnicza" - BlobovniczatreeBlobovniczaOverflowed = "blobovnicza overflowed" - BlobovniczatreeCouldNotPutObjectToActiveBlobovnicza = "could not put object to active blobovnicza" - BlobovniczatreeInitializingBlobovniczas = "initializing Blobovnicza's" - BlobovniczatreeReadonlyModeSkipBlobovniczasInitialization = "read-only mode, skip blobovniczas initialization..." - BlobovniczatreeBlobovniczaSuccessfullyInitializedClosing = "blobovnicza successfully initialized, closing..." - AlphabetTick = "tick" - AlphabetAlphabetProcessorWorkerPoolDrained = "alphabet processor worker pool drained" - AlphabetNonAlphabetModeIgnoreGasEmissionEvent = "non alphabet mode, ignore gas emission event" - AlphabetNodeIsOutOfAlphabetRangeIgnoreGasEmissionEvent = "node is out of alphabet range, ignore gas emission event" - AlphabetCantInvokeAlphabetEmitMethod = "can't invoke alphabet emit method" - AlphabetStorageNodeEmissionIsOff = "storage node emission is off" - AlphabetCantGetNetmapSnapshotToEmitGasToStorageNodes = "can't get netmap snapshot to emit gas to storage nodes" - AlphabetGasEmission = "gas emission" - AlphabetCantParseNodePublicKey = "can't parse node public key" - AlphabetCantTransferGas = "can't transfer gas" - AlphabetCantTransferGasToWallet = "can't transfer gas to wallet" - AlphabetAlphabetWorkerPool = "alphabet worker pool" - BalanceBalanceWorkerPoolDrained = "balance worker pool drained" - BalanceNonAlphabetModeIgnoreBalanceLock = "non alphabet mode, ignore balance lock" - BalanceCantSendLockAssetTx = "can't send lock asset tx" - BalanceBalanceWorkerPool = "balance worker pool" - ContainerContainerWorkerPool = "container worker pool" - ContainerContainerProcessorWorkerPoolDrained = "container processor worker pool drained" - ContainerNonAlphabetModeIgnoreContainerPut = "non alphabet mode, ignore container put" - ContainerPutContainerCheckFailed = "put container check failed" - ContainerCouldNotApprovePutContainer = "could not approve put container" - ContainerNonAlphabetModeIgnoreContainerDelete = "non alphabet mode, ignore container delete" - ContainerDeleteContainerCheckFailed = "delete container check failed" - ContainerCouldNotApproveDeleteContainer = "could not approve delete container" - ContainerNonAlphabetModeIgnoreSetEACL = "non alphabet mode, ignore set EACL" - ContainerSetEACLCheckFailed = "set EACL check failed" - ContainerCouldNotApproveSetEACL = "could not approve set EACL" - FrostFSNonAlphabetModeIgnoreConfig = "non alphabet mode, ignore config" - FrostFSCantRelaySetConfigEvent = "can't relay set config event" - FrostFSFrostfsWorkerPool = "frostfs worker pool" - FrostFSFrostfsProcessorWorkerPoolDrained = "frostfs processor worker pool drained" - FrostFSNonAlphabetModeIgnoreDeposit = "non alphabet mode, ignore deposit" - FrostFSCantTransferAssetsToBalanceContract = "can't transfer assets to balance contract" - FrostFSDoubleMintEmissionDeclined = "double mint emission declined" - FrostFSCantGetGasBalanceOfTheNode = "can't get gas balance of the node" - FrostFSGasBalanceThresholdHasBeenReached = "gas balance threshold has been reached" - FrostFSCantTransferNativeGasToReceiver = "can't transfer native gas to receiver" - FrostFSNonAlphabetModeIgnoreWithdraw = "non alphabet mode, ignore withdraw" - FrostFSCantCreateLockAccount = "can't create lock account" - FrostFSCantLockAssetsForWithdraw = "can't lock assets for withdraw" - FrostFSNonAlphabetModeIgnoreCheque = "non alphabet mode, ignore cheque" - FrostFSCantTransferAssetsToFedContract = "can't transfer assets to fed contract" - GovernanceNewEvent = "new event" - GovernanceGovernanceWorkerPoolDrained = "governance worker pool drained" - GovernanceNonAlphabetModeIgnoreAlphabetSync = "non alphabet mode, ignore alphabet sync" - GovernanceCantFetchAlphabetListFromMainNet = "can't fetch alphabet list from main net" - GovernanceCantFetchAlphabetListFromSideChain = "can't fetch alphabet list from side chain" - GovernanceCantMergeAlphabetListsFromMainNetAndSideChain = "can't merge alphabet lists from main net and side chain" - GovernanceNoGovernanceUpdateAlphabetListHasNotBeenChanged = "no governance update, alphabet list has not been changed" - GovernanceAlphabetListHasBeenChangedStartingUpdate = "alphabet list has been changed, starting update" - GovernanceCantVoteForSideChainCommittee = "can't vote for side chain committee" - GovernanceFinishedAlphabetListUpdate = "finished alphabet list update" - GovernanceCantFetchInnerRingListFromSideChain = "can't fetch inner ring list from side chain" - GovernanceCantCreateNewInnerRingListWithNewAlphabetKeys = "can't create new inner ring list with new alphabet keys" - GovernanceUpdateOfTheInnerRingList = "update of the inner ring list" - GovernanceCantUpdateInnerRingListWithNewAlphabetKeys = "can't update inner ring list with new alphabet keys" - GovernanceCantUpdateListOfNotaryNodesInSideChain = "can't update list of notary nodes in side chain" - GovernanceCantUpdateListOfAlphabetNodesInFrostfsContract = "can't update list of alphabet nodes in frostfs contract" - NetmapNetmapWorkerPool = "netmap worker pool" - NetmapTick = "tick" - NetmapNetmapWorkerPoolDrained = "netmap worker pool drained" - NetmapNetmapCleanUpRoutineIsDisabled518 = "netmap clean up routine is disabled" - NetmapNonAlphabetModeIgnoreNewNetmapCleanupTick = "non alphabet mode, ignore new netmap cleanup tick" - NetmapCantDecodePublicKeyOfNetmapNode = "can't decode public key of netmap node" - NetmapVoteToRemoveNodeFromNetmap = "vote to remove node from netmap" - NetmapCantInvokeNetmapUpdateState = "can't invoke netmap.UpdateState" - NetmapCantIterateOnNetmapCleanerCache = "can't iterate on netmap cleaner cache" - NetmapCantGetEpochDuration = "can't get epoch duration" - NetmapCantGetTransactionHeight = "can't get transaction height" - NetmapCantResetEpochTimer = "can't reset epoch timer" - NetmapCantGetNetmapSnapshotToPerformCleanup = "can't get netmap snapshot to perform cleanup" - NetmapCantStartContainerSizeEstimation = "can't start container size estimation" - NetmapNonAlphabetModeIgnoreNewEpochTick = "non alphabet mode, ignore new epoch tick" - NetmapNextEpoch = "next epoch" - NetmapCantInvokeNetmapNewEpoch = "can't invoke netmap.NewEpoch" - NetmapNonAlphabetModeIgnoreNewPeerNotification = "non alphabet mode, ignore new peer notification" - NetmapNonhaltNotaryTransaction = "non-halt notary transaction" - NetmapCantParseNetworkMapCandidate = "can't parse network map candidate" - NetmapCouldNotVerifyAndUpdateInformationAboutNetworkMapCandidate = "could not verify and update information about network map candidate" - NetmapApprovingNetworkMapCandidate = "approving network map candidate" - NetmapCantInvokeNetmapAddPeer = "can't invoke netmap.AddPeer" - NetmapNonAlphabetModeIgnoreUpdatePeerNotification = "non alphabet mode, ignore update peer notification" - NetmapPreventSwitchingNodeToMaintenanceState = "prevent switching node to maintenance state" - NetmapCantInvokeNetmapUpdatePeer = "can't invoke netmap.UpdatePeer" - FrostFSIRInternalError = "internal error" - FrostFSIRCouldNotShutdownHTTPServer = "could not shutdown HTTP server" - FrostFSIRApplicationStopped = "application stopped" - FrostFSIRCouldntCreateRPCClientForEndpoint = "could not create RPC client for endpoint" - FrostFSIRCreatedRPCClientForEndpoint = "created RPC client for endpoint" - FrostFSIRReloadExtraWallets = "reload extra wallets" - FrostFSNodeStartListeningEndpoint = "start listening endpoint" - FrostFSNodeCouldNotReadCertificateFromFile = "could not read certificate from file" - FrostFSNodeCantListenGRPCEndpoint = "can't listen gRPC endpoint" - FrostFSNodeStopListeningGRPCEndpoint = "stop listening gRPC endpoint" - FrostFSNodeStoppingGRPCServer = "stopping gRPC server..." - FrostFSNodeGRPCCannotShutdownGracefullyForcingStop = "gRPC cannot shutdown gracefully, forcing stop" - FrostFSNodeGRPCServerStoppedSuccessfully = "gRPC server stopped successfully" - FrostFSNodeGRPCServerError = "gRPC server error" - FrostFSNodeGRPCReconnecting = "reconnecting gRPC server..." - FrostFSNodeGRPCReconnectedSuccessfully = "gRPC server reconnected successfully" - FrostFSNodeGRPCServerConfigNotFound = "gRPC server config not found" - FrostFSNodeGRPCReconnectFailed = "failed to reconnect gRPC server" - FrostFSNodeWaitingForAllProcessesToStop = "waiting for all processes to stop" - FrostFSNodeStartedLocalNodesMaintenance = "started local node's maintenance" - FrostFSNodeStoppedLocalNodesMaintenance = "stopped local node's maintenance" - FrostFSNodeFailedToAttachShardToEngine = "failed to attach shard to engine" - FrostFSNodeShardAttachedToEngine = "shard attached to engine" - FrostFSNodeClosingComponentsOfTheStorageEngine = "closing components of the storage engine..." - FrostFSNodeAccessPolicyEngineClosingFailure = "ape closing failure" - FrostFSNodeStorageEngineClosingFailure = "storage engine closing failure" - FrostFSNodePersistentRuleStorageDBPathIsNotSetInmemoryWillBeUsed = "persistent rule storage db path is not set: in-memory will be used" - FrostFSNodeAllComponentsOfTheStorageEngineClosedSuccessfully = "all components of the storage engine closed successfully" - FrostFSNodeBootstrappingWithTheMaintenanceState = "bootstrapping with the maintenance state" - FrostFSNodeBootstrappingWithOnlineState = "bootstrapping with online state" - FrostFSNodeTerminationSignalHasBeenReceivedStopping = "termination signal has been received, stopping..." - FrostFSNodeTerminationSignalProcessingIsComplete = "termination signal processing is complete" - FrostFSNodeInternalApplicationError = "internal application error" - FrostFSNodeInternalErrorProcessingIsComplete = "internal error processing is complete" - FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration = "SIGHUP has been received, rereading configuration..." - FrostFSNodeSIGHUPSkip = "node is not ready for reconfiguration, skipped SIGHUP" - FrostFSNodeShutdownSkip = "node is already shutting down, skipped shutdown" - FrostFSNodeShutdownWhenNotReady = "node is going to shut down when subsystems are still initializing" - FrostFSNodeConfigurationReading = "configuration reading" - FrostFSNodeLoggerConfigurationPreparation = "logger configuration preparation" - FrostFSNodeTracingConfigationUpdated = "tracing configation updated" - FrostFSNodeStorageEngineConfigurationUpdate = "storage engine configuration update" - FrostFSNodePoolConfigurationUpdate = "adjust pool configuration" - FrostFSNodeUpdatedConfigurationApplying = "updated configuration applying" - FrostFSNodeConfigurationHasBeenReloadedSuccessfully = "configuration has been reloaded successfully" - FrostFSNodeReadNewlyCreatedContainerAfterTheNotification = "read newly created container after the notification" - FrostFSNodeContainerCreationEventsReceipt = "container creation event's receipt" - FrostFSNodeContainerRemovalEventsReceipt = "container removal event's receipt" - FrostFSNodeNotificatorCouldNotListContainers = "notificator: could not list containers" - FrostFSNodeNotificatorCouldNotSelectObjectsFromContainer = "notificator: could not select objects from container" - FrostFSNodeNotificatorCouldNotProcessObject = "notificator: could not process object" - FrostFSNodeNotificatorFinishedProcessingObjectNotifications = "notificator: finished processing object notifications" - FrostFSNodeCouldNotWriteObjectNotification = "could not write object notification" - FrostFSNodeCouldNotGetMaxObjectSizeValue = "could not get max object size value" - FrostFSNodeCouldNotInhumeMarkRedundantCopyAsGarbage = "could not inhume mark redundant copy as garbage" - FrostFSNodeFailedInitTracing = "failed init tracing" - FrostFSNodeFailedShutdownTracing = "failed shutdown tracing" - FrostFSNodeFailedToCreateNeoRPCClient = "failed to create neo RPC client" - FrostFSNodeClosingMorphComponents = "closing morph components..." - FrostFSNodeFailedToSetGroupSignerScopeContinueWithGlobal = "failed to set group signer scope, continue with Global" - FrostFSNodeNotarySupport = "notary support" - FrostFSNodeMorphcacheTTLFetchedFromNetwork = "morph.cache_ttl fetched from network" - FrostFSNodeNotaryDepositHasAlreadyBeenMade = "notary deposit has already been made" - FrostFSNodeCantGetLastProcessedSideChainBlockNumber = "can't get last processed side chain block number" - FrostFSNodeNewEpochEventFromSidechain = "new epoch event from sidechain" - FrostFSNodeNewBlock = "new block" - FrostFSNodeCantUpdatePersistentState = "can't update persistent state" - FrostFSNodeCantSendRebootstrapTx = "can't send re-bootstrap tx" - FrostFSNodeCouldNotUpdateNodeStateOnNewEpoch = "could not update node state on new epoch" - FrostFSNodeCouldNotMakeNotaryDeposit = "could not make notary deposit" - FrostFSNodeInitialNetworkState = "initial network state" - FrostFSNodeTreeServiceIsNotEnabledSkipInitialization = "tree service is not enabled, skip initialization" - FrostFSNodeCouldNotSynchronizeTreeService = "could not synchronize Tree Service" - FrostFSNodeRemovingAllTreesForContainer = "removing all trees for container" - FrostFSNodeContainerRemovalEventReceivedButTreesWerentRemoved = "container removal event received, but trees weren't removed" - FrostFSNodeCantListenGRPCEndpointControl = "can't listen gRPC endpoint (control)" - FrostFSNodePolicerIsDisabled = "policer is disabled" - CommonApplicationStarted = "application started" - ShardGCCollectingExpiredObjectsStarted = "collecting expired objects started" - ShardGCCollectingExpiredObjectsCompleted = "collecting expired objects completed" - ShardGCCollectingExpiredLocksStarted = "collecting expired locks started" - ShardGCCollectingExpiredLocksCompleted = "collecting expired locks completed" - ShardGCRemoveGarbageStarted = "garbage remove started" - ShardGCRemoveGarbageCompleted = "garbage remove completed" - EngineShardsEvacuationFailedToCount = "failed to get total objects count to evacuate" - EngineShardsEvacuationFailedToListObjects = "failed to list objects to evacuate" - EngineShardsEvacuationFailedToReadObject = "failed to read object to evacuate" - EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node" - ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked" - FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap" - EngineCouldNotChangeShardModeToDisabled = "could not change shard mode to disabled" - RPConnectionLost = "RPC connection lost, attempting reconnect" - RPCNodeSwitchFailure = "can't switch RPC node" - FSTreeCantUnmarshalObject = "can't unmarshal an object" - FSTreeCantFushObjectBlobstor = "can't flush an object to blobstor" - FSTreeCantUpdateID = "can't update object storage ID" - FSTreeCantDecodeDBObjectAddress = "can't decode object address from the DB" - PutSingleRedirectFailure = "failed to redirect PutSingle request" - StorageIDRetrievalFailure = "can't get storage ID from metabase" - ObjectRemovalFailureBlobStor = "can't remove object from blobStor" - CandidateStatusPriority = "candidate status is different from the netmap status, the former takes priority" - TombstoneExpirationParseFailure = "tombstone getter: could not parse tombstone expiration epoch" - RuntimeSoftMemoryLimitUpdated = "soft runtime memory limit value updated" - RuntimeSoftMemoryDefinedWithGOMEMLIMIT = "soft runtime memory defined with GOMEMLIMIT environment variable, config value skipped" - AttemtToCloseAlreadyClosedBlobovnicza = "attempt to close an already closed blobovnicza" - FailedToGetContainerCounters = "failed to get container counters values" - FailedToRebuildBlobstore = "failed to rebuild blobstore" - BlobstoreRebuildStarted = "blobstore rebuild started" - BlobstoreRebuildCompletedSuccessfully = "blobstore rebuild completed successfully" - BlobstoreRebuildStopped = "blobstore rebuild stopped" - BlobovniczaTreeFixingFileExtensions = "fixing blobovnicza tree file extensions..." - BlobovniczaTreeFixingFileExtensionsCompletedSuccessfully = "fixing blobovnicza tree file extensions completed successfully" - BlobovniczaTreeFixingFileExtensionsFailed = "failed to fix blobovnicza tree file extensions" - BlobovniczaTreeFixingFileExtensionForFile = "fixing blobovnicza file extension..." - BlobovniczaTreeFixingFileExtensionCompletedSuccessfully = "fixing blobovnicza file extension completed successfully" - BlobovniczaTreeFixingFileExtensionFailed = "failed to fix blobovnicza file extension" - BlobstorRebuildFailedToRebuildStorages = "failed to rebuild storages" - BlobstorRebuildRebuildStoragesCompleted = "storages rebuild completed" - BlobovniczaTreeCollectingDBToRebuild = "collecting blobovniczas to rebuild..." - BlobovniczaTreeCollectingDBToRebuildFailed = "collecting blobovniczas to rebuild failed" - BlobovniczaTreeCollectingDBToRebuildSuccess = "collecting blobovniczas to rebuild completed successfully" - BlobovniczaTreeRebuildingBlobovnicza = "rebuilding blobovnicza..." - BlobovniczaTreeRebuildingBlobovniczaFailed = "rebuilding blobovnicza failed" - BlobovniczaTreeRebuildingBlobovniczaSuccess = "rebuilding blobovnicza completed successfully" - BlobovniczatreeCouldNotPutMoveInfoToSourceBlobovnicza = "could not put move info to source blobovnicza" - BlobovniczatreeCouldNotUpdateStorageID = "could not update storage ID" - BlobovniczatreeCouldNotDropMoveInfo = "could not drop move info from source blobovnicza" - BlobovniczatreeCouldNotDeleteFromSource = "could not delete object from source blobovnicza" - BlobovniczaTreeCompletingPreviousRebuild = "completing previous rebuild if failed..." - BlobovniczaTreeCompletedPreviousRebuildSuccess = "previous rebuild completed successfully" - BlobovniczaTreeCompletedPreviousRebuildFailed = "failed to complete previous rebuild" - BlobovniczatreeCouldNotCheckExistenceInTargetDB = "could not check object existence in target blobovnicza" - BlobovniczatreeCouldNotPutObjectToTargetDB = "could not put object to target blobovnicza" - BlobovniczaSavingCountersToMeta = "saving counters to blobovnicza's meta..." - BlobovniczaSavingCountersToMetaSuccess = "saving counters to blobovnicza's meta completed successfully" - BlobovniczaSavingCountersToMetaFailed = "saving counters to blobovnicza's meta failed" - ObjectRemovalFailureExistsInWritecache = "can't remove object: object must be flushed from writecache" - FailedToReportStatusToSystemd = "failed to report status to systemd" - ShardGCCollectingExpiredMetricsStarted = "collecting expired metrics started" - ShardGCCollectingExpiredMetricsCompleted = "collecting expired metrics completed" - ShardGCFailedToCollectZeroSizeContainers = "failed to collect zero-size containers" - ShardGCFailedToCollectZeroCountContainers = "failed to collect zero-count containers" - EngineFailedToCheckContainerAvailability = "failed to check container availability" - EngineFailedToGetContainerSize = "failed to get container size" - EngineFailedToDeleteContainerSize = "failed to delete container size" - EngineInterruptProcessingZeroSizeContainers = "interrupt processing zero-size containers" - EngineInterruptProcessingZeroCountContainers = "interrupt processing zero-count containers" - EngineFailedToGetContainerCounters = "failed to get container counters" - GetSvcV2FailedToParseNodeEndpoints = "failed to parse node endpoints" - GetSvcV2FailedToParseNodeExternalAddresses = "failed to parse node external addresses" - GetSvcV2FailedToGetRangeHashFromNode = "failed to get range hash from node" - GetSvcV2FailedToGetRangeHashFromAllOfContainerNodes = "failed to get range hash from all of container nodes" - FailedToUpdateShardID = "failed to update shard id" - EngineShardsEvacuationFailedToMoveTree = "failed to evacuate tree to other node" - EngineShardsEvacuationTreeEvacuatedLocal = "tree evacuated to local node" - EngineShardsEvacuationTreeEvacuatedRemote = "tree evacuated to other node" - EngineRefillFailedToGetObjectsCount = "failed to get blobstor objects count, no resync percent estimation is available" - ECFailedToSendToContainerNode = "failed to send EC object to container node" - ECFailedToSaveECPart = "failed to save EC part" - PolicerNodeIsNotECObjectNode = "current node is not EC object node" - PolicerFailedToGetLocalECChunks = "failed to get local EC chunks" - PolicerMissingECChunk = "failed to find EC chunk on any of the nodes" - PolicerFailedToDecodeECChunkID = "failed to decode EC chunk ID" - PolicerDifferentObjectIDForTheSameECChunk = "different object IDs for the same EC chunk" - ReplicatorCouldNotGetObjectFromRemoteStorage = "could not get object from remote storage" - ReplicatorCouldNotPutObjectToLocalStorage = "could not put object to local storage" - PolicerCouldNotGetObjectFromNodeMoving = "could not get EC object from the node, moving current chunk to the node" - PolicerCouldNotRestoreObjectNotEnoughChunks = "could not restore EC object: not enough chunks" - PolicerFailedToRestoreObject = "failed to restore EC object" - PolicerCouldNotGetChunk = "could not get EC chunk" - PolicerCouldNotGetChunks = "could not get EC chunks" - AuditEventLogRecord = "audit event log record" + InnerringNonalphabetModeDoNotStopContainerEstimations = "non-alphabet mode, do not stop container estimations" + InnerringCantStopEpochEstimation = "can't stop epoch estimation" + InnerringCantMakeNotaryDepositInMainChain = "can't make notary deposit in main chain" + InnerringCantMakeNotaryDepositInSideChain = "can't make notary deposit in side chain" + InnerringNotaryDepositHasAlreadyBeenMade = "notary deposit has already been made" + InnerringCantGetInnerRingIndex = "can't get inner ring index" + InnerringCantGetInnerRingSize = "can't get inner ring size" + InnerringCantGetAlphabetIndex = "can't get alphabet index" + InnerringIgnoreValidatorVoteNodeNotInAlphabetRange = "ignore validator vote: node not in alphabet range" + InnerringIgnoreValidatorVoteEmptyValidatorsList = "ignore validator vote: empty validators list" + InnerringCantInvokeVoteMethodInAlphabetContract = "can't invoke vote method in alphabet contract" + InnerringCantGetLastProcessedMainChainBlockNumber = "can't get last processed main chain block number" + InnerringNotarySupport = "notary support" + InnerringAlphabetKeysSyncIsDisabled = "alphabet keys sync is disabled" + InnerringNoControlServerEndpointSpecified = "no Control server endpoint specified, service is disabled" + InnerringCantGetLastProcessedSideChainBlockNumber = "can't get last processed side chain block number" + InnerringFailedToSetGroupSignerScope = "failed to set group signer scope, continue with Global" + InnerringCantVoteForPreparedValidators = "can't vote for prepared validators" + InnerringNewBlock = "new block" + InnerringCantUpdatePersistentState = "can't update persistent state" + InnerringCloserError = "closer error" + InnerringReadConfigFromBlockchain = "read config from blockchain" + NotificatorNotificatorStartProcessingObjectNotifications = "notificator: start processing object notifications" + NotificatorNotificatorProcessingObjectNotification = "notificator: processing object notification" + PolicerCouldNotGetContainer = "could not get container" + PolicerCouldNotConfirmContainerRemoval = "could not confirm container removal" + PolicerCouldNotInhumeObjectWithMissingContainer = "could not inhume object with missing container" + PolicerCouldNotBuildPlacementVectorForObject = "could not build placement vector for object" + PolicerRedundantLocalObjectCopyDetected = "redundant local object copy detected" + PolicerReceiveObjectHeaderToCheckPolicyCompliance = "receive object header to check policy compliance" + PolicerConsiderNodeUnderMaintenanceAsOK = "consider node under maintenance as OK" + PolicerShortageOfObjectCopiesDetected = "shortage of object copies detected" + PolicerSomeOfTheCopiesAreStoredOnNodesUnderMaintenance = "some of the copies are stored on nodes under maintenance, save local copy" + PolicerRoutineStopped = "routine stopped" + PolicerFailureAtObjectSelectForReplication = "failure at object select for replication" + PolicerPoolSubmission = "pool submission" + PolicerUnableToProcessObj = "unable to process object" + ReplicatorFinishWork = "finish work" + ReplicatorCouldNotGetObjectFromLocalStorage = "could not get object from local storage" + ReplicatorCouldNotReplicateObject = "could not replicate object" + ReplicatorObjectSuccessfullyReplicated = "object successfully replicated" + TreeRedirectingTreeServiceQuery = "redirecting tree service query" + TreeBearerPresentedButNotAllowedByACL = "bearer presented but not allowed by ACL" + TreeCouldNotGetLastSynchronizedHeightForATree = "could not get last synchronized height for a tree" + TreeCouldNotUpdateLastSynchronizedHeightForATree = "could not update last synchronized height for a tree" + TreeSynchronizeTree = "synchronize tree" + TreeFailedToRunTreeSynchronizationOverAllNodes = "failed to run tree synchronization over all nodes" + TreeFailedToRunTreeSynchronizationForSpecificNode = "failed to run tree synchronization for specific node" + TreeFailedToParseAddressForTreeSynchronization = "failed to parse address for tree synchronization" + TreeFailedToConnectForTreeSynchronization = "failed to connect for tree synchronization" + TreeSyncingTrees = "syncing trees..." + TreeCouldNotFetchContainers = "could not fetch containers" + TreeTreesHaveBeenSynchronized = "trees have been synchronized" + TreeSyncingContainerTrees = "syncing container trees..." + TreeCouldNotSyncTrees = "could not sync trees" + TreeContainerTreesHaveBeenSynced = "container trees have been synced" + TreeCouldNotQueryTreesForSynchronization = "could not query trees for synchronization" + TreeRemovingRedundantTrees = "removing redundant trees..." + TreeCouldNotCheckIfContainerExisted = "could not check if the container ever existed" + TreeCouldNotRemoveRedundantTree = "could not remove redundant tree" + TreeCouldNotCalculateContainerNodes = "could not calculate container nodes" + TreeFailedToApplyReplicatedOperation = "failed to apply replicated operation" + TreeDoNotSendUpdateToTheNode = "do not send update to the node" + TreeFailedToSentUpdateToTheNode = "failed to sent update to the node" + TreeErrorDuringReplication = "error during replication" + PersistentCouldNotGetSessionFromPersistentStorage = "could not get session from persistent storage" + PersistentCouldNotDeleteSToken = "could not delete token" + PersistentCouldNotCleanUpExpiredTokens = "could not clean up expired tokens" + ControllerReportIsAlreadyStarted = "report is already started" + TombstoneCouldNotGetTheTombstoneTheSource = "tombstone getter: could not get the tombstone the source" + DeleteNoSplitInfoObjectIsPHY = "no split info, object is PHY" + DeleteAssemblingChain = "assembling chain..." + DeleteCollectingChildren = "collecting children..." + DeleteSupplementBySplitID = "supplement by split ID" + DeleteFormingTombstoneStructure = "forming tombstone structure..." + DeleteTombstoneStructureSuccessfullyFormedSaving = "tombstone structure successfully formed, saving..." + DeleteFormingSplitInfo = "forming split info..." + DeleteSplitInfoSuccessfullyFormedCollectingMembers = "split info successfully formed, collecting members..." + DeleteMembersSuccessfullyCollected = "members successfully collected" + DeleteECObjectReceived = "erasure-coded object received, form tombstone" + GetRemoteCallFailed = "remote call failed" + GetCanNotAssembleTheObject = "can not assemble the object" + GetTryingToAssembleTheObject = "trying to assemble the object..." + GetTryingToAssembleTheECObject = "trying to assemble the ec object..." + GetAssemblingSplittedObject = "assembling splitted object..." + GetAssemblingECObject = "assembling erasure-coded object..." + GetUnableToGetAllPartsECObject = "unable to get all parts, continue to reconstruct with existed" + GetUnableToGetPartECObject = "unable to get part of the erasure-encoded object" + GetUnableToHeadPartECObject = "unable to head part of the erasure-encoded object" + GetUnableToGetECObjectContainer = "unable to get container for erasure-coded object" + GetAssemblingSplittedObjectCompleted = "assembling splitted object completed" + GetAssemblingECObjectCompleted = "assembling erasure-coded object completed" + GetFailedToAssembleSplittedObject = "failed to assemble splitted object" + GetFailedToAssembleECObject = "failed to assemble erasure-coded object" + GetCouldNotGenerateContainerTraverser = "could not generate container traverser" + GetCouldNotConstructRemoteNodeClient = "could not construct remote node client" + GetCouldNotWriteHeader = "could not write header" + GetCouldNotWritePayloadChunk = "could not write payload chunk" + GetLocalGetFailed = "local get failed" + GetReturnResultDirectly = "return result directly" + GetCompletingTheOperation = "completing the operation" + GetRequestedObjectWasMarkedAsRemoved = "requested object was marked as removed" + GetRequestedObjectIsVirtual = "requested object is virtual" + GetRequestedObjectIsEC = "requested object is erasure-coded" + GetRequestedRangeIsOutOfObjectBounds = "requested range is out of object bounds" + PutAdditionalContainerBroadcastFailure = "additional container broadcast failure" + SearchReturnResultDirectly = "return result directly" + SearchCouldNotConstructRemoteNodeClient = "could not construct remote node client" + SearchRemoteOperationFailed = "remote operation failed" + SearchCouldNotGenerateContainerTraverser = "could not generate container traverser" + SearchCouldNotWriteObjectIdentifiers = "could not write object identifiers" + SearchLocalOperationFailed = "local operation failed" + UtilObjectServiceError = "object service error" + UtilCouldNotPushTaskToWorkerPool = "could not push task to worker pool" + V2CantCheckIfRequestFromInnerRing = "can't check if request from inner ring" + V2CantCheckIfRequestFromContainerNode = "can't check if request from container node" + NatsNatsConnectionWasLost = "nats: connection was lost" + NatsNatsReconnectedToTheServer = "nats: reconnected to the server" + NatsNatsClosingConnectionAsTheContextIsDone = "nats: closing connection as the context is done" + NatsConnectedToEndpoint = "nats: successfully connected to endpoint" + ControllerStartingToAnnounceTheValuesOfTheMetrics = "starting to announce the values of the metrics" + ControllerCouldNotInitializeIteratorOverLocallyCollectedMetrics = "could not initialize iterator over locally collected metrics" + ControllerCouldNotInitializeAnnouncementAccumulator = "could not initialize announcement accumulator" + ControllerIteratorOverLocallyCollectedMetricsAborted = "iterator over locally collected metrics aborted" + ControllerCouldNotFinishWritingLocalAnnouncements = "could not finish writing local announcements" + ControllerTrustAnnouncementSuccessfullyFinished = "trust announcement successfully finished" + ControllerAnnouncementIsAlreadyStarted = "announcement is already started" + ControllerAnnouncementSuccessfullyInterrupted = "announcement successfully interrupted" + ControllerAnnouncementIsNotStartedOrAlreadyInterrupted = "announcement is not started or already interrupted" + ControllerCouldNotInitializeIteratorOverLocallyAccumulatedAnnouncements = "could not initialize iterator over locally accumulated announcements" + ControllerCouldNotInitializeResultTarget = "could not initialize result target" + ControllerIteratorOverLocalAnnouncementsAborted = "iterator over local announcements aborted" + ControllerCouldNotFinishWritingLoadEstimations = "could not finish writing load estimations" + RouteCouldNotInitializeWriterProvider = "could not initialize writer provider" + RouteCouldNotInitializeWriter = "could not initialize writer" + RouteCouldNotPutTheValue = "could not put the value" + RouteCouldNotCloseRemoteServerWriter = "could not close remote server writer" + ClientCouldNotRestoreBlockSubscriptionAfterRPCSwitch = "could not restore block subscription after RPC switch" + ClientCouldNotRestoreNotificationSubscriptionAfterRPCSwitch = "could not restore notification subscription after RPC switch" + ClientCouldNotRestoreNotaryNotificationSubscriptionAfterRPCSwitch = "could not restore notary notification subscription after RPC switch" + ClientCouldNotEstablishConnectionToTheSwitchedRPCNode = "could not establish connection to the switched RPC node" + ClientConnectionToTheNewRPCNodeHasBeenEstablished = "connection to the new RPC node has been established" + ClientSwitchingToTheNextRPCNode = "switching to the next RPC node" + ClientCouldNotEstablishConnectionToAnyRPCNode = "could not establish connection to any RPC node" + ClientCouldNotCreateClientToTheHigherPriorityNode = "could not create client to the higher priority node" + ClientSwitchedToTheHigherPriorityRPC = "switched to the higher priority RPC" + ClientCouldNotRestoreSideChainSubscriptionsUsingNode = "could not restore side chain subscriptions using node" + ClientNotaryDepositHasAlreadyBeenMade = "notary deposit has already been made" + ClientNotaryDepositInvoke = "notary deposit invoke" + ClientNotaryRequestWithPreparedMainTXInvoked = "notary request with prepared main TX invoked" + ClientNotaryRequestInvoked = "notary request invoked" + ClientNotaryDepositTransactionWasSuccessfullyPersisted = "notary deposit transaction was successfully persisted" + ClientAttemptToWaitForNotaryDepositTransactionToGetPersisted = "attempt to wait for notary deposit transaction to get persisted" + ClientNeoClientInvoke = "neo client invoke" + ClientNativeGasTransferInvoke = "native gas transfer invoke" + ClientBatchGasTransferInvoke = "batch gas transfer invoke" + ClientCantGetBlockchainHeight = "can't get blockchain height" + ClientCantGetBlockchainHeight243 = "can't get blockchain height" + EventCouldNotSubmitHandlerToWorkerPool = "could not Submit handler to worker pool" + EventCouldNotStartListenToEvents = "could not start listen to events" + EventStopEventListenerByError = "stop event listener by error" + EventStopEventListenerByContext = "stop event listener by context" + EventStopEventListenerByNotificationChannel = "stop event listener by notification channel" + EventNilNotificationEventWasCaught = "nil notification event was caught" + EventStopEventListenerByNotaryChannel = "stop event listener by notary channel" + EventNilNotaryEventWasCaught = "nil notary event was caught" + EventStopEventListenerByBlockChannel = "stop event listener by block channel" + EventNilBlockWasCaught = "nil block was caught" + EventListenerWorkerPoolDrained = "listener worker pool drained" + EventEventParserNotSet = "event parser not set" + EventCouldNotParseNotificationEvent = "could not parse notification event" + EventNotificationHandlersForParsedNotificationEventWereNotRegistered = "notification handlers for parsed notification event were not registered" + EventSkipExpiredMainTXNotaryEvent = "skip expired main TX notary event" + EventCouldNotPrepareAndValidateNotaryEvent = "could not prepare and validate notary event" + EventNotaryParserNotSet = "notary parser not set" + EventCouldNotParseNotaryEvent = "could not parse notary event" + EventNotaryHandlersForParsedNotificationEventWereNotRegistered = "notary handlers for parsed notification event were not registered" + EventIgnoreNilEventParser = "ignore nil event parser" + EventListenerHasBeenAlreadyStartedIgnoreParser = "listener has been already started, ignore parser" + EventRegisteredNewEventParser = "registered new event parser" + EventIgnoreNilEventHandler = "ignore nil event handler" + EventIgnoreHandlerOfEventWoParser = "ignore handler of event w/o parser" + EventRegisteredNewEventHandler = "registered new event handler" + EventIgnoreNilNotaryEventParser = "ignore nil notary event parser" + EventListenerHasBeenAlreadyStartedIgnoreNotaryParser = "listener has been already started, ignore notary parser" + EventIgnoreNilNotaryEventHandler = "ignore nil notary event handler" + EventIgnoreHandlerOfNotaryEventWoParser = "ignore handler of notary event w/o parser" + EventIgnoreNilBlockHandler = "ignore nil block handler" + SubscriberRemoteNotificationChannelHasBeenClosed = "remote notification channel has been closed" + SubscriberCantCastNotifyEventValueToTheNotifyStruct = "can't cast notify event value to the notify struct" + SubscriberNewNotificationEventFromSidechain = "new notification event from sidechain" + SubscriberCantCastBlockEventValueToBlock = "can't cast block event value to block" + SubscriberCantCastNotifyEventValueToTheNotaryRequestStruct = "can't cast notify event value to the notary request struct" + SubscriberUnsupportedNotificationFromTheChain = "unsupported notification from the chain" + StorageOperation = "local object storage operation" + BlobovniczaCreatingDirectoryForBoltDB = "creating directory for BoltDB" + BlobovniczaOpeningBoltDB = "opening BoltDB" + BlobovniczaInitializing = "initializing..." + BlobovniczaAlreadyInitialized = "already initialized" + BlobovniczaCreatingBucketForSizeRange = "creating bucket for size range" + BlobovniczaClosingBoltDB = "closing BoltDB" + BlobovniczaObjectWasRemovedFromBucket = "object was removed from bucket" + BlobstorOpening = "opening..." + BlobstorInitializing = "initializing..." + BlobstorClosing = "closing..." + BlobstorCouldntCloseStorage = "couldn't close storage" + BlobstorErrorOccurredDuringObjectExistenceChecking = "error occurred during object existence checking" + BlobstorErrorOccurredDuringTheIteration = "error occurred during the iteration" + EngineShardHasBeenRemoved = "shard has been removed" + EngineCouldNotCloseRemovedShard = "could not close removed shard" + EngineCouldNotOpenShardClosingAndSkipping = "could not open shard, closing and skipping" + EngineCouldNotClosePartiallyInitializedShard = "could not close partially initialized shard" + EngineCouldNotInitializeShardClosingAndSkipping = "could not initialize shard, closing and skipping" + EngineCouldNotCloseShard = "could not close shard" + EngineCouldNotReloadAShard = "could not reload a shard" + EngineAddedNewShard = "added new shard" + EngineCouldNotPutObjectToShard = "could not put object to shard" + EngineCouldNotCheckObjectExistence = "could not check object existence when put object to shard" + EngineErrorDuringSearchingForObjectChildren = "error during searching for object children" + EngineCouldNotInhumeObjectInShard = "could not inhume object in shard" + EngineStartingRemovalOfLocallyredundantCopies = "starting removal of locally-redundant copies" + EngineStartedDuplicatesRemovalRoutine = "started duplicates removal routine" + EngineFinishedRemovalOfLocallyredundantCopies = "finished removal of locally-redundant copies" + EngineRemovingAnObjectWithoutFullLockingCheck = "removing an object without full locking check" + EngineInterruptProcessingTheExpiredLocks = "interrupt processing the expired locks" + EngineInterruptGettingLockers = "can't get object's lockers" + EngineInterruptProcessingTheDeletedLocks = "interrupt processing the deleted locks" + EngineFailedToMoveShardInDegradedreadonlyModeMovingToReadonly = "failed to move shard in degraded-read-only mode, moving to read-only" + EngineFailedToMoveShardInReadonlyMode = "failed to move shard in read-only mode" + EngineShardIsMovedInReadonlyModeDueToErrorThreshold = "shard is moved in read-only mode due to error threshold" + EngineShardIsMovedInDegradedModeDueToErrorThreshold = "shard is moved in degraded mode due to error threshold" + EngineModeChangeIsInProgressIgnoringSetmodeRequest = "mode change is in progress, ignoring set-mode request" + EngineStartedShardsEvacuation = "started shards evacuation" + EngineFinishedSuccessfullyShardsEvacuation = "shards evacuation finished successfully" + EngineFinishedWithErrorShardsEvacuation = "shards evacuation finished with error" + EngineObjectIsMovedToAnotherShard = "object is moved to another shard" + MetabaseMissingMatcher = "missing matcher" + MetabaseCantDecodeListBucketLeaf = "can't decode list bucket leaf" + MetabaseUnknownOperation = "unknown operation" + MetabaseCantIterateOverTheBucket = "can't iterate over the bucket" + MetabaseCouldNotIterateOverThePrefix = "could not iterate over the prefix" + MetabaseCreatedDirectoryForMetabase = "created directory for Metabase" + MetabaseOpenedBoltDBInstanceForMetabase = "opened boltDB instance for Metabase" + MetabaseCheckingMetabaseVersion = "checking metabase version" + ShardCantSelectAllObjects = "can't select all objects" + ShardSettingShardMode = "setting shard mode" + ShardShardModeSetSuccessfully = "shard mode set successfully" + ShardCouldNotMarkObjectForShardRelocationInMetabase = "could not mark object for shard relocation in metabase" + ShardCantDeleteObjectFromWriteCache = "can't delete object from write cache" + ShardCantGetStorageIDFromMetabase = "can't get storage ID from metabase" + ShardCantRemoveObjectFromBlobStor = "can't remove object from blobStor" + ShardFetchingObjectWithoutMeta = "fetching object without meta" + ShardObjectIsMissingInWritecache = "object is missing in write-cache" + ShardFailedToFetchObjectFromWritecache = "failed to fetch object from write-cache" + ShardCantPutObjectToTheWritecacheTryingBlobstor = "can't put object to the write-cache, trying blobstor" + ShardMetaObjectCounterRead = "meta: object counter read" + ShardMetaCantReadContainerList = "meta: can't read container list" + ShardMetaCantReadContainerSize = "meta: can't read container size" + ShardMetaInfoPresentButObjectNotFound = "meta info was present, but the object is missing" + ShardMetabaseFailureSwitchingMode = "metabase failure, switching mode" + ShardCantMoveShardToReadonlySwitchMode = "can't move shard to readonly, switch mode" + ShardCouldNotUnmarshalObject = "could not unmarshal object" + ShardCouldNotCloseShardComponent = "could not close shard component" + ShardCantOpenMetabaseMoveToADegradedMode = "can't open metabase, move to a degraded mode" + ShardCantInitializeMetabaseMoveToADegradedreadonlyMode = "can't initialize metabase, move to a degraded-read-only mode" + ShardTryingToRestoreReadwriteMode = "trying to restore read-write mode" + ShardStopEventListenerByClosedEventChannel = "stop event listener by closed `event` channel" + ShardStopEventListenerByClosedStopChannel = "stop event listener by closed `stop` channel" + ShardStopEventListenerByContext = "stop event listener by context" + ShardCouldNotSubmitGCJobToWorkerPool = "could not submit GC job to worker pool" + ShardGCIsStopped = "GC is stopped" + ShardWaitingForGCWorkersToStop = "waiting for GC workers to stop..." + ShardIteratorOverMetabaseGraveyardFailed = "iterator over metabase graveyard failed" + ShardCouldNotDeleteTheObjects = "could not delete the objects" + ShardIteratorOverExpiredObjectsFailed = "iterator over expired objects failed" + ShardCouldNotInhumeTheObjects = "could not inhume the objects" + ShardStartedExpiredTombstonesHandling = "started expired tombstones handling" + ShardIteratingTombstones = "iterating tombstones" + ShardShardIsInADegradedModeSkipCollectingExpiredTombstones = "shard is in a degraded mode, skip collecting expired tombstones" + ShardIteratorOverGraveyardFailed = "iterator over graveyard failed" + ShardHandlingExpiredTombstonesBatch = "handling expired tombstones batch" + ShardFinishedExpiredTombstonesHandling = "finished expired tombstones handling" + ShardIteratorOverExpiredLocksFailed = "iterator over expired locks failed" + ShardCouldNotMarkTombstonesAsGarbage = "could not mark tombstones as garbage" + ShardCouldNotDropExpiredGraveRecords = "could not drop expired grave records" + ShardFailureToUnlockObjects = "failure to unlock objects" + ShardFailureToMarkLockersAsGarbage = "failure to mark lockers as garbage" + ShardFailureToGetExpiredUnlockedObjects = "failure to get expired unlocked objects" + ShardCouldNotMarkObjectToDeleteInMetabase = "could not mark object to delete in metabase" + WritecacheTriedToFlushItemsFromWritecache = "tried to flush items from write-cache" + WritecacheWaitingForChannelsToFlush = "waiting for channels to flush" + WritecacheFillingFlushMarksForObjectsInFSTree = "filling flush marks for objects in FSTree" + WritecacheFinishedUpdatingFSTreeFlushMarks = "finished updating FSTree flush marks" + WritecacheFillingFlushMarksForObjectsInDatabase = "filling flush marks for objects in database" + WritecacheFinishedUpdatingFlushMarks = "finished updating flush marks" + WritecacheCantRemoveObjectsFromTheDatabase = "can't remove objects from the database" + WritecacheCantRemoveObjectFromWritecache = "can't remove object from write-cache" + BlobovniczatreeCouldNotGetObjectFromLevel = "could not get object from level" + BlobovniczatreeCouldNotReadPayloadRangeFromOpenedBlobovnicza = "could not read payload range from opened blobovnicza" + BlobovniczatreeCouldNotReadPayloadRangeFromActiveBlobovnicza = "could not read payload range from active blobovnicza" + BlobovniczatreeCouldNotCloseBlobovnicza = "could not close Blobovnicza" + BlobovniczatreeBlobovniczaSuccessfullyClosedOnEvict = "blobovnicza successfully closed on evict" + BlobovniczatreeUpdatingActiveBlobovnicza = "updating active blobovnicza..." + BlobovniczatreeActiveBlobovniczaSuccessfullyUpdated = "active blobovnicza successfully updated" + BlobovniczatreeBlobovniczaSuccessfullyActivated = "blobovnicza successfully activated" + BlobovniczatreeCouldNotRemoveObjectFromLevel = "could not remove object from level" + BlobovniczatreeCouldNotRemoveObjectFromOpenedBlobovnicza = "could not remove object from opened blobovnicza" + BlobovniczatreeCouldNotRemoveObjectFromActiveBlobovnicza = "could not remove object from active blobovnicza" + BlobovniczatreeCouldNotGetActiveBlobovnicza = "could not get active blobovnicza" + BlobovniczatreeBlobovniczaOverflowed = "blobovnicza overflowed" + BlobovniczatreeCouldNotUpdateActiveBlobovnicza = "could not update active blobovnicza" + BlobovniczatreeCouldNotPutObjectToActiveBlobovnicza = "could not put object to active blobovnicza" + BlobovniczatreeCouldNotReadObjectFromOpenedBlobovnicza = "could not read object from opened blobovnicza" + BlobovniczatreeCouldNotGetObjectFromActiveBlobovnicza = "could not get object from active blobovnicza" + BlobovniczatreeInitializingBlobovniczas = "initializing Blobovnicza's" + BlobovniczatreeReadonlyModeSkipBlobovniczasInitialization = "read-only mode, skip blobovniczas initialization..." + BlobovniczatreeBlobovniczaSuccessfullyInitializedClosing = "blobovnicza successfully initialized, closing..." + BlobovniczatreeCouldNotCloseActiveBlobovnicza = "could not close active blobovnicza" + AlphabetTick = "tick" + AlphabetAlphabetProcessorWorkerPoolDrained = "alphabet processor worker pool drained" + AlphabetNonAlphabetModeIgnoreGasEmissionEvent = "non alphabet mode, ignore gas emission event" + AlphabetNodeIsOutOfAlphabetRangeIgnoreGasEmissionEvent = "node is out of alphabet range, ignore gas emission event" + AlphabetCantInvokeAlphabetEmitMethod = "can't invoke alphabet emit method" + AlphabetStorageNodeEmissionIsOff = "storage node emission is off" + AlphabetCantGetNetmapSnapshotToEmitGasToStorageNodes = "can't get netmap snapshot to emit gas to storage nodes" + AlphabetGasEmission = "gas emission" + AlphabetCantParseNodePublicKey = "can't parse node public key" + AlphabetCantTransferGas = "can't transfer gas" + AlphabetCantTransferGasToWallet = "can't transfer gas to wallet" + AlphabetAlphabetWorkerPool = "alphabet worker pool" + BalanceBalanceWorkerPoolDrained = "balance worker pool drained" + BalanceNonAlphabetModeIgnoreBalanceLock = "non alphabet mode, ignore balance lock" + BalanceCantSendLockAssetTx = "can't send lock asset tx" + BalanceBalanceWorkerPool = "balance worker pool" + ContainerContainerWorkerPool = "container worker pool" + ContainerContainerProcessorWorkerPoolDrained = "container processor worker pool drained" + ContainerNonAlphabetModeIgnoreContainerPut = "non alphabet mode, ignore container put" + ContainerPutContainerCheckFailed = "put container check failed" + ContainerCouldNotApprovePutContainer = "could not approve put container" + ContainerNonAlphabetModeIgnoreContainerDelete = "non alphabet mode, ignore container delete" + ContainerDeleteContainerCheckFailed = "delete container check failed" + ContainerCouldNotApproveDeleteContainer = "could not approve delete container" + ContainerNonAlphabetModeIgnoreSetEACL = "non alphabet mode, ignore set EACL" + ContainerSetEACLCheckFailed = "set EACL check failed" + ContainerCouldNotApproveSetEACL = "could not approve set EACL" + FrostFSNonAlphabetModeIgnoreBind = "non alphabet mode, ignore bind" + FrostFSInvalidManageKeyEvent = "invalid manage key event" + FrostFSCouldNotDecodeScriptHashFromBytes = "could not decode script hash from bytes" + FrostFSNonAlphabetModeIgnoreConfig = "non alphabet mode, ignore config" + FrostFSCantRelaySetConfigEvent = "can't relay set config event" + FrostFSFrostfsWorkerPool = "frostfs worker pool" + FrostFSFrostfsProcessorWorkerPoolDrained = "frostfs processor worker pool drained" + FrostFSNonAlphabetModeIgnoreDeposit = "non alphabet mode, ignore deposit" + FrostFSCantTransferAssetsToBalanceContract = "can't transfer assets to balance contract" + FrostFSDoubleMintEmissionDeclined = "double mint emission declined" + FrostFSCantGetGasBalanceOfTheNode = "can't get gas balance of the node" + FrostFSGasBalanceThresholdHasBeenReached = "gas balance threshold has been reached" + FrostFSCantTransferNativeGasToReceiver = "can't transfer native gas to receiver" + FrostFSNonAlphabetModeIgnoreWithdraw = "non alphabet mode, ignore withdraw" + FrostFSCantCreateLockAccount = "can't create lock account" + FrostFSCantLockAssetsForWithdraw = "can't lock assets for withdraw" + FrostFSNonAlphabetModeIgnoreCheque = "non alphabet mode, ignore cheque" + FrostFSCantTransferAssetsToFedContract = "can't transfer assets to fed contract" + GovernanceNewEvent = "new event" + GovernanceGovernanceWorkerPoolDrained = "governance worker pool drained" + GovernanceNonAlphabetModeIgnoreAlphabetSync = "non alphabet mode, ignore alphabet sync" + GovernanceCantFetchAlphabetListFromMainNet = "can't fetch alphabet list from main net" + GovernanceCantFetchAlphabetListFromSideChain = "can't fetch alphabet list from side chain" + GovernanceCantMergeAlphabetListsFromMainNetAndSideChain = "can't merge alphabet lists from main net and side chain" + GovernanceNoGovernanceUpdateAlphabetListHasNotBeenChanged = "no governance update, alphabet list has not been changed" + GovernanceAlphabetListHasBeenChangedStartingUpdate = "alphabet list has been changed, starting update" + GovernanceCantVoteForSideChainCommittee = "can't vote for side chain committee" + GovernanceFinishedAlphabetListUpdate = "finished alphabet list update" + GovernanceCantFetchInnerRingListFromSideChain = "can't fetch inner ring list from side chain" + GovernanceCantCreateNewInnerRingListWithNewAlphabetKeys = "can't create new inner ring list with new alphabet keys" + GovernanceUpdateOfTheInnerRingList = "update of the inner ring list" + GovernanceCantUpdateInnerRingListWithNewAlphabetKeys = "can't update inner ring list with new alphabet keys" + GovernanceCantUpdateListOfNotaryNodesInSideChain = "can't update list of notary nodes in side chain" + GovernanceCantUpdateListOfAlphabetNodesInFrostfsContract = "can't update list of alphabet nodes in frostfs contract" + NetmapNetmapWorkerPool = "netmap worker pool" + NetmapTick = "tick" + NetmapNetmapWorkerPoolDrained = "netmap worker pool drained" + NetmapNetmapCleanUpRoutineIsDisabled518 = "netmap clean up routine is disabled" + NetmapNonAlphabetModeIgnoreNewNetmapCleanupTick = "non alphabet mode, ignore new netmap cleanup tick" + NetmapCantDecodePublicKeyOfNetmapNode = "can't decode public key of netmap node" + NetmapVoteToRemoveNodeFromNetmap = "vote to remove node from netmap" + NetmapCantInvokeNetmapUpdateState = "can't invoke netmap.UpdateState" + NetmapCantIterateOnNetmapCleanerCache = "can't iterate on netmap cleaner cache" + NetmapCantGetEpochDuration = "can't get epoch duration" + NetmapCantGetTransactionHeight = "can't get transaction height" + NetmapCantResetEpochTimer = "can't reset epoch timer" + NetmapCantGetNetmapSnapshotToPerformCleanup = "can't get netmap snapshot to perform cleanup" + NetmapCantStartContainerSizeEstimation = "can't start container size estimation" + NetmapNonAlphabetModeIgnoreNewEpochTick = "non alphabet mode, ignore new epoch tick" + NetmapNextEpoch = "next epoch" + NetmapCantInvokeNetmapNewEpoch = "can't invoke netmap.NewEpoch" + NetmapNonAlphabetModeIgnoreNewPeerNotification = "non alphabet mode, ignore new peer notification" + NetmapNonhaltNotaryTransaction = "non-halt notary transaction" + NetmapCantParseNetworkMapCandidate = "can't parse network map candidate" + NetmapCouldNotVerifyAndUpdateInformationAboutNetworkMapCandidate = "could not verify and update information about network map candidate" + NetmapApprovingNetworkMapCandidate = "approving network map candidate" + NetmapCantInvokeNetmapAddPeer = "can't invoke netmap.AddPeer" + NetmapNonAlphabetModeIgnoreUpdatePeerNotification = "non alphabet mode, ignore update peer notification" + NetmapPreventSwitchingNodeToMaintenanceState = "prevent switching node to maintenance state" + NetmapCantInvokeNetmapUpdatePeer = "can't invoke netmap.UpdatePeer" + FrostFSIRInternalError = "internal error" + FrostFSIRCouldNotShutdownHTTPServer = "could not shutdown HTTP server" + FrostFSIRApplicationStopped = "application stopped" + FrostFSIRCouldntCreateRPCClientForEndpoint = "could not create RPC client for endpoint" + FrostFSIRCreatedRPCClientForEndpoint = "created RPC client for endpoint" + FrostFSIRReloadExtraWallets = "reload extra wallets" + FrostFSNodeStartListeningEndpoint = "start listening endpoint" + FrostFSNodeCouldNotReadCertificateFromFile = "could not read certificate from file" + FrostFSNodeCantListenGRPCEndpoint = "can't listen gRPC endpoint" + FrostFSNodeStopListeningGRPCEndpoint = "stop listening gRPC endpoint" + FrostFSNodeStoppingGRPCServer = "stopping gRPC server..." + FrostFSNodeGRPCCannotShutdownGracefullyForcingStop = "gRPC cannot shutdown gracefully, forcing stop" + FrostFSNodeGRPCServerStoppedSuccessfully = "gRPC server stopped successfully" + FrostFSNodeGRPCServerError = "gRPC server error" + FrostFSNodeGRPCReconnecting = "reconnecting gRPC server..." + FrostFSNodeGRPCReconnectedSuccessfully = "gRPC server reconnected successfully" + FrostFSNodeGRPCServerConfigNotFound = "gRPC server config not found" + FrostFSNodeGRPCReconnectFailed = "failed to reconnect gRPC server" + FrostFSNodeWaitingForAllProcessesToStop = "waiting for all processes to stop" + FrostFSNodeStartedLocalNodesMaintenance = "started local node's maintenance" + FrostFSNodeStoppedLocalNodesMaintenance = "stopped local node's maintenance" + FrostFSNodeFailedToAttachShardToEngine = "failed to attach shard to engine" + FrostFSNodeShardAttachedToEngine = "shard attached to engine" + FrostFSNodeClosingComponentsOfTheStorageEngine = "closing components of the storage engine..." + FrostFSNodeAccessPolicyEngineClosingFailure = "ape closing failure" + FrostFSNodeStorageEngineClosingFailure = "storage engine closing failure" + FrostFSNodePersistentRuleStorageDBPathIsNotSetInmemoryWillBeUsed = "persistent rule storage db path is not set: in-memory will be used" + FrostFSNodeAllComponentsOfTheStorageEngineClosedSuccessfully = "all components of the storage engine closed successfully" + FrostFSNodeBootstrappingWithTheMaintenanceState = "bootstrapping with the maintenance state" + FrostFSNodeBootstrappingWithOnlineState = "bootstrapping with online state" + FrostFSNodeTerminationSignalHasBeenReceivedStopping = "termination signal has been received, stopping..." + FrostFSNodeTerminationSignalProcessingIsComplete = "termination signal processing is complete" + FrostFSNodeInternalApplicationError = "internal application error" + FrostFSNodeInternalErrorProcessingIsComplete = "internal error processing is complete" + FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration = "SIGHUP has been received, rereading configuration..." + FrostFSNodeSIGHUPSkip = "node not ready for reconfiguration, skipped SIGHUP" + FrostFSNodeShutdownSkip = "node already is going to shutting down, skipped shutdown" + FrostFSNodeShutdownWhenNotReady = "node is going to shutting down when subsystems still initializing" + FrostFSNodeConfigurationReading = "configuration reading" + FrostFSNodeLoggerConfigurationPreparation = "logger configuration preparation" + FrostFSNodeTracingConfigationUpdated = "tracing configation updated" + FrostFSNodeStorageEngineConfigurationUpdate = "storage engine configuration update" + FrostFSNodePoolConfigurationUpdate = "adjust pool configuration" + FrostFSNodeUpdatedConfigurationApplying = "updated configuration applying" + FrostFSNodeConfigurationHasBeenReloadedSuccessfully = "configuration has been reloaded successfully" + FrostFSNodeReadNewlyCreatedContainerAfterTheNotification = "read newly created container after the notification" + FrostFSNodeContainerCreationEventsReceipt = "container creation event's receipt" + FrostFSNodeContainerRemovalEventsReceipt = "container removal event's receipt" + FrostFSNodeSaveUsedSpaceAnnouncementInContract = "save used space announcement in contract" + FrostFSNodeFailedToCalculateContainerSizeInStorageEngine = "failed to calculate container size in storage engine" + FrostFSNodeContainerSizeInStorageEngineCalculatedSuccessfully = "container size in storage engine calculated successfully" + FrostFSNodeNotificatorCouldNotListContainers = "notificator: could not list containers" + FrostFSNodeNotificatorCouldNotSelectObjectsFromContainer = "notificator: could not select objects from container" + FrostFSNodeNotificatorCouldNotProcessObject = "notificator: could not process object" + FrostFSNodeNotificatorFinishedProcessingObjectNotifications = "notificator: finished processing object notifications" + FrostFSNodeCouldNotWriteObjectNotification = "could not write object notification" + FrostFSNodeCouldNotGetMaxObjectSizeValue = "could not get max object size value" + FrostFSNodeCouldNotInhumeMarkRedundantCopyAsGarbage = "could not inhume mark redundant copy as garbage" + FrostFSNodeFailedInitTracing = "failed init tracing" + FrostFSNodeFailedShutdownTracing = "failed shutdown tracing" + FrostFSNodeFailedToCreateNeoRPCClient = "failed to create neo RPC client" + FrostFSNodeClosingMorphComponents = "closing morph components..." + FrostFSNodeFailedToSetGroupSignerScopeContinueWithGlobal = "failed to set group signer scope, continue with Global" + FrostFSNodeNotarySupport = "notary support" + FrostFSNodeMorphcacheTTLFetchedFromNetwork = "morph.cache_ttl fetched from network" + FrostFSNodeNotaryDepositHasAlreadyBeenMade = "notary deposit has already been made" + FrostFSNodeCantGetLastProcessedSideChainBlockNumber = "can't get last processed side chain block number" + FrostFSNodeNewEpochEventFromSidechain = "new epoch event from sidechain" + FrostFSNodeNewBlock = "new block" + FrostFSNodeCantUpdatePersistentState = "can't update persistent state" + FrostFSNodeCantSendRebootstrapTx = "can't send re-bootstrap tx" + FrostFSNodeCouldNotUpdateNodeStateOnNewEpoch = "could not update node state on new epoch" + FrostFSNodeCouldNotMakeNotaryDeposit = "could not make notary deposit" + FrostFSNodeInitialNetworkState = "initial network state" + FrostFSNodeTreeServiceIsNotEnabledSkipInitialization = "tree service is not enabled, skip initialization" + FrostFSNodeCouldNotSynchronizeTreeService = "could not synchronize Tree Service" + FrostFSNodeRemovingAllTreesForContainer = "removing all trees for container" + FrostFSNodeContainerRemovalEventReceivedButTreesWerentRemoved = "container removal event received, but trees weren't removed" + FrostFSNodeCantListenGRPCEndpointControl = "can't listen gRPC endpoint (control)" + FrostFSNodePolicerIsDisabled = "policer is disabled" + CommonApplicationStarted = "application started" + ShardGCCollectingExpiredObjectsStarted = "collecting expired objects started" + ShardGCCollectingExpiredObjectsCompleted = "collecting expired objects completed" + ShardGCCollectingExpiredLocksStarted = "collecting expired locks started" + ShardGCCollectingExpiredLocksCompleted = "collecting expired locks completed" + ShardGCRemoveGarbageStarted = "garbage remove started" + ShardGCRemoveGarbageCompleted = "garbage remove completed" + EngineShardsEvacuationFailedToCount = "failed to get total objects count to evacuate" + EngineShardsEvacuationFailedToListObjects = "failed to list objects to evacuate" + EngineShardsEvacuationFailedToReadObject = "failed to read object to evacuate" + EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node" + ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked" + ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache" + FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap" + EngineCouldNotChangeShardModeToDisabled = "could not change shard mode to disabled" + NetmapNodeAlreadyInCandidateListOnlineSkipInitialBootstrap = "the node is already in candidate list with online state, skip initial bootstrap" + RPConnectionLost = "RPC connection lost, attempting reconnect" + RPCNodeSwitchFailure = "can't switch RPC node" + FSTreeCantReadFile = "can't read a file" + FSTreeCantUnmarshalObject = "can't unmarshal an object" + FSTreeCantFushObjectBlobstor = "can't flush an object to blobstor" + FSTreeCantUpdateID = "can't update object storage ID" + FSTreeCantDecodeDBObjectAddress = "can't decode object address from the DB" + PutSingleRedirectFailure = "failed to redirect PutSingle request" + StorageIDRetrievalFailure = "can't get storage ID from metabase" + ObjectRemovalFailureBlobStor = "can't remove object from blobStor" + CandidateStatusPriority = "candidate status is different from the netmap status, the former takes priority" + TombstoneExpirationParseFailure = "tombstone getter: could not parse tombstone expiration epoch" + FrostFSNodeCantUpdateObjectStorageID = "can't update object storage ID" + FrostFSNodeCantFlushObjectToBlobstor = "can't flush an object to blobstor" + FrostFSNodeCantDecodeObjectAddressFromDB = "can't decode object address from the DB" + FrostFSNodeCantUnmarshalObjectFromDB = "can't unmarshal an object from the DB" + RuntimeSoftMemoryLimitUpdated = "soft runtime memory limit value updated" + RuntimeSoftMemoryDefinedWithGOMEMLIMIT = "soft runtime memory defined with GOMEMLIMIT environment variable, config value skipped" + FailedToCountWritecacheItems = "failed to count writecache items" + AttemtToCloseAlreadyClosedBlobovnicza = "attempt to close an already closed blobovnicza" + FailedToGetContainerCounters = "failed to get container counters values" + FailedToRebuildBlobstore = "failed to rebuild blobstore" + BlobstoreRebuildStarted = "blobstore rebuild started" + BlobstoreRebuildCompletedSuccessfully = "blobstore rebuild completed successfully" + BlobstoreRebuildStopped = "blobstore rebuild stopped" + BlobovniczaTreeFixingFileExtensions = "fixing blobovnicza tree file extensions..." + BlobovniczaTreeFixingFileExtensionsCompletedSuccessfully = "fixing blobovnicza tree file extensions completed successfully" + BlobovniczaTreeFixingFileExtensionsFailed = "failed to fix blobovnicza tree file extensions" + BlobovniczaTreeFixingFileExtensionForFile = "fixing blobovnicza file extension..." + BlobovniczaTreeFixingFileExtensionCompletedSuccessfully = "fixing blobovnicza file extension completed successfully" + BlobovniczaTreeFixingFileExtensionFailed = "failed to fix blobovnicza file extension" + BlobstorRebuildFailedToRebuildStorages = "failed to rebuild storages" + BlobstorRebuildRebuildStoragesCompleted = "storages rebuild completed" + BlobovniczaTreeCollectingDBToRebuild = "collecting blobovniczas to rebuild..." + BlobovniczaTreeCollectingDBToRebuildFailed = "collecting blobovniczas to rebuild failed" + BlobovniczaTreeCollectingDBToRebuildSuccess = "collecting blobovniczas to rebuild completed successfully" + BlobovniczaTreeRebuildingBlobovnicza = "rebuilding blobovnicza..." + BlobovniczaTreeRebuildingBlobovniczaFailed = "rebuilding blobovnicza failed" + BlobovniczaTreeRebuildingBlobovniczaSuccess = "rebuilding blobovnicza completed successfully" + BlobovniczatreeCouldNotPutMoveInfoToSourceBlobovnicza = "could not put move info to source blobovnicza" + BlobovniczatreeCouldNotUpdateStorageID = "could not update storage ID" + BlobovniczatreeCouldNotDropMoveInfo = "could not drop move info from source blobovnicza" + BlobovniczatreeCouldNotDeleteFromSource = "could not delete object from source blobovnicza" + BlobovniczaTreeCompletingPreviousRebuild = "completing previous rebuild if failed..." + BlobovniczaTreeCompletedPreviousRebuildSuccess = "previous rebuild completed successfully" + BlobovniczaTreeCompletedPreviousRebuildFailed = "failed to complete previous rebuild" + BlobovniczatreeCouldNotCheckExistenceInSourceDB = "could not check object existence in source blobovnicza" + BlobovniczatreeCouldNotCheckExistenceInTargetDB = "could not check object existence in target blobovnicza" + BlobovniczatreeCouldNotGetObjectFromSourceDB = "could not get object from source blobovnicza" + BlobovniczatreeCouldNotPutObjectToTargetDB = "could not put object to target blobovnicza" + BlobovniczaSavingCountersToMeta = "saving counters to blobovnicza's meta..." + BlobovniczaSavingCountersToMetaSuccess = "saving counters to blobovnicza's meta completed successfully" + BlobovniczaSavingCountersToMetaFailed = "saving counters to blobovnicza's meta failed" + ObjectRemovalFailureExistsInWritecache = "can't remove object: object must be flushed from writecache" + FailedToReportStatusToSystemd = "failed to report status to systemd" + ShardGCCollectingExpiredMetricsStarted = "collecting expired metrics started" + ShardGCCollectingExpiredMetricsCompleted = "collecting expired metrics completed" + ShardGCFailedToCollectZeroSizeContainers = "failed to collect zero-size containers" + ShardGCFailedToCollectZeroCountContainers = "failed to collect zero-count containers" + EngineFailedToCheckContainerAvailability = "failed to check container availability" + EngineFailedToGetContainerSize = "failed to get container size" + EngineFailedToDeleteContainerSize = "failed to delete container size" + EngineInterruptProcessingZeroSizeContainers = "interrupt processing zero-size containers" + EngineInterruptProcessingZeroCountContainers = "interrupt processing zero-count containers" + EngineFailedToGetContainerCounters = "failed to get container counters" + GetSvcV2FailedToParseNodeEndpoints = "failed to parse node endpoints" + GetSvcV2FailedToParseNodeExternalAddresses = "failed to parse node external addresses" + GetSvcV2FailedToGetRangeHashFromNode = "failed to get range hash from node" + GetSvcV2FailedToGetRangeHashFromAllOfContainerNodes = "failed to get range hash from all of container nodes" + FailedToUpdateShardID = "failed to update shard id" + EngineShardsEvacuationFailedToMoveTree = "failed to evacuate tree to other node" + EngineShardsEvacuationTreeEvacuatedLocal = "tree evacuated to local node" + EngineShardsEvacuationTreeEvacuatedRemote = "tree evacuated to other node" + EngineRefillFailedToGetObjectsCount = "failed to get blobstor objects count, no resync percent estimation is available" + BlobstoreFailedToGetFileinfo = "failed to get file info" + ECFailedToSendToContainerNode = "failed to send EC object to container node" + ECFailedToSaveECPart = "failed to save EC part" + FailedToParseAddressFromKey = "failed to parse address from key" + FailedToParseOwnerFromKey = "failed to parse owner from key" + FailedToParsePayloadHashFromKey = "failed to parse payload hash from key" + FailedToParseSplitIDFromKey = "failed to parse splitID from key" + FailedToParseAttributeValueFromKey = "failed to parse attribute value from key" + FailedToRunMetabaseGC = "failed to run badger GC on metabase" + PolicerNodeIsNotECObjectNode = "current node is not EC object node" + PolicerFailedToGetLocalECChunks = "failed to get local EC chunks" + PolicerMissingECChunk = "failed to find EC chunk on any of the nodes" + PolicerFailedToDecodeECChunkID = "failed to decode EC chunk ID" + PolicerDifferentObjectIDForTheSameECChunk = "different object IDs for the same EC chunk" + ReplicatorCouldNotGetObjectFromRemoteStorage = "could not get object from remote storage" + ReplicatorCouldNotPutObjectToLocalStorage = "could not put object to local storage" + PolicerCouldNotGetObjectFromNodeMoving = "could not get EC object from the node, moving current chunk to the node" + PolicerCouldNotRestoreObjectNotEnoughChunks = "could not restore EC object: not enough chunks" + PolicerFailedToRestoreObject = "failed to restore EC object" + PolicerCouldNotGetChunk = "could not get EC chunk" + PolicerCouldNotGetChunks = "could not get EC chunks" + ErrMetabaseConflict = "metabase conflict" + AuditEventLogRecord = "audit event log record" ) diff --git a/pkg/local_object_storage/blobstor/blobovniczatree/control.go b/pkg/local_object_storage/blobstor/blobovniczatree/control.go index 67e8c8f18..7470959a7 100644 --- a/pkg/local_object_storage/blobstor/blobovniczatree/control.go +++ b/pkg/local_object_storage/blobstor/blobovniczatree/control.go @@ -61,6 +61,7 @@ func (b *Blobovniczas) initializeDBs(ctx context.Context) error { shBlz := b.getBlobovniczaWithoutCaching(p) blz, err := shBlz.Open() if err != nil { + b.log.Error("failed to open blobovnicza", zap.Error(err), zap.String("path", p)) return err } defer shBlz.Close() @@ -101,7 +102,6 @@ func (b *Blobovniczas) initializeDBs(ctx context.Context) error { }) return false, nil }) - if err != nil { _ = eg.Wait() return err diff --git a/pkg/local_object_storage/engine/container.go b/pkg/local_object_storage/engine/container.go index e45f502ac..f76b0d8d8 100644 --- a/pkg/local_object_storage/engine/container.go +++ b/pkg/local_object_storage/engine/container.go @@ -5,7 +5,6 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" - "go.uber.org/zap" ) // ContainerSizePrm groups parameters of ContainerSize operation. @@ -41,56 +40,6 @@ func (r ListContainersRes) Containers() []cid.ID { return r.containers } -// ContainerSize returns the sum of estimation container sizes among all shards. -// -// Returns an error if executions are blocked (see BlockExecution). -func (e *StorageEngine) ContainerSize(prm ContainerSizePrm) (res ContainerSizeRes, err error) { - err = e.execIfNotBlocked(func() error { - res, err = e.containerSize(prm) - return err - }) - - return -} - -// ContainerSize calls ContainerSize method on engine to calculate sum of estimation container sizes among all shards. -func ContainerSize(e *StorageEngine, id cid.ID) (uint64, error) { - var prm ContainerSizePrm - - prm.SetContainerID(id) - - res, err := e.ContainerSize(prm) - if err != nil { - return 0, err - } - - return res.Size(), nil -} - -func (e *StorageEngine) containerSize(prm ContainerSizePrm) (res ContainerSizeRes, err error) { - if e.metrics != nil { - defer elapsed("EstimateContainerSize", e.metrics.AddMethodDuration)() - } - - e.iterateOverUnsortedShards(func(sh hashedShard) (stop bool) { - var csPrm shard.ContainerSizePrm - csPrm.SetContainerID(prm.cnr) - - csRes, err := sh.Shard.ContainerSize(csPrm) - if err != nil { - e.reportShardError(sh, "can't get container size", err, - zap.Stringer("container_id", prm.cnr)) - return false - } - - res.size += csRes.Size() - - return false - }) - - return -} - // ListContainers returns a unique container IDs presented in the engine objects. // // Returns an error if executions are blocked (see BlockExecution). diff --git a/pkg/local_object_storage/engine/control_test.go b/pkg/local_object_storage/engine/control_test.go index f0809883c..672a4eeb1 100644 --- a/pkg/local_object_storage/engine/control_test.go +++ b/pkg/local_object_storage/engine/control_test.go @@ -10,7 +10,6 @@ import ( "strconv" "sync/atomic" "testing" - "time" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor" @@ -24,7 +23,6 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger/test" cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test" "github.com/stretchr/testify/require" - "go.etcd.io/bbolt" ) // TestInitializationFailure checks that shard is initialized and closed even if media @@ -53,10 +51,6 @@ func TestInitializationFailure(t *testing.T) { shard.WithBlobStorOptions( blobstor.WithStorages(storages)), shard.WithMetaBaseOptions( - meta.WithBoltDBOptions(&bbolt.Options{ - Timeout: 100 * time.Millisecond, - OpenFile: opts.openFileMetabase, - }), meta.WithPath(filepath.Join(t.TempDir(), "metabase")), meta.WithPermissions(0o700), meta.WithEpochState(epochState{})), @@ -83,6 +77,7 @@ func TestInitializationFailure(t *testing.T) { testEngineFailInitAndReload(t, false, shardOpts, beforeReload) }) t.Run("metabase", func(t *testing.T) { + t.Skip("badger doesn't support custom open file") var openFileMetabaseSucceed atomic.Bool openFileMetabase := func(p string, f int, mode fs.FileMode) (*os.File, error) { if openFileMetabaseSucceed.Load() { diff --git a/pkg/local_object_storage/engine/inhume.go b/pkg/local_object_storage/engine/inhume.go index 991305af0..71a191c42 100644 --- a/pkg/local_object_storage/engine/inhume.go +++ b/pkg/local_object_storage/engine/inhume.go @@ -277,7 +277,7 @@ func (e *StorageEngine) processExpiredLocks(ctx context.Context, epoch uint64, l func (e *StorageEngine) processDeletedLocks(ctx context.Context, lockers []oid.Address) { e.iterateOverUnsortedShards(func(sh hashedShard) (stop bool) { - sh.HandleDeletedLocks(lockers) + sh.HandleDeletedLocks(ctx, lockers) select { case <-ctx.Done(): @@ -317,7 +317,7 @@ func (e *StorageEngine) processZeroSizeContainers(ctx context.Context, ids []cid var drop []cid.ID for id := range idMap { prm.SetContainerID(id) - s, err := sh.ContainerSize(prm) + s, err := sh.ContainerSize(ctx, prm) if err != nil { e.log.Warn(logs.EngineFailedToGetContainerSize, zap.Stringer("container_id", id), zap.Error(err)) failed = true diff --git a/pkg/local_object_storage/metabase/badger.go b/pkg/local_object_storage/metabase/badger.go new file mode 100644 index 000000000..dff916bca --- /dev/null +++ b/pkg/local_object_storage/metabase/badger.go @@ -0,0 +1,137 @@ +package meta + +import ( + "bytes" + "context" + "time" + + "github.com/dgraph-io/badger/v4" +) + +const ( + // replace with lock by objectID ? + retryCount = 10 + retryTimeout = 5 * time.Millisecond +) + +// deleteByPrefix must be used only with small transactions. +func deleteByPrefix(ctx context.Context, tx *badger.Txn, prefix []byte) error { + for { + batch, err := selectByPrefixBatch(ctx, tx, prefix, batchSize) + if err != nil { + return err + } + for _, key := range batch { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + if err := tx.Delete(key); err != nil { + return err + } + } + if len(batch) < batchSize { + return nil + } + } +} + +func (db *DB) deleteByPrefixBatched(ctx context.Context, prefix []byte) error { + wb := db.database.NewWriteBatch() + defer wb.Cancel() + + for { + batch, err := db.selectByPrefixBatchTxn(ctx, prefix, batchSize) + if err != nil { + return err + } + for _, key := range batch { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + if err := wb.Delete(key); err != nil { + return err + } + } + if len(batch) < batchSize { + return wb.Flush() + } + } +} + +func (db *DB) selectByPrefixBatchTxn(ctx context.Context, prefix []byte, batchSize int) ([][]byte, error) { + txn := db.database.NewTransaction(false) + defer txn.Discard() + + return selectByPrefixBatch(ctx, txn, prefix, batchSize) +} + +func selectByPrefixBatch(ctx context.Context, tx *badger.Txn, prefix []byte, batchSize int) ([][]byte, error) { + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: prefix, + }) + defer it.Close() + + var result [][]byte + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + result = append(result, it.Item().KeyCopy(nil)) + if len(result) == batchSize { + return result, nil + } + } + return result, nil +} + +func selectByPrefixAndSeek(ctx context.Context, tx *badger.Txn, prefix, lastSeen []byte, withValues bool, batchSize int) ([]keyValue, error) { + opts := badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: prefix, + } + if withValues { + opts.PrefetchValues = true + } + it := tx.NewIterator(opts) + defer it.Close() + + var result []keyValue + for it.Seek(lastSeen); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + if bytes.Equal(lastSeen, it.Item().Key()) { + continue + } + var current keyValue + current.Key = it.Item().KeyCopy(nil) + if withValues { + var err error + current.Value, err = it.Item().ValueCopy(nil) + if err != nil { + return nil, err + } + } + result = append(result, current) + if len(result) == batchSize { + return result, nil + } + } + return result, nil +} + +func deleteByKey(tx *badger.Txn, key []byte) error { + return tx.Delete(key) +} diff --git a/pkg/local_object_storage/metabase/bucket.go b/pkg/local_object_storage/metabase/bucket.go new file mode 100644 index 000000000..47bb94a52 --- /dev/null +++ b/pkg/local_object_storage/metabase/bucket.go @@ -0,0 +1,106 @@ +package meta + +import ( + "math" + "sync" + + cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" +) + +type dbBucketDispatcher struct { + cond *sync.Cond + containerDispatchers map[cid.ID]*containerBucketDispatcher +} + +func newDBBucketDispatcher() *dbBucketDispatcher { + return &dbBucketDispatcher{ + cond: sync.NewCond(&sync.Mutex{}), + containerDispatchers: make(map[cid.ID]*containerBucketDispatcher), + } +} + +func (d *dbBucketDispatcher) BucketID(id cid.ID) (uint16, func()) { + d.cond.L.Lock() + defer d.cond.L.Unlock() + + cd := d.getOrCreateContainerDispatcher(id) + val, ok := cd.freeBucketID() + for !ok { + d.cond.Wait() + cd = d.getOrCreateContainerDispatcher(id) + val, ok = cd.freeBucketID() + } + return val, func() { + d.release(id, val) + } +} + +func (d *dbBucketDispatcher) release(id cid.ID, bucketID uint16) { + d.cond.L.Lock() + defer d.cond.L.Unlock() + + cd, ok := d.containerDispatchers[id] + if !ok { + panic("container bucket ID dispatcher not found") + } + notify := cd.full() + cd.release(bucketID) + if cd.empty() { + delete(d.containerDispatchers, id) + } + if notify { + d.cond.Broadcast() + } +} + +func (d *dbBucketDispatcher) getOrCreateContainerDispatcher(id cid.ID) *containerBucketDispatcher { + existed, found := d.containerDispatchers[id] + if found { + return existed + } + created := newContainerDispatcher() + d.containerDispatchers[id] = created + return created +} + +type containerBucketDispatcher struct { + free []uint16 + next uint16 + taken map[uint16]struct{} +} + +func newContainerDispatcher() *containerBucketDispatcher { + return &containerBucketDispatcher{ + taken: make(map[uint16]struct{}), + } +} + +func (d *containerBucketDispatcher) freeBucketID() (uint16, bool) { + if len(d.free) > 0 { + idx := len(d.free) - 1 + result := d.free[idx] + d.free = d.free[:idx] + d.taken[result] = struct{}{} + return result, true + } + if d.next == math.MaxUint16 { + return 0, false + } + v := d.next + d.next++ + d.taken[v] = struct{}{} + return v, true +} + +func (d *containerBucketDispatcher) release(bucketID uint16) { + delete(d.taken, bucketID) + d.free = append(d.free, bucketID) +} + +func (d *containerBucketDispatcher) empty() bool { + return len(d.taken) == 0 +} + +func (d *containerBucketDispatcher) full() bool { + return len(d.free) == 0 && len(d.taken) == math.MaxUint16 +} diff --git a/pkg/local_object_storage/metabase/children.go b/pkg/local_object_storage/metabase/children.go index acd367951..2ba95bfc3 100644 --- a/pkg/local_object_storage/metabase/children.go +++ b/pkg/local_object_storage/metabase/children.go @@ -7,7 +7,7 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) @@ -38,33 +38,28 @@ func (db *DB) GetChildren(ctx context.Context, addresses []oid.Address) (map[oid result := make(map[oid.Address][]oid.Address, len(addresses)) - buffer := make([]byte, bucketKeySize) - err := db.boltDB.View(func(tx *bbolt.Tx) error { + err := db.database.View(func(tx *badger.Txn) error { for _, addr := range addresses { if _, found := result[addr]; found { continue } - result[addr] = []oid.Address{} - bkt := tx.Bucket(parentBucketName(addr.Container(), buffer)) - if bkt == nil { - continue - } - - binObjIDs, err := decodeList(bkt.Get(objectKey(addr.Object(), buffer))) - if err != nil { - return err - } - - for _, binObjID := range binObjIDs { - var id oid.ID - if err = id.Decode(binObjID); err != nil { + for { + keys, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), batchSize) + if err != nil { return err } - var resultAddress oid.Address - resultAddress.SetContainer(addr.Container()) - resultAddress.SetObject(id) - result[addr] = append(result[addr], resultAddress) + + for _, key := range keys { + resultAddress, err := addressOfTargetFromParentKey(key) + if err != nil { + return err + } + result[addr] = append(result[addr], resultAddress) + } + if len(keys) < batchSize { + break + } } } return nil diff --git a/pkg/local_object_storage/metabase/containers.go b/pkg/local_object_storage/metabase/containers.go index 472b2affc..a8102d2fc 100644 --- a/pkg/local_object_storage/metabase/containers.go +++ b/pkg/local_object_storage/metabase/containers.go @@ -3,12 +3,20 @@ package meta import ( "context" "encoding/binary" + "fmt" "time" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" +) + +const ( + containerSizeKeySize = 1 + cidSize + 2 + containerSizePrefixSize = 1 + cidSize ) func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) { @@ -30,8 +38,8 @@ func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) { return nil, ErrDegradedMode } - err = db.boltDB.View(func(tx *bbolt.Tx) error { - list, err = db.containers(tx) + err = db.database.View(func(tx *badger.Txn) error { + list, err = containers(tx) return err }) @@ -39,24 +47,28 @@ func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) { return list, metaerr.Wrap(err) } -func (db *DB) containers(tx *bbolt.Tx) ([]cid.ID, error) { +func containers(tx *badger.Txn) ([]cid.ID, error) { result := make([]cid.ID, 0) unique := make(map[string]struct{}) var cnr cid.ID - err := tx.ForEach(func(name []byte, _ *bbolt.Bucket) error { - if parseContainerID(&cnr, name, unique) { - result = append(result, cnr) - unique[string(name[1:bucketKeySize])] = struct{}{} - } - - return nil + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, }) + defer it.Close() - return result, err + for it.Seek(nil); it.Valid(); it.Next() { + name := it.Item().Key() + if parseContainerIDWithIgnore(&cnr, name, unique) { + result = append(result, cnr) + unique[string(name[1:containerSizePrefixSize])] = struct{}{} + } + } + + return result, nil } -func (db *DB) ContainerSize(id cid.ID) (size uint64, err error) { +func (db *DB) ContainerSize(ctx context.Context, id cid.ID) (size uint64, err error) { db.modeMtx.RLock() defer db.modeMtx.RUnlock() @@ -64,58 +76,171 @@ func (db *DB) ContainerSize(id cid.ID) (size uint64, err error) { return 0, ErrDegradedMode } - err = db.boltDB.View(func(tx *bbolt.Tx) error { - size, err = db.containerSize(tx, id) + result, err := db.containerSizesInternal(ctx, &id) + if err != nil { + return 0, metaerr.Wrap(err) + } + return result[id], nil +} - return err +func (db *DB) ContainerSizes(ctx context.Context) (map[cid.ID]uint64, error) { + db.modeMtx.RLock() + defer db.modeMtx.RUnlock() + + if db.mode.NoMetabase() { + return nil, ErrDegradedMode + } + + return db.containerSizesInternal(ctx, nil) +} + +// ZeroSizeContainers returns containers with size = 0. +func (db *DB) ZeroSizeContainers(ctx context.Context) ([]cid.ID, error) { + var ( + startedAt = time.Now() + success = false + ) + defer func() { + db.metrics.AddMethodDuration("ZeroSizeContainers", time.Since(startedAt), success) + }() + + ctx, span := tracing.StartSpanFromContext(ctx, "metabase.ZeroSizeContainers") + defer span.End() + + db.modeMtx.RLock() + defer db.modeMtx.RUnlock() + + sizes, err := db.containerSizesInternal(ctx, nil) + if err != nil { + return nil, err + } + var result []cid.ID + for id, size := range sizes { + if size == 0 { + result = append(result, id) + } + } + return result, nil +} + +func (db *DB) DeleteContainerSize(ctx context.Context, id cid.ID) error { + var ( + startedAt = time.Now() + success = false + ) + defer func() { + db.metrics.AddMethodDuration("DeleteContainerSize", time.Since(startedAt), success) + }() + + _, span := tracing.StartSpanFromContext(ctx, "metabase.DeleteContainerSize", + trace.WithAttributes( + attribute.Stringer("container_id", id), + )) + defer span.End() + + db.modeMtx.RLock() + defer db.modeMtx.RUnlock() + + if db.mode.NoMetabase() { + return ErrDegradedMode + } + + if db.mode.ReadOnly() { + return ErrReadOnlyMode + } + + return metaerr.Wrap(db.deleteByPrefixBatched(ctx, containerSizeKeyPrefix(id))) +} + +func (db *DB) containerSizesInternal(ctx context.Context, id *cid.ID) (map[cid.ID]uint64, error) { + prefix := []byte{containerSizePrefix} + if id != nil { + prefix = containerSizeKeyPrefix(*id) + } + result := make(map[cid.ID]int64) + err := db.database.View(func(tx *badger.Txn) error { + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: prefix, + PrefetchValues: true, + }) + defer it.Close() + + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + key := it.Item().Key() + var cnr cid.ID + if err := cnr.Decode(key[1:containerSizePrefixSize]); err != nil { + return fmt.Errorf("invalid container size key: %w", err) + } + + if err := it.Item().Value(func(val []byte) error { + value, ok := parseInt64Value(val) + if !ok { + return fmt.Errorf("invalid container size value for container %s", cnr) + } + result[cnr] += value + return nil + }); err != nil { + return err + } + } + return nil }) - - return size, metaerr.Wrap(err) -} - -func (db *DB) containerSize(tx *bbolt.Tx, id cid.ID) (uint64, error) { - containerVolume := tx.Bucket(containerVolumeBucketName) - key := make([]byte, cidSize) - id.Encode(key) - - return parseContainerSize(containerVolume.Get(key)), nil -} - -func parseContainerID(dst *cid.ID, name []byte, ignore map[string]struct{}) bool { - if len(name) != bucketKeySize { - return false - } - if _, ok := ignore[string(name[1:bucketKeySize])]; ok { - return false - } - return dst.Decode(name[1:bucketKeySize]) == nil -} - -func parseContainerSize(v []byte) uint64 { - if len(v) == 0 { - return 0 + if err != nil { + return nil, metaerr.Wrap(err) } - return binary.LittleEndian.Uint64(v) + return normilizeContainerSizes(result) } -func changeContainerSize(tx *bbolt.Tx, id cid.ID, delta uint64, increase bool) error { - containerVolume := tx.Bucket(containerVolumeBucketName) - key := make([]byte, cidSize) - id.Encode(key) +func normilizeContainerSizes(sizes map[cid.ID]int64) (map[cid.ID]uint64, error) { + result := make(map[cid.ID]uint64, len(sizes)) + for k, v := range sizes { + if v < 0 { + return nil, fmt.Errorf("invalid cumulative size for container %s", k) + } + result[k] = uint64(v) + } + return result, nil +} - size := parseContainerSize(containerVolume.Get(key)) +func changeContainerSize(tx *badger.Txn, id cid.ID, delta int64, bucketID uint16) error { + key := containerSizeKey(id, bucketID) - if increase { - size += delta - } else if size > delta { - size -= delta - } else { - size = 0 + v, err := valueOrNil(tx, key) + if err != nil { + return err } - buf := make([]byte, 8) // consider using sync.Pool to decrease allocations - binary.LittleEndian.PutUint64(buf, size) + size, ok := parseInt64Value(v) + if !ok { + return fmt.Errorf("invalid container size value for container %s", id) + } - return containerVolume.Put(key, buf) + size += delta + value := marshalInt64(size) + return tx.Set(key, value) +} + +// containerSizeKeyPrefix returns containerSizePrefix_CID key prefix. +func containerSizeKeyPrefix(cnr cid.ID) []byte { + result := make([]byte, containerSizePrefixSize) + result[0] = containerSizePrefix + cnr.Encode(result[1:]) + return result +} + +// containerSizeKey returns containerVolumePrefix_CID_bucketID key. +func containerSizeKey(cnr cid.ID, bucketID uint16) []byte { + result := make([]byte, containerSizeKeySize) + result[0] = containerSizePrefix + cnr.Encode(result[1:]) + binary.LittleEndian.PutUint16(result[containerSizePrefixSize:], bucketID) + return result } diff --git a/pkg/local_object_storage/metabase/containers_test.go b/pkg/local_object_storage/metabase/containers_test.go index 5d6788d7e..8627c50e1 100644 --- a/pkg/local_object_storage/metabase/containers_test.go +++ b/pkg/local_object_storage/metabase/containers_test.go @@ -151,7 +151,7 @@ func TestDB_ContainerSize(t *testing.T) { } for cnr, volume := range cids { - n, err := db.ContainerSize(cnr) + n, err := db.ContainerSize(context.Background(), cnr) require.NoError(t, err) require.Equal(t, volume, int(n)) } @@ -169,7 +169,7 @@ func TestDB_ContainerSize(t *testing.T) { volume -= int(obj.PayloadSize()) - n, err := db.ContainerSize(cnr) + n, err := db.ContainerSize(context.Background(), cnr) require.NoError(t, err) require.Equal(t, volume, int(n)) } diff --git a/pkg/local_object_storage/metabase/control.go b/pkg/local_object_storage/metabase/control.go index 891a1e9b2..367b8ca01 100644 --- a/pkg/local_object_storage/metabase/control.go +++ b/pkg/local_object_storage/metabase/control.go @@ -2,16 +2,18 @@ package meta import ( "context" - "errors" "fmt" + "math" "path/filepath" + "time" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" + "github.com/dgraph-io/badger/v4/options" "go.uber.org/zap" ) @@ -21,23 +23,7 @@ var ErrDegradedMode = logicerr.New("metabase is in a degraded mode") // ErrReadOnlyMode is returned when metabase is in a read-only mode. var ErrReadOnlyMode = logicerr.New("metabase is in a read-only mode") -var ( - mStaticBuckets = map[string]struct{}{ - string(containerVolumeBucketName): {}, - string(containerCounterBucketName): {}, - string(graveyardBucketName): {}, - string(garbageBucketName): {}, - string(shardInfoBucket): {}, - string(bucketNameLocked): {}, - } - - // deprecatedBuckets buckets that are not used anymore. - deprecatedBuckets = [][]byte{ - toMoveItBucketName, - } -) - -// Open boltDB instance for metabase. +// Open metabase. func (db *DB) Open(_ context.Context, m mode.Mode) error { db.modeMtx.Lock() defer db.modeMtx.Unlock() @@ -47,149 +33,151 @@ func (db *DB) Open(_ context.Context, m mode.Mode) error { if m.NoMetabase() { return nil } + return db.openDB(m) } func (db *DB) openDB(mode mode.Mode) error { - err := util.MkdirAllX(filepath.Dir(db.info.Path), db.info.Permission) + err := util.MkdirAllX(db.info.Path, db.info.Permission) if err != nil { return fmt.Errorf("can't create dir %s for metabase: %w", db.info.Path, err) } db.log.Debug(logs.MetabaseCreatedDirectoryForMetabase, zap.String("path", db.info.Path)) - if db.boltOptions == nil { - opts := *bbolt.DefaultOptions - db.boltOptions = &opts - } - db.boltOptions.ReadOnly = mode.ReadOnly() - - return metaerr.Wrap(db.openBolt()) + return metaerr.Wrap(db.openBadger(mode.ReadOnly())) } -func (db *DB) openBolt() error { - var err error +func (db *DB) badgerOptions(readOnly bool) badger.Options { + opts := badger.DefaultOptions(db.info.Path) - db.boltDB, err = bbolt.Open(db.info.Path, db.info.Permission, db.boltOptions) - if err != nil { - return fmt.Errorf("can't open boltDB database: %w", err) + opts.BlockCacheSize = 0 // compression and encryption are disabled, so block cache should be disabled + opts.IndexCacheSize = db.indexCacheSize // 256MB, to not to keep all indicies in memory + opts.Compression = options.None // no need to compress metabase values + opts.Logger = nil + opts.MetricsEnabled = false + opts.NumLevelZeroTablesStall = math.MaxInt // to not to stall because of Level0 slow compaction + opts.NumMemtables = db.memtablesCount + opts.NumCompactors = db.numCompactors + opts.SyncWrites = !db.noSync + opts.ValueLogMaxEntries = math.MaxUint32 // default vLog file size is 1GB, so size is more clear than entries count + opts.ValueThreshold = db.valueThreshold + opts.LmaxCompaction = true + opts.ReadOnly = readOnly + if db.verbose { + opts.Logger = &badgerLogger{l: db.log} } - db.boltDB.MaxBatchDelay = db.boltBatchDelay - db.boltDB.MaxBatchSize = db.boltBatchSize + opts.ValueLogFileSize = db.valueLogFileSize - db.log.Debug(logs.MetabaseOpenedBoltDBInstanceForMetabase) + return opts +} - db.log.Debug(logs.MetabaseCheckingMetabaseVersion) - return db.boltDB.View(func(tx *bbolt.Tx) error { - // The safest way to check if the metabase is fresh is to check if it has no buckets. - // However, shard info can be present. So here we check that the number of buckets is - // at most 1. - // Another thing to consider is that tests do not persist shard ID, we want to support - // this case too. - var n int - err := tx.ForEach(func([]byte, *bbolt.Bucket) error { - if n++; n >= 2 { // do not iterate a lot - return errBreakBucketForEach - } - return nil - }) +func (db *DB) openBadger(readOnly bool) error { + opts := db.badgerOptions(readOnly) - if err == errBreakBucketForEach { - db.initialized = true - err = nil + var err error + db.database, err = badger.Open(opts) + if err != nil { + return fmt.Errorf("can't open badger database: %w", err) + } + + if db.closed != nil { + close(db.closed) + db.wg.Wait() + db.closed = nil + } + + db.closed = make(chan struct{}) + db.wg.Add(1) + go db.collectGC() + + return db.database.View(func(txn *badger.Txn) error { + data, err := valueOrNil(txn, shardInfoKey(versionKey)) + if err != nil { + return err } - return err + db.initialized = len(data) > 0 + return nil }) } -// Init initializes metabase. It creates static (CID-independent) buckets in underlying BoltDB instance. +func (db *DB) collectGC() { + defer db.wg.Done() + timer := time.NewTicker(db.gcInterval) + for { + select { + case <-db.closed: + return + case <-timer.C: + if err := db.database.RunValueLogGC(db.gcDiscardRatio); err == nil { + _ = db.database.RunValueLogGC(db.gcDiscardRatio) // see https://dgraph.io/docs/badger/get-started/#garbage-collection + } else { + db.log.Warn(logs.FailedToRunMetabaseGC, zap.Error(err), zap.String("path", db.info.Path)) + } + } + } +} + +// Init initializes metabase. // // Returns ErrOutdatedVersion if a database at the provided path is outdated. // // Does nothing if metabase has already been initialized and filled. To roll back the database to its initial state, // use Reset. func (db *DB) Init() error { - return metaerr.Wrap(db.init(false)) + db.modeMtx.Lock() + defer db.modeMtx.Unlock() + + return metaerr.Wrap(db.init(context.TODO(), false)) } -// Reset resets metabase. Works similar to Init but cleans up all static buckets and -// removes all dynamic (CID-dependent) ones in non-blank BoltDB instances. -func (db *DB) Reset() error { - db.modeMtx.RLock() - defer db.modeMtx.RUnlock() +func (db *DB) Init2(ctx context.Context) error { + db.modeMtx.Lock() + defer db.modeMtx.Unlock() + + return metaerr.Wrap(db.init(ctx, false)) +} + +// Reset resets metabase. Works similar to Init but cleans up all data records. +func (db *DB) Reset(ctx context.Context) error { + db.modeMtx.Lock() + defer db.modeMtx.Unlock() if db.mode.NoMetabase() { return ErrDegradedMode } - return metaerr.Wrap(db.init(true)) + return metaerr.Wrap(db.init(ctx, true)) } -func (db *DB) init(reset bool) error { +func (db *DB) init(ctx context.Context, reset bool) error { if db.mode.NoMetabase() || db.mode.ReadOnly() { return nil } - - return db.boltDB.Update(func(tx *bbolt.Tx) error { - var err error - if !reset { - // Normal open, check version and update if not initialized. - err := checkVersion(tx, db.initialized) - if err != nil { - return err - } - } - for k := range mStaticBuckets { - name := []byte(k) - if reset { - err := tx.DeleteBucket(name) - if err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { - return fmt.Errorf("could not delete static bucket %s: %w", k, err) - } - } - - _, err := tx.CreateBucketIfNotExists(name) - if err != nil { - return fmt.Errorf("could not create static bucket %s: %w", k, err) - } - } - - for _, b := range deprecatedBuckets { - err := tx.DeleteBucket(b) - if err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) { - return fmt.Errorf("could not delete deprecated bucket %s: %w", string(b), err) - } - } - - if !reset { // counters will be recalculated by refill metabase - err = syncCounter(tx, false) - if err != nil { - return fmt.Errorf("could not sync object counter: %w", err) - } - + if reset { + if err := db.database.DropAll(); err != nil { return nil } + return db.database.Update(func(tx *badger.Txn) error { + return updateVersion(tx, version) + }) + } - bucketCursor := tx.Cursor() - name, _ := bucketCursor.First() - for name != nil { - if _, ok := mStaticBuckets[string(name)]; !ok { - if err := tx.DeleteBucket(name); err != nil { - return err - } - name, _ = bucketCursor.Seek(name) - continue - } - name, _ = bucketCursor.Next() - } - return updateVersion(tx, version) - }) + if err := db.checkVersion(); err != nil { + return err + } + + if err := db.syncCounter(ctx, false); err != nil { + return fmt.Errorf("could not sync object counter: %w", err) + } + + return nil } // SyncCounters forces to synchronize the object counters. -func (db *DB) SyncCounters() error { - db.modeMtx.RLock() - defer db.modeMtx.RUnlock() +func (db *DB) SyncCounters(ctx context.Context) error { + db.modeMtx.Lock() + defer db.modeMtx.Unlock() if db.mode.NoMetabase() { return ErrDegradedMode @@ -197,17 +185,27 @@ func (db *DB) SyncCounters() error { return ErrReadOnlyMode } - return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error { - return syncCounter(tx, true) - })) + return metaerr.Wrap(db.syncCounter(ctx, true)) } -// Close closes boltDB instance -// and reports metabase metric. +// Close closes metabase. func (db *DB) Close() error { + db.modeMtx.Lock() + defer db.modeMtx.Unlock() + + return db.close() +} + +func (db *DB) close() error { + if db.closed != nil { + close(db.closed) + db.wg.Wait() + db.closed = nil + } + var err error - if db.boltDB != nil { - err = db.close() + if db.database != nil { + err = metaerr.Wrap(db.database.Close()) } if err == nil { db.metrics.Close() @@ -215,10 +213,6 @@ func (db *DB) Close() error { return err } -func (db *DB) close() error { - return metaerr.Wrap(db.boltDB.Close()) -} - // Reload reloads part of the configuration. // It returns true iff database was reopened. // If a config option is invalid, it logs an error and returns nil. @@ -235,14 +229,14 @@ func (db *DB) Reload(opts ...Option) (bool, error) { defer db.modeMtx.Unlock() if db.mode.NoMetabase() || c.info.Path != "" && filepath.Clean(db.info.Path) != filepath.Clean(c.info.Path) { - if err := db.Close(); err != nil { + if err := db.close(); err != nil { return false, err } db.mode = mode.Disabled db.metrics.SetMode(mode.ComponentDisabled) db.info.Path = c.info.Path - if err := db.openBolt(); err != nil { + if err := db.openBadger(false); err != nil { return false, metaerr.Wrap(fmt.Errorf("%w: %v", ErrDegradedMode, err)) } diff --git a/pkg/local_object_storage/metabase/control_test.go b/pkg/local_object_storage/metabase/control_test.go index 0354a5eb6..7d31de96f 100644 --- a/pkg/local_object_storage/metabase/control_test.go +++ b/pkg/local_object_storage/metabase/control_test.go @@ -17,7 +17,7 @@ func TestReset(t *testing.T) { db := newDB(t) defer func() { require.NoError(t, db.Close()) }() - err := db.Reset() + err := db.Reset(context.Background()) require.NoError(t, err) obj := testutil.GenerateObject() @@ -47,7 +47,7 @@ func TestReset(t *testing.T) { assertExists(addr, true, nil) assertExists(addrToInhume, false, client.IsErrObjectAlreadyRemoved) - err = db.Reset() + err = db.Reset(context.Background()) require.NoError(t, err) assertExists(addr, false, nil) diff --git a/pkg/local_object_storage/metabase/counter.go b/pkg/local_object_storage/metabase/counter.go index 275099ff2..e75c8d71f 100644 --- a/pkg/local_object_storage/metabase/counter.go +++ b/pkg/local_object_storage/metabase/counter.go @@ -1,7 +1,6 @@ package meta import ( - "bytes" "context" "encoding/binary" "errors" @@ -13,29 +12,21 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) var ( - objectPhyCounterKey = []byte("phy_counter") - objectLogicCounterKey = []byte("logic_counter") - objectUserCounterKey = []byte("user_counter") + errInvalidKeyLenght = errors.New("invalid key length") + errInvalidKeyPrefix = errors.New("invalid key prefix") + errInvalidValueLenght = errors.New("invalid value length") + errInvalidContainerIDValue = errors.New("invalid container ID value") ) -var ( - errInvalidKeyLenght = errors.New("invalid key length") - errInvalidValueLenght = errors.New("invalid value length") -) - -type objectType uint8 - const ( - _ objectType = iota - phy - logical - user + containerObjectCountKeySize = 1 + cidSize + 2 + containerObjectCountPrefixSize = 1 + cidSize ) // ObjectCounters groups object counter @@ -50,12 +41,18 @@ func (o ObjectCounters) IsZero() bool { return o.Phy == 0 && o.Logic == 0 && o.User == 0 } +type objectCounterValue struct { + Logic int64 + Phy int64 + User int64 +} + // ObjectCounters returns object counters that metabase has // tracked since it was opened and initialized. // // Returns only the errors that do not allow reading counter -// in Bolt database. -func (db *DB) ObjectCounters() (cc ObjectCounters, err error) { +// in badger database. +func (db *DB) ObjectCounters(ctx context.Context) (ObjectCounters, error) { db.modeMtx.RLock() defer db.modeMtx.RUnlock() @@ -63,29 +60,22 @@ func (db *DB) ObjectCounters() (cc ObjectCounters, err error) { return ObjectCounters{}, ErrDegradedMode } - err = db.boltDB.View(func(tx *bbolt.Tx) error { - b := tx.Bucket(shardInfoBucket) - if b != nil { - data := b.Get(objectPhyCounterKey) - if len(data) == 8 { - cc.Phy = binary.LittleEndian.Uint64(data) - } - - data = b.Get(objectLogicCounterKey) - if len(data) == 8 { - cc.Logic = binary.LittleEndian.Uint64(data) - } - - data = b.Get(objectUserCounterKey) - if len(data) == 8 { - cc.User = binary.LittleEndian.Uint64(data) - } - } - - return nil + var cc map[cid.ID]ObjectCounters + err := db.database.View(func(tx *badger.Txn) error { + var err error + cc, err = containerObjectCounters(ctx, tx, nil) + return err }) - - return cc, metaerr.Wrap(err) + if err != nil { + return ObjectCounters{}, metaerr.Wrap(err) + } + var result ObjectCounters + for _, v := range cc { + result.Logic += v.Logic + result.Phy += v.Phy + result.User += v.User + } + return result, nil } type ContainerCounters struct { @@ -96,7 +86,7 @@ type ContainerCounters struct { // that metabase has tracked since it was opened and initialized. // // Returns only the errors that do not allow reading counter -// in Bolt database. +// in badger database. // // It is guaranteed that the ContainerCounters fields are not nil. func (db *DB) ContainerCounters(ctx context.Context) (ContainerCounters, error) { @@ -114,84 +104,16 @@ func (db *DB) ContainerCounters(ctx context.Context) (ContainerCounters, error) cc := ContainerCounters{ Counts: make(map[cid.ID]ObjectCounters), } - - lastKey := make([]byte, cidSize) - - // there is no limit for containers count, so use batching with cancellation - for { - select { - case <-ctx.Done(): - return cc, ctx.Err() - default: - } - - completed, err := db.containerCountersNextBatch(lastKey, func(id cid.ID, entity ObjectCounters) { - cc.Counts[id] = entity - }) - if err != nil { - return cc, err - } - if completed { - break - } - } - - success = true - return cc, nil -} - -func (db *DB) containerCountersNextBatch(lastKey []byte, f func(id cid.ID, entity ObjectCounters)) (bool, error) { - db.modeMtx.RLock() - defer db.modeMtx.RUnlock() - - if db.mode.NoMetabase() { - return false, ErrDegradedMode - } - - counter := 0 - const batchSize = 1000 - - err := db.boltDB.View(func(tx *bbolt.Tx) error { - b := tx.Bucket(containerCounterBucketName) - if b == nil { - return ErrInterruptIterator - } - c := b.Cursor() - var key, value []byte - for key, value = c.Seek(lastKey); key != nil; key, value = c.Next() { - if bytes.Equal(lastKey, key) { - continue - } - copy(lastKey, key) - - cnrID, err := parseContainerCounterKey(key) - if err != nil { - return err - } - ent, err := parseContainerCounterValue(value) - if err != nil { - return err - } - f(cnrID, ent) - - counter++ - if counter == batchSize { - break - } - } - - if counter < batchSize { // last batch - return ErrInterruptIterator - } - return nil + err := db.database.View(func(tx *badger.Txn) error { + var err error + cc.Counts, err = containerObjectCounters(ctx, tx, nil) + return err }) if err != nil { - if errors.Is(err, ErrInterruptIterator) { - return true, nil - } - return false, metaerr.Wrap(err) + return ContainerCounters{}, metaerr.Wrap(err) } - return false, nil + success = true + return cc, nil } func (db *DB) ContainerCount(ctx context.Context, id cid.ID) (ObjectCounters, error) { @@ -213,144 +135,65 @@ func (db *DB) ContainerCount(ctx context.Context, id cid.ID) (ObjectCounters, er return ObjectCounters{}, ErrDegradedMode } - var result ObjectCounters - - err := db.boltDB.View(func(tx *bbolt.Tx) error { - b := tx.Bucket(containerCounterBucketName) - key := make([]byte, cidSize) - id.Encode(key) - v := b.Get(key) - if v == nil { - return nil - } + var cc map[cid.ID]ObjectCounters + err := db.database.View(func(tx *badger.Txn) error { var err error - result, err = parseContainerCounterValue(v) + cc, err = containerObjectCounters(ctx, tx, &id) return err }) - - return result, metaerr.Wrap(err) + if err != nil { + return ObjectCounters{}, metaerr.Wrap(err) + } + return cc[id], nil } -func (db *DB) incCounters(tx *bbolt.Tx, cnrID cid.ID, isUserObject bool) error { - b := tx.Bucket(shardInfoBucket) - if b == nil { - return db.incContainerObjectCounter(tx, cnrID, isUserObject) - } +func containerCounterKey(cnrID cid.ID, bucketID uint16) []byte { + result := make([]byte, containerObjectCountKeySize) + result[0] = containerCountersPrefix + cnrID.Encode(result[1:]) + binary.LittleEndian.PutUint16(result[containerObjectCountPrefixSize:], bucketID) + return result +} - if err := db.updateShardObjectCounterBucket(b, phy, 1, true); err != nil { - return fmt.Errorf("could not increase phy object counter: %w", err) - } - if err := db.updateShardObjectCounterBucket(b, logical, 1, true); err != nil { - return fmt.Errorf("could not increase logical object counter: %w", err) +func incCounters(tx *badger.Txn, cnrID cid.ID, isUserObject bool, bucketID uint16) error { + delta := objectCounterValue{ + Logic: 1, + Phy: 1, } if isUserObject { - if err := db.updateShardObjectCounterBucket(b, user, 1, true); err != nil { - return fmt.Errorf("could not increase user object counter: %w", err) - } + delta.User = 1 } - return db.incContainerObjectCounter(tx, cnrID, isUserObject) + return editContainerCounterValue(tx, cnrID, delta, bucketID) } -func (db *DB) updateShardObjectCounter(tx *bbolt.Tx, typ objectType, delta uint64, inc bool) error { - b := tx.Bucket(shardInfoBucket) - if b == nil { - return nil - } - - return db.updateShardObjectCounterBucket(b, typ, delta, inc) -} - -func (*DB) updateShardObjectCounterBucket(b *bbolt.Bucket, typ objectType, delta uint64, inc bool) error { - var counter uint64 - var counterKey []byte - - switch typ { - case phy: - counterKey = objectPhyCounterKey - case logical: - counterKey = objectLogicCounterKey - case user: - counterKey = objectUserCounterKey - default: - panic("unknown object type counter") - } - - data := b.Get(counterKey) - if len(data) == 8 { - counter = binary.LittleEndian.Uint64(data) - } - - if inc { - counter += delta - } else if counter <= delta { - counter = 0 - } else { - counter -= delta - } - - newCounter := make([]byte, 8) - binary.LittleEndian.PutUint64(newCounter, counter) - - return b.Put(counterKey, newCounter) -} - -func (db *DB) updateContainerCounter(tx *bbolt.Tx, delta map[cid.ID]ObjectCounters, inc bool) error { - b := tx.Bucket(containerCounterBucketName) - if b == nil { - return nil - } - - key := make([]byte, cidSize) +func updateContainerCounter(tx *badger.Txn, delta map[cid.ID]objectCounterValue, bucketIDs map[cid.ID]uint16) error { for cnrID, cnrDelta := range delta { - cnrID.Encode(key) - if err := db.editContainerCounterValue(b, key, cnrDelta, inc); err != nil { + bucketID, found := bucketIDs[cnrID] + if !found { + return fmt.Errorf("bucket ID not found for container %s", cnrID) + } + if err := editContainerCounterValue(tx, cnrID, cnrDelta, bucketID); err != nil { return err } } return nil } -func (*DB) editContainerCounterValue(b *bbolt.Bucket, key []byte, delta ObjectCounters, inc bool) error { - var entity ObjectCounters - var err error - data := b.Get(key) - if len(data) > 0 { - entity, err = parseContainerCounterValue(data) +func editContainerCounterValue(tx *badger.Txn, cnrID cid.ID, delta objectCounterValue, bucketID uint16) error { + key := containerCounterKey(cnrID, bucketID) + val, err := valueOrNil(tx, key) + if err != nil { + return err + } + setValue := delta + if val != nil { + exited, err := parseContainerCounterValue(val) if err != nil { return err } + setValue = mergeObjectCounterValues(setValue, exited) } - entity.Phy = nextValue(entity.Phy, delta.Phy, inc) - entity.Logic = nextValue(entity.Logic, delta.Logic, inc) - entity.User = nextValue(entity.User, delta.User, inc) - value := containerCounterValue(entity) - return b.Put(key, value) -} - -func nextValue(existed, delta uint64, inc bool) uint64 { - if inc { - existed += delta - } else if existed <= delta { - existed = 0 - } else { - existed -= delta - } - return existed -} - -func (db *DB) incContainerObjectCounter(tx *bbolt.Tx, cnrID cid.ID, isUserObject bool) error { - b := tx.Bucket(containerCounterBucketName) - if b == nil { - return nil - } - - key := make([]byte, cidSize) - cnrID.Encode(key) - c := ObjectCounters{Logic: 1, Phy: 1} - if isUserObject { - c.User = 1 - } - return db.editContainerCounterValue(b, key, c, true) + return tx.Set(key, marshalContainerCounterValue(setValue)) } // syncCounter updates object counters according to metabase state: @@ -359,34 +202,34 @@ func (db *DB) incContainerObjectCounter(tx *bbolt.Tx, cnrID cid.ID, isUserObject // // Does nothing if counters are not empty and force is false. If force is // true, updates the counters anyway. -func syncCounter(tx *bbolt.Tx, force bool) error { - shardInfoB, err := createBucketLikelyExists(tx, shardInfoBucket) - if err != nil { - return fmt.Errorf("could not get shard info bucket: %w", err) - } - shardObjectCounterInitialized := len(shardInfoB.Get(objectPhyCounterKey)) == 8 && - len(shardInfoB.Get(objectLogicCounterKey)) == 8 && - len(shardInfoB.Get(objectUserCounterKey)) == 8 - containerObjectCounterInitialized := containerObjectCounterInitialized(tx) - if !force && shardObjectCounterInitialized && containerObjectCounterInitialized { - // the counters are already inited +func (db *DB) syncCounter(ctx context.Context, force bool) error { + if !force && db.containerObjectCounterInitialized(ctx) { return nil } - containerCounterB, err := createBucketLikelyExists(tx, containerCounterBucketName) + // drop existed counters + err := db.deleteByPrefixBatched(ctx, []byte{containerCountersPrefix}) if err != nil { - return fmt.Errorf("could not get container counter bucket: %w", err) + return err } + counters, err := db.getActualObjectCounters() + if err != nil { + return err + } + + return db.setObjectCounters(counters) +} + +func (db *DB) getActualObjectCounters() (map[cid.ID]ObjectCounters, error) { + tx := db.database.NewTransaction(false) + defer tx.Discard() + var addr oid.Address + var isAvailable bool counters := make(map[cid.ID]ObjectCounters) - graveyardBKT := tx.Bucket(graveyardBucketName) - garbageBKT := tx.Bucket(garbageBucketName) - key := make([]byte, addressKeySize) - var isAvailable bool - - err = iteratePhyObjects(tx, func(cnr cid.ID, objID oid.ID, obj *objectSDK.Object) error { + err := iteratePhyObjects(tx, func(cnr cid.ID, objID oid.ID, obj *objectSDK.Object) error { if v, ok := counters[cnr]; ok { v.Phy++ counters[cnr] = v @@ -400,9 +243,12 @@ func syncCounter(tx *bbolt.Tx, force bool) error { addr.SetObject(objID) isAvailable = false - // check if an object is available: not with GCMark - // and not covered with a tombstone - if inGraveyardWithKey(addressKey(addr, key), graveyardBKT, garbageBKT) == 0 { + st, err := inGraveyardWithKey(tx, addr) + if err != nil { + return err + } + + if st == 0 { if v, ok := counters[cnr]; ok { v.Logic++ counters[cnr] = v @@ -428,102 +274,30 @@ func syncCounter(tx *bbolt.Tx, force bool) error { return nil }) if err != nil { - return fmt.Errorf("could not iterate objects: %w", err) + return nil, fmt.Errorf("could not iterate objects: %w", err) } - - return setObjectCounters(counters, shardInfoB, containerCounterB) + return counters, nil } -func setObjectCounters(counters map[cid.ID]ObjectCounters, shardInfoB, containerCounterB *bbolt.Bucket) error { - var phyTotal uint64 - var logicTotal uint64 - var userTotal uint64 - key := make([]byte, cidSize) - for cnrID, count := range counters { - phyTotal += count.Phy - logicTotal += count.Logic - userTotal += count.User +func (db *DB) setObjectCounters(counters map[cid.ID]ObjectCounters) error { + wb := db.database.NewWriteBatch() + defer wb.Cancel() - cnrID.Encode(key) - value := containerCounterValue(count) - err := containerCounterB.Put(key, value) - if err != nil { - return fmt.Errorf("could not update phy container object counter: %w", err) + for cnrID, count := range counters { + value := objectCounterValue{ + Logic: int64(count.Logic), + Phy: int64(count.Phy), + User: int64(count.User), + } + + // this function called by init or refill, so no other updates should happen + // so here bucketID = 0 can be used + key := containerCounterKey(cnrID, 0) + if err := wb.Set(key, marshalContainerCounterValue(value)); err != nil { + return err } } - phyData := make([]byte, 8) - binary.LittleEndian.PutUint64(phyData, phyTotal) - - err := shardInfoB.Put(objectPhyCounterKey, phyData) - if err != nil { - return fmt.Errorf("could not update phy object counter: %w", err) - } - - logData := make([]byte, 8) - binary.LittleEndian.PutUint64(logData, logicTotal) - - err = shardInfoB.Put(objectLogicCounterKey, logData) - if err != nil { - return fmt.Errorf("could not update logic object counter: %w", err) - } - - userData := make([]byte, 8) - binary.LittleEndian.PutUint64(userData, userTotal) - - err = shardInfoB.Put(objectUserCounterKey, userData) - if err != nil { - return fmt.Errorf("could not update user object counter: %w", err) - } - - return nil -} - -func containerCounterValue(entity ObjectCounters) []byte { - res := make([]byte, 24) - binary.LittleEndian.PutUint64(res, entity.Phy) - binary.LittleEndian.PutUint64(res[8:], entity.Logic) - binary.LittleEndian.PutUint64(res[16:], entity.User) - return res -} - -func parseContainerCounterKey(buf []byte) (cid.ID, error) { - if len(buf) != cidSize { - return cid.ID{}, errInvalidKeyLenght - } - var cnrID cid.ID - if err := cnrID.Decode(buf); err != nil { - return cid.ID{}, fmt.Errorf("failed to decode container ID: %w", err) - } - return cnrID, nil -} - -// parseContainerCounterValue return phy, logic values. -func parseContainerCounterValue(buf []byte) (ObjectCounters, error) { - if len(buf) != 24 { - return ObjectCounters{}, errInvalidValueLenght - } - return ObjectCounters{ - Phy: binary.LittleEndian.Uint64(buf), - Logic: binary.LittleEndian.Uint64(buf[8:16]), - User: binary.LittleEndian.Uint64(buf[16:]), - }, nil -} - -func containerObjectCounterInitialized(tx *bbolt.Tx) bool { - b := tx.Bucket(containerCounterBucketName) - if b == nil { - return false - } - k, v := b.Cursor().First() - if k == nil && v == nil { - return true - } - _, err := parseContainerCounterKey(k) - if err != nil { - return false - } - _, err = parseContainerCounterValue(v) - return err == nil + return wb.Flush() } func IsUserObject(obj *objectSDK.Object) bool { @@ -537,134 +311,6 @@ func IsUserObject(obj *objectSDK.Object) bool { return ech.Index() == 0 && (ech.ParentSplitID() == nil || ech.ParentSplitParentID() != nil) } -// ZeroSizeContainers returns containers with size = 0. -func (db *DB) ZeroSizeContainers(ctx context.Context) ([]cid.ID, error) { - var ( - startedAt = time.Now() - success = false - ) - defer func() { - db.metrics.AddMethodDuration("ZeroSizeContainers", time.Since(startedAt), success) - }() - - ctx, span := tracing.StartSpanFromContext(ctx, "metabase.ZeroSizeContainers") - defer span.End() - - db.modeMtx.RLock() - defer db.modeMtx.RUnlock() - - var result []cid.ID - lastKey := make([]byte, cidSize) - - for { - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - completed, err := db.containerSizesNextBatch(lastKey, func(contID cid.ID, size uint64) { - if size == 0 { - result = append(result, contID) - } - }) - if err != nil { - return nil, err - } - if completed { - break - } - } - - success = true - return result, nil -} - -func (db *DB) containerSizesNextBatch(lastKey []byte, f func(cid.ID, uint64)) (bool, error) { - db.modeMtx.RLock() - defer db.modeMtx.RUnlock() - - if db.mode.NoMetabase() { - return false, ErrDegradedMode - } - - counter := 0 - const batchSize = 1000 - - err := db.boltDB.View(func(tx *bbolt.Tx) error { - b := tx.Bucket(containerVolumeBucketName) - c := b.Cursor() - var key, value []byte - for key, value = c.Seek(lastKey); key != nil; key, value = c.Next() { - if bytes.Equal(lastKey, key) { - continue - } - copy(lastKey, key) - - size := parseContainerSize(value) - var id cid.ID - if err := id.Decode(key); err != nil { - return err - } - f(id, size) - - counter++ - if counter == batchSize { - break - } - } - - if counter < batchSize { - return ErrInterruptIterator - } - return nil - }) - if err != nil { - if errors.Is(err, ErrInterruptIterator) { - return true, nil - } - return false, metaerr.Wrap(err) - } - return false, nil -} - -func (db *DB) DeleteContainerSize(ctx context.Context, id cid.ID) error { - var ( - startedAt = time.Now() - success = false - ) - defer func() { - db.metrics.AddMethodDuration("DeleteContainerSize", time.Since(startedAt), success) - }() - - _, span := tracing.StartSpanFromContext(ctx, "metabase.DeleteContainerSize", - trace.WithAttributes( - attribute.Stringer("container_id", id), - )) - defer span.End() - - db.modeMtx.RLock() - defer db.modeMtx.RUnlock() - - if db.mode.NoMetabase() { - return ErrDegradedMode - } - - if db.mode.ReadOnly() { - return ErrReadOnlyMode - } - - err := db.boltDB.Update(func(tx *bbolt.Tx) error { - b := tx.Bucket(containerVolumeBucketName) - - key := make([]byte, cidSize) - id.Encode(key) - return b.Delete(key) - }) - success = err == nil - return metaerr.Wrap(err) -} - // ZeroCountContainers returns containers with objects count = 0 in metabase. func (db *DB) ZeroCountContainers(ctx context.Context) ([]cid.ID, error) { var ( @@ -687,24 +333,18 @@ func (db *DB) ZeroCountContainers(ctx context.Context) ([]cid.ID, error) { var result []cid.ID - lastKey := make([]byte, cidSize) - for { - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - completed, err := db.containerCountersNextBatch(lastKey, func(id cid.ID, entity ObjectCounters) { - if entity.IsZero() { - result = append(result, id) - } - }) - if err != nil { - return nil, metaerr.Wrap(err) - } - if completed { - break + var cc map[cid.ID]ObjectCounters + err := db.database.View(func(tx *badger.Txn) error { + var err error + cc, err = containerObjectCounters(ctx, tx, nil) + return err + }) + if err != nil { + return nil, metaerr.Wrap(err) + } + for cnrID, c := range cc { + if c.IsZero() { + result = append(result, cnrID) } } success = true @@ -737,13 +377,105 @@ func (db *DB) DeleteContainerCount(ctx context.Context, id cid.ID) error { return ErrReadOnlyMode } - err := db.boltDB.Update(func(tx *bbolt.Tx) error { - b := tx.Bucket(containerCounterBucketName) + prefix := make([]byte, containerObjectCountPrefixSize) + prefix[0] = containerCountersPrefix + id.Encode(prefix[1:]) - key := make([]byte, cidSize) - id.Encode(key) - return b.Delete(key) - }) - success = err == nil - return metaerr.Wrap(err) + err := db.deleteByPrefixBatched(ctx, prefix) + if err != nil { + return metaerr.Wrap(err) + } + success = true + return nil +} + +func (db *DB) containerObjectCounterInitialized(ctx context.Context) bool { + err := db.database.View(func(txn *badger.Txn) error { + _, e := containerObjectCounters(ctx, txn, nil) + return e + }) + return err == nil +} + +func containerObjectCounters(ctx context.Context, tx *badger.Txn, cnrID *cid.ID) (map[cid.ID]ObjectCounters, error) { + prefix := []byte{containerCountersPrefix} + if cnrID != nil { + buf := make([]byte, cidSize) + cnrID.Encode(buf) + prefix = append(prefix, buf...) + } + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: prefix, + PrefetchValues: true, + }) + defer it.Close() + + counters := make(map[cid.ID]objectCounterValue) + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + var cnrID cid.ID + if !parseContainerID(&cnrID, it.Item().Key()) { + return nil, errInvalidContainerIDValue + } + + if err := it.Item().Value(func(val []byte) error { + oc, err := parseContainerCounterValue(val) + if err != nil { + return err + } + counters[cnrID] = mergeObjectCounterValues(counters[cnrID], oc) + return nil + }); err != nil { + return nil, fmt.Errorf("invalid container object counter value: %w", err) + } + } + return normilizeObjectCounters(counters) +} + +// parseContainerCounterValue return phy, logic values. +func parseContainerCounterValue(buf []byte) (objectCounterValue, error) { + if len(buf) != 24 { + return objectCounterValue{}, errInvalidValueLenght + } + return objectCounterValue{ + Phy: int64(binary.LittleEndian.Uint64(buf[:8])), + Logic: int64(binary.LittleEndian.Uint64(buf[8:16])), + User: int64(binary.LittleEndian.Uint64(buf[16:])), + }, nil +} + +func marshalContainerCounterValue(v objectCounterValue) []byte { + buf := make([]byte, 24) + binary.LittleEndian.PutUint64(buf[:8], uint64(v.Phy)) + binary.LittleEndian.PutUint64(buf[8:16], uint64(v.Logic)) + binary.LittleEndian.PutUint64(buf[16:], uint64(v.User)) + return buf +} + +func mergeObjectCounterValues(lhs, rhs objectCounterValue) objectCounterValue { + lhs.Logic += rhs.Logic + lhs.Phy += rhs.Phy + lhs.User += rhs.User + return lhs +} + +func normilizeObjectCounters(values map[cid.ID]objectCounterValue) (map[cid.ID]ObjectCounters, error) { + result := make(map[cid.ID]ObjectCounters, len(values)) + for k, v := range values { + if v.Logic < 0 || v.Phy < 0 || v.User < 0 { + return nil, fmt.Errorf("invalid container object counter for container ID %s", k.EncodeToString()) + } + var oc ObjectCounters + oc.Logic = uint64(v.Logic) + oc.Phy = uint64(v.Phy) + oc.User = uint64(v.User) + result[k] = oc + } + return result, nil } diff --git a/pkg/local_object_storage/metabase/counter_test.go b/pkg/local_object_storage/metabase/counter_test.go index 1797fc0aa..e6c98203a 100644 --- a/pkg/local_object_storage/metabase/counter_test.go +++ b/pkg/local_object_storage/metabase/counter_test.go @@ -23,7 +23,7 @@ func TestCounters(t *testing.T) { t.Parallel() db := newDB(t) defer func() { require.NoError(t, db.Close()) }() - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Zero(t, c.Phy) require.Zero(t, c.Logic) @@ -59,7 +59,7 @@ func TestCounters(t *testing.T) { _, err := db.Put(context.Background(), prm) require.NoError(t, err) - c, err = db.ObjectCounters() + c, err = db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(i+1), c.Phy) @@ -90,13 +90,13 @@ func TestCounters(t *testing.T) { var prm meta.DeletePrm for i := objCount - 1; i >= 0; i-- { - prm.SetAddresses(objectcore.AddressOf(oo[i])) + prm.Address = objectcore.AddressOf(oo[i]) res, err := db.Delete(context.Background(), prm) require.NoError(t, err) - require.Equal(t, uint64(1), res.LogicCount()) + require.Equal(t, uint64(1), res.LogicCount) - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(i), c.Phy) @@ -164,7 +164,7 @@ func TestCounters(t *testing.T) { require.Equal(t, uint64(len(inhumedObjs)), res.LogicInhumed()) require.Equal(t, uint64(len(inhumedObjs)), res.UserInhumed()) - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(objCount), c.Phy) @@ -203,7 +203,7 @@ func TestCounters(t *testing.T) { require.NoError(t, putBig(db, o)) - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(i+1), c.Phy) require.Equal(t, uint64(i+1), c.Logic) @@ -238,7 +238,7 @@ func TestCounters(t *testing.T) { addr := objectcore.AddressOf(o) require.NoError(t, metaDelete(db, addr)) - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(objCount-i-1), c.Phy) require.Equal(t, uint64(objCount-i-1), c.Logic) @@ -302,7 +302,7 @@ func TestCounters(t *testing.T) { _, err := db.Inhume(context.Background(), prm) require.NoError(t, err) - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(objCount), c.Phy) @@ -336,7 +336,7 @@ func TestDoublePut(t *testing.T) { require.NoError(t, err) require.True(t, pr.Inserted) - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(1), c.Phy) @@ -352,7 +352,7 @@ func TestDoublePut(t *testing.T) { require.NoError(t, err) require.False(t, pr.Inserted) - c, err = db.ObjectCounters() + c, err = db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(1), c.Phy) @@ -395,7 +395,7 @@ func TestCounters_Expired(t *testing.T) { // 1. objects are available and counters are correct - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(objCount), c.Phy) require.Equal(t, uint64(objCount), c.Logic) @@ -416,7 +416,7 @@ func TestCounters_Expired(t *testing.T) { es.e = epoch + 2 - c, err = db.ObjectCounters() + c, err = db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(objCount), c.Phy) require.Equal(t, uint64(objCount), c.Logic) @@ -445,7 +445,7 @@ func TestCounters_Expired(t *testing.T) { require.Equal(t, uint64(1), inhumeRes.LogicInhumed()) require.Equal(t, uint64(1), inhumeRes.UserInhumed()) - c, err = db.ObjectCounters() + c, err = db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(len(oo)), c.Phy) @@ -472,12 +472,12 @@ func TestCounters_Expired(t *testing.T) { // that step they should be equal) var deletePrm meta.DeletePrm - deletePrm.SetAddresses(oo[0]) + deletePrm.Address = oo[0] deleteRes, err := db.Delete(context.Background(), deletePrm) require.NoError(t, err) - require.Zero(t, deleteRes.LogicCount()) - require.Zero(t, deleteRes.UserCount()) + require.Zero(t, deleteRes.LogicCount) + require.Zero(t, deleteRes.UserCount) if v, ok := exp[oo[0].Container()]; ok { v.Phy-- @@ -486,7 +486,7 @@ func TestCounters_Expired(t *testing.T) { oo = oo[1:] - c, err = db.ObjectCounters() + c, err = db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(len(oo)), c.Phy) require.Equal(t, uint64(len(oo)), c.Logic) @@ -501,12 +501,12 @@ func TestCounters_Expired(t *testing.T) { // service do) should decrease both counters despite the // expiration fact - deletePrm.SetAddresses(oo[0]) + deletePrm.Address = oo[0] deleteRes, err = db.Delete(context.Background(), deletePrm) require.NoError(t, err) - require.Equal(t, uint64(1), deleteRes.LogicCount()) - require.Equal(t, uint64(1), deleteRes.UserCount()) + require.Equal(t, uint64(1), deleteRes.LogicCount) + require.Equal(t, uint64(1), deleteRes.UserCount) if v, ok := exp[oo[0].Container()]; ok { v.Phy-- @@ -517,7 +517,7 @@ func TestCounters_Expired(t *testing.T) { oo = oo[1:] - c, err = db.ObjectCounters() + c, err = db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(len(oo)), c.Phy) require.Equal(t, uint64(len(oo)), c.Logic) @@ -548,7 +548,7 @@ func putObjs(t *testing.T, db *meta.DB, count int, withParent bool) []*objectSDK _, err = db.Put(context.Background(), prm) require.NoError(t, err) - c, err := db.ObjectCounters() + c, err := db.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, uint64(i+1), c.Phy) diff --git a/pkg/local_object_storage/metabase/db.go b/pkg/local_object_storage/metabase/db.go index 1f444a3ef..ae9193ec1 100644 --- a/pkg/local_object_storage/metabase/db.go +++ b/pkg/local_object_storage/metabase/db.go @@ -1,7 +1,6 @@ package meta import ( - "bytes" "encoding/binary" "encoding/hex" "io/fs" @@ -14,17 +13,11 @@ import ( v2object "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger" - objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + "github.com/dgraph-io/badger/v4" "github.com/mr-tron/base58" - "go.etcd.io/bbolt" "go.uber.org/zap" ) -type matcher struct { - matchSlow func(string, []byte, string) bool - matchBucket func(*bbolt.Bucket, string, string, func([]byte, []byte) error) error -} - // EpochState is an interface that provides access to the // current epoch number. type EpochState interface { @@ -39,28 +32,37 @@ type DB struct { modeMtx sync.RWMutex mode mode.Mode - matchers map[objectSDK.SearchMatchType]matcher - - boltDB *bbolt.DB - + database *badger.DB initialized bool + + bucketIDs *dbBucketDispatcher + + wg sync.WaitGroup + closed chan struct{} } // Option is an option of DB constructor. type Option func(*cfg) type cfg struct { - boltOptions *bbolt.Options // optional - - boltBatchSize int - boltBatchDelay time.Duration - info Info log *logger.Logger epochState EpochState metrics Metrics + + // badger options + + noSync bool + verbose bool + memtablesCount int + valueThreshold int64 + valueLogFileSize int64 + indexCacheSize int64 + numCompactors int + gcInterval time.Duration + gcDiscardRatio float64 } func defaultCfg() *cfg { @@ -68,10 +70,15 @@ func defaultCfg() *cfg { info: Info{ Permission: os.ModePerm, // 0777 }, - boltBatchDelay: bbolt.DefaultMaxBatchDelay, - boltBatchSize: bbolt.DefaultMaxBatchSize, - log: &logger.Logger{Logger: zap.L()}, - metrics: &noopMetrics{}, + log: &logger.Logger{Logger: zap.L()}, + metrics: &noopMetrics{}, + memtablesCount: 32, + valueThreshold: 512, + valueLogFileSize: 1<<30 - 1, + indexCacheSize: 256 << 20, + numCompactors: 8, + gcInterval: 10 * time.Minute, + gcDiscardRatio: 0.5, } } @@ -88,26 +95,9 @@ func New(opts ...Option) *DB { } return &DB{ - cfg: c, - matchers: map[objectSDK.SearchMatchType]matcher{ - objectSDK.MatchUnknown: { - matchSlow: unknownMatcher, - matchBucket: unknownMatcherBucket, - }, - objectSDK.MatchStringEqual: { - matchSlow: stringEqualMatcher, - matchBucket: stringEqualMatcherBucket, - }, - objectSDK.MatchStringNotEqual: { - matchSlow: stringNotEqualMatcher, - matchBucket: stringNotEqualMatcherBucket, - }, - objectSDK.MatchCommonPrefix: { - matchSlow: stringCommonPrefixMatcher, - matchBucket: stringCommonPrefixMatcherBucket, - }, - }, - mode: mode.Disabled, + cfg: c, + bucketIDs: newDBBucketDispatcher(), + mode: mode.Disabled, } } @@ -115,7 +105,7 @@ func stringifyValue(key string, objVal []byte) string { switch key { default: return string(objVal) - case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent: + case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent, v2object.FilterHeaderECParent: return base58.Encode(objVal) case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash: return hex.EncodeToString(objVal) @@ -146,7 +136,7 @@ func destringifyValue(key, value string, prefix bool) ([]byte, bool, bool) { switch key { default: return []byte(value), false, true - case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent: + case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent, v2object.FilterHeaderECParent: v, err := base58.Decode(value) return v, false, err == nil case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash: @@ -186,110 +176,18 @@ func stringEqualMatcher(key string, objVal []byte, filterVal string) bool { return stringifyValue(key, objVal) == filterVal } -func stringEqualMatcherBucket(b *bbolt.Bucket, fKey string, fValue string, f func([]byte, []byte) error) error { - // Ignore the second return value because we check for strict equality. - val, _, ok := destringifyValue(fKey, fValue, false) - if !ok { - return nil - } - if data := b.Get(val); data != nil { - return f(val, data) - } - if b.Bucket(val) != nil { - return f(val, nil) - } - return nil -} - func stringNotEqualMatcher(key string, objVal []byte, filterVal string) bool { return stringifyValue(key, objVal) != filterVal } -func stringNotEqualMatcherBucket(b *bbolt.Bucket, fKey string, fValue string, f func([]byte, []byte) error) error { - // Ignore the second return value because we check for strict inequality. - val, _, ok := destringifyValue(fKey, fValue, false) - return b.ForEach(func(k, v []byte) error { - if !ok || !bytes.Equal(val, k) { - return f(k, v) - } - return nil - }) -} - func stringCommonPrefixMatcher(key string, objVal []byte, filterVal string) bool { return strings.HasPrefix(stringifyValue(key, objVal), filterVal) } -func stringCommonPrefixMatcherBucket(b *bbolt.Bucket, fKey string, fVal string, f func([]byte, []byte) error) error { - val, checkLast, ok := destringifyValue(fKey, fVal, true) - if !ok { - return nil - } - - prefix := val - if checkLast { - prefix = val[:len(val)-1] - } - - if len(val) == 0 { - // empty common prefix, all the objects - // satisfy that filter - return b.ForEach(f) - } - - c := b.Cursor() - for k, v := c.Seek(val); bytes.HasPrefix(k, prefix); k, v = c.Next() { - if checkLast && (len(k) == len(prefix) || k[len(prefix)]>>4 != val[len(val)-1]) { - // If the last byte doesn't match, this means the prefix does no longer match, - // so we need to break here. - break - } - if err := f(k, v); err != nil { - return err - } - } - return nil -} - func unknownMatcher(_ string, _ []byte, _ string) bool { return false } -func unknownMatcherBucket(_ *bbolt.Bucket, _ string, _ string, _ func([]byte, []byte) error) error { - return nil -} - -// bucketKeyHelper returns byte representation of val that is used as a key -// in boltDB. Useful for getting filter values from unique and list indexes. -func bucketKeyHelper(hdr string, val string) []byte { - switch hdr { - case v2object.FilterHeaderParent, v2object.FilterHeaderECParent: - v, err := base58.Decode(val) - if err != nil { - return nil - } - return v - case v2object.FilterHeaderPayloadHash: - v, err := hex.DecodeString(val) - if err != nil { - return nil - } - - return v - case v2object.FilterHeaderSplitID: - s := objectSDK.NewSplitID() - - err := s.Parse(val) - if err != nil { - return nil - } - - return s.ToV2() - default: - return []byte(val) - } -} - // SetLogger sets logger. It is used after the shard ID was generated to use it in logs. func (db *DB) SetLogger(l *logger.Logger) { db.log = l @@ -307,13 +205,6 @@ func WithLogger(l *logger.Logger) Option { } } -// WithBoltDBOptions returns option to specify BoltDB options. -func WithBoltDBOptions(opts *bbolt.Options) Option { - return func(c *cfg) { - c.boltOptions = opts - } -} - // WithPath returns option to set system path to Metabase. func WithPath(path string) Option { return func(c *cfg) { @@ -329,28 +220,6 @@ func WithPermissions(perm fs.FileMode) Option { } } -// WithMaxBatchSize returns option to specify maximum concurrent operations -// to be processed in a single transactions. -// This option is missing from `bbolt.Options` but is set right after DB is open. -func WithMaxBatchSize(s int) Option { - return func(c *cfg) { - if s != 0 { - c.boltBatchSize = s - } - } -} - -// WithMaxBatchDelay returns option to specify maximum time to wait before -// the batch of concurrent transactions is processed. -// This option is missing from `bbolt.Options` but is set right after DB is open. -func WithMaxBatchDelay(d time.Duration) Option { - return func(c *cfg) { - if d != 0 { - c.boltBatchDelay = d - } - } -} - // WithEpochState return option to specify a source of current epoch height. func WithEpochState(s EpochState) Option { return func(c *cfg) { @@ -364,3 +233,57 @@ func WithMetrics(m Metrics) Option { c.metrics = m } } + +func WithNoSync(v bool) Option { + return func(c *cfg) { + c.noSync = v + } +} + +func WithVerbose(v bool) Option { + return func(c *cfg) { + c.verbose = v + } +} + +func WithMemtablesCount(v int) Option { + return func(c *cfg) { + c.memtablesCount = v + } +} + +func WithValueThreshold(v int64) Option { + return func(c *cfg) { + c.valueThreshold = v + } +} + +func WithValueLogFileSize(v int64) Option { + return func(c *cfg) { + c.valueLogFileSize = v + } +} + +func WithIndexCacheSize(v int64) Option { + return func(c *cfg) { + c.indexCacheSize = v + } +} + +func WithNumCompactors(v int) Option { + return func(c *cfg) { + c.numCompactors = v + } +} + +func WithGCInterval(v time.Duration) Option { + return func(c *cfg) { + c.gcInterval = v + } +} + +func WithGCDiscardRatio(v float64) Option { + return func(c *cfg) { + c.gcDiscardRatio = v + } +} diff --git a/pkg/local_object_storage/metabase/delete.go b/pkg/local_object_storage/metabase/delete.go index 21b98fca1..3efd9c846 100644 --- a/pkg/local_object_storage/metabase/delete.go +++ b/pkg/local_object_storage/metabase/delete.go @@ -1,7 +1,6 @@ package meta import ( - "bytes" "context" "errors" "fmt" @@ -15,63 +14,23 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/trace" + "github.com/dgraph-io/badger/v4" ) var errFailedToRemoveUniqueIndexes = errors.New("can't remove unique indexes") // DeletePrm groups the parameters of Delete operation. type DeletePrm struct { - addrs []oid.Address + Address oid.Address } // DeleteRes groups the resulting values of Delete operation. type DeleteRes struct { - phyCount uint64 - logicCount uint64 - userCount uint64 - phySize uint64 - logicSize uint64 - removedByCnrID map[cid.ID]ObjectCounters -} - -// LogicCount returns the number of removed logic -// objects. -func (d DeleteRes) LogicCount() uint64 { - return d.logicCount -} - -func (d DeleteRes) UserCount() uint64 { - return d.userCount -} - -// RemovedByCnrID returns the number of removed objects by container ID. -func (d DeleteRes) RemovedByCnrID() map[cid.ID]ObjectCounters { - return d.removedByCnrID -} - -// PhyCount returns the number of removed physical objects. -func (d DeleteRes) PhyCount() uint64 { - return d.phyCount -} - -// PhySize returns the size of removed physical objects. -func (d DeleteRes) PhySize() uint64 { - return d.phySize -} - -// LogicSize returns the size of removed logical objects. -func (d DeleteRes) LogicSize() uint64 { - return d.logicSize -} - -// SetAddresses is a Delete option to set the addresses of the objects to delete. -// -// Option is required. -func (p *DeletePrm) SetAddresses(addrs ...oid.Address) { - p.addrs = addrs + PhyCount uint64 + LogicCount uint64 + UserCount uint64 + PhySize uint64 + LogicSize uint64 } type referenceNumber struct { @@ -82,8 +41,6 @@ type referenceNumber struct { obj *objectSDK.Object } -type referenceCounter map[string]*referenceNumber - // Delete removed object records from metabase indexes. func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) { var ( @@ -94,10 +51,7 @@ func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) { db.metrics.AddMethodDuration("Delete", time.Since(startedAt), deleted) }() - _, span := tracing.StartSpanFromContext(ctx, "metabase.Delete", - trace.WithAttributes( - attribute.Int("addr_count", len(prm.addrs)), - )) + _, span := tracing.StartSpanFromContext(ctx, "metabase.Delete") defer span.End() db.modeMtx.RLock() @@ -109,370 +63,232 @@ func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) { return DeleteRes{}, ErrReadOnlyMode } + bucketID, release := db.bucketIDs.BucketID(prm.Address.Container()) + defer release() + var err error var res DeleteRes - err = db.boltDB.Update(func(tx *bbolt.Tx) error { - res, err = db.deleteGroup(tx, prm.addrs) + err = db.database.Update(func(tx *badger.Txn) error { + res, err = db.deleteByAddress(ctx, tx, prm.Address, bucketID) return err }) if err == nil { deleted = true - for i := range prm.addrs { - storagelog.Write(db.log, - storagelog.AddressField(prm.addrs[i]), - storagelog.OpField("metabase DELETE")) - } + storagelog.Write(db.log, + storagelog.AddressField(prm.Address), + storagelog.OpField("metabase DELETE")) } return res, metaerr.Wrap(err) } // deleteGroup deletes object from the metabase. Handles removal of the // references of the split objects. -func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address) (DeleteRes, error) { - res := DeleteRes{ - removedByCnrID: make(map[cid.ID]ObjectCounters), - } - refCounter := make(referenceCounter, len(addrs)) +func (db *DB) deleteByAddress(ctx context.Context, tx *badger.Txn, addr oid.Address, bucketID uint16) (DeleteRes, error) { + refCounter := &referenceNumber{} currEpoch := db.epochState.CurrentEpoch() - - for i := range addrs { - r, err := db.delete(tx, addrs[i], refCounter, currEpoch) - if err != nil { - return DeleteRes{}, err - } - - applyDeleteSingleResult(r, &res, addrs, i) - } - - if err := db.updateCountersDelete(tx, res); err != nil { + res, err := db.delete(ctx, tx, addr, refCounter, currEpoch) + if err != nil { return DeleteRes{}, err } - for _, refNum := range refCounter { - if refNum.cur == refNum.all { - err := db.deleteObject(tx, refNum.obj, true) - if err != nil { - return DeleteRes{}, err - } - } + if err := db.updateCountersDelete(tx, addr.Container(), res, bucketID); err != nil { + return DeleteRes{}, err } + if refCounter.cur == refCounter.all { + err := db.deleteObject(ctx, tx, refCounter.obj, true) + if err != nil { + return DeleteRes{}, err + } + } return res, nil } -func (db *DB) updateCountersDelete(tx *bbolt.Tx, res DeleteRes) error { - if res.phyCount > 0 { - err := db.updateShardObjectCounter(tx, phy, res.phyCount, false) - if err != nil { - return fmt.Errorf("could not decrease phy object counter: %w", err) - } +func (db *DB) updateCountersDelete(tx *badger.Txn, cnrID cid.ID, res DeleteRes, bucketID uint16) error { + delta := map[cid.ID]objectCounterValue{ + cnrID: { + Logic: -1 * int64(res.LogicCount), + Phy: -1 * int64(res.PhyCount), + User: -1 * int64(res.UserCount), + }, } - - if res.logicCount > 0 { - err := db.updateShardObjectCounter(tx, logical, res.logicCount, false) - if err != nil { - return fmt.Errorf("could not decrease logical object counter: %w", err) - } + bucketIDs := map[cid.ID]uint16{ + cnrID: bucketID, } - - if res.userCount > 0 { - err := db.updateShardObjectCounter(tx, user, res.userCount, false) - if err != nil { - return fmt.Errorf("could not decrease user object counter: %w", err) - } - } - - if err := db.updateContainerCounter(tx, res.removedByCnrID, false); err != nil { + if err := updateContainerCounter(tx, delta, bucketIDs); err != nil { return fmt.Errorf("could not decrease container object counter: %w", err) } return nil } -func applyDeleteSingleResult(r deleteSingleResult, res *DeleteRes, addrs []oid.Address, i int) { - if r.Phy { - if v, ok := res.removedByCnrID[addrs[i].Container()]; ok { - v.Phy++ - res.removedByCnrID[addrs[i].Container()] = v - } else { - res.removedByCnrID[addrs[i].Container()] = ObjectCounters{ - Phy: 1, - } - } - - res.phyCount++ - res.phySize += r.Size - } - - if r.Logic { - if v, ok := res.removedByCnrID[addrs[i].Container()]; ok { - v.Logic++ - res.removedByCnrID[addrs[i].Container()] = v - } else { - res.removedByCnrID[addrs[i].Container()] = ObjectCounters{ - Logic: 1, - } - } - - res.logicCount++ - res.logicSize += r.Size - } - - if r.User { - if v, ok := res.removedByCnrID[addrs[i].Container()]; ok { - v.User++ - res.removedByCnrID[addrs[i].Container()] = v - } else { - res.removedByCnrID[addrs[i].Container()] = ObjectCounters{ - User: 1, - } - } - - res.userCount++ - } -} - -type deleteSingleResult struct { - Phy bool - Logic bool - User bool - Size uint64 -} - // delete removes object indexes from the metabase. Counts the references // of the object that is being removed. // The first return value indicates if an object has been removed. (removing a // non-exist object is error-free). The second return value indicates if an // object was available before the removal (for calculating the logical object // counter). The third return value The fourth return value is removed object payload size. -func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (deleteSingleResult, error) { - key := make([]byte, addressKeySize) - addrKey := addressKey(addr, key) - garbageBKT := tx.Bucket(garbageBucketName) - graveyardBKT := tx.Bucket(graveyardBucketName) - - removeAvailableObject := inGraveyardWithKey(addrKey, graveyardBKT, garbageBKT) == 0 +func (db *DB) delete(ctx context.Context, tx *badger.Txn, addr oid.Address, refCounter *referenceNumber, currEpoch uint64) (DeleteRes, error) { + status, err := inGraveyardWithKey(tx, addr) + if err != nil { + return DeleteRes{}, err + } + removeAvailableObject := status == 0 // unmarshal object, work only with physically stored (raw == true) objects - obj, err := db.get(tx, addr, key, false, true, currEpoch) + obj, err := get(ctx, tx, addr, false, true, currEpoch) if err != nil { if client.IsErrObjectNotFound(err) { - addrKey = addressKey(addr, key) - if garbageBKT != nil { - err := garbageBKT.Delete(addrKey) - if err != nil { - return deleteSingleResult{}, fmt.Errorf("could not remove from garbage bucket: %w", err) - } + err := deleteFromGarbage(tx, addr) + if err != nil { + return DeleteRes{}, fmt.Errorf("could not remove from garbage bucket: %w", err) } - return deleteSingleResult{}, nil + return DeleteRes{}, nil } var siErr *objectSDK.SplitInfoError var ecErr *objectSDK.ECInfoError if errors.As(err, &siErr) || errors.As(err, &ecErr) { // if object is virtual (parent) then do nothing, it will be deleted with last child // if object is erasure-coded it will be deleted with the last chunk presented on the shard - return deleteSingleResult{}, nil + return DeleteRes{}, nil } - return deleteSingleResult{}, err + return DeleteRes{}, err } - addrKey = addressKey(addr, key) // remove record from the garbage bucket - if garbageBKT != nil { - err := garbageBKT.Delete(addrKey) - if err != nil { - return deleteSingleResult{}, fmt.Errorf("could not remove from garbage bucket: %w", err) - } + err = deleteFromGarbage(tx, addr) + if err != nil { + return DeleteRes{}, fmt.Errorf("could not remove from garbage bucket: %w", err) } // if object is an only link to a parent, then remove parent if parent := obj.Parent(); parent != nil { parAddr := object.AddressOf(parent) - sParAddr := addressKey(parAddr, key) - k := string(sParAddr) - - nRef, ok := refCounter[k] - if !ok { - nRef = &referenceNumber{ - all: parentLength(tx, parAddr), - addr: parAddr, - obj: parent, - } - - refCounter[k] = nRef + parentLen, err := parentLength(ctx, tx, parAddr) + if err != nil { + return DeleteRes{}, fmt.Errorf("failed to get parent count for object %s: %w", parAddr, err) } - - nRef.cur++ + refCounter.addr = parAddr + refCounter.all = parentLen + refCounter.obj = parent + refCounter.cur = 1 } isUserObject := IsUserObject(obj) // remove object - err = db.deleteObject(tx, obj, false) + err = db.deleteObject(ctx, tx, obj, false) if err != nil { - return deleteSingleResult{}, fmt.Errorf("could not remove object: %w", err) + return DeleteRes{}, fmt.Errorf("could not remove object: %w", err) } - return deleteSingleResult{ - Phy: true, - Logic: removeAvailableObject, - User: isUserObject && removeAvailableObject, - Size: obj.PayloadSize(), - }, nil + var result DeleteRes + result.PhyCount = 1 + result.PhySize = obj.PayloadSize() + if removeAvailableObject { + result.LogicCount = 1 + result.LogicSize = obj.PayloadSize() + } + if removeAvailableObject && isUserObject { + result.UserCount = 1 + } + + return result, nil } func (db *DB) deleteObject( - tx *bbolt.Tx, + ctx context.Context, + tx *badger.Txn, obj *objectSDK.Object, isParent bool, ) error { - err := delUniqueIndexes(tx, obj, isParent) + err := delUniqueIndexes(ctx, tx, obj, isParent) if err != nil { return errFailedToRemoveUniqueIndexes } - err = updateListIndexes(tx, obj, delListIndexItem) + err = updateListIndexes(tx, obj, deleteByKey) if err != nil { return fmt.Errorf("can't remove list indexes: %w", err) } - err = updateFKBTIndexes(tx, obj, delFKBTIndexItem) + err = updateFKBTIndexes(tx, obj, deleteByKey) if err != nil { return fmt.Errorf("can't remove fake bucket tree indexes: %w", err) } if isParent { // remove record from the garbage bucket, because regular object deletion does nothing for virtual object - garbageBKT := tx.Bucket(garbageBucketName) - if garbageBKT != nil { - key := make([]byte, addressKeySize) - addrKey := addressKey(object.AddressOf(obj), key) - err := garbageBKT.Delete(addrKey) - if err != nil { - return fmt.Errorf("could not remove from garbage bucket: %w", err) - } + err := deleteFromGarbage(tx, object.AddressOf(obj)) + if err != nil { + return fmt.Errorf("could not remove from garbage bucket: %w", err) } } return nil } +func deleteFromGarbage(tx *badger.Txn, addr oid.Address) error { + return tx.Delete(garbageKey(addr.Container(), addr.Object())) +} + // parentLength returns amount of available children from parentid index. -func parentLength(tx *bbolt.Tx, addr oid.Address) int { - bucketName := make([]byte, bucketKeySize) - - bkt := tx.Bucket(parentBucketName(addr.Container(), bucketName[:])) - if bkt == nil { - return 0 - } - - lst, err := decodeList(bkt.Get(objectKey(addr.Object(), bucketName[:]))) - if err != nil { - return 0 - } - - return len(lst) -} - -func delUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem) { - bkt := tx.Bucket(item.name) - if bkt != nil { - _ = bkt.Delete(item.key) // ignore error, best effort there - } -} - -func delFKBTIndexItem(tx *bbolt.Tx, item namedBucketItem) error { - bkt := tx.Bucket(item.name) - if bkt == nil { - return nil - } - - fkbtRoot := bkt.Bucket(item.key) - if fkbtRoot == nil { - return nil - } - - _ = fkbtRoot.Delete(item.val) // ignore error, best effort there - return nil -} - -func delListIndexItem(tx *bbolt.Tx, item namedBucketItem) error { - bkt := tx.Bucket(item.name) - if bkt == nil { - return nil - } - - lst, err := decodeList(bkt.Get(item.key)) - if err != nil || len(lst) == 0 { - return nil - } - - // remove element from the list - for i := range lst { - if bytes.Equal(item.val, lst[i]) { - copy(lst[i:], lst[i+1:]) - lst = lst[:len(lst)-1] +func parentLength(ctx context.Context, tx *badger.Txn, addr oid.Address) (int, error) { + var result int + prefix := parentKeyLongPrefix(addr.Container(), addr.Object()) + const batchSize = 1000 + for { + ids, err := selectByPrefixBatch(ctx, tx, prefix, batchSize) + if err != nil { + return 0, err + } + result += len(ids) + if len(ids) < batchSize { break } } - - // if list empty, remove the key from bucket - if len(lst) == 0 { - _ = bkt.Delete(item.key) // ignore error, best effort there - - return nil - } - - // if list is not empty, then update it - encodedLst, err := encodeList(lst) - if err != nil { - return nil // ignore error, best effort there - } - - _ = bkt.Put(item.key, encodedLst) // ignore error, best effort there - return nil + return result, nil } -func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error { - addr := object.AddressOf(obj) +func delParent(ctx context.Context, tx *badger.Txn, addr oid.Address) error { + prefix := parentKeyLongPrefix(addr.Container(), addr.Object()) + return deleteByPrefix(ctx, tx, prefix) +} - objKey := objectKey(addr.Object(), make([]byte, objectKeySize)) - cnr := addr.Container() - bucketName := make([]byte, bucketKeySize) +func delUniqueIndexes(ctx context.Context, tx *badger.Txn, obj *objectSDK.Object, isParent bool) error { + addr := object.AddressOf(obj) // add value to primary unique bucket if !isParent { + var key []byte switch obj.Type() { case objectSDK.TypeRegular: - bucketName = primaryBucketName(cnr, bucketName) + key = primaryKey(addr.Container(), addr.Object()) case objectSDK.TypeTombstone: - bucketName = tombstoneBucketName(cnr, bucketName) + key = tombstoneKey(addr.Container(), addr.Object()) case objectSDK.TypeLock: - bucketName = bucketNameLockers(cnr, bucketName) + key = lockersKey(addr.Container(), addr.Object()) default: return ErrUnknownObjectType } - delUniqueIndexItem(tx, namedBucketItem{ - name: bucketName, - key: objKey, - }) + if err := tx.Delete(key); err != nil { + return err + } } else { - delUniqueIndexItem(tx, namedBucketItem{ - name: parentBucketName(cnr, bucketName), - key: objKey, - }) + if err := delParent(ctx, tx, addr); err != nil { + return err + } } - delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index - name: smallBucketName(cnr, bucketName), - key: objKey, - }) - delUniqueIndexItem(tx, namedBucketItem{ // remove from root index - name: rootBucketName(cnr, bucketName), - key: objKey, - }) - - return nil + if err := tx.Delete(smallKey(addr.Container(), addr.Object())); err != nil { + return err + } + if ecHead := obj.ECHeader(); ecHead != nil { + if err := tx.Delete(ecInfoKey(addr.Container(), ecHead.Parent(), addr.Object())); err != nil { + return err + } + } + return tx.Delete(rootKey(addr.Container(), addr.Object())) } diff --git a/pkg/local_object_storage/metabase/delete_test.go b/pkg/local_object_storage/metabase/delete_test.go index 2053874d0..020b4882e 100644 --- a/pkg/local_object_storage/metabase/delete_test.go +++ b/pkg/local_object_storage/metabase/delete_test.go @@ -92,8 +92,8 @@ func TestDeleteAllChildren(t *testing.T) { require.True(t, errors.As(err, &siErr)) // remove all children in single call - err = metaDelete(db, object.AddressOf(child1), object.AddressOf(child2)) - require.NoError(t, err) + require.NoError(t, metaDelete(db, object.AddressOf(child1))) + require.NoError(t, metaDelete(db, object.AddressOf(child2))) // parent should not be found now ex, err := metaExists(db, object.AddressOf(parent)) @@ -155,9 +155,11 @@ func TestDelete(t *testing.T) { require.NoError(t, db.IterateOverGarbage(context.Background(), iprm)) require.Equal(t, 10, len(addrs)) var deletePrm meta.DeletePrm - deletePrm.SetAddresses(addrs...) - _, err := db.Delete(context.Background(), deletePrm) - require.NoError(t, err) + for _, addr := range addrs { + deletePrm.Address = addr + _, err := db.Delete(context.Background(), deletePrm) + require.NoError(t, err) + } addrs = nil iprm.SetHandler(func(o meta.GarbageObject) error { @@ -190,7 +192,7 @@ func TestDeleteDropsGCMarkIfObjectNotFound(t *testing.T) { require.Equal(t, 1, garbageCount) var delPrm meta.DeletePrm - delPrm.SetAddresses(addr) + delPrm.Address = addr _, err = db.Delete(context.Background(), delPrm) require.NoError(t, err) @@ -199,9 +201,9 @@ func TestDeleteDropsGCMarkIfObjectNotFound(t *testing.T) { require.Equal(t, 0, garbageCount) } -func metaDelete(db *meta.DB, addrs ...oid.Address) error { +func metaDelete(db *meta.DB, addr oid.Address) error { var deletePrm meta.DeletePrm - deletePrm.SetAddresses(addrs...) + deletePrm.Address = addr _, err := db.Delete(context.Background(), deletePrm) return err diff --git a/pkg/local_object_storage/metabase/exists.go b/pkg/local_object_storage/metabase/exists.go index 153d92110..51f735111 100644 --- a/pkg/local_object_storage/metabase/exists.go +++ b/pkg/local_object_storage/metabase/exists.go @@ -1,19 +1,18 @@ package meta import ( + "bytes" "context" "fmt" "time" - objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status" - cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) @@ -81,8 +80,8 @@ func (db *DB) Exists(ctx context.Context, prm ExistsPrm) (res ExistsRes, err err currEpoch := db.epochState.CurrentEpoch() - err = db.boltDB.View(func(tx *bbolt.Tx) error { - res.exists, res.locked, err = db.exists(tx, prm.addr, prm.paddr, currEpoch) + err = db.database.View(func(tx *badger.Txn) error { + res.exists, res.locked, err = exists(ctx, tx, prm.addr, prm.paddr, currEpoch) return err }) @@ -90,13 +89,20 @@ func (db *DB) Exists(ctx context.Context, prm ExistsPrm) (res ExistsRes, err err return res, metaerr.Wrap(err) } -func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpoch uint64) (bool, bool, error) { +func exists(ctx context.Context, tx *badger.Txn, addr oid.Address, parent oid.Address, currEpoch uint64) (bool, bool, error) { + status, err := objectStatus(ctx, tx, addr, currEpoch) + if err != nil { + return false, false, err + } var locked bool if !parent.Equals(oid.Address{}) { - locked = objectLocked(tx, parent.Container(), parent.Object()) + locked, err = objectLocked(ctx, tx, parent.Container(), parent.Object()) + if err != nil { + return false, false, err + } } // check graveyard and object expiration first - switch objectStatus(tx, addr, currEpoch) { + switch status { case 1: return false, locked, logicerr.Wrap(new(apistatus.ObjectNotFound)) case 2: @@ -105,32 +111,41 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpo return false, locked, ErrObjectIsExpired } - objKey := objectKey(addr.Object(), make([]byte, objectKeySize)) - - cnr := addr.Container() - key := make([]byte, bucketKeySize) - - // if graveyard is empty, then check if object exists in primary bucket - if inBucket(tx, primaryBucketName(cnr, key), objKey) { + v, err := valueOrNil(tx, primaryKey(addr.Container(), addr.Object())) + if err != nil { + return false, false, err + } + if v != nil { return true, locked, nil } - // if primary bucket is empty, then check if object exists in parent bucket - if inBucket(tx, parentBucketName(cnr, key), objKey) { - splitInfo, err := getSplitInfo(tx, cnr, objKey) + children, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), 1) // try to found any child + if err != nil { + return false, false, err + } + if len(children) > 0 { + splitInfo, err := getSplitInfo(tx, addr) if err != nil { - return false, locked, err + return false, false, err } - return false, locked, logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo)) } - // if parent bucket is empty, then check if object exists in ec bucket - if data := getFromBucket(tx, ecInfoBucketName(cnr, key), objKey); len(data) != 0 { - return false, locked, getECInfoError(tx, cnr, data) + + // if parent bucket is empty, then check if object exists with ec prefix + children, err = selectByPrefixBatch(ctx, tx, ecInfoLongKeyPrefix(addr.Container(), addr.Object()), 1) // try to found any child + if err != nil { + return false, locked, err + } + if len(children) > 0 { + return false, locked, getECInfoError(ctx, tx, addr) } - // if parent bucket is empty, then check if object exists in typed buckets - return firstIrregularObjectType(tx, cnr, objKey) != objectSDK.TypeRegular, locked, nil + t, err := firstIrregularObjectType(tx, addr.Container(), addr.Object()) + if err != nil { + return false, false, err + } + + return t != objectSDK.TypeRegular, locked, nil } // objectStatus returns: @@ -138,86 +153,68 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpo // - 1 if object with GC mark; // - 2 if object is covered with tombstone; // - 3 if object is expired. -func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 { +func objectStatus(ctx context.Context, tx *badger.Txn, addr oid.Address, currEpoch uint64) (uint8, error) { // locked object could not be removed/marked with GC/expired - if objectLocked(tx, addr.Container(), addr.Object()) { - return 0 + locked, err := objectLocked(ctx, tx, addr.Container(), addr.Object()) + if err != nil { + return 0, err + } + if locked { + return 0, nil } - // we check only if the object is expired in the current - // epoch since it is considered the only corner case: the - // GC is expected to collect all the objects that have - // expired previously for less than the one epoch duration + st, err := inGraveyardWithKey(tx, addr) + if err != nil { + return 0, err + } + if st > 0 { + return st, nil + } - expired := isExpiredWithAttribute(tx, objectV2.SysAttributeExpEpoch, addr, currEpoch) - if !expired { - expired = isExpiredWithAttribute(tx, objectV2.SysAttributeExpEpochNeoFS, addr, currEpoch) + expired, err := isExpired(ctx, tx, addr, currEpoch) + if err != nil { + return 0, err } if expired { - return 3 + return 3, nil } - - graveyardBkt := tx.Bucket(graveyardBucketName) - garbageBkt := tx.Bucket(garbageBucketName) - addrKey := addressKey(addr, make([]byte, addressKeySize)) - return inGraveyardWithKey(addrKey, graveyardBkt, garbageBkt) + return 0, nil } -func inGraveyardWithKey(addrKey []byte, graveyard, garbageBCK *bbolt.Bucket) uint8 { - if graveyard == nil { - // incorrect metabase state, does not make - // sense to check garbage bucket - return 0 +func inGraveyardWithKey(tx *badger.Txn, addr oid.Address) (uint8, error) { + v, err := valueOrNil(tx, graveyardKey(addr.Container(), addr.Object())) + if err != nil { + return 0, err + } + if v != nil { + return 2, nil + } + v, err = valueOrNil(tx, garbageKey(addr.Container(), addr.Object())) + if err != nil { + return 0, err + } + if v != nil { + return 1, nil } - val := graveyard.Get(addrKey) - if val == nil { - if garbageBCK == nil { - // incorrect node state - return 0 - } - - val = garbageBCK.Get(addrKey) - if val != nil { - // object has been marked with GC - return 1 - } - - // neither in the graveyard - // nor was marked with GC mark - return 0 - } - - // object in the graveyard - return 2 -} - -// inBucket checks if key is present in bucket . -func inBucket(tx *bbolt.Tx, name, key []byte) bool { - bkt := tx.Bucket(name) - if bkt == nil { - return false - } - - // using `get` as `exists`: https://github.com/boltdb/bolt/issues/321 - val := bkt.Get(key) - - return len(val) != 0 + return 0, nil } // getSplitInfo returns SplitInfo structure from root index. Returns error // if there is no `key` record in root index. -func getSplitInfo(tx *bbolt.Tx, cnr cid.ID, key []byte) (*objectSDK.SplitInfo, error) { - bucketName := rootBucketName(cnr, make([]byte, bucketKeySize)) - rawSplitInfo := getFromBucket(tx, bucketName, key) - if len(rawSplitInfo) == 0 { +func getSplitInfo(tx *badger.Txn, addr oid.Address) (*objectSDK.SplitInfo, error) { + rawSplitInfo, err := valueOrNil(tx, rootKey(addr.Container(), addr.Object())) + if err != nil { + return nil, err + } + if len(rawSplitInfo) == 0 || bytes.Equal(zeroValue, rawSplitInfo) { return nil, ErrLackSplitInfo } splitInfo := objectSDK.NewSplitInfo() - err := splitInfo.Unmarshal(rawSplitInfo) + err = splitInfo.Unmarshal(rawSplitInfo) if err != nil { return nil, fmt.Errorf("can't unmarshal split info from root index: %w", err) } diff --git a/pkg/local_object_storage/metabase/expired.go b/pkg/local_object_storage/metabase/expired.go index aa2cb6f20..3b6dfc7c6 100644 --- a/pkg/local_object_storage/metabase/expired.go +++ b/pkg/local_object_storage/metabase/expired.go @@ -3,16 +3,13 @@ package meta import ( "context" "errors" - "fmt" "strconv" "time" - objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" - cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) @@ -44,44 +41,10 @@ func (db *DB) FilterExpired(ctx context.Context, epoch uint64, addresses []oid.A } result := make([]oid.Address, 0, len(addresses)) - containerIDToObjectIDs := make(map[cid.ID][]oid.ID) - for _, addr := range addresses { - containerIDToObjectIDs[addr.Container()] = append(containerIDToObjectIDs[addr.Container()], addr.Object()) - } - - err := db.boltDB.View(func(tx *bbolt.Tx) error { - for containerID, objectIDs := range containerIDToObjectIDs { - select { - case <-ctx.Done(): - return ErrInterruptIterator - default: - } - - expiredNeoFS, err := selectExpiredObjectIDs(tx, objectV2.SysAttributeExpEpochNeoFS, epoch, containerID, objectIDs) - if err != nil { - return err - } - - expiredSys, err := selectExpiredObjectIDs(tx, objectV2.SysAttributeExpEpoch, epoch, containerID, objectIDs) - if err != nil { - return err - } - - for _, o := range expiredNeoFS { - var a oid.Address - a.SetContainer(containerID) - a.SetObject(o) - result = append(result, a) - } - - for _, o := range expiredSys { - var a oid.Address - a.SetContainer(containerID) - a.SetObject(o) - result = append(result, a) - } - } - return nil + err := db.database.View(func(tx *badger.Txn) error { + var e error + result, e = selectExpiredObjects(ctx, tx, epoch, addresses) + return e }) if err != nil { return nil, metaerr.Wrap(err) @@ -90,76 +53,179 @@ func (db *DB) FilterExpired(ctx context.Context, epoch uint64, addresses []oid.A return result, nil } -func isExpiredWithAttribute(tx *bbolt.Tx, attr string, addr oid.Address, currEpoch uint64) bool { - // bucket with objects that have expiration attr - attrKey := make([]byte, bucketKeySize+len(attr)) - expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), attr, attrKey)) - if expirationBucket != nil { - // bucket that contains objects that expire in the current epoch - prevEpochBkt := expirationBucket.Bucket([]byte(strconv.FormatUint(currEpoch-1, 10))) - if prevEpochBkt != nil { - rawOID := objectKey(addr.Object(), make([]byte, objectKeySize)) - if prevEpochBkt.Get(rawOID) != nil { - return true - } +func isExpired(ctx context.Context, tx *badger.Txn, addr oid.Address, currEpoch uint64) (bool, error) { + prefix := []byte{expiredPrefix} + opts := badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: []byte{expiredPrefix}, + } + it := tx.NewIterator(opts) + defer it.Close() + + // iteration does in ascending order by expiration epoch. + // gc does expired objects collect every epoch, so here should be not so much items. + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return false, ctx.Err() + default: + } + + expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key()) + if err != nil { + return false, err + } + + if expEpoch >= currEpoch { + return false, nil // keys are ordered by epoch, so next items will be discarded anyway. + } + + curAddr, err := addressFromExpiredKey(it.Item().Key()) + if err != nil { + return false, err + } + if curAddr == addr { + return true, nil } } - - return false + return false, nil } -func selectExpiredObjectIDs(tx *bbolt.Tx, attr string, epoch uint64, containerID cid.ID, objectIDs []oid.ID) ([]oid.ID, error) { - result := make([]oid.ID, 0) - notResolved := make(map[oid.ID]struct{}) - for _, oid := range objectIDs { - notResolved[oid] = struct{}{} +func selectExpiredObjects(ctx context.Context, tx *badger.Txn, epoch uint64, objects []oid.Address) ([]oid.Address, error) { + result := make([]oid.Address, 0) + objMap := make(map[oid.Address]struct{}) + for _, obj := range objects { + objMap[obj] = struct{}{} } - expiredBuffer := make([]oid.ID, 0) - objectKeyBuffer := make([]byte, objectKeySize) - - expirationBucketKey := make([]byte, bucketKeySize+len(attr)) - expirationBucket := tx.Bucket(attributeBucketName(containerID, attr, expirationBucketKey)) - if expirationBucket == nil { - return result, nil // all not expired + prefix := []byte{expiredPrefix} + opts := badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: []byte{expiredPrefix}, } + it := tx.NewIterator(opts) + defer it.Close() - err := expirationBucket.ForEach(func(epochExpBucketKey, _ []byte) error { - bucketExpiresAfter, err := strconv.ParseUint(string(epochExpBucketKey), 10, 64) + // iteration does in ascending order by expiration epoch. + // gc does expired objects collect every epoch, so here should be not so much items. + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key()) if err != nil { - return fmt.Errorf("could not parse expiration epoch: %w", err) - } else if bucketExpiresAfter >= epoch { - return nil + return nil, err } - epochExpirationBucket := expirationBucket.Bucket(epochExpBucketKey) - if epochExpirationBucket == nil { - return nil + if expEpoch >= epoch { + return result, nil // keys are ordered by epoch, so next items will be discarded anyway. } - expiredBuffer = expiredBuffer[:0] - for oid := range notResolved { - key := objectKey(oid, objectKeyBuffer) - if epochExpirationBucket.Get(key) != nil { - expiredBuffer = append(expiredBuffer, oid) - } + addr, err := addressFromExpiredKey(it.Item().Key()) + if err != nil { + return nil, err } - - for _, oid := range expiredBuffer { - delete(notResolved, oid) - result = append(result, oid) + if _, ok := objMap[addr]; ok { + result = append(result, addr) } - - if len(notResolved) == 0 { - return errBreakBucketForEach - } - - return nil - }) - - if err != nil && !errors.Is(err, errBreakBucketForEach) { - return nil, err } - return result, nil } + +// IterateExpired iterates over all objects in DB which are out of date +// relative to epoch. Locked objects are not included (do not confuse +// with objects of type LOCK). +// +// If h returns ErrInterruptIterator, nil returns immediately. +// Returns other errors of h directly. +func (db *DB) IterateExpired(ctx context.Context, epoch uint64, h ExpiredObjectHandler) error { + var ( + startedAt = time.Now() + success = false + ) + defer func() { + db.metrics.AddMethodDuration("IterateExpired", time.Since(startedAt), success) + }() + _, span := tracing.StartSpanFromContext(ctx, "metabase.IterateExpired", + trace.WithAttributes( + attribute.String("epoch", strconv.FormatUint(epoch, 10)), + )) + defer span.End() + + db.modeMtx.RLock() + defer db.modeMtx.RUnlock() + + if db.mode.NoMetabase() { + return ErrDegradedMode + } + + err := metaerr.Wrap(db.database.View(func(tx *badger.Txn) error { + return iterateExpired(ctx, tx, epoch, h) + })) + success = err == nil + return err +} + +func iterateExpired(ctx context.Context, tx *badger.Txn, epoch uint64, h ExpiredObjectHandler) error { + prefix := []byte{expiredPrefix} + opts := badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: []byte{expiredPrefix}, + } + it := tx.NewIterator(opts) + defer it.Close() + + // iteration does in ascending order by expiration epoch. + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key()) + if err != nil { + return err + } + + if expEpoch >= epoch { + return nil // keys are ordered by epoch, so next items will be discarded anyway. + } + + addr, err := addressFromExpiredKey(it.Item().Key()) + if err != nil { + return err + } + + // Ignore locked objects. + // + // To slightly optimize performance we can check only REGULAR objects + // (only they can be locked), but it's more reliable. + isLocked, err := objectLocked(ctx, tx, addr.Container(), addr.Object()) + if err != nil { + return err + } + if isLocked { + continue + } + + objType, err := firstIrregularObjectType(tx, addr.Container(), addr.Object()) + if err != nil { + return err + } + + if err := h(&ExpiredObject{ + typ: objType, + addr: addr, + }); err != nil { + if errors.Is(err, ErrInterruptIterator) { + return nil + } + return err + } + } + return nil +} diff --git a/pkg/local_object_storage/metabase/expired_test.go b/pkg/local_object_storage/metabase/expired_test.go index bb98745ee..facf895b3 100644 --- a/pkg/local_object_storage/metabase/expired_test.go +++ b/pkg/local_object_storage/metabase/expired_test.go @@ -2,15 +2,54 @@ package meta_test import ( "context" + "strconv" "testing" + objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil" + meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase" cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" "github.com/stretchr/testify/require" ) +func TestExpiredIterationOrder(t *testing.T) { + db := newDB(t) + defer func() { require.NoError(t, db.Close()) }() + + containerID := cidtest.ID() + + for i := 1; i <= 1000; i++ { + obj := testutil.GenerateObject() + obj.SetContainerID(containerID) + setExpiration(obj, uint64(i)) + err := putBig(db, obj) + require.NoError(t, err) + } + + var previous uint64 + require.NoError(t, db.IterateExpired(context.Background(), 2000, func(eo *meta.ExpiredObject) error { + var prm meta.GetPrm + prm.SetAddress(eo.Address()) + obj, err := db.Get(context.Background(), prm) + require.NoError(t, err) + + var found bool + for _, attr := range obj.Header().Attributes() { + if attr.Key() == objectV2.SysAttributeExpEpoch { + found = true + epoch, err := strconv.ParseUint(attr.Value(), 10, 64) + require.NoError(t, err) + require.Equal(t, previous+1, epoch) + previous = epoch + } + } + require.True(t, found) + return nil + })) +} + func TestDB_SelectExpired(t *testing.T) { db := newDB(t) defer func() { require.NoError(t, db.Close()) }() diff --git a/pkg/local_object_storage/metabase/get.go b/pkg/local_object_storage/metabase/get.go index d9acd4ce2..a4109a07b 100644 --- a/pkg/local_object_storage/metabase/get.go +++ b/pkg/local_object_storage/metabase/get.go @@ -9,10 +9,9 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status" - cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) @@ -77,9 +76,8 @@ func (db *DB) Get(ctx context.Context, prm GetPrm) (res GetRes, err error) { currEpoch := db.epochState.CurrentEpoch() - err = db.boltDB.View(func(tx *bbolt.Tx) error { - key := make([]byte, addressKeySize) - res.hdr, err = db.get(tx, prm.addr, key, true, prm.raw, currEpoch) + err = db.database.View(func(tx *badger.Txn) error { + res.hdr, err = get(ctx, tx, prm.addr, true, prm.raw, currEpoch) return err }) @@ -87,9 +85,13 @@ func (db *DB) Get(ctx context.Context, prm GetPrm) (res GetRes, err error) { return res, metaerr.Wrap(err) } -func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) { +func get(ctx context.Context, tx *badger.Txn, addr oid.Address, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) { if checkStatus { - switch objectStatus(tx, addr, currEpoch) { + st, err := objectStatus(ctx, tx, addr, currEpoch) + if err != nil { + return nil, err + } + switch st { case 1: return nil, logicerr.Wrap(new(apistatus.ObjectNotFound)) case 2: @@ -98,78 +100,76 @@ func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw b return nil, ErrObjectIsExpired } } - - key = objectKey(addr.Object(), key) - cnr := addr.Container() obj := objectSDK.New() - bucketName := make([]byte, bucketKeySize) // check in primary index - data := getFromBucket(tx, primaryBucketName(cnr, bucketName), key) - if len(data) != 0 { + data, err := valueOrNil(tx, primaryKey(addr.Container(), addr.Object())) + if err != nil { + return nil, err + } + if data != nil { return obj, obj.Unmarshal(data) } - data = getFromBucket(tx, ecInfoBucketName(cnr, bucketName), key) - if len(data) != 0 { - return nil, getECInfoError(tx, cnr, data) + children, err := selectByPrefixBatch(ctx, tx, ecInfoLongKeyPrefix(addr.Container(), addr.Object()), 1) // try to found any child + if err != nil { + return nil, err + } + if len(children) > 0 { + return nil, getECInfoError(ctx, tx, addr) } // if not found then check in tombstone index - data = getFromBucket(tx, tombstoneBucketName(cnr, bucketName), key) - if len(data) != 0 { + data, err = valueOrNil(tx, tombstoneKey(addr.Container(), addr.Object())) + if err != nil { + return nil, err + } + if data != nil { return obj, obj.Unmarshal(data) } // if not found then check in locker index - data = getFromBucket(tx, bucketNameLockers(cnr, bucketName), key) - if len(data) != 0 { + data, err = valueOrNil(tx, lockersKey(addr.Container(), addr.Object())) + if err != nil { + return nil, err + } + if data != nil { return obj, obj.Unmarshal(data) } // if not found then check if object is a virtual - return getVirtualObject(tx, cnr, key, raw) + return getVirtualObject(ctx, tx, addr, raw) } -func getFromBucket(tx *bbolt.Tx, name, key []byte) []byte { - bkt := tx.Bucket(name) - if bkt == nil { - return nil - } - - return bkt.Get(key) -} - -func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSDK.Object, error) { +func getVirtualObject(ctx context.Context, tx *badger.Txn, addr oid.Address, raw bool) (*objectSDK.Object, error) { if raw { - return nil, getSplitInfoError(tx, cnr, key) + return nil, getSplitInfoError(tx, addr) } - bucketName := make([]byte, bucketKeySize) - parentBucket := tx.Bucket(parentBucketName(cnr, bucketName)) - if parentBucket == nil { - return nil, logicerr.Wrap(new(apistatus.ObjectNotFound)) - } - - relativeLst, err := decodeList(parentBucket.Get(key)) + binObjIDs, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), 1) if err != nil { return nil, err } - if len(relativeLst) == 0 { // this should never happen though + if len(binObjIDs) == 0 { // this should never happen though return nil, logicerr.Wrap(new(apistatus.ObjectNotFound)) } - // pick last item, for now there is not difference which address to pick - // but later list might be sorted so first or last value can be more - // prioritized to choose - virtualOID := relativeLst[len(relativeLst)-1] - data := getFromBucket(tx, primaryBucketName(cnr, bucketName), virtualOID) - - child := objectSDK.New() - - err = child.Unmarshal(data) + phyObjAddr, err := addressOfTargetFromParentKey(binObjIDs[0]) if err != nil { + return nil, err + } + + data, err := valueOrNil(tx, primaryKey(phyObjAddr.Container(), phyObjAddr.Object())) + if err != nil { + return nil, err + } + + if data == nil { // this should never happen though #2 + return nil, logicerr.Wrap(new(apistatus.ObjectNotFound)) + } + child := objectSDK.New() + if err := child.Unmarshal(data); err != nil { return nil, fmt.Errorf("can't unmarshal child with parent: %w", err) } @@ -182,8 +182,8 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD return par, nil } -func getSplitInfoError(tx *bbolt.Tx, cnr cid.ID, key []byte) error { - splitInfo, err := getSplitInfo(tx, cnr, key) +func getSplitInfoError(tx *badger.Txn, addr oid.Address) error { + splitInfo, err := getSplitInfo(tx, addr) if err == nil { return logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo)) } @@ -191,27 +191,50 @@ func getSplitInfoError(tx *bbolt.Tx, cnr cid.ID, key []byte) error { return logicerr.Wrap(new(apistatus.ObjectNotFound)) } -func getECInfoError(tx *bbolt.Tx, cnr cid.ID, data []byte) error { - keys, err := decodeList(data) - if err != nil { - return err - } - ecInfo := objectSDK.NewECInfo() - for _, key := range keys { - // check in primary index - ojbData := getFromBucket(tx, primaryBucketName(cnr, make([]byte, bucketKeySize)), key) - if len(ojbData) != 0 { - obj := objectSDK.New() - if err := obj.Unmarshal(ojbData); err != nil { +func getECInfoError(ctx context.Context, tx *badger.Txn, addr oid.Address) error { + var chunkAddresses []oid.Address + for { + keys, err := selectByPrefixBatch(ctx, tx, ecInfoLongKeyPrefix(addr.Container(), addr.Object()), batchSize) + if err != nil { + return err + } + for _, key := range keys { + chunkAddress, err := addressOfChunkFromECInfoKey(key) + if err != nil { return err } - chunk := objectSDK.ECChunk{} - id, _ := obj.ID() - chunk.SetID(id) - chunk.Index = obj.ECHeader().Index() - chunk.Total = obj.ECHeader().Total() - ecInfo.AddChunk(chunk) + chunkAddresses = append(chunkAddresses, chunkAddress) } + if len(keys) < batchSize { + break + } + } + ecInfo := objectSDK.NewECInfo() + for _, chunkAddress := range chunkAddresses { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + objData, err := valueOrNil(tx, primaryKey(chunkAddress.Container(), chunkAddress.Object())) + if err != nil { + return err + } + if len(objData) == 0 { + continue + } + + obj := objectSDK.New() + if err := obj.Unmarshal(objData); err != nil { + return err + } + chunk := objectSDK.ECChunk{} + id, _ := obj.ID() + chunk.SetID(id) + chunk.Index = obj.ECHeader().Index() + chunk.Total = obj.ECHeader().Total() + ecInfo.AddChunk(chunk) } return logicerr.Wrap(objectSDK.NewECInfoError(ecInfo)) } diff --git a/pkg/local_object_storage/metabase/get_test.go b/pkg/local_object_storage/metabase/get_test.go index 247ddf9cd..01879bf4d 100644 --- a/pkg/local_object_storage/metabase/get_test.go +++ b/pkg/local_object_storage/metabase/get_test.go @@ -8,7 +8,6 @@ import ( "os" "runtime" "testing" - "time" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil" @@ -215,11 +214,8 @@ func BenchmarkGet(b *testing.B) { } func benchmarkGet(b *testing.B, numOfObj int) { - prepareDb := func(batchSize int) (*meta.DB, []oid.Address) { - db := newDB(b, - meta.WithMaxBatchSize(batchSize), - meta.WithMaxBatchDelay(10*time.Millisecond), - ) + prepareDb := func(_ int) (*meta.DB, []oid.Address) { + db := newDB(b) defer func() { require.NoError(b, db.Close()) }() addrs := make([]oid.Address, 0, numOfObj) diff --git a/pkg/local_object_storage/metabase/graveyard.go b/pkg/local_object_storage/metabase/graveyard.go index 80d40fb78..3e9d68d96 100644 --- a/pkg/local_object_storage/metabase/graveyard.go +++ b/pkg/local_object_storage/metabase/graveyard.go @@ -9,8 +9,9 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" + cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" ) // GarbageObject represents descriptor of the @@ -80,8 +81,8 @@ func (db *DB) IterateOverGarbage(ctx context.Context, p GarbageIterationPrm) err return ErrDegradedMode } - err := metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error { - return db.iterateDeletedObj(tx, gcHandler{p.h}, p.offset) + err := metaerr.Wrap(db.database.View(func(tx *badger.Txn) error { + return db.iterateDeletedObj(ctx, tx, gcHandler{p.h}, p.offset) })) success = err == nil return err @@ -160,8 +161,8 @@ func (db *DB) IterateOverGraveyard(ctx context.Context, p GraveyardIterationPrm) return ErrDegradedMode } - return metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error { - return db.iterateDeletedObj(tx, graveyardHandler{p.h}, p.offset) + return metaerr.Wrap(db.database.View(func(tx *badger.Txn) error { + return db.iterateDeletedObj(ctx, tx, graveyardHandler{p.h}, p.offset) })) } @@ -195,40 +196,53 @@ func (g graveyardHandler) handleKV(k, v []byte) error { return g.h(o) } -func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address) error { - var bkt *bbolt.Bucket +func (db *DB) iterateDeletedObj(ctx context.Context, tx *badger.Txn, h kvHandler, offset *oid.Address) error { + var prefix []byte switch t := h.(type) { case graveyardHandler: - bkt = tx.Bucket(graveyardBucketName) + prefix = []byte{graveyardPrefix} case gcHandler: - bkt = tx.Bucket(garbageBucketName) + prefix = []byte{garbagePrefix} default: panic(fmt.Sprintf("metabase: unknown iteration object hadler: %T", t)) } - - c := bkt.Cursor() - var k, v []byte - - if offset == nil { - k, v = c.First() - } else { - rawAddr := addressKey(*offset, make([]byte, addressKeySize)) - - k, v = c.Seek(rawAddr) - if bytes.Equal(k, rawAddr) { - // offset was found, move - // cursor to the next element - k, v = c.Next() - } + var seekKey []byte + if offset != nil { + cidBytes := make([]byte, cidSize) + offset.Container().Encode(cidBytes) + oidBytes := make([]byte, objectKeySize) + offset.Object().Encode(oidBytes) + seekKey = append(prefix, cidBytes...) + seekKey = append(seekKey, oidBytes...) } - for ; k != nil; k, v = c.Next() { - err := h.handleKV(k, v) + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: prefix, + PrefetchValues: true, + }) + defer it.Close() + + for it.Seek(seekKey); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + if bytes.Equal(it.Item().Key(), seekKey) { + continue + } + + key := it.Item().KeyCopy(nil) + value, err := it.Item().ValueCopy(nil) if err != nil { + return err + } + if err = h.handleKV(key, value); err != nil { if errors.Is(err, ErrInterruptIterator) { return nil } - return err } } @@ -237,7 +251,7 @@ func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address) } func garbageFromKV(k []byte) (res GarbageObject, err error) { - err = decodeAddressFromKey(&res.addr, k) + res.addr, err = addressFromGarbageKey(k) if err != nil { err = fmt.Errorf("could not parse address: %w", err) } @@ -246,15 +260,44 @@ func garbageFromKV(k []byte) (res GarbageObject, err error) { } func graveFromKV(k, v []byte) (res TombstonedObject, err error) { - if err = decodeAddressFromKey(&res.addr, k); err != nil { + res.addr, err = addressFromGraveyardKey(k) + if err != nil { err = fmt.Errorf("decode tombstone target from key: %w", err) - } else if err = decodeAddressFromKey(&res.tomb, v); err != nil { - err = fmt.Errorf("decode tombstone address from value: %w", err) + return + } + res.tomb, err = decodeAddressFromGrave(v) + if err != nil { + err = fmt.Errorf("decode tombstone address from value: %w", err) + return } - return } +func encodeAddressToGrave(addr oid.Address) []byte { + value := make([]byte, cidSize+objectKeySize) + addr.Container().Encode(value) + addr.Object().Encode(value[cidSize:]) + return value +} + +func decodeAddressFromGrave(v []byte) (oid.Address, error) { + if len(v) != cidSize+objectKeySize { + return oid.Address{}, errInvalidValueLenght + } + var cont cid.ID + if err := cont.Decode(v[:cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var obj oid.ID + if err := obj.Decode(v[cidSize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cont) + result.SetObject(obj) + return result, nil +} + // DropGraves deletes tombstoned objects from the // graveyard bucket. // @@ -280,16 +323,9 @@ func (db *DB) DropGraves(ctx context.Context, tss []TombstonedObject) error { return ErrReadOnlyMode } - buf := make([]byte, addressKeySize) - - return db.boltDB.Update(func(tx *bbolt.Tx) error { - bkt := tx.Bucket(graveyardBucketName) - if bkt == nil { - return nil - } - + return db.database.Update(func(tx *badger.Txn) error { for _, ts := range tss { - err := bkt.Delete(addressKey(ts.Address(), buf)) + err := tx.Delete(graveyardKey(ts.Address().Container(), ts.Address().Object())) if err != nil { return err } diff --git a/pkg/local_object_storage/metabase/index_test.go b/pkg/local_object_storage/metabase/index_test.go deleted file mode 100644 index 45b9bc756..000000000 --- a/pkg/local_object_storage/metabase/index_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package meta - -import ( - "crypto/rand" - "math" - mrand "math/rand" - "testing" - "time" - - "github.com/nspcc-dev/neo-go/pkg/io" - "github.com/stretchr/testify/require" -) - -func Test_getVarUint(t *testing.T) { - data := make([]byte, 10) - for _, val := range []uint64{0, 0xfc, 0xfd, 0xfffe, 0xffff, 0xfffffffe, 0xffffffff, math.MaxUint64} { - expSize := io.PutVarUint(data, val) - actual, actSize, err := getVarUint(data) - require.NoError(t, err) - require.Equal(t, val, actual) - require.Equal(t, expSize, actSize, "value: %x", val) - - _, _, err = getVarUint(data[:expSize-1]) - require.Error(t, err) - } -} - -func Test_decodeList(t *testing.T) { - t.Run("empty", func(t *testing.T) { - lst, err := decodeList(nil) - require.NoError(t, err) - require.True(t, len(lst) == 0) - }) - t.Run("empty, 0 len", func(t *testing.T) { - lst, err := decodeList([]byte{0}) - require.NoError(t, err) - require.True(t, len(lst) == 0) - }) - t.Run("bad len", func(t *testing.T) { - _, err := decodeList([]byte{0xfe}) - require.Error(t, err) - }) - t.Run("random", func(t *testing.T) { - r := mrand.New(mrand.NewSource(time.Now().Unix())) - expected := make([][]byte, 20) - for i := range expected { - expected[i] = make([]byte, r.Uint32()%10) - rand.Read(expected[i]) - } - - data, err := encodeList(expected) - require.NoError(t, err) - - actual, err := decodeList(data) - require.NoError(t, err) - require.Equal(t, expected, actual) - - t.Run("unexpected EOF", func(t *testing.T) { - for i := 1; i < len(data)-1; i++ { - _, err := decodeList(data[:i]) - require.Error(t, err) - } - }) - }) -} diff --git a/pkg/local_object_storage/metabase/inhume.go b/pkg/local_object_storage/metabase/inhume.go index c265fb217..8b2ce330c 100644 --- a/pkg/local_object_storage/metabase/inhume.go +++ b/pkg/local_object_storage/metabase/inhume.go @@ -5,6 +5,7 @@ import ( "context" "errors" "fmt" + "slices" "time" storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log" @@ -15,7 +16,7 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" ) // InhumePrm encapsulates parameters for Inhume operation. @@ -180,9 +181,11 @@ func (db *DB) Inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) { res := InhumeRes{ inhumedByCnrID: make(map[cid.ID]ObjectCounters), } + bucketIDs, release := db.acquireBucketIDs(prm) + defer release() currEpoch := db.epochState.CurrentEpoch() - err := db.boltDB.Update(func(tx *bbolt.Tx) error { - return db.inhumeTx(tx, currEpoch, prm, &res) + err := db.database.Update(func(tx *badger.Txn) error { + return db.inhumeTx(ctx, tx, currEpoch, prm, bucketIDs, &res) }) success = err == nil if success { @@ -195,48 +198,56 @@ func (db *DB) Inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) { return res, metaerr.Wrap(err) } -func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes) error { - garbageBKT := tx.Bucket(garbageBucketName) - graveyardBKT := tx.Bucket(graveyardBucketName) +func (db *DB) acquireBucketIDs(prm InhumePrm) (map[cid.ID]uint16, func()) { + unique := make(map[cid.ID]struct{}) + for _, addr := range prm.target { + unique[addr.Container()] = struct{}{} + } + containers := make([]cid.ID, 0, len(unique)) + for contID := range unique { + containers = append(containers, contID) + } + slices.SortFunc(containers, func(lhs, rhs cid.ID) int { + return bytes.Compare(lhs[:], rhs[:]) + }) + result := make(map[cid.ID]uint16, len(unique)) + releases := make([]func(), len(unique)) - bkt, value, err := db.getInhumeTargetBucketAndValue(garbageBKT, graveyardBKT, &prm) + for i, contID := range containers { + result[contID], releases[i] = db.bucketIDs.BucketID(contID) + } + return result, func() { + for i := range releases { + releases[len(releases)-i-1]() + } + } +} + +func (db *DB) inhumeTx(ctx context.Context, tx *badger.Txn, epoch uint64, prm InhumePrm, bucketIDs map[cid.ID]uint16, res *InhumeRes) error { + keyer, value, err := getInhumeTargetBucketAndValue(tx, prm) if err != nil { return err } - - buf := make([]byte, addressKeySize) for i := range prm.target { id := prm.target[i].Object() cnr := prm.target[i].Container() - - // prevent locked objects to be inhumed - if !prm.forceRemoval && objectLocked(tx, cnr, id) { - return new(apistatus.ObjectLocked) - } - var lockWasChecked bool - - // prevent lock objects to be inhumed - // if `Inhume` was called not with the - // `WithForceGCMark` option + var ecErr *objectSDK.ECInfoError if !prm.forceRemoval { - if isLockObject(tx, cnr, id) { - return ErrLockObjectRemoval + if err := checkNotLockerOrLocked(ctx, tx, cnr, id); err != nil { + return err } - lockWasChecked = true } - obj, err := db.get(tx, prm.target[i], buf, false, true, epoch) - targetKey := addressKey(prm.target[i], buf) - var ecErr *objectSDK.ECInfoError + obj, err := get(ctx, tx, prm.target[i], false, true, epoch) if err == nil { - err = db.updateDeleteInfo(tx, garbageBKT, graveyardBKT, targetKey, cnr, obj, res) + err = db.updateDeleteInfo(tx, prm.target[i], obj, bucketIDs, res) if err != nil { return err } } else if errors.As(err, &ecErr) { - err = db.inhumeECInfo(tx, epoch, prm.tomb, res, garbageBKT, graveyardBKT, ecErr.ECInfo(), cnr, bkt, value, targetKey) + err = db.inhumeECInfo(ctx, tx, epoch, keyer, value, res, ecErr.ECInfo(), cnr, bucketIDs) if err != nil { return err } @@ -244,18 +255,18 @@ func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes if prm.tomb != nil { var isTomb bool - isTomb, err = db.markAsGC(graveyardBKT, garbageBKT, targetKey) + isTomb, err = markAsGC(tx, prm.target[i]) if err != nil { return err } - if isTomb { continue } } // consider checking if target is already in graveyard? - err = bkt.Put(targetKey, value) + key := keyer(prm.target[i]) + err = tx.Set(key, value) if err != nil { return err } @@ -268,22 +279,24 @@ func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes // the LOCK type continue } - - if isLockObject(tx, cnr, id) { + isLock, err := isLockObject(tx, cnr, id) + if err != nil { + return err + } + if isLock { res.deletedLockObj = append(res.deletedLockObj, prm.target[i]) } } } - return db.applyInhumeResToCounters(tx, res) + return db.applyInhumeResToCounters(tx, bucketIDs, res) } -func (db *DB) inhumeECInfo(tx *bbolt.Tx, epoch uint64, tomb *oid.Address, res *InhumeRes, - garbageBKT *bbolt.Bucket, graveyardBKT *bbolt.Bucket, - ecInfo *objectSDK.ECInfo, cnr cid.ID, targetBucket *bbolt.Bucket, value []byte, targetKey []byte, +func (db *DB) inhumeECInfo(ctx context.Context, tx *badger.Txn, epoch uint64, + keyer func(addr oid.Address) []byte, value []byte, + res *InhumeRes, ecInfo *objectSDK.ECInfo, cnr cid.ID, bucketIDs map[cid.ID]uint16, ) error { for _, chunk := range ecInfo.Chunks { - chunkBuf := make([]byte, addressKeySize) var chunkAddr oid.Address chunkAddr.SetContainer(cnr) var chunkID oid.ID @@ -292,22 +305,16 @@ func (db *DB) inhumeECInfo(tx *bbolt.Tx, epoch uint64, tomb *oid.Address, res *I return err } chunkAddr.SetObject(chunkID) - chunkObj, err := db.get(tx, chunkAddr, chunkBuf, false, true, epoch) + chunkObj, err := get(ctx, tx, chunkAddr, false, true, epoch) if err != nil { return err } - err = db.updateDeleteInfo(tx, garbageBKT, graveyardBKT, targetKey, cnr, chunkObj, res) + err = db.updateDeleteInfo(tx, chunkAddr, chunkObj, bucketIDs, res) if err != nil { return err } - chunkKey := addressKey(chunkAddr, chunkBuf) - if tomb != nil { - _, err = db.markAsGC(graveyardBKT, garbageBKT, chunkKey) - if err != nil { - return err - } - } - err = targetBucket.Put(chunkKey, value) + key := keyer(chunkAddr) + err = tx.Set(key, value) if err != nil { return err } @@ -315,15 +322,38 @@ func (db *DB) inhumeECInfo(tx *bbolt.Tx, epoch uint64, tomb *oid.Address, res *I return nil } -func (db *DB) applyInhumeResToCounters(tx *bbolt.Tx, res *InhumeRes) error { - if err := db.updateShardObjectCounter(tx, logical, res.LogicInhumed(), false); err != nil { +func checkNotLockerOrLocked(ctx context.Context, tx *badger.Txn, cnr cid.ID, id oid.ID) error { + // prevent locked objects to be inhumed + locked, err := objectLocked(ctx, tx, cnr, id) + if err != nil { return err } - if err := db.updateShardObjectCounter(tx, user, res.UserInhumed(), false); err != nil { + if locked { + return new(apistatus.ObjectLocked) + } + // prevent lock objects to be inhumed + // if `Inhume` was called not with the + // `WithForceGCMark` option + isLock, err := isLockObject(tx, cnr, id) + if err != nil { return err } + if isLock { + return ErrLockObjectRemoval + } + return nil +} - return db.updateContainerCounter(tx, res.inhumedByCnrID, false) +func (db *DB) applyInhumeResToCounters(tx *badger.Txn, bucketIDs map[cid.ID]uint16, res *InhumeRes) error { + counters := make(map[cid.ID]objectCounterValue, len(res.inhumedByCnrID)) + for contID, inhumed := range res.inhumedByCnrID { + counters[contID] = objectCounterValue{ + Logic: -1 * int64(inhumed.Logic), + Phy: -1 * int64(inhumed.Phy), + User: -1 * int64(inhumed.User), + } + } + return updateContainerCounter(tx, counters, bucketIDs) } // getInhumeTargetBucketAndValue return target bucket to store inhume result and value that will be put in the bucket. @@ -336,31 +366,33 @@ func (db *DB) applyInhumeResToCounters(tx *bbolt.Tx, res *InhumeRes) error { // 1. tombstone address if Inhume was called with // a Tombstone // 2. zeroValue if Inhume was called with a GC mark -func (db *DB) getInhumeTargetBucketAndValue(garbageBKT, graveyardBKT *bbolt.Bucket, prm *InhumePrm) (targetBucket *bbolt.Bucket, value []byte, err error) { +func getInhumeTargetBucketAndValue(tx *badger.Txn, prm InhumePrm) (key func(addr oid.Address) []byte, value []byte, err error) { if prm.tomb != nil { - targetBucket = graveyardBKT - tombKey := addressKey(*prm.tomb, make([]byte, addressKeySize)) - // it is forbidden to have a tomb-on-tomb in FrostFS, // so graveyard keys must not be addresses of tombstones - data := targetBucket.Get(tombKey) - if data != nil { - err := targetBucket.Delete(tombKey) + tombKey := graveyardKey(prm.tomb.Container(), prm.tomb.Object()) + v, err := valueOrNil(tx, tombKey) + if err != nil { + return nil, nil, err + } + if v != nil { + err := tx.Delete(tombKey) if err != nil { return nil, nil, fmt.Errorf("could not remove grave with tombstone key: %w", err) } } - value = tombKey - } else { - targetBucket = garbageBKT - value = zeroValue + return func(addr oid.Address) []byte { + return graveyardKey(addr.Container(), addr.Object()) + }, encodeAddressToGrave(*prm.tomb), nil } - return targetBucket, value, nil + return func(addr oid.Address) []byte { + return garbageKey(addr.Container(), addr.Object()) + }, zeroValue, nil } -func (db *DB) markAsGC(graveyardBKT, garbageBKT *bbolt.Bucket, key []byte) (bool, error) { - targetIsTomb, err := isTomb(graveyardBKT, key) +func markAsGC(tx *badger.Txn, addr oid.Address) (bool, error) { + targetIsTomb, err := isTomb(tx, addr) if err != nil { return false, err } @@ -372,19 +404,27 @@ func (db *DB) markAsGC(graveyardBKT, garbageBKT *bbolt.Bucket, key []byte) (bool // if tombstone appears object must be // additionally marked with GC - return false, garbageBKT.Put(key, zeroValue) + key := garbageKey(addr.Container(), addr.Object()) + return false, tx.Set(key, zeroValue) } -func (db *DB) updateDeleteInfo(tx *bbolt.Tx, garbageBKT, graveyardBKT *bbolt.Bucket, targetKey []byte, cnr cid.ID, obj *objectSDK.Object, res *InhumeRes) error { - containerID, _ := obj.ContainerID() - if inGraveyardWithKey(targetKey, graveyardBKT, garbageBKT) == 0 { - res.storeDeletionInfo(containerID, obj.PayloadSize(), IsUserObject(obj)) +func (db *DB) updateDeleteInfo(tx *badger.Txn, addr oid.Address, obj *objectSDK.Object, bucketIDs map[cid.ID]uint16, res *InhumeRes) error { + st, err := inGraveyardWithKey(tx, addr) + if err != nil { + return err + } + if st == 0 { + res.storeDeletionInfo(addr.Container(), obj.PayloadSize(), IsUserObject(obj)) } // if object is stored, and it is regular object then update bucket // with container size estimations if obj.Type() == objectSDK.TypeRegular { - err := changeContainerSize(tx, cnr, obj.PayloadSize(), false) + bucketID, found := bucketIDs[addr.Container()] + if !found { + panic("bucketID not found") + } + err := changeContainerSize(tx, addr.Container(), -1*int64(obj.PayloadSize()), bucketID) if err != nil { return err } @@ -392,25 +432,39 @@ func (db *DB) updateDeleteInfo(tx *bbolt.Tx, garbageBKT, graveyardBKT *bbolt.Buc return nil } -func isTomb(graveyardBucket *bbolt.Bucket, key []byte) (bool, error) { +func isTomb(tx *badger.Txn, addr oid.Address) (bool, error) { targetIsTomb := false + expectedValue := make([]byte, cidSize+objectKeySize) + addr.Container().Encode(expectedValue) + addr.Object().Encode(expectedValue[cidSize:]) + + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: []byte{graveyardPrefix}, + PrefetchValues: true, + }) + defer it.Close() // iterate over graveyard and check if target address // is the address of tombstone in graveyard. - err := graveyardBucket.ForEach(func(_, v []byte) error { - // check if graveyard has record with key corresponding - // to tombstone address (at least one) - targetIsTomb = bytes.Equal(v, key) + // check if graveyard has record with key corresponding + // to tombstone address (at least one) + for it.Seek(nil); it.ValidForPrefix([]byte{graveyardPrefix}); it.Next() { + err := it.Item().Value(func(val []byte) error { + targetIsTomb = bytes.Equal(expectedValue, val) - if targetIsTomb { - // break bucket iterator - return errBreakBucketForEach + if targetIsTomb { + // break bucket iterator + return errBreakBucketForEach + } + return nil + }) + if err != nil { + if errors.Is(err, errBreakBucketForEach) { + return targetIsTomb, nil + } + return false, err } - - return nil - }) - if err != nil && !errors.Is(err, errBreakBucketForEach) { - return false, err } return targetIsTomb, nil } diff --git a/pkg/local_object_storage/metabase/iterators.go b/pkg/local_object_storage/metabase/iterators.go index 7b60b7d50..9f2bd99a7 100644 --- a/pkg/local_object_storage/metabase/iterators.go +++ b/pkg/local_object_storage/metabase/iterators.go @@ -3,21 +3,15 @@ package meta import ( "context" "errors" - "fmt" - "strconv" "time" - objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + "github.com/dgraph-io/badger/v4" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/trace" ) // ExpiredObject is a descriptor of expired object from DB. @@ -44,99 +38,7 @@ type ExpiredObjectHandler func(*ExpiredObject) error // as a "break" keyword. var ErrInterruptIterator = logicerr.New("iterator is interrupted") -// IterateExpired iterates over all objects in DB which are out of date -// relative to epoch. Locked objects are not included (do not confuse -// with objects of type LOCK). -// -// If h returns ErrInterruptIterator, nil returns immediately. -// Returns other errors of h directly. -func (db *DB) IterateExpired(ctx context.Context, epoch uint64, h ExpiredObjectHandler) error { - var ( - startedAt = time.Now() - success = false - ) - defer func() { - db.metrics.AddMethodDuration("IterateExpired", time.Since(startedAt), success) - }() - _, span := tracing.StartSpanFromContext(ctx, "metabase.IterateExpired", - trace.WithAttributes( - attribute.String("epoch", strconv.FormatUint(epoch, 10)), - )) - defer span.End() - - db.modeMtx.RLock() - defer db.modeMtx.RUnlock() - - if db.mode.NoMetabase() { - return ErrDegradedMode - } - - err := metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error { - return db.iterateExpired(tx, epoch, h) - })) - success = err == nil - return err -} - -func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler) error { - err := tx.ForEach(func(name []byte, b *bbolt.Bucket) error { - cidBytes := cidFromAttributeBucket(name, objectV2.SysAttributeExpEpoch) - if cidBytes == nil { - cidBytes = cidFromAttributeBucket(name, objectV2.SysAttributeExpEpochNeoFS) - if cidBytes == nil { - return nil - } - } - - var cnrID cid.ID - err := cnrID.Decode(cidBytes) - if err != nil { - return fmt.Errorf("could not parse container ID of expired bucket: %w", err) - } - - return b.ForEachBucket(func(expKey []byte) error { - bktExpired := b.Bucket(expKey) - expiresAfter, err := strconv.ParseUint(string(expKey), 10, 64) - if err != nil { - return fmt.Errorf("could not parse expiration epoch: %w", err) - } else if expiresAfter >= epoch { - return nil - } - - return bktExpired.ForEach(func(idKey, _ []byte) error { - var id oid.ID - - err = id.Decode(idKey) - if err != nil { - return fmt.Errorf("could not parse ID of expired object: %w", err) - } - - // Ignore locked objects. - // - // To slightly optimize performance we can check only REGULAR objects - // (only they can be locked), but it's more reliable. - if objectLocked(tx, cnrID, id) { - return nil - } - - var addr oid.Address - addr.SetContainer(cnrID) - addr.SetObject(id) - - return h(&ExpiredObject{ - typ: firstIrregularObjectType(tx, cnrID, idKey), - addr: addr, - }) - }) - }) - }) - - if errors.Is(err, ErrInterruptIterator) { - err = nil - } - - return err -} +var errInvalidAttributeKey = errors.New("invalid userr attribute key") // IterateCoveredByTombstones iterates over all objects in DB which are covered // by tombstone with string address from tss. Locked objects are not included @@ -164,69 +66,99 @@ func (db *DB) IterateCoveredByTombstones(ctx context.Context, tss map[string]oid return ErrDegradedMode } - return db.boltDB.View(func(tx *bbolt.Tx) error { - return db.iterateCoveredByTombstones(tx, tss, h) + return db.database.View(func(tx *badger.Txn) error { + return db.iterateCoveredByTombstones(ctx, tx, tss, h) }) } -func (db *DB) iterateCoveredByTombstones(tx *bbolt.Tx, tss map[string]oid.Address, h func(oid.Address) error) error { - bktGraveyard := tx.Bucket(graveyardBucketName) +func (db *DB) iterateCoveredByTombstones(ctx context.Context, tx *badger.Txn, tss map[string]oid.Address, h func(oid.Address) error) error { + prefix := []byte{graveyardPrefix} + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: prefix, + PrefetchValues: true, + }) + defer it.Close() - err := bktGraveyard.ForEach(func(k, v []byte) error { - var addr oid.Address - if err := decodeAddressFromKey(&addr, v); err != nil { + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + var tombstoneAddress oid.Address + if err := it.Item().Value(func(val []byte) error { + var e error + tombstoneAddress, e = decodeAddressFromGrave(val) + return e + }); err != nil { return err } - if _, ok := tss[addr.EncodeToString()]; ok { - var addr oid.Address + if _, ok := tss[tombstoneAddress.EncodeToString()]; !ok { + continue + } - err := decodeAddressFromKey(&addr, k) - if err != nil { - return fmt.Errorf("could not parse address of the object under tombstone: %w", err) - } + var objectAddress oid.Address + var err error + objectAddress, err = addressFromGraveyardKey(it.Item().Key()) + if err != nil { + return err + } - if objectLocked(tx, addr.Container(), addr.Object()) { + isLocked, err := objectLocked(ctx, tx, objectAddress.Container(), objectAddress.Object()) + if err != nil { + return err + } + if isLocked { + continue + } + if err := h(objectAddress); err != nil { + if errors.Is(err, ErrInterruptIterator) { return nil } - - return h(addr) + return err } - - return nil - }) - - if errors.Is(err, ErrInterruptIterator) { - err = nil } - - return err + return nil } -func iteratePhyObjects(tx *bbolt.Tx, f func(cid.ID, oid.ID, *objectSDK.Object) error) error { - var cid cid.ID - var oid oid.ID - obj := objectSDK.New() +func iteratePhyObjects(tx *badger.Txn, f func(cid.ID, oid.ID, *objectSDK.Object) error) error { + if err := iteratePhyObjectsWithPrefix(tx, primaryPrefix, f); err != nil { + return err + } + if err := iteratePhyObjectsWithPrefix(tx, lockersPrefix, f); err != nil { + return err + } + if err := iteratePhyObjectsWithPrefix(tx, tombstonePrefix, f); err != nil { + return err + } + return nil +} - return tx.ForEach(func(name []byte, b *bbolt.Bucket) error { - b58CID, postfix := parseContainerIDWithPrefix(&cid, name) - if len(b58CID) == 0 { - return nil - } - - switch postfix { - case primaryPrefix, - lockersPrefix, - tombstonePrefix: - default: - return nil - } - - return b.ForEach(func(k, v []byte) error { - if oid.Decode(k) == nil && obj.Unmarshal(v) == nil { - return f(cid, oid, obj) - } - - return nil - }) +func iteratePhyObjectsWithPrefix(tx *badger.Txn, typePrefix byte, f func(cid.ID, oid.ID, *objectSDK.Object) error) error { + prefix := []byte{typePrefix} + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: prefix, + PrefetchValues: true, }) + defer it.Close() + + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + addr, err := addressFromKey(typePrefix, it.Item().Key()) + if err != nil { + return err + } + obj := objectSDK.New() + if err := it.Item().Value(func(val []byte) error { + return obj.Unmarshal(val) + }); err != nil { + return err + } + if err := f(addr.Container(), addr.Object(), obj); err != nil { + return err + } + } + return nil } diff --git a/pkg/local_object_storage/metabase/list.go b/pkg/local_object_storage/metabase/list.go index b4326a92c..fcd8ec9ad 100644 --- a/pkg/local_object_storage/metabase/list.go +++ b/pkg/local_object_storage/metabase/list.go @@ -2,16 +2,17 @@ package meta import ( "context" + "fmt" + "slices" "time" objectcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" - cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) @@ -21,10 +22,33 @@ import ( // cursor. Use nil cursor object to start listing again. var ErrEndOfListing = logicerr.New("end of object listing") +type listPrefix struct { + prefix []byte + keyParser func(k []byte) (oid.Address, error) + objectType objectSDK.Type +} + +var listPrefixes = []listPrefix{ + { + prefix: []byte{primaryPrefix}, + keyParser: addressFromPrimaryKey, + objectType: objectSDK.TypeRegular, + }, + { + prefix: []byte{lockersPrefix}, + keyParser: addressFromLockersKey, + objectType: objectSDK.TypeLock, + }, + { + prefix: []byte{tombstonePrefix}, + keyParser: addressFromTombstoneKey, + objectType: objectSDK.TypeTombstone, + }, +} + // Cursor is a type for continuous object listing. type Cursor struct { - bucketName []byte - inBucketOffset []byte + lastKey []byte } // ListPrm contains parameters for ListWithCursor operation. @@ -89,173 +113,109 @@ func (db *DB) ListWithCursor(ctx context.Context, prm ListPrm) (res ListRes, err return res, ErrDegradedMode } - result := make([]objectcore.Info, 0, prm.count) + if prm.count == 0 { + return ListRes{}, ErrEndOfListing + } - err = db.boltDB.View(func(tx *bbolt.Tx) error { - res.addrList, res.cursor, err = db.listWithCursor(tx, result, prm.count, prm.cursor) + err = db.database.View(func(tx *badger.Txn) error { + res.addrList, res.cursor, err = db.listWithCursor(ctx, tx, prm.count, prm.cursor) return err }) success = err == nil return res, metaerr.Wrap(err) } -func (db *DB) listWithCursor(tx *bbolt.Tx, result []objectcore.Info, count int, cursor *Cursor) ([]objectcore.Info, *Cursor, error) { - threshold := cursor == nil // threshold is a flag to ignore cursor - var bucketName []byte - var err error - - c := tx.Cursor() - name, _ := c.First() - - if !threshold { - name, _ = c.Seek(cursor.bucketName) +func (db *DB) listWithCursor(ctx context.Context, tx *badger.Txn, count int, cursor *Cursor) ([]objectcore.Info, *Cursor, error) { + var prefix []byte + var lastSeen []byte + if cursor != nil { + prefix = []byte{cursor.lastKey[0]} + lastSeen = cursor.lastKey + } else { + prefix = listPrefixes[0].prefix } - var containerID cid.ID - var offset []byte - graveyardBkt := tx.Bucket(graveyardBucketName) - garbageBkt := tx.Bucket(garbageBucketName) - - rawAddr := make([]byte, cidSize, addressKeySize) - -loop: - for ; name != nil; name, _ = c.Next() { - cidRaw, prefix := parseContainerIDWithPrefix(&containerID, name) - if cidRaw == nil { - continue - } - - var objType objectSDK.Type - - switch prefix { - case primaryPrefix: - objType = objectSDK.TypeRegular - case lockersPrefix: - objType = objectSDK.TypeLock - case tombstonePrefix: - objType = objectSDK.TypeTombstone - default: - continue - } - - bkt := tx.Bucket(name) - if bkt != nil { - copy(rawAddr, cidRaw) - result, offset, cursor, err = selectNFromBucket(bkt, objType, graveyardBkt, garbageBkt, rawAddr, containerID, - result, count, cursor, threshold) - if err != nil { - return nil, nil, err - } - } - bucketName = name - if len(result) >= count { - break loop - } - - // set threshold flag after first `selectNFromBucket` invocation - // first invocation must look for cursor object - threshold = true + idx := slices.IndexFunc(listPrefixes, func(e listPrefix) bool { + return e.prefix[0] == prefix[0] + }) + if idx < 0 { + return nil, nil, fmt.Errorf("invalid prefix value %d", prefix[0]) } - if offset != nil { - // new slice is much faster but less memory efficient - // we need to copy, because offset exists during bbolt tx - cursor.inBucketOffset = make([]byte, len(offset)) - copy(cursor.inBucketOffset, offset) + var next Cursor + result := make([]objectcore.Info, 0, count) + for ; idx < len(listPrefixes); idx++ { + indexResult, lastIndexSeen, err := listByPrefix(ctx, tx, lastSeen, idx, count-len(result)) + if err != nil { + return nil, nil, err + } + result = append(result, indexResult...) + if len(lastIndexSeen) > 0 { + next.lastKey = lastIndexSeen + } + if len(result) == count { + return result, &next, nil + } + lastSeen = nil } - if len(result) == 0 { return nil, nil, ErrEndOfListing } - - // new slice is much faster but less memory efficient - // we need to copy, because bucketName exists during bbolt tx - cursor.bucketName = make([]byte, len(bucketName)) - copy(cursor.bucketName, bucketName) - - return result, cursor, nil + return result, &next, nil } -// selectNFromBucket similar to selectAllFromBucket but uses cursor to find -// object to start selecting from. Ignores inhumed objects. -func selectNFromBucket(bkt *bbolt.Bucket, // main bucket - objType objectSDK.Type, // type of the objects stored in the main bucket - graveyardBkt, garbageBkt *bbolt.Bucket, // cached graveyard buckets - cidRaw []byte, // container ID prefix, optimization - cnt cid.ID, // container ID - to []objectcore.Info, // listing result - limit int, // stop listing at `limit` items in result - cursor *Cursor, // start from cursor object - threshold bool, // ignore cursor and start immediately -) ([]objectcore.Info, []byte, *Cursor, error) { - if cursor == nil { - cursor = new(Cursor) - } - - count := len(to) - c := bkt.Cursor() - k, v := c.First() - - offset := cursor.inBucketOffset - - if !threshold { - c.Seek(offset) - k, v = c.Next() // we are looking for objects _after_ the cursor - } - - for ; k != nil; k, v = c.Next() { - if count >= limit { - break +func listByPrefix(ctx context.Context, tx *badger.Txn, lastSeen []byte, idx int, count int) ([]objectcore.Info, []byte, error) { + var result []objectcore.Info + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, listPrefixes[idx].prefix, lastSeen, listPrefixes[idx].objectType == objectSDK.TypeRegular, count-len(result)) + if err != nil { + return nil, nil, err } - - var obj oid.ID - if err := obj.Decode(k); err != nil { - break + if len(kvs) == 0 { + return result, lastSeen, nil } - - offset = k - if inGraveyardWithKey(append(cidRaw, k...), graveyardBkt, garbageBkt) > 0 { - continue - } - - var isLinkingObj bool - var ecInfo *objectcore.ECInfo - if objType == objectSDK.TypeRegular { - var o objectSDK.Object - if err := o.Unmarshal(v); err != nil { - return nil, nil, nil, err + for _, kv := range kvs { + lastSeen = kv.Key + addr, err := listPrefixes[idx].keyParser(kv.Key) + if err != nil { + return nil, nil, err } - isLinkingObj = isLinkObject(&o) - ecHeader := o.ECHeader() - if ecHeader != nil { - ecInfo = &objectcore.ECInfo{ - ParentID: ecHeader.Parent(), - Index: ecHeader.Index(), - Total: ecHeader.Total(), + st, err := inGraveyardWithKey(tx, addr) + if err != nil { + return nil, nil, err + } + if st > 0 { + continue + } + + var isLinkingObj bool + var ecInfo *objectcore.ECInfo + if listPrefixes[idx].objectType == objectSDK.TypeRegular { + var o objectSDK.Object + if err := o.Unmarshal(kv.Value); err != nil { + return nil, nil, err + } + isLinkingObj = isLinkObject(&o) + ecHeader := o.ECHeader() + if ecHeader != nil { + ecInfo = &objectcore.ECInfo{ + ParentID: ecHeader.Parent(), + Index: ecHeader.Index(), + Total: ecHeader.Total(), + } } } + + result = append(result, objectcore.Info{ + Address: addr, + Type: listPrefixes[idx].objectType, + IsLinkingObject: isLinkingObj, + ECInfo: ecInfo, + }) + + if len(result) == count { + return result, lastSeen, nil + } } - - var a oid.Address - a.SetContainer(cnt) - a.SetObject(obj) - to = append(to, objectcore.Info{Address: a, Type: objType, IsLinkingObject: isLinkingObj, ECInfo: ecInfo}) - count++ } - - return to, offset, cursor, nil -} - -func parseContainerIDWithPrefix(containerID *cid.ID, name []byte) ([]byte, byte) { - if len(name) < bucketKeySize { - return nil, 0 - } - - rawID := name[1:bucketKeySize] - - if err := containerID.Decode(rawID); err != nil { - return nil, 0 - } - - return rawID, name[0] } diff --git a/pkg/local_object_storage/metabase/list_test.go b/pkg/local_object_storage/metabase/list_test.go index a92e2eff4..bb830a919 100644 --- a/pkg/local_object_storage/metabase/list_test.go +++ b/pkg/local_object_storage/metabase/list_test.go @@ -12,7 +12,6 @@ import ( objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test" "github.com/stretchr/testify/require" - "go.etcd.io/bbolt" ) func BenchmarkListWithCursor(b *testing.B) { @@ -29,9 +28,7 @@ func BenchmarkListWithCursor(b *testing.B) { } func listWithCursorPrepareDB(b *testing.B) *meta.DB { - db := newDB(b, meta.WithMaxBatchSize(1), meta.WithBoltDBOptions(&bbolt.Options{ - NoSync: true, - })) // faster single-thread generation + db := newDB(b) defer func() { require.NoError(b, db.Close()) }() obj := testutil.GenerateObject() @@ -147,7 +144,7 @@ func TestLisObjectsWithCursor(t *testing.T) { } _, _, err = metaListWithCursor(db, uint32(countPerReq), cursor) - require.ErrorIs(t, err, meta.ErrEndOfListing, "count:%d", countPerReq, cursor) + require.ErrorIs(t, err, meta.ErrEndOfListing, "count:%d, cursor:%v", countPerReq, cursor) require.ElementsMatch(t, expected, got, "count:%d", countPerReq) } }) diff --git a/pkg/local_object_storage/metabase/lock.go b/pkg/local_object_storage/metabase/lock.go index 732ba426d..f873d3551 100644 --- a/pkg/local_object_storage/metabase/lock.go +++ b/pkg/local_object_storage/metabase/lock.go @@ -1,9 +1,7 @@ package meta import ( - "bytes" "context" - "fmt" "time" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" @@ -13,23 +11,16 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) -var bucketNameLocked = []byte{lockedPrefix} - type keyValue struct { Key []byte Value []byte } -// returns name of the bucket with objects of type LOCK for specified container. -func bucketNameLockers(idCnr cid.ID, key []byte) []byte { - return bucketName(idCnr, lockersPrefix, key) -} - // Lock marks objects as locked with another object. All objects are from the // specified container. // @@ -66,66 +57,43 @@ func (db *DB) Lock(ctx context.Context, cnr cid.ID, locker oid.ID, locked []oid. panic("empty locked list") } - err := db.lockInternal(locked, cnr, locker) + err := db.database.Update(func(txn *badger.Txn) error { + return lockInternal(txn, locked, cnr, locker) + }) success = err == nil return err } -func (db *DB) lockInternal(locked []oid.ID, cnr cid.ID, locker oid.ID) error { - bucketKeysLocked := make([][]byte, len(locked)) - for i := range locked { - bucketKeysLocked[i] = objectKey(locked[i], make([]byte, objectKeySize)) +func lockInternal(tx *badger.Txn, locked []oid.ID, cnr cid.ID, locker oid.ID) error { + t, err := firstIrregularObjectType(tx, cnr, locked...) + if err != nil { + return err + } + if t != objectSDK.TypeRegular { + return logicerr.Wrap(new(apistatus.LockNonRegularObject)) } - key := make([]byte, cidSize) - return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error { - if firstIrregularObjectType(tx, cnr, bucketKeysLocked...) != objectSDK.TypeRegular { - return logicerr.Wrap(new(apistatus.LockNonRegularObject)) - } - - bucketLocked := tx.Bucket(bucketNameLocked) - - cnr.Encode(key) - bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists(key) + for _, objID := range locked { + key := lockedKey(cnr, objID, locker) + v, err := valueOrNil(tx, key) if err != nil { - return fmt.Errorf("create container bucket for locked objects %v: %w", cnr, err) + return err + } + if v != nil { + // already locked by locker + continue } - keyLocker := objectKey(locker, key) - var exLockers [][]byte - var updLockers []byte - - loop: - for i := range bucketKeysLocked { - exLockers, err = decodeList(bucketLockedContainer.Get(bucketKeysLocked[i])) - if err != nil { - return fmt.Errorf("decode list of object lockers: %w", err) - } - - for i := range exLockers { - if bytes.Equal(exLockers[i], keyLocker) { - continue loop - } - } - - updLockers, err = encodeList(append(exLockers, keyLocker)) - if err != nil { - return fmt.Errorf("encode list of object lockers: %w", err) - } - - err = bucketLockedContainer.Put(bucketKeysLocked[i], updLockers) - if err != nil { - return fmt.Errorf("update list of object lockers: %w", err) - } + if err := tx.Set(key, zeroValue); err != nil { + return err } - - return nil - })) + } + return nil } // FreeLockedBy unlocks all objects in DB which are locked by lockers. // Returns slice of unlocked object ID's or an error. -func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) { +func (db *DB) FreeLockedBy(ctx context.Context, lockers []oid.Address) ([]oid.Address, error) { var ( startedAt = time.Now() success = false @@ -143,9 +111,9 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) { var unlockedObjects []oid.Address - if err := db.boltDB.Update(func(tx *bbolt.Tx) error { + if err := db.database.Update(func(tx *badger.Txn) error { for i := range lockers { - unlocked, err := freePotentialLocks(tx, lockers[i].Container(), lockers[i].Object()) + unlocked, err := freePotentialLocks(ctx, tx, lockers[i]) if err != nil { return err } @@ -161,42 +129,38 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) { } // checks if specified object is locked in the specified container. -func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool { - bucketLocked := tx.Bucket(bucketNameLocked) - if bucketLocked != nil { - key := make([]byte, cidSize) - idCnr.Encode(key) - bucketLockedContainer := bucketLocked.Bucket(key) - if bucketLockedContainer != nil { - return bucketLockedContainer.Get(objectKey(idObj, key)) != nil - } - } +func objectLocked(ctx context.Context, tx *badger.Txn, idCnr cid.ID, idObj oid.ID) (bool, error) { + prefix := lockedKeyLongPrefix(idCnr, idObj) - return false + items, err := selectByPrefixBatch(ctx, tx, prefix, 1) + if err != nil { + return false, err + } + return len(items) > 0, nil } // return `LOCK` id's if specified object is locked in the specified container. -func getLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) ([]oid.ID, error) { +func getLocked(ctx context.Context, tx *badger.Txn, idCnr cid.ID, idObj oid.ID) ([]oid.ID, error) { + prefix := lockedKeyLongPrefix(idCnr, idObj) + var lockers []oid.ID - bucketLocked := tx.Bucket(bucketNameLocked) - if bucketLocked != nil { - key := make([]byte, cidSize) - idCnr.Encode(key) - bucketLockedContainer := bucketLocked.Bucket(key) - if bucketLockedContainer != nil { - binObjIDs, err := decodeList(bucketLockedContainer.Get(objectKey(idObj, key))) + for { + items, err := selectByPrefixBatch(ctx, tx, prefix, batchSize) + if err != nil { + return nil, err + } + for _, it := range items { + id, err := lockerObjectIDFromLockedKey(it) if err != nil { - return nil, fmt.Errorf("decode list of object lockers: %w", err) - } - for _, binObjID := range binObjIDs { - var id oid.ID - if err = id.Decode(binObjID); err != nil { - return nil, err - } - lockers = append(lockers, id) + return nil, err } + lockers = append(lockers, id) + } + if len(items) < batchSize { + break } } + return lockers, nil } @@ -206,95 +170,64 @@ func getLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) ([]oid.ID, error) { // Operation is very resource-intensive, which is caused by the admissibility // of multiple locks. Also, if we knew what objects are locked, it would be // possible to speed up the execution. -func freePotentialLocks(tx *bbolt.Tx, idCnr cid.ID, locker oid.ID) ([]oid.Address, error) { +func freePotentialLocks(ctx context.Context, tx *badger.Txn, locker oid.Address) ([]oid.Address, error) { var unlockedObjects []oid.Address - bucketLocked := tx.Bucket(bucketNameLocked) - if bucketLocked == nil { - return unlockedObjects, nil - } - key := make([]byte, cidSize) - idCnr.Encode(key) - - bucketLockedContainer := bucketLocked.Bucket(key) - if bucketLockedContainer == nil { - return unlockedObjects, nil - } - - keyLocker := objectKey(locker, key) - updates := make([]keyValue, 0) - err := bucketLockedContainer.ForEach(func(k, v []byte) error { - keyLockers, err := decodeList(v) - if err != nil { - return fmt.Errorf("decode list of lockers in locked bucket: %w", err) - } - - for i := range keyLockers { - if bytes.Equal(keyLockers[i], keyLocker) { - if len(keyLockers) == 1 { - updates = append(updates, keyValue{ - Key: k, - Value: nil, - }) - - var id oid.ID - err = id.Decode(k) - if err != nil { - return fmt.Errorf("decode unlocked object id error: %w", err) - } - - var addr oid.Address - addr.SetContainer(idCnr) - addr.SetObject(id) - - unlockedObjects = append(unlockedObjects, addr) - } else { - // exclude locker - keyLockers = append(keyLockers[:i], keyLockers[i+1:]...) - - v, err = encodeList(keyLockers) - if err != nil { - return fmt.Errorf("encode updated list of lockers: %w", err) - } - - updates = append(updates, keyValue{ - Key: k, - Value: v, - }) - } - - return nil - } - } - - return nil - }) + locked, err := lockedObjects(tx, locker) if err != nil { return nil, err } - if err = applyBucketUpdates(bucketLockedContainer, updates); err != nil { - return nil, err + for _, lockedObject := range locked { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + if err := tx.Delete(lockedKey(locker.Container(), lockedObject, locker.Object())); err != nil { + return nil, err + } + isLocked, err := objectLocked(ctx, tx, locker.Container(), lockedObject) + if err != nil { + return nil, err + } + if !isLocked { // deleted locker was the last one + var addr oid.Address + addr.SetContainer(locker.Container()) + addr.SetObject(lockedObject) + unlockedObjects = append(unlockedObjects, addr) + } } return unlockedObjects, nil } -func applyBucketUpdates(bucket *bbolt.Bucket, updates []keyValue) error { - for _, update := range updates { - if update.Value == nil { - err := bucket.Delete(update.Key) - if err != nil { - return fmt.Errorf("delete locked object record from locked bucket: %w", err) - } - } else { - err := bucket.Put(update.Key, update.Value) - if err != nil { - return fmt.Errorf("update list of lockers: %w", err) - } +func lockedObjects(tx *badger.Txn, locker oid.Address) ([]oid.ID, error) { + var lockedByLocker []oid.ID + + prefix := lockedKeyShortPrefix(locker.Container()) + it := tx.NewIterator(badger.IteratorOptions{ + PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize, + Prefix: prefix, + }) + defer it.Close() + + for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() { + currentLockerObjID, err := lockerObjectIDFromLockedKey(it.Item().Key()) + if err != nil { + return nil, err } + if !currentLockerObjID.Equals(locker.Object()) { + continue + } + currentObjectID, err := objectIDFromLockedKey(it.Item().Key()) + if err != nil { + return nil, err + } + lockedByLocker = append(lockedByLocker, currentObjectID) } - return nil + return lockedByLocker, nil } // IsLockedPrm groups the parameters of IsLocked operation. @@ -343,9 +276,10 @@ func (db *DB) IsLocked(ctx context.Context, prm IsLockedPrm) (res IsLockedRes, e if db.mode.NoMetabase() { return res, ErrDegradedMode } - err = metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error { - res.locked = objectLocked(tx, prm.addr.Container(), prm.addr.Object()) - return nil + err = metaerr.Wrap(db.database.View(func(tx *badger.Txn) error { + var e error + res.locked, e = objectLocked(ctx, tx, prm.addr.Container(), prm.addr.Object()) + return e })) success = err == nil return res, err @@ -376,8 +310,8 @@ func (db *DB) GetLocked(ctx context.Context, addr oid.Address) (res []oid.ID, er if db.mode.NoMetabase() { return res, ErrDegradedMode } - err = metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error { - res, err = getLocked(tx, addr.Container(), addr.Object()) + err = metaerr.Wrap(db.database.View(func(tx *badger.Txn) error { + res, err = getLocked(ctx, tx, addr.Container(), addr.Object()) return nil })) success = err == nil diff --git a/pkg/local_object_storage/metabase/lock_test.go b/pkg/local_object_storage/metabase/lock_test.go index 2d7bfc1cc..afa31fba8 100644 --- a/pkg/local_object_storage/metabase/lock_test.go +++ b/pkg/local_object_storage/metabase/lock_test.go @@ -117,7 +117,7 @@ func TestDB_Lock(t *testing.T) { require.Len(t, res.DeletedLockObjects(), 1) require.Equal(t, objectcore.AddressOf(lockObj), res.DeletedLockObjects()[0]) - _, err = db.FreeLockedBy([]oid.Address{lockAddr}) + _, err = db.FreeLockedBy(context.Background(), []oid.Address{lockAddr}) require.NoError(t, err) inhumePrm.SetAddresses(objAddr) @@ -148,7 +148,7 @@ func TestDB_Lock(t *testing.T) { // unlock just objects that were locked by // just removed locker - _, err = db.FreeLockedBy([]oid.Address{res.DeletedLockObjects()[0]}) + _, err = db.FreeLockedBy(context.Background(), []oid.Address{res.DeletedLockObjects()[0]}) require.NoError(t, err) // removing objects after unlock diff --git a/pkg/local_object_storage/metabase/logger.go b/pkg/local_object_storage/metabase/logger.go new file mode 100644 index 000000000..4a75a6e59 --- /dev/null +++ b/pkg/local_object_storage/metabase/logger.go @@ -0,0 +1,32 @@ +package meta + +import ( + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger" + "github.com/dgraph-io/badger/v4" +) + +var _ badger.Logger = (*badgerLogger)(nil) + +type badgerLogger struct { + l *logger.Logger +} + +// Debugf implements badger.Logger. +func (d *badgerLogger) Debugf(msg string, args ...interface{}) { + d.l.Sugar().Debugf(msg, args...) +} + +// Errorf implements badger.Logger. +func (d *badgerLogger) Errorf(msg string, args ...interface{}) { + d.l.Sugar().Errorf(msg, args...) +} + +// Infof implements badger.Logger. +func (d *badgerLogger) Infof(msg string, args ...interface{}) { + d.l.Sugar().Infof(msg, args...) +} + +// Warningf implements badger.Logger. +func (d *badgerLogger) Warningf(msg string, args ...interface{}) { + d.l.Sugar().Warnf(msg, args...) +} diff --git a/pkg/local_object_storage/metabase/mode.go b/pkg/local_object_storage/metabase/mode.go index 2032ed6b2..6f09227de 100644 --- a/pkg/local_object_storage/metabase/mode.go +++ b/pkg/local_object_storage/metabase/mode.go @@ -1,8 +1,10 @@ package meta import ( + "context" "fmt" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" ) @@ -17,17 +19,17 @@ func (db *DB) SetMode(m mode.Mode) error { } if !db.mode.NoMetabase() { - if err := db.Close(); err != nil { + if err := db.close(); err != nil { return fmt.Errorf("can't set metabase mode (old=%s, new=%s): %w", db.mode, m, err) } } if m.NoMetabase() { - db.boltDB = nil + db.database = nil } else { err := db.openDB(m) if err == nil && !m.ReadOnly() { - err = db.Init() + err = metaerr.Wrap(db.init(context.TODO(), false)) } if err != nil { return fmt.Errorf("can't set metabase mode (old=%s, new=%s): %w", db.mode, m, err) diff --git a/pkg/local_object_storage/metabase/mode_test.go b/pkg/local_object_storage/metabase/mode_test.go index 1b9f60055..7286a9db3 100644 --- a/pkg/local_object_storage/metabase/mode_test.go +++ b/pkg/local_object_storage/metabase/mode_test.go @@ -24,14 +24,14 @@ func Test_Mode(t *testing.T) { }...) require.NoError(t, bdb.Open(context.Background(), mode.DegradedReadOnly)) - require.Nil(t, bdb.boltDB) + require.Nil(t, bdb.database) require.NoError(t, bdb.Init()) - require.Nil(t, bdb.boltDB) + require.Nil(t, bdb.database) require.NoError(t, bdb.Close()) require.NoError(t, bdb.Open(context.Background(), mode.Degraded)) - require.Nil(t, bdb.boltDB) + require.Nil(t, bdb.database) require.NoError(t, bdb.Init()) - require.Nil(t, bdb.boltDB) + require.Nil(t, bdb.database) require.NoError(t, bdb.Close()) } diff --git a/pkg/local_object_storage/metabase/parse.go b/pkg/local_object_storage/metabase/parse.go new file mode 100644 index 000000000..8c8bd7add --- /dev/null +++ b/pkg/local_object_storage/metabase/parse.go @@ -0,0 +1,62 @@ +package meta + +import ( + "encoding/binary" + "errors" + + cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" + "github.com/dgraph-io/badger/v4" +) + +// valueOrNil returns value or nil, if key not found. +// value must be used only inside transaction. +func valueOrNil(tx *badger.Txn, key []byte) ([]byte, error) { + i, err := tx.Get(key) + if err != nil { + if errors.Is(err, badger.ErrKeyNotFound) { + return nil, nil + } + return nil, err + } + var value []byte + if err := i.Value(func(val []byte) error { + value = val + return nil + }); err != nil { + return nil, err + } + return value, nil +} + +func parseInt64Value(v []byte) (int64, bool) { + if len(v) == 0 { + return 0, true + } + if len(v) != 8 { + return 0, false + } + return int64(binary.LittleEndian.Uint64(v)), true +} + +func marshalInt64(v int64) []byte { + buf := make([]byte, 8) + binary.LittleEndian.PutUint64(buf, uint64(v)) + return buf +} + +func parseContainerIDWithIgnore(dst *cid.ID, name []byte, ignore map[string]struct{}) bool { + if len(name) < bucketKeySize { + return false + } + if _, ok := ignore[string(name[1:bucketKeySize])]; ok { + return false + } + return dst.Decode(name[1:bucketKeySize]) == nil +} + +func parseContainerID(dst *cid.ID, name []byte) bool { + if len(name) < bucketKeySize { + return false + } + return dst.Decode(name[1:bucketKeySize]) == nil +} diff --git a/pkg/local_object_storage/metabase/put.go b/pkg/local_object_storage/metabase/put.go index ceb79758f..345ac4690 100644 --- a/pkg/local_object_storage/metabase/put.go +++ b/pkg/local_object_storage/metabase/put.go @@ -1,13 +1,15 @@ package meta import ( + "bytes" "context" - "encoding/binary" "errors" "fmt" - gio "io" + "strconv" "time" + objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object" + "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" objectCore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object" storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" @@ -16,18 +18,11 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "github.com/nspcc-dev/neo-go/pkg/io" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) -type ( - namedBucketItem struct { - name, key, val []byte - } -) - // PutPrm groups the parameters of Put operation. type PutPrm struct { obj *objectSDK.Object @@ -54,6 +49,8 @@ var ( ErrUnknownObjectType = errors.New("unknown object type") ErrIncorrectSplitInfoUpdate = errors.New("updating split info on object without it") ErrIncorrectRootObject = errors.New("invalid root object") + + errInvalidUserAttributeKeyFormat = errors.New("invalid user attribute key format") ) // Put saves object header in metabase. Object payload expected to be cut. @@ -85,12 +82,28 @@ func (db *DB) Put(ctx context.Context, prm PutPrm) (res PutRes, err error) { } currEpoch := db.epochState.CurrentEpoch() + cnr, ok := prm.obj.ContainerID() + if !ok { + return PutRes{}, errors.New("missing container in object") + } - err = db.boltDB.Batch(func(tx *bbolt.Tx) error { - var e error - res, e = db.put(tx, prm.obj, prm.id, nil, currEpoch) - return e - }) + bucketID, release := db.bucketIDs.BucketID(cnr) + defer release() + + const retryCount = 10 + for i := 0; i < retryCount; i++ { + err = db.database.Update(func(tx *badger.Txn) error { + var e error + res, e = db.put(ctx, tx, prm.obj, prm.id, nil, currEpoch, bucketID) + return e + }) + if errors.Is(err, badger.ErrConflict) { + db.log.Warn(logs.ErrMetabaseConflict) + time.Sleep(retryTimeout) + continue + } + break + } if err == nil { success = true storagelog.Write(db.log, @@ -101,11 +114,14 @@ func (db *DB) Put(ctx context.Context, prm PutPrm) (res PutRes, err error) { return res, metaerr.Wrap(err) } -func (db *DB) put(tx *bbolt.Tx, +func (db *DB) put( + ctx context.Context, + tx *badger.Txn, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, currEpoch uint64, + bucketID uint16, ) (PutRes, error) { cnr, ok := obj.ContainerID() if !ok { @@ -114,7 +130,7 @@ func (db *DB) put(tx *bbolt.Tx, isParent := si != nil - exists, _, err := db.exists(tx, objectCore.AddressOf(obj), oid.Address{}, currEpoch) + exists, _, err := exists(ctx, tx, objectCore.AddressOf(obj), oid.Address{}, currEpoch) var splitInfoError *objectSDK.SplitInfoError if errors.As(err, &splitInfoError) { @@ -127,37 +143,38 @@ func (db *DB) put(tx *bbolt.Tx, return PutRes{}, db.updateObj(tx, obj, id, si, isParent) } - return PutRes{Inserted: true}, db.insertObject(tx, obj, id, si, isParent, cnr, currEpoch) + return PutRes{Inserted: true}, db.insertObject(ctx, tx, obj, id, si, isParent, cnr, currEpoch, bucketID) } -func (db *DB) updateObj(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool) error { +func (db *DB) updateObj(tx *badger.Txn, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool) error { + addr := objectCore.AddressOf(obj) // most right child and split header overlap parent so we have to // check if object exists to not overwrite it twice // When storage engine moves objects between different sub-storages, // it calls metabase.Put method with new storage ID, thus triggering this code. if !isParent && id != nil { - return setStorageID(tx, objectCore.AddressOf(obj), id, true) + return setStorageID(tx, addr, id, true) } // when storage already has last object in split hierarchy and there is // a linking object to put (or vice versa), we should update split info // with object ids of these objects if isParent { - return updateSplitInfo(tx, objectCore.AddressOf(obj), si) + return updateSplitInfo(tx, addr.Container(), addr.Object(), si) } return nil } -func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool, cnr cid.ID, currEpoch uint64) error { +func (db *DB) insertObject(ctx context.Context, tx *badger.Txn, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool, cnr cid.ID, currEpoch uint64, bucketID uint16) error { if par := obj.Parent(); par != nil && !isParent { // limit depth by two parentSI, err := splitInfoFromObject(obj) if err != nil { return err } - _, err = db.put(tx, par, id, parentSI, currEpoch) + _, err = db.put(ctx, tx, par, id, parentSI, currEpoch, bucketID) if err != nil { return err } @@ -173,21 +190,21 @@ func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *o return fmt.Errorf("can't put list indexes: %w", err) } - err = updateFKBTIndexes(tx, obj, putFKBTIndexItem) + err = updateFKBTIndexes(tx, obj, putListIndexItem) if err != nil { return fmt.Errorf("can't put fake bucket tree indexes: %w", err) } // update container volume size estimation if obj.Type() == objectSDK.TypeRegular && !isParent { - err = changeContainerSize(tx, cnr, obj.PayloadSize(), true) + err = changeContainerSize(tx, cnr, int64(obj.PayloadSize()), bucketID) if err != nil { return err } } if !isParent { - if err = db.incCounters(tx, cnr, IsUserObject(obj)); err != nil { + if err = incCounters(tx, cnr, IsUserObject(obj), bucketID); err != nil { return err } } @@ -196,26 +213,24 @@ func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *o } func putUniqueIndexes( - tx *bbolt.Tx, + tx *badger.Txn, obj *objectSDK.Object, si *objectSDK.SplitInfo, id []byte, ) error { isParent := si != nil addr := objectCore.AddressOf(obj) - cnr := addr.Container() - objKey := objectKey(addr.Object(), make([]byte, objectKeySize)) - bucketName := make([]byte, bucketKeySize) // add value to primary unique bucket if !isParent { + var key []byte switch obj.Type() { case objectSDK.TypeRegular: - bucketName = primaryBucketName(cnr, bucketName) + key = primaryKey(addr.Container(), addr.Object()) case objectSDK.TypeTombstone: - bucketName = tombstoneBucketName(cnr, bucketName) + key = tombstoneKey(addr.Container(), addr.Object()) case objectSDK.TypeLock: - bucketName = bucketNameLockers(cnr, bucketName) + key = lockersKey(addr.Container(), addr.Object()) default: return ErrUnknownObjectType } @@ -225,11 +240,7 @@ func putUniqueIndexes( return fmt.Errorf("can't marshal object header: %w", err) } - err = putUniqueIndexItem(tx, namedBucketItem{ - name: bucketName, - key: objKey, - val: rawObject, - }) + err = tx.Set(key, rawObject) if err != nil { return err } @@ -244,6 +255,7 @@ func putUniqueIndexes( // index root object if obj.Type() == objectSDK.TypeRegular && !obj.HasParent() { + objID := addr.Object() if ecHead := obj.ECHeader(); ecHead != nil { parentID := ecHead.Parent() if ecHead.ParentSplitID() != nil { @@ -258,53 +270,23 @@ func putUniqueIndexes( parentID = *parentSplitParentID } - objKey = objectKey(parentID, objKey) + objID = parentID } - return updateSplitInfoIndex(tx, objKey, cnr, bucketName, si) + return updateSplitInfo(tx, addr.Container(), objID, si) } return nil } -func updateSplitInfoIndex(tx *bbolt.Tx, objKey []byte, cnr cid.ID, bucketName []byte, si *objectSDK.SplitInfo) error { - return updateUniqueIndexItem(tx, namedBucketItem{ - name: rootBucketName(cnr, bucketName), - key: objKey, - }, func(old, _ []byte) ([]byte, error) { - switch { - case si == nil && old == nil: - return []byte{}, nil - case si == nil: - return old, nil - case old == nil: - return si.Marshal() - default: - oldSI := objectSDK.NewSplitInfo() - if err := oldSI.Unmarshal(old); err != nil { - return nil, err - } - si = util.MergeSplitInfo(si, oldSI) - return si.Marshal() - } - }) -} +type updateIndexItemFunc = func(tx *badger.Txn, key []byte) error -type updateIndexItemFunc = func(tx *bbolt.Tx, item namedBucketItem) error - -func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error { +func updateListIndexes(tx *badger.Txn, obj *objectSDK.Object, f updateIndexItemFunc) error { idObj, _ := obj.ID() cnr, _ := obj.ContainerID() - objKey := objectKey(idObj, make([]byte, objectKeySize)) - bucketName := make([]byte, bucketKeySize) - cs, _ := obj.PayloadChecksum() // index payload hashes - err := f(tx, namedBucketItem{ - name: payloadHashBucketName(cnr, bucketName), - key: cs.Value(), - val: objKey, - }) + err := f(tx, payloadHashKey(cnr, idObj, cs.Value())) if err != nil { return err } @@ -313,11 +295,7 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun // index parent ids if ok { - err := f(tx, namedBucketItem{ - name: parentBucketName(cnr, bucketName), - key: objectKey(idParent, make([]byte, objectKeySize)), - val: objKey, - }) + err := f(tx, parentKey(cnr, idParent, idObj)) if err != nil { return err } @@ -325,33 +303,35 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun // index split ids if obj.SplitID() != nil { - err := f(tx, namedBucketItem{ - name: splitBucketName(cnr, bucketName), - key: obj.SplitID().ToV2(), - val: objKey, - }) + err := f(tx, splitKey(cnr, idObj, obj.SplitID().ToV2())) if err != nil { return err } } + for _, attr := range obj.Attributes() { + if attr.Key() != objectV2.SysAttributeExpEpochNeoFS && attr.Key() != objectV2.SysAttributeExpEpoch { + continue + } + expEpoch, err := strconv.ParseUint(attr.Value(), 10, 64) + if err != nil { + return errInvalidUserAttributeKeyFormat + } + err = f(tx, expiredKey(cnr, idObj, expEpoch)) + if err != nil { + return err + } + break + } + if ech := obj.ECHeader(); ech != nil { - err := f(tx, namedBucketItem{ - name: ecInfoBucketName(cnr, bucketName), - key: objectKey(ech.Parent(), make([]byte, objectKeySize)), - val: objKey, - }) + err := f(tx, ecInfoKey(cnr, ech.Parent(), idObj)) if err != nil { return err } if ech.ParentSplitID() != nil { - objKey := objectKey(ech.Parent(), make([]byte, objectKeySize)) - err := f(tx, namedBucketItem{ - name: splitBucketName(cnr, bucketName), - key: ech.ParentSplitID().ToV2(), - val: objKey, - }) + err := f(tx, splitKey(cnr, ech.Parent(), ech.ParentSplitID().ToV2())) if err != nil { return err } @@ -361,17 +341,10 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun return nil } -func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error { +func updateFKBTIndexes(tx *badger.Txn, obj *objectSDK.Object, f updateIndexItemFunc) error { id, _ := obj.ID() cnr, _ := obj.ContainerID() - objKey := objectKey(id, make([]byte, objectKeySize)) - - key := make([]byte, bucketKeySize) - err := f(tx, namedBucketItem{ - name: ownerBucketName(cnr, key), - key: []byte(obj.OwnerID().EncodeToString()), - val: objKey, - }) + err := f(tx, ownerKey(cnr, id, []byte(obj.OwnerID().EncodeToString()))) if err != nil { return err } @@ -379,19 +352,14 @@ func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun var attrs []objectSDK.Attribute if obj.ECHeader() != nil { attrs = obj.ECHeader().ParentAttributes() - objKey = objectKey(obj.ECHeader().Parent(), make([]byte, objectKeySize)) + id = obj.ECHeader().Parent() } else { attrs = obj.Attributes() } // user specified attributes for i := range attrs { - key = attributeBucketName(cnr, attrs[i].Key(), key) - err := f(tx, namedBucketItem{ - name: key, - key: []byte(attrs[i].Value()), - val: objKey, - }) + err := f(tx, attributeKey(cnr, id, attrs[i].Key(), attrs[i].Value())) if err != nil { return err } @@ -400,161 +368,42 @@ func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun return nil } -type bucketContainer interface { - Bucket([]byte) *bbolt.Bucket - CreateBucket([]byte) (*bbolt.Bucket, error) - CreateBucketIfNotExists([]byte) (*bbolt.Bucket, error) -} - -func createBucketLikelyExists[T bucketContainer](tx T, name []byte) (*bbolt.Bucket, error) { - if bkt := tx.Bucket(name); bkt != nil { - return bkt, nil - } - return tx.CreateBucket(name) -} - -func updateUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem, update func(oldData, newData []byte) ([]byte, error)) error { - bkt, err := createBucketLikelyExists(tx, item.name) - if err != nil { - return fmt.Errorf("can't create index %v: %w", item.name, err) - } - - data, err := update(bkt.Get(item.key), item.val) - if err != nil { - return err - } - return bkt.Put(item.key, data) -} - -func putUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem) error { - return updateUniqueIndexItem(tx, item, func(_, val []byte) ([]byte, error) { return val, nil }) -} - -func putFKBTIndexItem(tx *bbolt.Tx, item namedBucketItem) error { - bkt, err := createBucketLikelyExists(tx, item.name) - if err != nil { - return fmt.Errorf("can't create index %v: %w", item.name, err) - } - - fkbtRoot, err := createBucketLikelyExists(bkt, item.key) - if err != nil { - return fmt.Errorf("can't create fake bucket tree index %v: %w", item.key, err) - } - - return fkbtRoot.Put(item.val, zeroValue) -} - -func putListIndexItem(tx *bbolt.Tx, item namedBucketItem) error { - bkt, err := createBucketLikelyExists(tx, item.name) - if err != nil { - return fmt.Errorf("can't create index %v: %w", item.name, err) - } - - lst, err := decodeList(bkt.Get(item.key)) - if err != nil { - return fmt.Errorf("can't decode leaf list %v: %w", item.key, err) - } - - lst = append(lst, item.val) - - encodedLst, err := encodeList(lst) - if err != nil { - return fmt.Errorf("can't encode leaf list %v: %w", item.key, err) - } - - return bkt.Put(item.key, encodedLst) -} - -// encodeList decodes list of bytes into a single blog for list bucket indexes. -func encodeList(lst [][]byte) ([]byte, error) { - w := io.NewBufBinWriter() - w.WriteVarUint(uint64(len(lst))) - for i := range lst { - w.WriteVarBytes(lst[i]) - } - if w.Err != nil { - return nil, w.Err - } - return w.Bytes(), nil -} - -// decodeList decodes blob into the list of bytes from list bucket index. -func decodeList(data []byte) (lst [][]byte, err error) { - if len(data) == 0 { - return nil, nil - } - - var offset uint64 - size, n, err := getVarUint(data) - if err != nil { - return nil, err - } - - offset += uint64(n) - lst = make([][]byte, size, size+1) - for i := range lst { - sz, n, err := getVarUint(data[offset:]) - if err != nil { - return nil, err - } - offset += uint64(n) - - next := offset + sz - if uint64(len(data)) < next { - return nil, gio.ErrUnexpectedEOF - } - lst[i] = data[offset:next] - offset = next - } - return lst, nil -} - -func getVarUint(data []byte) (uint64, int, error) { - if len(data) == 0 { - return 0, 0, gio.ErrUnexpectedEOF - } - - switch b := data[0]; b { - case 0xfd: - if len(data) < 3 { - return 0, 1, gio.ErrUnexpectedEOF - } - return uint64(binary.LittleEndian.Uint16(data[1:])), 3, nil - case 0xfe: - if len(data) < 5 { - return 0, 1, gio.ErrUnexpectedEOF - } - return uint64(binary.LittleEndian.Uint32(data[1:])), 5, nil - case 0xff: - if len(data) < 9 { - return 0, 1, gio.ErrUnexpectedEOF - } - return binary.LittleEndian.Uint64(data[1:]), 9, nil - default: - return uint64(b), 1, nil - } -} - -// setStorageID for existing objects if they were moved from one -// storage location to another. -func setStorageID(tx *bbolt.Tx, addr oid.Address, id []byte, override bool) error { - key := make([]byte, bucketKeySize) - bkt, err := createBucketLikelyExists(tx, smallBucketName(addr.Container(), key)) - if err != nil { - return err - } - key = objectKey(addr.Object(), key) - if override || bkt.Get(key) == nil { - return bkt.Put(key, id) - } - return nil +func putListIndexItem(tx *badger.Txn, key []byte) error { + return tx.Set(key, zeroValue) } // updateSpliInfo for existing objects if storage filled with extra information // about last object in split hierarchy or linking object. -func updateSplitInfo(tx *bbolt.Tx, addr oid.Address, from *objectSDK.SplitInfo) error { - objKey := objectKey(addr.Object(), make([]byte, bucketKeySize)) - return updateSplitInfoIndex(tx, objKey, addr.Container(), make([]byte, bucketKeySize), from) +func updateSplitInfo(tx *badger.Txn, cnr cid.ID, obj oid.ID, si *objectSDK.SplitInfo) error { + key := rootKey(cnr, obj) + existed, err := valueOrNil(tx, key) + if err != nil { + return nil + } + + switch { + case si == nil && existed == nil: + return tx.Set(key, zeroValue) + case si == nil: + return nil + case existed == nil || bytes.Equal(existed, zeroValue): + siBytes, err := si.Marshal() + if err != nil { + return nil + } + return tx.Set(key, siBytes) + default: + existedSI := objectSDK.NewSplitInfo() + if err := existedSI.Unmarshal(existed); err != nil { + return err + } + si = util.MergeSplitInfo(si, existedSI) + siBytes, err := si.Marshal() + if err != nil { + return nil + } + return tx.Set(key, siBytes) + } } // splitInfoFromObject returns split info based on last or linkin object. diff --git a/pkg/local_object_storage/metabase/put_test.go b/pkg/local_object_storage/metabase/put_test.go index 84e4029f2..fb6385b6b 100644 --- a/pkg/local_object_storage/metabase/put_test.go +++ b/pkg/local_object_storage/metabase/put_test.go @@ -2,11 +2,9 @@ package meta_test import ( "context" - "runtime" "strconv" "sync/atomic" "testing" - "time" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil" @@ -43,9 +41,7 @@ func prepareObjects(n int) []*objectSDK.Object { func BenchmarkPut(b *testing.B) { b.Run("parallel", func(b *testing.B) { - db := newDB(b, - meta.WithMaxBatchDelay(time.Millisecond*10), - meta.WithMaxBatchSize(runtime.NumCPU())) + db := newDB(b) defer func() { require.NoError(b, db.Close()) }() // Ensure the benchmark is bound by CPU and not waiting batch-delay time. b.SetParallelism(1) @@ -65,9 +61,7 @@ func BenchmarkPut(b *testing.B) { }) }) b.Run("sequential", func(b *testing.B) { - db := newDB(b, - meta.WithMaxBatchDelay(time.Millisecond*10), - meta.WithMaxBatchSize(1)) + db := newDB(b) defer func() { require.NoError(b, db.Close()) }() var index atomic.Int64 index.Store(-1) diff --git a/pkg/local_object_storage/metabase/reset_test.go b/pkg/local_object_storage/metabase/reset_test.go index 66f5eefc6..5a8182030 100644 --- a/pkg/local_object_storage/metabase/reset_test.go +++ b/pkg/local_object_storage/metabase/reset_test.go @@ -8,8 +8,8 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" + "github.com/dgraph-io/badger/v4" "github.com/stretchr/testify/require" - "go.etcd.io/bbolt" ) type epochState struct{ e uint64 } @@ -42,16 +42,15 @@ func TestResetDropsContainerBuckets(t *testing.T) { require.NoError(t, err) } - require.NoError(t, db.Reset()) + require.NoError(t, db.Reset(context.Background())) - var bucketCount int - require.NoError(t, db.boltDB.Update(func(tx *bbolt.Tx) error { - return tx.ForEach(func(name []byte, b *bbolt.Bucket) error { - _, exists := mStaticBuckets[string(name)] - require.True(t, exists, "unexpected bucket:"+string(name)) - bucketCount++ - return nil - }) + require.NoError(t, db.database.Update(func(tx *badger.Txn) error { + it := tx.NewIterator(badger.DefaultIteratorOptions) + defer it.Close() + + for it.Seek(nil); it.Valid(); it.Next() { + require.Equal(t, byte(shardInfoPrefix), it.Item().Key()[0], "unexpected prefix: %d", it.Item().Key()[0]) + } + return nil })) - require.Equal(t, len(mStaticBuckets), bucketCount) } diff --git a/pkg/local_object_storage/metabase/select.go b/pkg/local_object_storage/metabase/select.go index 3a4d7a227..8bcb9bca3 100644 --- a/pkg/local_object_storage/metabase/select.go +++ b/pkg/local_object_storage/metabase/select.go @@ -1,6 +1,7 @@ package meta import ( + "bytes" "context" "encoding/binary" "errors" @@ -15,12 +16,15 @@ import ( cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" + "github.com/mr-tron/base58" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) +const batchSize = 1000 + type ( // filterGroup is a structure that have search filters grouped by access // method. We have fast filters that looks for indexes and do not unmarshal @@ -91,14 +95,14 @@ func (db *DB) Select(ctx context.Context, prm SelectPrm) (res SelectRes, err err currEpoch := db.epochState.CurrentEpoch() - return res, metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error { - res.addrList, err = db.selectObjects(tx, prm.cnr, prm.filters, currEpoch) + return res, metaerr.Wrap(db.database.View(func(tx *badger.Txn) error { + res.addrList, err = db.selectObjects(ctx, tx, prm.cnr, prm.filters, currEpoch) success = err == nil return err })) } -func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters, currEpoch uint64) ([]oid.Address, error) { +func (db *DB) selectObjects(ctx context.Context, tx *badger.Txn, cnr cid.ID, fs objectSDK.SearchFilters, currEpoch uint64) ([]oid.Address, error) { group, err := groupFilters(fs) if err != nil { return nil, err @@ -119,10 +123,10 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters if len(group.fastFilters) == 0 { expLen = 1 - db.selectAll(tx, cnr, mAddr) + db.selectAll(ctx, tx, cnr, mAddr) } else { for i := range group.fastFilters { - db.selectFastFilter(tx, cnr, group.fastFilters[i], mAddr, i) + db.selectFastFilter(ctx, tx, cnr, group.fastFilters[i], mAddr, i, currEpoch) } } @@ -133,21 +137,20 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters continue // ignore objects with unmatched fast filters } - var id oid.ID - err = id.Decode([]byte(a)) - if err != nil { + var addr oid.Address + if err := addr.DecodeString(a); err != nil { return nil, err } - var addr oid.Address - addr.SetContainer(cnr) - addr.SetObject(id) - - if objectStatus(tx, addr, currEpoch) > 0 { + st, err := objectStatus(ctx, tx, addr, currEpoch) + if err != nil { + return nil, err + } + if st > 0 { continue // ignore removed objects } - if !db.matchSlowFilters(tx, addr, group.slowFilters, currEpoch) { + if !db.matchSlowFilters(ctx, tx, addr, group.slowFilters, currEpoch) { continue // ignore objects with unmatched slow filters } @@ -158,101 +161,137 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters } // selectAll adds to resulting cache all available objects in metabase. -func (db *DB) selectAll(tx *bbolt.Tx, cnr cid.ID, to map[string]int) { - bucketName := make([]byte, bucketKeySize) - selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, 0) - selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, 0) - selectAllFromBucket(tx, parentBucketName(cnr, bucketName), to, 0) - selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, 0) +func (db *DB) selectAll(ctx context.Context, tx *badger.Txn, cnr cid.ID, to map[string]int) { + db.selectAllWithPrefix(ctx, tx, primaryKeyPrefix(cnr), addressFromPrimaryKey, to, 0) + db.selectAllWithPrefix(ctx, tx, tombstoneKeyPrefix(cnr), addressFromTombstoneKey, to, 0) + db.selectAllWithPrefix(ctx, tx, parentKeyShortPrefix(cnr), addressOfParentFromParentKey, to, 0) + db.selectAllWithPrefix(ctx, tx, lockersKeyPrefix(cnr), addressFromLockersKey, to, 0) } // selectAllFromBucket goes through all keys in bucket and adds them in a // resulting cache. Keys should be stringed object ids. -func selectAllFromBucket(tx *bbolt.Tx, name []byte, to map[string]int, fNum int) { - bkt := tx.Bucket(name) - if bkt == nil { - return +func (db *DB) selectAllWithPrefix(ctx context.Context, tx *badger.Txn, prefix []byte, keyParser func(ket []byte) (oid.Address, error), to map[string]int, fNum int) { + db.selectWithPrefix(ctx, tx, prefix, keyParser, func(oid.Address) bool { return true }, to, fNum) +} + +func (db *DB) selectWithPrefix(ctx context.Context, tx *badger.Txn, prefix []byte, keyParser func([]byte) (oid.Address, error), condition func(oid.Address) bool, to map[string]int, fNum int) { + var lastSeen []byte + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize) + if err != nil { + db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix, + zap.ByteString("prefix", prefix), + zap.Error(err), + ) + return + } + for _, kv := range kvs { + lastSeen = kv.Key + addr, err := keyParser(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + if condition(addr) { + markAddressInCache(to, fNum, addr.EncodeToString()) + } + } + if len(kvs) < batchSize { + break + } } - - _ = bkt.ForEach(func(k, _ []byte) error { - markAddressInCache(to, fNum, string(k)) - - return nil - }) } // selectFastFilter makes fast optimized checks for well known buckets or // looking through user attribute buckets otherwise. func (db *DB) selectFastFilter( - tx *bbolt.Tx, + ctx context.Context, + tx *badger.Txn, cnr cid.ID, // container we search on f objectSDK.SearchFilter, // fast filter to map[string]int, // resulting cache fNum int, // index of filter + currEpoch uint64, ) { - currEpoch := db.epochState.CurrentEpoch() - bucketName := make([]byte, bucketKeySize) switch f.Header() { case v2object.FilterHeaderObjectID: - db.selectObjectID(tx, f, cnr, to, fNum, currEpoch) + db.selectObjectID(ctx, tx, f, cnr, to, fNum, currEpoch) case v2object.FilterHeaderOwnerID: - bucketName := ownerBucketName(cnr, bucketName) - db.selectFromFKBT(tx, bucketName, f, to, fNum) + db.selectOwnerID(ctx, tx, f, cnr, to, fNum) case v2object.FilterHeaderPayloadHash: - bucketName := payloadHashBucketName(cnr, bucketName) - db.selectFromList(tx, bucketName, f, to, fNum) + db.selectPayloadHash(ctx, tx, f, cnr, to, fNum) case v2object.FilterHeaderObjectType: - for _, bucketName := range bucketNamesForType(cnr, f.Operation(), f.Value()) { - selectAllFromBucket(tx, bucketName, to, fNum) + for _, prefix := range prefixesForType(cnr, f.Operation(), f.Value()) { + db.selectAllWithPrefix(ctx, tx, prefix.prefix, prefix.keyParser, to, fNum) } case v2object.FilterHeaderParent: - bucketName := parentBucketName(cnr, bucketName) - db.selectFromList(tx, bucketName, f, to, fNum) + db.selectParent(ctx, tx, f, cnr, to, fNum) case v2object.FilterHeaderSplitID: - bucketName := splitBucketName(cnr, bucketName) - db.selectFromList(tx, bucketName, f, to, fNum) + db.selectSplitID(ctx, tx, f, cnr, to, fNum) case v2object.FilterHeaderECParent: - bucketName := ecInfoBucketName(cnr, bucketName) - db.selectFromList(tx, bucketName, f, to, fNum) + db.selectECParent(ctx, tx, f, cnr, to, fNum) case v2object.FilterPropertyRoot: - selectAllFromBucket(tx, rootBucketName(cnr, bucketName), to, fNum) + db.selectAllWithPrefix(ctx, tx, rootKeyPrefix(cnr), addressFromRootKey, to, fNum) case v2object.FilterPropertyPhy: - selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, fNum) - selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, fNum) - selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, fNum) + db.selectAllWithPrefix(ctx, tx, primaryKeyPrefix(cnr), addressFromPrimaryKey, to, fNum) + db.selectAllWithPrefix(ctx, tx, tombstoneKeyPrefix(cnr), addressFromTombstoneKey, to, fNum) + db.selectAllWithPrefix(ctx, tx, lockersKeyPrefix(cnr), addressFromLockersKey, to, fNum) default: // user attribute - bucketName := attributeBucketName(cnr, f.Header(), bucketName) - if f.Operation() == objectSDK.MatchNotPresent { - selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, to, fNum) + db.selectWithoutAttribute(ctx, tx, cnr, f.Header(), to, fNum) } else { - db.selectFromFKBT(tx, bucketName, f, to, fNum) + db.selectByAttribute(ctx, tx, cnr, f, to, fNum) } } } -var mBucketNaming = map[string][]func(cid.ID, []byte) []byte{ - v2object.TypeRegular.String(): {primaryBucketName, parentBucketName}, - v2object.TypeTombstone.String(): {tombstoneBucketName}, - v2object.TypeLock.String(): {bucketNameLockers}, +type prefixer struct { + prefixer func(cid.ID) []byte + keyParser func(key []byte) (oid.Address, error) } -func allBucketNames(cnr cid.ID) (names [][]byte) { - for _, fns := range mBucketNaming { - for _, fn := range fns { - names = append(names, fn(cnr, make([]byte, bucketKeySize))) - } - } - - return +type prefixWithKeyParser struct { + prefix []byte + keyParser func(key []byte) (oid.Address, error) } -func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal string) (names [][]byte) { +var typeToPrefix = map[string][]prefixer{ + v2object.TypeRegular.String(): { + prefixer{ + prefixer: primaryKeyPrefix, + keyParser: addressFromPrimaryKey, + }, + prefixer{ + prefixer: parentKeyShortPrefix, + keyParser: addressOfParentFromParentKey, + }, + }, + v2object.TypeTombstone.String(): { + prefixer{ + prefixer: tombstoneKeyPrefix, + keyParser: addressFromTombstoneKey, + }, + }, + v2object.TypeLock.String(): { + prefixer{ + prefixer: lockersKeyPrefix, + keyParser: addressFromLockersKey, + }, + }, +} + +func prefixesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal string) (prefixes []prefixWithKeyParser) { appendNames := func(key string) { - fns, ok := mBucketNaming[key] + prefixers, ok := typeToPrefix[key] if ok { - for _, fn := range fns { - names = append(names, fn(cnr, make([]byte, bucketKeySize))) + for _, prefixer := range prefixers { + prefixes = append(prefixes, prefixWithKeyParser{ + prefix: prefixer.prefixer(cnr), + keyParser: prefixer.keyParser, + }) } } } @@ -260,7 +299,7 @@ func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal str switch mType { default: case objectSDK.MatchStringNotEqual: - for key := range mBucketNaming { + for key := range typeToPrefix { if key != typeVal { appendNames(key) } @@ -268,7 +307,7 @@ func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal str case objectSDK.MatchStringEqual: appendNames(typeVal) case objectSDK.MatchCommonPrefix: - for key := range mBucketNaming { + for key := range typeToPrefix { if strings.HasPrefix(key, typeVal) { appendNames(key) } @@ -280,145 +319,158 @@ func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal str // selectFromList looks into index to find list of addresses to add in // resulting cache. -func (db *DB) selectFromFKBT( - tx *bbolt.Tx, - name []byte, // fkbt root bucket name +func (db *DB) selectByAttribute( + ctx context.Context, + tx *badger.Txn, + cnr cid.ID, f objectSDK.SearchFilter, // filter for operation and value to map[string]int, // resulting cache fNum int, // index of filter ) { // - matchFunc, ok := db.matchers[f.Operation()] - if !ok { + var prefix []byte + var condition func([]byte) bool + switch f.Operation() { + default: db.log.Debug(logs.MetabaseMissingMatcher, zap.Uint32("operation", uint32(f.Operation()))) - return - } - - fkbtRoot := tx.Bucket(name) - if fkbtRoot == nil { + case objectSDK.MatchUnknown: return - } - - err := matchFunc.matchBucket(fkbtRoot, f.Header(), f.Value(), func(k, _ []byte) error { - fkbtLeaf := fkbtRoot.Bucket(k) - if fkbtLeaf == nil { - return nil + case objectSDK.MatchStringEqual, objectSDK.MatchCommonPrefix: + prefix = attributeKeyPrefix(cnr, f.Header(), f.Value()) + condition = func([]byte) bool { return true } + case objectSDK.MatchStringNotEqual: + prefix = attributeKeyPrefix(cnr, f.Header(), "") + fromRequestValue := []byte(f.Value()) + condition = func(fromDBValue []byte) bool { + return !bytes.Equal(fromDBValue, fromRequestValue) } + } - return fkbtLeaf.ForEach(func(k, _ []byte) error { - markAddressInCache(to, fNum, string(k)) + var lastSeen []byte + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize) + if err != nil { + db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix, + zap.ByteString("prefix", prefix), + zap.Error(err), + ) + return + } + for _, kv := range kvs { + lastSeen = kv.Key + attrValue, err := attributeValueFromAttributeKey(kv.Key, f.Header()) + if err != nil { + db.log.Debug(logs.FailedToParseAttributeValueFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } - return nil - }) - }) - if err != nil { - db.log.Debug(logs.MetabaseErrorInFKBTSelection, zap.String("error", err.Error())) + if condition(attrValue) { + addr, err := addressFromAttributeKey(kv.Key, f.Header()) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + markAddressInCache(to, fNum, addr.EncodeToString()) + } + } + if len(kvs) < batchSize { + break + } } } // selectOutsideFKBT looks into all incl buckets to find list of addresses outside to add in // resulting cache. -func selectOutsideFKBT( - tx *bbolt.Tx, - incl [][]byte, // buckets - name []byte, // fkbt root bucket name +func (db *DB) selectWithoutAttribute( + ctx context.Context, + tx *badger.Txn, + cnr cid.ID, + attributeKey string, // fkbt root bucket name to map[string]int, // resulting cache fNum int, // index of filter ) { - mExcl := make(map[string]struct{}) - - bktExcl := tx.Bucket(name) - if bktExcl != nil { - _ = bktExcl.ForEachBucket(func(k []byte) error { - exclBktLeaf := bktExcl.Bucket(k) - return exclBktLeaf.ForEach(func(k, _ []byte) error { - mExcl[string(k)] = struct{}{} - - return nil - }) - }) - } - - for i := range incl { - bktIncl := tx.Bucket(incl[i]) - if bktIncl == nil { - continue + for _, prefixers := range typeToPrefix { + for _, prefixer := range prefixers { + db.selectWithoutAttributeForPrexier(ctx, prefixer, cnr, tx, attributeKey, to, fNum) } - - _ = bktIncl.ForEach(func(k, _ []byte) error { - if _, ok := mExcl[string(k)]; !ok { - markAddressInCache(to, fNum, string(k)) - } - - return nil - }) } } -// selectFromList looks into index to find list of addresses to add in -// resulting cache. -func (db *DB) selectFromList( - tx *bbolt.Tx, - name []byte, // list root bucket name - f objectSDK.SearchFilter, // filter for operation and value - to map[string]int, // resulting cache - fNum int, // index of filter -) { // - bkt := tx.Bucket(name) - if bkt == nil { - return - } - - var ( - lst [][]byte - err error - ) - - switch op := f.Operation(); op { - case objectSDK.MatchStringEqual: - lst, err = decodeList(bkt.Get(bucketKeyHelper(f.Header(), f.Value()))) +func (db *DB) selectWithoutAttributeForPrexier(ctx context.Context, prefixer prefixer, cnr cid.ID, tx *badger.Txn, attributeKey string, to map[string]int, fNum int) { + prefix := prefixer.prefixer(cnr) + var lastSeen []byte + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, true, batchSize) if err != nil { - db.log.Debug(logs.MetabaseCantDecodeListBucketLeaf, zap.String("error", err.Error())) + db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix, + zap.ByteString("prefix", prefix), + zap.Error(err), + ) return } - default: - fMatch, ok := db.matchers[op] - if !ok { - db.log.Debug(logs.MetabaseUnknownOperation, zap.Uint32("operation", uint32(op))) - - return - } - - if err = fMatch.matchBucket(bkt, f.Header(), f.Value(), func(_, val []byte) error { - l, err := decodeList(val) + for _, kv := range kvs { + lastSeen = kv.Key + addr, err := prefixer.keyParser(kv.Key) if err != nil { - db.log.Debug(logs.MetabaseCantDecodeListBucketLeaf, - zap.String("error", err.Error()), + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), ) - - return err + continue } - lst = append(lst, l...) + obj := objectSDK.New() + if err := obj.Unmarshal(kv.Value); err != nil { + db.log.Debug(logs.ShardCouldNotUnmarshalObject, zap.Stringer("address", addr), zap.Error(err)) + continue + } - return nil - }); err != nil { - db.log.Debug(logs.MetabaseCantIterateOverTheBucket, - zap.String("error", err.Error()), - ) - - return + var hasAttribute bool + for _, attr := range obj.Attributes() { + if attr.Key() == attributeKey { + hasAttribute = true + break + } + } + if hasAttribute { + continue + } + markAddressInCache(to, fNum, addr.EncodeToString()) + } + if len(kvs) < batchSize { + break } - } - - for i := range lst { - markAddressInCache(to, fNum, string(lst[i])) } } -// selectObjectID processes objectID filter with in-place optimizations. +type matcher struct { + matchSlow func(string, []byte, string) bool +} + +var matchers map[objectSDK.SearchMatchType]matcher = map[objectSDK.SearchMatchType]matcher{ + objectSDK.MatchUnknown: { + matchSlow: unknownMatcher, + }, + objectSDK.MatchStringEqual: { + matchSlow: stringEqualMatcher, + }, + objectSDK.MatchStringNotEqual: { + matchSlow: stringNotEqualMatcher, + }, + objectSDK.MatchCommonPrefix: { + matchSlow: stringCommonPrefixMatcher, + }, +} + func (db *DB) selectObjectID( - tx *bbolt.Tx, + ctx context.Context, + tx *badger.Txn, f objectSDK.SearchFilter, cnr cid.ID, to map[string]int, // resulting cache @@ -431,67 +483,439 @@ func (db *DB) selectObjectID( addr.SetObject(id) var splitInfoError *objectSDK.SplitInfoError - ok, _, err := db.exists(tx, addr, oid.Address{}, currEpoch) + ok, _, err := exists(ctx, tx, addr, oid.Address{}, currEpoch) if (err == nil && ok) || errors.As(err, &splitInfoError) { - raw := make([]byte, objectKeySize) - id.Encode(raw) - markAddressInCache(to, fNum, string(raw)) + markAddressInCache(to, fNum, addr.EncodeToString()) } } + var condition func(oid.Address) bool switch op := f.Operation(); op { case objectSDK.MatchStringEqual: var id oid.ID - if err := id.DecodeString(f.Value()); err == nil { - appendOID(id) + if err := id.DecodeString(f.Value()); err != nil { + return + } + appendOID(id) + return + case objectSDK.MatchUnknown: + return + case objectSDK.MatchStringNotEqual: + var id oid.ID + if err := id.DecodeString(f.Value()); err != nil { + return + } + condition = func(a oid.Address) bool { + return !a.Container().Equals(cnr) || !a.Object().Equals(id) + } + case objectSDK.MatchCommonPrefix: + condition = func(a oid.Address) bool { + return a.Container().Equals(cnr) && strings.HasPrefix( + a.Object().EncodeToString(), + f.Value(), + ) } default: - fMatch, ok := db.matchers[op] - if !ok { - db.log.Debug(logs.MetabaseUnknownOperation, - zap.Uint32("operation", uint32(f.Operation())), - ) + db.log.Debug(logs.MetabaseUnknownOperation, + zap.Uint32("operation", uint32(f.Operation())), + ) + return + } + for _, prefix := range prefixesForType(cnr, objectSDK.MatchStringNotEqual, "") { + db.selectWithPrefix(ctx, tx, prefix.prefix, prefix.keyParser, condition, to, fNum) + } +} + +func (db *DB) selectOwnerID(ctx context.Context, tx *badger.Txn, f objectSDK.SearchFilter, cnr cid.ID, to map[string]int, fNum int) { + var condition func([]byte) bool + var prefix []byte + switch op := f.Operation(); op { + case objectSDK.MatchCommonPrefix, objectSDK.MatchStringEqual: + prefix = ownerKeyLongPrefix(cnr, []byte(f.Value())) + condition = func([]byte) bool { return true } + case objectSDK.MatchUnknown: + return + case objectSDK.MatchStringNotEqual: + prefix = ownerKeyShortPrefix(cnr) + ownerID := []byte(f.Value()) + condition = func(fromDB []byte) bool { return !bytes.Equal(fromDB, ownerID) } + default: + db.log.Debug(logs.MetabaseUnknownOperation, + zap.Uint32("operation", uint32(f.Operation())), + ) + return + } + + var lastSeen []byte + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize) + if err != nil { + db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix, + zap.ByteString("prefix", prefix), + zap.Error(err), + ) return } - for _, bucketName := range bucketNamesForType(cnr, objectSDK.MatchStringNotEqual, "") { - // copy-paste from DB.selectAllFrom - bkt := tx.Bucket(bucketName) - if bkt == nil { - return - } - - err := fMatch.matchBucket(bkt, f.Header(), f.Value(), func(k, _ []byte) error { - var id oid.ID - if err := id.Decode(k); err == nil { - appendOID(id) - } - return nil - }) + for _, kv := range kvs { + lastSeen = kv.Key + owner, err := ownerFromOwnerKey(kv.Key) if err != nil { - db.log.Debug(logs.MetabaseCouldNotIterateOverTheBuckets, - zap.String("error", err.Error()), + db.log.Debug(logs.FailedToParseOwnerFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), ) + continue } + if condition(owner) { + addr, err := addressFromOwnerKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + markAddressInCache(to, fNum, addr.EncodeToString()) + } + } + if len(kvs) < batchSize { + break + } + } +} + +func (db *DB) selectPayloadHash(ctx context.Context, tx *badger.Txn, f objectSDK.SearchFilter, cnr cid.ID, to map[string]int, fNum int) { + var condition func([]byte) bool + var prefix []byte + switch op := f.Operation(); op { + case objectSDK.MatchUnknown: + return + case objectSDK.MatchCommonPrefix: + value, checkLast, ok := destringifyValue(f.Header(), f.Value(), true) + if !ok { + return + } + prefixValue := value + if checkLast { + prefixValue = value[:len(value)-1] + } + if len(value) == 0 { + condition = func([]byte) bool { return true } + prefix = payloadHashKeyShortPrefix(cnr) + } else { + prefix = payloadHashKeyLongPrefix(cnr, prefixValue) + condition = func(fromDB []byte) bool { + if checkLast && (len(fromDB) == len(prefixValue) || fromDB[len(prefixValue)]>>4 != value[len(value)-1]) { + return false + } + return true + } + } + case objectSDK.MatchStringEqual: + value, _, ok := destringifyValue(f.Header(), f.Value(), false) + if !ok { + return + } + prefix = payloadHashKeyLongPrefix(cnr, value) + condition = func([]byte) bool { return true } + case objectSDK.MatchStringNotEqual: + value, _, ok := destringifyValue(f.Header(), f.Value(), false) + prefix = payloadHashKeyShortPrefix(cnr) + condition = func(fromDB []byte) bool { return !ok || !bytes.Equal(fromDB, value) } + default: + db.log.Debug(logs.MetabaseUnknownOperation, zap.Uint32("operation", uint32(f.Operation()))) + return + } + + var lastSeen []byte + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize) + if err != nil { + db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix, + zap.ByteString("prefix", prefix), + zap.Error(err), + ) + return + } + for _, kv := range kvs { + lastSeen = kv.Key + hash, err := payloadHashFromPayloadHashKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParsePayloadHashFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + if condition(hash) { + addr, err := addressFromPayloadHashKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + markAddressInCache(to, fNum, addr.EncodeToString()) + } + } + if len(kvs) < batchSize { + break + } + } +} + +func (db *DB) selectParent( + ctx context.Context, + tx *badger.Txn, + f objectSDK.SearchFilter, + cnr cid.ID, + to map[string]int, // resulting cache + fNum int, // index of filter +) { + var condition func(oid.ID) bool + var prefix []byte + switch op := f.Operation(); op { + case objectSDK.MatchStringEqual: + var parentObjID oid.ID + if err := parentObjID.DecodeString(f.Value()); err != nil { + return + } + prefix = parentKeyLongPrefix(cnr, parentObjID) + condition = func(oid.ID) bool { return true } + case objectSDK.MatchCommonPrefix: + v, err := base58.Decode(f.Value()) + if err != nil { + return + } + prefix = append(parentKeyShortPrefix(cnr), v...) + condition = func(oid.ID) bool { return true } + case objectSDK.MatchUnknown: + return + case objectSDK.MatchStringNotEqual: + var parentObjID oid.ID + if err := parentObjID.DecodeString(f.Value()); err != nil { + return + } + + prefix = parentKeyShortPrefix(cnr) + condition = func(parentFromDB oid.ID) bool { return !parentFromDB.Equals(parentObjID) } + default: + db.log.Debug(logs.MetabaseUnknownOperation, + zap.Uint32("operation", uint32(f.Operation())), + ) + return + } + + var lastSeen []byte + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize) + if err != nil { + db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix, + zap.ByteString("prefix", prefix), + zap.Error(err), + ) + return + } + + for _, kv := range kvs { + lastSeen = kv.Key + parentAddr, err := addressOfParentFromParentKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + if condition(parentAddr.Object()) { + targetAddr, err := addressOfTargetFromParentKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + markAddressInCache(to, fNum, targetAddr.EncodeToString()) + } + } + if len(kvs) < batchSize { + break + } + } +} + +func (db *DB) selectECParent( + ctx context.Context, + tx *badger.Txn, + f objectSDK.SearchFilter, + cnr cid.ID, + to map[string]int, // resulting cache + fNum int, // index of filter +) { + var condition func(oid.ID) bool + var prefix []byte + switch op := f.Operation(); op { + case objectSDK.MatchStringEqual: + var ecParentObjID oid.ID + if err := ecParentObjID.DecodeString(f.Value()); err != nil { + return + } + prefix = ecInfoLongKeyPrefix(cnr, ecParentObjID) + condition = func(oid.ID) bool { return true } + case objectSDK.MatchCommonPrefix: + v, err := base58.Decode(f.Value()) + if err != nil { + return + } + prefix = append(ecInfoShortKeyPrefix(cnr), v...) + condition = func(oid.ID) bool { return true } + case objectSDK.MatchUnknown: + return + case objectSDK.MatchStringNotEqual: + var ecParentObjID oid.ID + if err := ecParentObjID.DecodeString(f.Value()); err != nil { + return + } + + prefix = ecInfoShortKeyPrefix(cnr) + condition = func(parentFromDB oid.ID) bool { return !parentFromDB.Equals(ecParentObjID) } + default: + db.log.Debug(logs.MetabaseUnknownOperation, + zap.Uint32("operation", uint32(f.Operation())), + ) + return + } + + var lastSeen []byte + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize) + if err != nil { + db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix, + zap.ByteString("prefix", prefix), + zap.Error(err), + ) + return + } + + for _, kv := range kvs { + lastSeen = kv.Key + ecParentAddr, err := addressOfParentFromECInfoKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + if condition(ecParentAddr.Object()) { + chunkAddr, err := addressOfChunkFromECInfoKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + markAddressInCache(to, fNum, chunkAddr.EncodeToString()) + } + } + if len(kvs) < batchSize { + break + } + } +} + +func (db *DB) selectSplitID( + ctx context.Context, + tx *badger.Txn, + f objectSDK.SearchFilter, + cnr cid.ID, + to map[string]int, // resulting cache + fNum int, // index of filter +) { + var condition func([]byte) bool + var prefix []byte + switch op := f.Operation(); op { + case objectSDK.MatchStringEqual: + s := objectSDK.NewSplitID() + err := s.Parse(f.Value()) + if err != nil { + return + } + prefix = splitKeyLongPrefix(cnr, s.ToV2()) + condition = func([]byte) bool { return true } + case objectSDK.MatchCommonPrefix: + prefix = splitKeyLongPrefix(cnr, []byte(f.Value())) + condition = func([]byte) bool { return true } + case objectSDK.MatchUnknown: + return + case objectSDK.MatchStringNotEqual: + prefix = splitKeyShortPrefix(cnr) + splitIDFromRequest := []byte(f.Value()) + condition = func(splitIDFromDB []byte) bool { return !bytes.Equal(splitIDFromRequest, splitIDFromDB) } + default: + db.log.Debug(logs.MetabaseUnknownOperation, + zap.Uint32("operation", uint32(f.Operation())), + ) + return + } + + var lastSeen []byte + for { + kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize) + if err != nil { + db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix, + zap.ByteString("prefix", prefix), + zap.Error(err), + ) + return + } + + for _, kv := range kvs { + lastSeen = kv.Key + splitID, err := splitIDFromSplitKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseSplitIDFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + if condition(splitID) { + addr, err := addressFromSplitKey(kv.Key) + if err != nil { + db.log.Debug(logs.FailedToParseAddressFromKey, + zap.ByteString("key", kv.Key), + zap.Error(err), + ) + continue + } + markAddressInCache(to, fNum, addr.EncodeToString()) + } + } + if len(kvs) < batchSize { + break } } } // matchSlowFilters return true if object header is matched by all slow filters. -func (db *DB) matchSlowFilters(tx *bbolt.Tx, addr oid.Address, f objectSDK.SearchFilters, currEpoch uint64) bool { +func (db *DB) matchSlowFilters(ctx context.Context, tx *badger.Txn, addr oid.Address, f objectSDK.SearchFilters, currEpoch uint64) bool { if len(f) == 0 { return true } - buf := make([]byte, addressKeySize) - obj, err := db.get(tx, addr, buf, true, false, currEpoch) + obj, err := get(ctx, tx, addr, true, false, currEpoch) if err != nil { return false } for i := range f { - matchFunc, ok := db.matchers[f[i].Operation()] + matchFunc, ok := matchers[f[i].Operation()] if !ok { return false } diff --git a/pkg/local_object_storage/metabase/shard_id.go b/pkg/local_object_storage/metabase/shard_id.go index 88446494e..3e39d1696 100644 --- a/pkg/local_object_storage/metabase/shard_id.go +++ b/pkg/local_object_storage/metabase/shard_id.go @@ -8,13 +8,10 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" metamode "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" ) -var ( - shardInfoBucket = []byte{shardInfoPrefix} - shardIDKey = []byte("id") -) +var shardIDKey = []byte("id") // GetShardID sets metabase operation mode // and reads shard id from db. @@ -36,7 +33,7 @@ func (db *DB) GetShardID(mode metamode.Mode) ([]byte, error) { id, err := db.readShardID() - if cErr := db.close(); cErr != nil { + if cErr := metaerr.Wrap(db.database.Close()); cErr != nil { err = errors.Join(err, fmt.Errorf("failed to close metabase: %w", cErr)) } @@ -47,11 +44,12 @@ func (db *DB) GetShardID(mode metamode.Mode) ([]byte, error) { // If id is missing, returns nil, nil. func (db *DB) readShardID() ([]byte, error) { var id []byte - err := db.boltDB.View(func(tx *bbolt.Tx) error { - b := tx.Bucket(shardInfoBucket) - if b != nil { - id = bytes.Clone(b.Get(shardIDKey)) + err := db.database.View(func(tx *badger.Txn) error { + v, err := valueOrNil(tx, shardInfoKey(shardIDKey)) + if err != nil { + return err } + id = bytes.Clone(v) return nil }) return id, metaerr.Wrap(err) @@ -77,7 +75,7 @@ func (db *DB) SetShardID(id []byte, mode metamode.Mode) error { db.metrics.SetMode(metamode.ConvertToComponentModeDegraded(mode)) } - if cErr := db.close(); cErr != nil { + if cErr := metaerr.Wrap(db.database.Close()); cErr != nil { err = errors.Join(err, fmt.Errorf("failed to close metabase: %w", cErr)) } @@ -86,11 +84,7 @@ func (db *DB) SetShardID(id []byte, mode metamode.Mode) error { // writeShardID writes shard id to db. func (db *DB) writeShardID(id []byte) error { - return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error { - b, err := tx.CreateBucketIfNotExists(shardInfoBucket) - if err != nil { - return err - } - return b.Put(shardIDKey, id) + return metaerr.Wrap(db.database.Update(func(tx *badger.Txn) error { + return tx.Set(shardInfoKey(shardIDKey), id) })) } diff --git a/pkg/local_object_storage/metabase/storage_id.go b/pkg/local_object_storage/metabase/storage_id.go index 6d620b41a..d6744abbb 100644 --- a/pkg/local_object_storage/metabase/storage_id.go +++ b/pkg/local_object_storage/metabase/storage_id.go @@ -3,12 +3,13 @@ package meta import ( "bytes" "context" + "errors" "time" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" ) @@ -57,7 +58,7 @@ func (db *DB) StorageID(ctx context.Context, prm StorageIDPrm) (res StorageIDRes return res, ErrDegradedMode } - err = db.boltDB.View(func(tx *bbolt.Tx) error { + err = db.database.View(func(tx *badger.Txn) error { res.id, err = db.storageID(tx, prm.addr) return err @@ -66,14 +67,11 @@ func (db *DB) StorageID(ctx context.Context, prm StorageIDPrm) (res StorageIDRes return res, metaerr.Wrap(err) } -func (db *DB) storageID(tx *bbolt.Tx, addr oid.Address) ([]byte, error) { - key := make([]byte, bucketKeySize) - smallBucket := tx.Bucket(smallBucketName(addr.Container(), key)) - if smallBucket == nil { - return nil, nil +func (db *DB) storageID(tx *badger.Txn, addr oid.Address) ([]byte, error) { + storageID, err := valueOrNil(tx, smallKey(addr.Container(), addr.Object())) + if err != nil { + return nil, err } - - storageID := smallBucket.Get(objectKey(addr.Object(), key)) if storageID == nil { return nil, nil } @@ -126,9 +124,33 @@ func (db *DB) UpdateStorageID(ctx context.Context, prm UpdateStorageIDPrm) (res return res, ErrReadOnlyMode } - err = db.boltDB.Batch(func(tx *bbolt.Tx) error { - return setStorageID(tx, prm.addr, prm.id, true) - }) + for i := 0; i < retryCount; i++ { + err = db.database.Update(func(tx *badger.Txn) error { + return setStorageID(tx, prm.addr, prm.id, true) + }) + if errors.Is(err, badger.ErrConflict) { + time.Sleep(retryTimeout) + continue + } + break + } success = err == nil return res, metaerr.Wrap(err) } + +// setStorageID for existing objects if they were moved from one +// storage location to another. +func setStorageID(tx *badger.Txn, addr oid.Address, id []byte, override bool) error { + key := smallKey(addr.Container(), addr.Object()) + if override { + return tx.Set(key, id) + } + v, err := valueOrNil(tx, key) + if err != nil { + return err + } + if v == nil { + return tx.Set(key, id) + } + return nil +} diff --git a/pkg/local_object_storage/metabase/util.go b/pkg/local_object_storage/metabase/util.go index 9249ae49b..b38637a2a 100644 --- a/pkg/local_object_storage/metabase/util.go +++ b/pkg/local_object_storage/metabase/util.go @@ -3,31 +3,16 @@ package meta import ( "bytes" "crypto/sha256" - "errors" + "encoding/binary" + "fmt" cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" ) -var ( - // graveyardBucketName stores rows with the objects that have been - // covered with Tombstone objects. That objects should not be returned - // from the node and should not be accepted by the node from other - // nodes. - graveyardBucketName = []byte{graveyardPrefix} - // garbageBucketName stores rows with the objects that should be physically - // deleted by the node (Garbage Collector routine). - garbageBucketName = []byte{garbagePrefix} - toMoveItBucketName = []byte{toMoveItPrefix} - containerVolumeBucketName = []byte{containerVolumePrefix} - containerCounterBucketName = []byte{containerCountersPrefix} - - zeroValue = []byte{0xFF} - - errInvalidLength = errors.New("invalid length") -) +var zeroValue = []byte{0xFF} // Prefix bytes for database keys. All ids and addresses are encoded in binary // unless specified otherwise. @@ -42,13 +27,13 @@ const ( // Key: object address // Value: dummy value garbagePrefix - // toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving + // _ Previous usage was for for bucket containing IDs of objects that are candidates for moving // to another shard. - toMoveItPrefix - // containerVolumePrefix is used for storing container size estimations. + _ + // containerSizePrefix is used for storing container size estimations. // Key: container ID // Value: container size in bytes as little-endian uint64 - containerVolumePrefix + containerSizePrefix // lockedPrefix is used for storing locked objects information. // Key: container ID // Value: bucket mapping objects locked to the list of corresponding LOCK objects. @@ -124,6 +109,9 @@ const ( // Key: container ID + type // Value: Object id ecInfoPrefix + + // expiredPrefix used to store expiration info. + expiredPrefix ) const ( @@ -133,139 +121,589 @@ const ( addressKeySize = cidSize + objectKeySize ) -func bucketName(cnr cid.ID, prefix byte, key []byte) []byte { - key[0] = prefix - cnr.Encode(key[1:]) - return key[:bucketKeySize] +func keyPrefix(cnr cid.ID, prefix byte) []byte { + result := make([]byte, 1+cidSize) + result[0] = prefix + cnr.Encode(result[1:]) + return result } -// primaryBucketName returns . -func primaryBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, primaryPrefix, key) +func keyObject(prefix byte, cnr cid.ID, objID oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize) + result[0] = prefix + cnr.Encode(result[1:]) + objID.Encode(result[1+cidSize:]) + return result } -// tombstoneBucketName returns _TS. -func tombstoneBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, tombstonePrefix, key) +func addressFromKey(prefix byte, key []byte) (oid.Address, error) { + if len(key) != 1+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != prefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cont cid.ID + if err := cont.Decode(key[1 : 1+cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var obj oid.ID + if err := obj.Decode(key[1+cidSize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cont) + result.SetObject(obj) + return result, nil } -// smallBucketName returns _small. -func smallBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, smallPrefix, key) +// primaryKeyPrefix returns primaryPrefix_. +func primaryKeyPrefix(cnr cid.ID) []byte { + return keyPrefix(cnr, primaryPrefix) } -// attributeBucketName returns _attr_. -func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte { - key[0] = userAttributePrefix - cnr.Encode(key[1:]) - return append(key[:bucketKeySize], attributeKey...) +func primaryKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(primaryPrefix, cnr, objID) } -// returns from attributeBucketName result, nil otherwise. -func cidFromAttributeBucket(val []byte, attributeKey string) []byte { - if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) { - return nil +func addressFromPrimaryKey(v []byte) (oid.Address, error) { + return addressFromKey(primaryPrefix, v) +} + +// tombstoneKeyPrefix returns tombstonePrefix_. +func tombstoneKeyPrefix(cnr cid.ID) []byte { + return keyPrefix(cnr, tombstonePrefix) +} + +func tombstoneKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(tombstonePrefix, cnr, objID) +} + +func addressFromTombstoneKey(v []byte) (oid.Address, error) { + return addressFromKey(tombstonePrefix, v) +} + +func garbageKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(garbagePrefix, cnr, objID) +} + +func addressFromGarbageKey(v []byte) (oid.Address, error) { + return addressFromKey(garbagePrefix, v) +} + +func graveyardKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(graveyardPrefix, cnr, objID) +} + +func addressFromGraveyardKey(v []byte) (oid.Address, error) { + return addressFromKey(graveyardPrefix, v) +} + +func smallKey(cnr cid.ID, obj oid.ID) []byte { + return keyObject(smallPrefix, cnr, obj) +} + +// attributeKeyPrefix returns userAttributePrefix___. +func attributeKeyPrefix(cnr cid.ID, attributeKey, attributeValue string) []byte { + result := make([]byte, 1+len(attributeKey)+cidSize+len(attributeValue)) + result[0] = userAttributePrefix + copy(result[1:], []byte(attributeKey)) + cnr.Encode(result[1+len(attributeKey):]) + copy(result[1+len(attributeKey)+cidSize:], []byte(attributeValue)) + return result +} + +// userAttributePrefix+attributeKey++attributeValue+. +func attributeKey(cnr cid.ID, objID oid.ID, attributeKey, attributeValue string) []byte { + result := make([]byte, 1+len(attributeKey)+cidSize+len(attributeValue)+objectKeySize) + result[0] = userAttributePrefix + copy(result[1:], []byte(attributeKey)) + cnr.Encode(result[1+len(attributeKey):]) + copy(result[1+len(attributeKey)+cidSize:], []byte(attributeValue)) + objID.Encode(result[1+cidSize+len(attributeKey)+len(attributeValue):]) + return result +} + +// returns attributeValue from attributeKey result, nil otherwise. +func attributeValueFromAttributeKey(key []byte, attributeKey string) ([]byte, error) { + if len(key) < 1+len(attributeKey)+cidSize+objectKeySize { + return nil, errInvalidKeyLenght + } + if key[0] != userAttributePrefix { + return nil, errInvalidKeyPrefix + } + if !bytes.Equal(key[1:1+len(attributeKey)], []byte(attributeKey)) { + return nil, errInvalidAttributeKey } - return val[1:bucketKeySize] + return key[1+len(attributeKey)+cidSize : len(key)-objectKeySize], nil } -// payloadHashBucketName returns _payloadhash. -func payloadHashBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, payloadHashPrefix, key) -} - -// rootBucketName returns _root. -func rootBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, rootPrefix, key) -} - -// ownerBucketName returns _ownerid. -func ownerBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, ownerPrefix, key) -} - -// parentBucketName returns _parent. -func parentBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, parentPrefix, key) -} - -// splitBucketName returns _splitid. -func splitBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, splitPrefix, key) -} - -// ecInfoBucketName returns _ecinfo. -func ecInfoBucketName(cnr cid.ID, key []byte) []byte { - return bucketName(cnr, ecInfoPrefix, key) -} - -// addressKey returns key for K-V tables when key is a whole address. -func addressKey(addr oid.Address, key []byte) []byte { - addr.Container().Encode(key) - addr.Object().Encode(key[cidSize:]) - return key[:addressKeySize] -} - -// parses object address formed by addressKey. -func decodeAddressFromKey(dst *oid.Address, k []byte) error { - if len(k) != addressKeySize { - return errInvalidLength +func addressFromAttributeKey(key []byte, attributeKey string) (oid.Address, error) { + if len(key) < 1+len(attributeKey)+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght } + if key[0] != userAttributePrefix { + return oid.Address{}, errInvalidKeyPrefix + } + if !bytes.Equal(key[1:1+len(attributeKey)], []byte(attributeKey)) { + return oid.Address{}, errInvalidAttributeKey + } + var cnrID cid.ID + if err := cnrID.Decode(key[1+len(attributeKey) : 1+len(attributeKey)+cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var objID oid.ID + if err := objID.Decode(key[len(key)-objectKeySize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cnrID) + result.SetObject(objID) + return result, nil +} +// payloadHashKeyLongPrefix returns payloadHashPrefix__hash. +func payloadHashKeyLongPrefix(cnr cid.ID, hash []byte) []byte { + result := make([]byte, 1+cidSize+len(hash)) + result[0] = payloadHashPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], hash) + return result +} + +// payloadHashKeyShortPrefix returns payloadHashPrefix_. +func payloadHashKeyShortPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = payloadHashPrefix + cnr.Encode(result[1:]) + return result +} + +// payloadHashKey returns payloadHashPrefix__hash_. +func payloadHashKey(cnr cid.ID, obj oid.ID, hash []byte) []byte { + result := make([]byte, 1+cidSize+len(hash)+objectKeySize) + result[0] = payloadHashPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], hash) + obj.Encode(result[1+cidSize+len(hash):]) + return result +} + +func addressFromPayloadHashKey(k []byte) (oid.Address, error) { + if len(k) < 1+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if k[0] != payloadHashPrefix { + return oid.Address{}, errInvalidKeyPrefix + } var cnr cid.ID - if err := cnr.Decode(k[:cidSize]); err != nil { - return err + if err := cnr.Decode(k[1 : 1+cidSize]); err != nil { + return oid.Address{}, err } var obj oid.ID - if err := obj.Decode(k[cidSize:]); err != nil { - return err + if err := obj.Decode(k[len(k)-objectKeySize:]); err != nil { + return oid.Address{}, err } - dst.SetObject(obj) - dst.SetContainer(cnr) - return nil + var result oid.Address + result.SetObject(obj) + result.SetContainer(cnr) + return result, nil } -// objectKey returns key for K-V tables when key is an object id. -func objectKey(obj oid.ID, key []byte) []byte { - obj.Encode(key) - return key[:objectKeySize] +func payloadHashFromPayloadHashKey(k []byte) ([]byte, error) { + if len(k) < 1+cidSize+objectKeySize { + return nil, errInvalidKeyLenght + } + if k[0] != payloadHashPrefix { + return nil, errInvalidKeyPrefix + } + + return bytes.Clone(k[1+cidSize : len(k)-objectKeySize]), nil +} + +// rootBucketName returns rootPrefix_. +func rootKeyPrefix(cnr cid.ID) []byte { + return keyPrefix(cnr, rootPrefix) +} + +func rootKey(cnr cid.ID, objID oid.ID) []byte { + return keyObject(rootPrefix, cnr, objID) +} + +func addressFromRootKey(key []byte) (oid.Address, error) { + return addressFromKey(rootPrefix, key) +} + +// ownerKey returns ownerPrefix__owner_. +func ownerKey(cnr cid.ID, obj oid.ID, owner []byte) []byte { + result := make([]byte, 1+cidSize+len(owner)+objectKeySize) + result[0] = ownerPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], owner) + obj.Encode(result[1+cidSize+len(owner):]) + return result +} + +// ownerKeyShortPrefix returns ownerPrefix_. +func ownerKeyShortPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = ownerPrefix + cnr.Encode(result[1:]) + return result +} + +// ownerKeyLongPrefix returns ownerPrefix__owner. +func ownerKeyLongPrefix(cnr cid.ID, owner []byte) []byte { + result := make([]byte, 1+cidSize+len(owner)) + result[0] = ownerPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], owner) + return result +} + +func addressFromOwnerKey(k []byte) (oid.Address, error) { + if len(k) < 1+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if k[0] != ownerPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cnr cid.ID + if err := cnr.Decode(k[1 : 1+cidSize]); err != nil { + return oid.Address{}, err + } + + var obj oid.ID + if err := obj.Decode(k[len(k)-objectKeySize:]); err != nil { + return oid.Address{}, err + } + + var result oid.Address + result.SetObject(obj) + result.SetContainer(cnr) + return result, nil +} + +func ownerFromOwnerKey(k []byte) ([]byte, error) { + if len(k) < 1+cidSize+objectKeySize { + return nil, errInvalidKeyLenght + } + if k[0] != ownerPrefix { + return nil, errInvalidKeyPrefix + } + return bytes.Clone(k[1+cidSize : len(k)-objectKeySize]), nil +} + +// ecInfoLongKeyPrefix returns ecInfoPrefix__. +func ecInfoLongKeyPrefix(cnr cid.ID, parent oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize) + result[0] = ecInfoPrefix + cnr.Encode(result[1:]) + parent.Encode(result[1+cidSize:]) + return result +} + +// ecInfoShortKeyPrefix returns ecInfoPrefix_. +func ecInfoShortKeyPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = ecInfoPrefix + cnr.Encode(result[1:]) + return result +} + +func ecInfoKey(cnr cid.ID, parent, chunk oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize+objectKeySize) + result[0] = ecInfoPrefix + cnr.Encode(result[1:]) + parent.Encode(result[1+cidSize:]) + chunk.Encode(result[1+cidSize+objectKeySize:]) + return result +} + +func addressOfParentFromECInfoKey(key []byte) (oid.Address, error) { + return addressFromKey(ecInfoPrefix, key[:1+cidSize+objectKeySize]) +} + +func addressOfChunkFromECInfoKey(key []byte) (oid.Address, error) { + if len(key) != 1+cidSize+objectKeySize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != ecInfoPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cont cid.ID + if err := cont.Decode(key[1 : 1+cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var obj oid.ID + if err := obj.Decode(key[1+cidSize+objectKeySize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cont) + result.SetObject(obj) + return result, nil +} + +// parentKeyShortPrefix returns parentPrefix_. +func parentKeyShortPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = parentPrefix + cnr.Encode(result[1:]) + return result +} + +func addressOfParentFromParentKey(key []byte) (oid.Address, error) { + return addressFromKey(parentPrefix, key[:1+cidSize+objectKeySize]) +} + +func addressOfTargetFromParentKey(key []byte) (oid.Address, error) { + if len(key) != 1+cidSize+objectKeySize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != parentPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cont cid.ID + if err := cont.Decode(key[1 : 1+cidSize]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err) + } + var obj oid.ID + if err := obj.Decode(key[1+cidSize+objectKeySize:]); err != nil { + return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err) + } + var result oid.Address + result.SetContainer(cont) + result.SetObject(obj) + return result, nil +} + +// parentKeyLongPrefix returns parentPrefix__. +func parentKeyLongPrefix(cnr cid.ID, parentObj oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize) + result[0] = parentPrefix + cnr.Encode(result[1:]) + parentObj.Encode(result[bucketKeySize:]) + return result +} + +func parentKey(cnr cid.ID, parentObj, obj oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize+objectKeySize) + result[0] = parentPrefix + cnr.Encode(result[1:]) + parentObj.Encode(result[1+cidSize:]) + obj.Encode(result[1+cidSize+objectKeySize:]) + return result +} + +// splitKeyLongPrefix returns splitPrefix__splitID. +func splitKeyLongPrefix(cnr cid.ID, splitID []byte) []byte { + result := make([]byte, 1+cidSize+len(splitID)) + result[0] = splitPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], splitID) + return result +} + +// splitKeyShortPrefix returns splitPrefix_. +func splitKeyShortPrefix(cnr cid.ID) []byte { + result := make([]byte, 1+cidSize) + result[0] = splitPrefix + cnr.Encode(result[1:]) + return result +} + +// splitKey returns splitPrefix__splitID_. +func splitKey(cnr cid.ID, obj oid.ID, splitID []byte) []byte { + result := make([]byte, 1+cidSize+len(splitID)+objectKeySize) + result[0] = splitPrefix + cnr.Encode(result[1:]) + copy(result[1+cidSize:], splitID) + obj.Encode(result[1+cidSize+len(splitID):]) + return result +} + +func addressFromSplitKey(key []byte) (oid.Address, error) { + if len(key) < 1+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != splitPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cnr cid.ID + if err := cnr.Decode(key[1 : 1+cidSize]); err != nil { + return oid.Address{}, err + } + + var obj oid.ID + if err := obj.Decode(key[len(key)-objectKeySize:]); err != nil { + return oid.Address{}, err + } + + var result oid.Address + result.SetObject(obj) + result.SetContainer(cnr) + return result, nil +} + +func splitIDFromSplitKey(key []byte) ([]byte, error) { + if len(key) < 1+cidSize+objectKeySize { + return nil, errInvalidKeyLenght + } + if key[0] != splitPrefix { + return nil, errInvalidKeyPrefix + } + + return bytes.Clone(key[1+cidSize : len(key)-objectKeySize]), nil +} + +// returns prefix of the keys with objects of type LOCK for specified container. +func lockersKeyPrefix(idCnr cid.ID) []byte { + return keyPrefix(idCnr, lockersPrefix) +} + +func lockersKey(cnrID cid.ID, objID oid.ID) []byte { + return keyObject(lockersPrefix, cnrID, objID) +} + +func addressFromLockersKey(v []byte) (oid.Address, error) { + return addressFromKey(lockersPrefix, v) +} + +// returns lockedPrefix__. +func lockedKeyLongPrefix(cnrID cid.ID, objID oid.ID) []byte { + prefix := make([]byte, 1+cidSize+objectKeySize) + prefix[0] = lockedPrefix + cnrID.Encode(prefix[1:]) + objID.Encode(prefix[1+cidSize:]) + return prefix +} + +// returns lockedPrefix_. +func lockedKeyShortPrefix(cnrID cid.ID) []byte { + prefix := make([]byte, 1+cidSize) + prefix[0] = lockedPrefix + cnrID.Encode(prefix[1:]) + return prefix +} + +// returns lockedPrefix___. +func lockedKey(cnrID cid.ID, objID, lockerObjID oid.ID) []byte { + result := make([]byte, 1+cidSize+objectKeySize+objectKeySize) + result[0] = lockedPrefix + cnrID.Encode(result[1:]) + objID.Encode(result[1+cidSize:]) + lockerObjID.Encode(result[1+cidSize+objectKeySize:]) + return result +} + +func lockerObjectIDFromLockedKey(k []byte) (oid.ID, error) { + if len(k) != 1+cidSize+objectKeySize+objectKeySize { + return oid.ID{}, errInvalidKeyLenght + } + if k[0] != lockedPrefix { + return oid.ID{}, errInvalidKeyPrefix + } + var result oid.ID + if err := result.Decode(k[1+cidSize+objectKeySize:]); err != nil { + return oid.ID{}, fmt.Errorf("failed to decode lockers object ID: %w", err) + } + return result, nil +} + +func objectIDFromLockedKey(k []byte) (oid.ID, error) { + if len(k) != 1+cidSize+objectKeySize+objectKeySize { + return oid.ID{}, errInvalidKeyLenght + } + if k[0] != lockedPrefix { + return oid.ID{}, errInvalidKeyPrefix + } + var result oid.ID + if err := result.Decode(k[1+cidSize : 1+cidSize+objectKeySize]); err != nil { + return oid.ID{}, fmt.Errorf("failed to decode locked object ID: %w", err) + } + return result, nil } // if meets irregular object container in objs - returns its type, otherwise returns object.TypeRegular. // // firstIrregularObjectType(tx, cnr, obj) usage allows getting object type. -func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) objectSDK.Type { - if len(objs) == 0 { - panic("empty object list in firstIrregularObjectType") - } - - var keys [2][1 + cidSize]byte - - irregularTypeBuckets := [...]struct { - typ objectSDK.Type - name []byte - }{ - {objectSDK.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])}, - {objectSDK.TypeLock, bucketNameLockers(idCnr, keys[1][:])}, - } - - for i := range objs { - for j := range irregularTypeBuckets { - if inBucket(tx, irregularTypeBuckets[j].name, objs[i]) { - return irregularTypeBuckets[j].typ - } +func firstIrregularObjectType(tx *badger.Txn, idCnr cid.ID, objs ...oid.ID) (objectSDK.Type, error) { + for _, objID := range objs { + key := tombstoneKey(idCnr, objID) + v, err := valueOrNil(tx, key) + if err != nil { + return objectSDK.TypeRegular, err + } + if v != nil { + return objectSDK.TypeTombstone, nil + } + key = lockersKey(idCnr, objID) + v, err = valueOrNil(tx, key) + if err != nil { + return objectSDK.TypeRegular, err + } + if v != nil { + return objectSDK.TypeLock, nil } } - return objectSDK.TypeRegular + return objectSDK.TypeRegular, nil } // return true if provided object is of LOCK type. -func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool { - return inBucket(tx, - bucketNameLockers(idCnr, make([]byte, bucketKeySize)), - objectKey(obj, make([]byte, objectKeySize))) +func isLockObject(tx *badger.Txn, idCnr cid.ID, obj oid.ID) (bool, error) { + key := lockersKey(idCnr, obj) + v, err := valueOrNil(tx, key) + if err != nil { + return false, err + } + return v != nil, nil +} + +func expiredKey(cnr cid.ID, obj oid.ID, epoch uint64) []byte { + result := make([]byte, 1+8+cidSize+objectKeySize) + result[0] = expiredPrefix + // BigEndian is important for iteration order + binary.BigEndian.PutUint64(result[1:1+8], epoch) + cnr.Encode(result[1+8 : 1+8+cidSize]) + obj.Encode(result[1+8+cidSize:]) + return result +} + +func expirationEpochFromExpiredKey(key []byte) (uint64, error) { + if len(key) != 1+8+cidSize+objectKeySize { + return 0, errInvalidKeyLenght + } + if key[0] != expiredPrefix { + return 0, errInvalidKeyPrefix + } + // BigEndian is important for iteration order + return binary.BigEndian.Uint64(key[1 : 1+8]), nil +} + +func addressFromExpiredKey(key []byte) (oid.Address, error) { + if len(key) != 1+8+cidSize+objectKeySize { + return oid.Address{}, errInvalidKeyLenght + } + if key[0] != expiredPrefix { + return oid.Address{}, errInvalidKeyPrefix + } + var cnr cid.ID + if err := cnr.Decode(key[1+8 : 1+8+cidSize]); err != nil { + return oid.Address{}, err + } + + var obj oid.ID + if err := obj.Decode(key[len(key)-objectKeySize:]); err != nil { + return oid.Address{}, err + } + + var result oid.Address + result.SetObject(obj) + result.SetContainer(cnr) + return result, nil } diff --git a/pkg/local_object_storage/metabase/version.go b/pkg/local_object_storage/metabase/version.go index 5748b64ee..69d933ad6 100644 --- a/pkg/local_object_storage/metabase/version.go +++ b/pkg/local_object_storage/metabase/version.go @@ -5,7 +5,7 @@ import ( "fmt" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr" - "go.etcd.io/bbolt" + "github.com/dgraph-io/badger/v4" ) // version contains current metabase version. @@ -18,12 +18,14 @@ var versionKey = []byte("version") // the current code version. var ErrOutdatedVersion = logicerr.New("invalid version, resynchronization is required") -func checkVersion(tx *bbolt.Tx, initialized bool) error { - var knownVersion bool +func (db *DB) checkVersion() error { + return db.database.Update(func(txn *badger.Txn) error { + var knownVersion bool - b := tx.Bucket(shardInfoBucket) - if b != nil { - data := b.Get(versionKey) + data, err := valueOrNil(txn, shardInfoKey(versionKey)) + if err != nil { + return err + } if len(data) == 8 { knownVersion = true @@ -32,30 +34,32 @@ func checkVersion(tx *bbolt.Tx, initialized bool) error { return fmt.Errorf("%w: expected=%d, stored=%d", ErrOutdatedVersion, version, stored) } } - } - if !initialized { - // new database, write version - return updateVersion(tx, version) - } else if !knownVersion { - // db is initialized but no version - // has been found; that could happen - // if the db is corrupted or the version - // is <2 (is outdated and requires resync - // anyway) - return ErrOutdatedVersion - } + if !db.initialized { + // new database, write version + return updateVersion(txn, version) + } else if !knownVersion { + // db is initialized but no version + // has been found; that could happen + // if the db is corrupted or the version + // is <2 (is outdated and requires resync + // anyway) + return ErrOutdatedVersion + } - return nil + return nil + }) } -func updateVersion(tx *bbolt.Tx, version uint64) error { +func shardInfoKey(key []byte) []byte { + result := make([]byte, len(key)) + result[0] = shardInfoPrefix + copy(result[1:], key) + return result +} + +func updateVersion(tx *badger.Txn, version uint64) error { data := make([]byte, 8) binary.LittleEndian.PutUint64(data, version) - - b, err := tx.CreateBucketIfNotExists(shardInfoBucket) - if err != nil { - return fmt.Errorf("can't create auxiliary bucket: %w", err) - } - return b.Put(versionKey, data) + return tx.Set(shardInfoKey(versionKey), data) } diff --git a/pkg/local_object_storage/metabase/version_test.go b/pkg/local_object_storage/metabase/version_test.go index b2af428ff..a7fe04517 100644 --- a/pkg/local_object_storage/metabase/version_test.go +++ b/pkg/local_object_storage/metabase/version_test.go @@ -9,8 +9,8 @@ import ( "testing" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" + "github.com/dgraph-io/badger/v4" "github.com/stretchr/testify/require" - "go.etcd.io/bbolt" ) type epochStateImpl struct{} @@ -27,19 +27,20 @@ func TestVersion(t *testing.T) { WithPermissions(0o600), WithEpochState(epochStateImpl{})) } check := func(t *testing.T, db *DB) { - require.NoError(t, db.boltDB.View(func(tx *bbolt.Tx) error { - b := tx.Bucket(shardInfoBucket) - if b == nil { - return errors.New("shard info bucket not found") + require.NoError(t, db.database.View(func(tx *badger.Txn) error { + item, err := tx.Get(shardInfoKey(versionKey)) + if err != nil { + return err } - data := b.Get(versionKey) - if len(data) != 8 { - return errors.New("invalid version data") - } - if stored := binary.LittleEndian.Uint64(data); stored != version { - return fmt.Errorf("invalid version: %d != %d", stored, version) - } - return nil + return item.Value(func(val []byte) error { + if len(val) != 8 { + return errors.New("invalid version data") + } + if stored := binary.LittleEndian.Uint64(val); stored != version { + return fmt.Errorf("invalid version: %d != %d", stored, version) + } + return nil + }) })) } t.Run("simple", func(t *testing.T) { @@ -68,7 +69,7 @@ func TestVersion(t *testing.T) { t.Run("invalid version", func(t *testing.T) { db := newDB(t) require.NoError(t, db.Open(context.Background(), mode.ReadWrite)) - require.NoError(t, db.boltDB.Update(func(tx *bbolt.Tx) error { + require.NoError(t, db.database.Update(func(tx *badger.Txn) error { return updateVersion(tx, version+1) })) require.NoError(t, db.Close()) @@ -79,7 +80,7 @@ func TestVersion(t *testing.T) { t.Run("reset", func(t *testing.T) { require.NoError(t, db.Open(context.Background(), mode.ReadWrite)) - require.NoError(t, db.Reset()) + require.NoError(t, db.Reset(context.Background())) check(t, db) require.NoError(t, db.Close()) }) diff --git a/pkg/local_object_storage/shard/container.go b/pkg/local_object_storage/shard/container.go index 364649b50..e719f7814 100644 --- a/pkg/local_object_storage/shard/container.go +++ b/pkg/local_object_storage/shard/container.go @@ -26,7 +26,7 @@ func (r ContainerSizeRes) Size() uint64 { return r.size } -func (s *Shard) ContainerSize(prm ContainerSizePrm) (ContainerSizeRes, error) { +func (s *Shard) ContainerSize(ctx context.Context, prm ContainerSizePrm) (ContainerSizeRes, error) { s.m.RLock() defer s.m.RUnlock() @@ -34,7 +34,7 @@ func (s *Shard) ContainerSize(prm ContainerSizePrm) (ContainerSizeRes, error) { return ContainerSizeRes{}, ErrDegradedMode } - size, err := s.metaBase.ContainerSize(prm.cnr) + size, err := s.metaBase.ContainerSize(ctx, prm.cnr) if err != nil { return ContainerSizeRes{}, fmt.Errorf("could not get container size: %w", err) } diff --git a/pkg/local_object_storage/shard/control.go b/pkg/local_object_storage/shard/control.go index 90d7afdd4..53b4ce038 100644 --- a/pkg/local_object_storage/shard/control.go +++ b/pkg/local_object_storage/shard/control.go @@ -194,17 +194,19 @@ func (s *Shard) refillMetabase(ctx context.Context) error { } }() - err := s.metaBase.Reset() + err := s.metaBase.Reset(ctx) if err != nil { return fmt.Errorf("could not reset metabase: %w", err) } + s.log.Warn("counting objects...") withCount := true totalObjects, err := s.blobStor.ObjectsCount(ctx) if err != nil { s.log.Warn(logs.EngineRefillFailedToGetObjectsCount, zap.Error(err)) withCount = false } + s.log.Warn("counting objects completed") eg, egCtx := errgroup.WithContext(ctx) if s.cfg.refillMetabaseWorkersCount > 0 { @@ -213,6 +215,7 @@ func (s *Shard) refillMetabase(ctx context.Context) error { var completedCount uint64 var metricGuard sync.Mutex + s.log.Warn("iterating objects") itErr := blobstor.IterateBinaryObjects(egCtx, s.blobStor, func(addr oid.Address, data []byte, descriptor []byte) error { eg.Go(func() error { var success bool @@ -248,7 +251,7 @@ func (s *Shard) refillMetabase(ctx context.Context) error { return fmt.Errorf("could not put objects to the meta: %w", err) } - err = s.metaBase.SyncCounters() + err = s.metaBase.SyncCounters(ctx) if err != nil { return fmt.Errorf("could not sync object counters: %w", err) } @@ -410,7 +413,7 @@ func (s *Shard) Reload(ctx context.Context, opts ...Option) error { // config after the node was updated. err = s.refillMetabase(ctx) } else { - err = s.metaBase.Init() + err = s.metaBase.Init2(ctx) } if err != nil { s.log.Error(logs.ShardCantInitializeMetabaseMoveToADegradedreadonlyMode, zap.Error(err)) diff --git a/pkg/local_object_storage/shard/control_test.go b/pkg/local_object_storage/shard/control_test.go index 44fee1636..75ea14ec7 100644 --- a/pkg/local_object_storage/shard/control_test.go +++ b/pkg/local_object_storage/shard/control_test.go @@ -3,7 +3,6 @@ package shard import ( "context" "fmt" - "io/fs" "math" "os" "path/filepath" @@ -28,7 +27,6 @@ import ( oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test" objecttest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/test" "github.com/stretchr/testify/require" - "go.etcd.io/bbolt" ) type objAddr struct { @@ -37,6 +35,7 @@ type objAddr struct { } func TestShardOpen(t *testing.T) { + t.Skip("badger does not support open file") t.Parallel() dir := t.TempDir() @@ -49,13 +48,15 @@ func TestShardOpen(t *testing.T) { )) var allowedMode atomic.Int64 - openFileMetabase := func(p string, f int, perm fs.FileMode) (*os.File, error) { - const modeMask = os.O_RDONLY | os.O_RDWR | os.O_WRONLY - if int64(f&modeMask) == allowedMode.Load() { - return os.OpenFile(p, f, perm) + /* + openFileMetabase := func(p string, f int, perm fs.FileMode) (*os.File, error) { + const modeMask = os.O_RDONLY | os.O_RDWR | os.O_WRONLY + if int64(f&modeMask) == allowedMode.Load() { + return os.OpenFile(p, f, perm) + } + return nil, fs.ErrPermission } - return nil, fs.ErrPermission - } + */ wcOpts := []writecache.Option{ writecache.WithPath(filepath.Join(dir, "wc")), @@ -72,7 +73,7 @@ func TestShardOpen(t *testing.T) { WithMetaBaseOptions( meta.WithPath(metaPath), meta.WithEpochState(epochState{}), - meta.WithBoltDBOptions(&bbolt.Options{OpenFile: openFileMetabase}), + // meta.WithBoltDBOptions(&bbolt.Options{OpenFile: openFileMetabase}), ), WithPiloramaOptions( pilorama.WithPath(filepath.Join(dir, "pilorama"))), @@ -352,7 +353,7 @@ func TestRefillMetabase(t *testing.T) { checkTombMembers(true) checkLocked(t, cnrLocked, locked) - c, err := sh.metaBase.ObjectCounters() + c, err := sh.metaBase.ObjectCounters(context.Background()) require.NoError(t, err) phyBefore := c.Phy @@ -388,7 +389,7 @@ func TestRefillMetabase(t *testing.T) { err = sh.refillMetabase(context.Background()) require.NoError(t, err) - c, err = sh.metaBase.ObjectCounters() + c, err = sh.metaBase.ObjectCounters(context.Background()) require.NoError(t, err) require.Equal(t, phyBefore, c.Phy) diff --git a/pkg/local_object_storage/shard/count.go b/pkg/local_object_storage/shard/count.go index b3bc6a30b..c34193376 100644 --- a/pkg/local_object_storage/shard/count.go +++ b/pkg/local_object_storage/shard/count.go @@ -23,7 +23,7 @@ func (s *Shard) LogicalObjectsCount(ctx context.Context) (uint64, error) { return 0, ErrDegradedMode } - cc, err := s.metaBase.ObjectCounters() + cc, err := s.metaBase.ObjectCounters(ctx) if err != nil { return 0, err } diff --git a/pkg/local_object_storage/shard/delete.go b/pkg/local_object_storage/shard/delete.go index 44f6c6b48..8caf588de 100644 --- a/pkg/local_object_storage/shard/delete.go +++ b/pkg/local_object_storage/shard/delete.go @@ -10,6 +10,7 @@ import ( tracingPkg "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/tracing" "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client" + cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -134,19 +135,25 @@ func (s *Shard) deleteFromBlobstor(ctx context.Context, addr oid.Address) error } func (s *Shard) deleteFromMetabase(ctx context.Context, addr oid.Address) error { - var delPrm meta.DeletePrm - delPrm.SetAddresses(addr) - - res, err := s.metaBase.Delete(ctx, delPrm) + res, err := s.metaBase.Delete(ctx, meta.DeletePrm{ + Address: addr, + }) if err != nil { return err } - s.decObjectCounterBy(physical, res.PhyCount()) - s.decObjectCounterBy(logical, res.LogicCount()) - s.decObjectCounterBy(user, res.UserCount()) - s.decContainerObjectCounter(res.RemovedByCnrID()) - s.addToContainerSize(addr.Container().EncodeToString(), -int64(res.LogicSize())) - s.addToPayloadSize(-int64(res.PhySize())) + s.decObjectCounterBy(physical, res.PhyCount) + s.decObjectCounterBy(logical, res.LogicCount) + s.decObjectCounterBy(user, res.UserCount) + containerCounter := map[cid.ID]meta.ObjectCounters{ + addr.Container(): { + Logic: res.LogicCount, + Phy: res.PhyCount, + User: res.UserCount, + }, + } + s.decContainerObjectCounter(containerCounter) + s.addToContainerSize(addr.Container().EncodeToString(), -int64(res.LogicSize)) + s.addToPayloadSize(-int64(res.PhySize)) return nil } diff --git a/pkg/local_object_storage/shard/gc.go b/pkg/local_object_storage/shard/gc.go index d605746e8..18d7aecff 100644 --- a/pkg/local_object_storage/shard/gc.go +++ b/pkg/local_object_storage/shard/gc.go @@ -678,7 +678,7 @@ func (s *Shard) HandleExpiredLocks(ctx context.Context, epoch uint64, lockers [] if s.GetMode().NoMetabase() { return } - unlocked, err := s.metaBase.FreeLockedBy(lockers) + unlocked, err := s.metaBase.FreeLockedBy(ctx, lockers) if err != nil { s.log.Warn(logs.ShardFailureToUnlockObjects, zap.String("error", err.Error()), @@ -730,12 +730,12 @@ func (s *Shard) inhumeUnlockedIfExpired(ctx context.Context, epoch uint64, unloc } // HandleDeletedLocks unlocks all objects which were locked by lockers. -func (s *Shard) HandleDeletedLocks(lockers []oid.Address) { +func (s *Shard) HandleDeletedLocks(ctx context.Context, lockers []oid.Address) { if s.GetMode().NoMetabase() { return } - _, err := s.metaBase.FreeLockedBy(lockers) + _, err := s.metaBase.FreeLockedBy(ctx, lockers) if err != nil { s.log.Warn(logs.ShardFailureToUnlockObjects, zap.String("error", err.Error()), diff --git a/pkg/local_object_storage/shard/gc_internal_test.go b/pkg/local_object_storage/shard/gc_internal_test.go index 3993593ad..a3a14fc78 100644 --- a/pkg/local_object_storage/shard/gc_internal_test.go +++ b/pkg/local_object_storage/shard/gc_internal_test.go @@ -61,8 +61,8 @@ func Test_ObjectNotFoundIfNotDeletedFromMetabase(t *testing.T) { meta.WithEpochState(epochState{}), ), WithPiloramaOptions(pilorama.WithPath(filepath.Join(rootPath, "pilorama"))), - WithDeletedLockCallback(func(_ context.Context, addresses []oid.Address) { - sh.HandleDeletedLocks(addresses) + WithDeletedLockCallback(func(ctx context.Context, addresses []oid.Address) { + sh.HandleDeletedLocks(ctx, addresses) }), WithExpiredLocksCallback(func(ctx context.Context, epoch uint64, a []oid.Address) { sh.HandleExpiredLocks(ctx, epoch, a) diff --git a/pkg/local_object_storage/shard/id.go b/pkg/local_object_storage/shard/id.go index 2fe68d270..04d053376 100644 --- a/pkg/local_object_storage/shard/id.go +++ b/pkg/local_object_storage/shard/id.go @@ -35,7 +35,7 @@ func (s *Shard) UpdateID() (err error) { var idFromMetabase []byte modeDegraded := s.GetMode().NoMetabase() if !modeDegraded { - if idFromMetabase, err = s.metaBase.GetShardID(mode.ReadOnly); err != nil { + if idFromMetabase, err = s.metaBase.GetShardID(mode.ReadWrite); err != nil { err = fmt.Errorf("failed to read shard id from metabase: %w", err) } } diff --git a/pkg/local_object_storage/shard/lock_test.go b/pkg/local_object_storage/shard/lock_test.go index 9ce95feb1..9a7afd2fd 100644 --- a/pkg/local_object_storage/shard/lock_test.go +++ b/pkg/local_object_storage/shard/lock_test.go @@ -53,8 +53,8 @@ func TestShard_Lock(t *testing.T) { meta.WithPath(filepath.Join(rootPath, "meta")), meta.WithEpochState(epochState{}), ), - WithDeletedLockCallback(func(_ context.Context, addresses []oid.Address) { - sh.HandleDeletedLocks(addresses) + WithDeletedLockCallback(func(ctx context.Context, addresses []oid.Address) { + sh.HandleDeletedLocks(ctx, addresses) }), } diff --git a/pkg/local_object_storage/shard/shard.go b/pkg/local_object_storage/shard/shard.go index 94f22feb5..a574e7510 100644 --- a/pkg/local_object_storage/shard/shard.go +++ b/pkg/local_object_storage/shard/shard.go @@ -439,7 +439,7 @@ func (s *Shard) updateMetrics(ctx context.Context) { return } - cc, err := s.metaBase.ObjectCounters() + cc, err := s.metaBase.ObjectCounters(ctx) if err != nil { s.log.Warn(logs.ShardMetaObjectCounterRead, zap.Error(err), @@ -452,23 +452,16 @@ func (s *Shard) updateMetrics(ctx context.Context) { s.setObjectCounterBy(logical, cc.Logic) s.setObjectCounterBy(user, cc.User) - cnrList, err := s.metaBase.Containers(ctx) + cnrList, err := s.metaBase.ContainerSizes(ctx) if err != nil { - s.log.Warn(logs.ShardMetaCantReadContainerList, zap.Error(err)) + s.log.Warn(logs.ShardMetaCantReadContainerSize, zap.Error(err)) return } var totalPayload uint64 - for i := range cnrList { - size, err := s.metaBase.ContainerSize(cnrList[i]) - if err != nil { - s.log.Warn(logs.ShardMetaCantReadContainerSize, - zap.String("cid", cnrList[i].EncodeToString()), - zap.Error(err)) - continue - } - s.addToContainerSize(cnrList[i].EncodeToString(), int64(size)) + for cnrID, size := range cnrList { + s.addToContainerSize(cnrID.EncodeToString(), int64(size)) totalPayload += size } diff --git a/pkg/local_object_storage/shard/shard_test.go b/pkg/local_object_storage/shard/shard_test.go index 73ba2e82b..f9ee34488 100644 --- a/pkg/local_object_storage/shard/shard_test.go +++ b/pkg/local_object_storage/shard/shard_test.go @@ -89,8 +89,8 @@ func newCustomShard(t testing.TB, enableWriteCache bool, o shardOptions) *Shard WithPiloramaOptions(pilorama.WithPath(filepath.Join(o.rootPath, "pilorama"))), WithWriteCache(enableWriteCache), WithWriteCacheOptions(o.wcOpts), - WithDeletedLockCallback(func(_ context.Context, addresses []oid.Address) { - sh.HandleDeletedLocks(addresses) + WithDeletedLockCallback(func(ctx context.Context, addresses []oid.Address) { + sh.HandleDeletedLocks(ctx, addresses) }), WithExpiredLocksCallback(func(ctx context.Context, epoch uint64, a []oid.Address) { sh.HandleExpiredLocks(ctx, epoch, a)