forked from TrueCloudLab/frostfs-node
[#1099] metabase: Change engine to badger
Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
parent
bbe95dac8b
commit
d8c9159853
64 changed files with 4322 additions and 3500 deletions
|
@ -1,13 +1,10 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
common "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-lens/internal"
|
||||
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||
"github.com/spf13/cobra"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -38,10 +35,6 @@ func init() {
|
|||
func openMeta(cmd *cobra.Command) *meta.DB {
|
||||
db := meta.New(
|
||||
meta.WithPath(vPath),
|
||||
meta.WithBoltDBOptions(&bbolt.Options{
|
||||
ReadOnly: true,
|
||||
Timeout: 100 * time.Millisecond,
|
||||
}),
|
||||
meta.WithEpochState(epochState{}),
|
||||
)
|
||||
common.ExitOnErr(cmd, common.Errf("could not open metabase: %w", db.Open(cmd.Context(), mode.ReadOnly)))
|
||||
|
|
|
@ -76,7 +76,6 @@ import (
|
|||
"github.com/nspcc-dev/neo-go/pkg/crypto/keys"
|
||||
neogoutil "github.com/nspcc-dev/neo-go/pkg/util"
|
||||
"github.com/panjf2000/ants/v2"
|
||||
"go.etcd.io/bbolt"
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zapcore"
|
||||
"google.golang.org/grpc"
|
||||
|
@ -130,8 +129,15 @@ type shardCfg struct {
|
|||
metaCfg struct {
|
||||
path string
|
||||
perm fs.FileMode
|
||||
maxBatchSize int
|
||||
maxBatchDelay time.Duration
|
||||
noSync bool
|
||||
verbose bool
|
||||
memtablesCount int
|
||||
valueThreshold int64
|
||||
valueLogFileSize int64
|
||||
indexCacheSize int64
|
||||
numCompactors int
|
||||
gcInterval time.Duration
|
||||
gcDiscardRatio float64
|
||||
}
|
||||
|
||||
subStorages []subStorageCfg
|
||||
|
@ -341,9 +347,16 @@ func (a *applicationConfiguration) setMetabaseConfig(newConfig *shardCfg, oldCon
|
|||
m := &newConfig.metaCfg
|
||||
|
||||
m.path = metabaseCfg.Path()
|
||||
m.perm = metabaseCfg.BoltDB().Perm()
|
||||
m.maxBatchDelay = metabaseCfg.BoltDB().MaxBatchDelay()
|
||||
m.maxBatchSize = metabaseCfg.BoltDB().MaxBatchSize()
|
||||
m.perm = metabaseCfg.Perm()
|
||||
m.noSync = metabaseCfg.NoSync()
|
||||
m.verbose = metabaseCfg.Verbose()
|
||||
m.memtablesCount = metabaseCfg.MemtablesCount()
|
||||
m.valueThreshold = metabaseCfg.ValueThreshold()
|
||||
m.valueLogFileSize = metabaseCfg.ValueLogFileSize()
|
||||
m.indexCacheSize = metabaseCfg.IndexCacheSize()
|
||||
m.numCompactors = metabaseCfg.CompactorsCount()
|
||||
m.gcDiscardRatio = float64(metabaseCfg.GCDiscardPercent()) / 100.0
|
||||
m.gcInterval = metabaseCfg.GCInterval()
|
||||
}
|
||||
|
||||
func (a *applicationConfiguration) setGCConfig(newConfig *shardCfg, oldConfig *shardconfig.Config) {
|
||||
|
@ -980,13 +993,17 @@ func (c *cfg) getShardOpts(ctx context.Context, shCfg shardCfg) shardOptsWithID
|
|||
mbOptions := []meta.Option{
|
||||
meta.WithPath(shCfg.metaCfg.path),
|
||||
meta.WithPermissions(shCfg.metaCfg.perm),
|
||||
meta.WithMaxBatchSize(shCfg.metaCfg.maxBatchSize),
|
||||
meta.WithMaxBatchDelay(shCfg.metaCfg.maxBatchDelay),
|
||||
meta.WithBoltDBOptions(&bbolt.Options{
|
||||
Timeout: 100 * time.Millisecond,
|
||||
}),
|
||||
meta.WithLogger(c.log),
|
||||
meta.WithEpochState(c.cfgNetmap.state),
|
||||
meta.WithNoSync(shCfg.metaCfg.noSync),
|
||||
meta.WithVerbose(shCfg.metaCfg.verbose),
|
||||
meta.WithMemtablesCount(shCfg.metaCfg.memtablesCount),
|
||||
meta.WithValueThreshold(shCfg.metaCfg.valueThreshold),
|
||||
meta.WithValueLogFileSize(shCfg.metaCfg.valueLogFileSize),
|
||||
meta.WithIndexCacheSize(shCfg.metaCfg.indexCacheSize),
|
||||
meta.WithNumCompactors(shCfg.metaCfg.numCompactors),
|
||||
meta.WithGCInterval(shCfg.metaCfg.gcInterval),
|
||||
meta.WithGCDiscardRatio(shCfg.metaCfg.gcDiscardRatio),
|
||||
}
|
||||
if c.metricsCollector != nil {
|
||||
mbOptions = append(mbOptions, meta.WithMetrics(lsmetrics.NewMetabaseMetrics(shCfg.metaCfg.path, c.metricsCollector.MetabaseMetrics())))
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
blobovniczaconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/blobstor/blobovnicza"
|
||||
fstreeconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/blobstor/fstree"
|
||||
gcconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/gc"
|
||||
metabaseconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/metabase"
|
||||
piloramaconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/pilorama"
|
||||
configtest "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/test"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||
|
@ -80,9 +81,16 @@ func TestEngineSection(t *testing.T) {
|
|||
require.EqualValues(t, 3221225472, wc.SizeLimit())
|
||||
|
||||
require.Equal(t, "tmp/0/meta", meta.Path())
|
||||
require.Equal(t, fs.FileMode(0o644), meta.BoltDB().Perm())
|
||||
require.Equal(t, 100, meta.BoltDB().MaxBatchSize())
|
||||
require.Equal(t, 10*time.Millisecond, meta.BoltDB().MaxBatchDelay())
|
||||
require.Equal(t, fs.FileMode(0o644), meta.Perm())
|
||||
require.Equal(t, true, meta.NoSync())
|
||||
require.Equal(t, true, meta.Verbose())
|
||||
require.Equal(t, 10, meta.MemtablesCount())
|
||||
require.Equal(t, int64(1024), meta.ValueThreshold())
|
||||
require.Equal(t, int64(512*1024*1024), meta.ValueLogFileSize())
|
||||
require.Equal(t, int64(10*1024*1024), meta.IndexCacheSize())
|
||||
require.Equal(t, 10, meta.CompactorsCount())
|
||||
require.Equal(t, 20*time.Minute, meta.GCInterval())
|
||||
require.Equal(t, 60, meta.GCDiscardPercent())
|
||||
|
||||
require.Equal(t, true, sc.Compress())
|
||||
require.Equal(t, []string{"audio/*", "video/*"}, sc.UncompressableContentTypes())
|
||||
|
@ -137,9 +145,16 @@ func TestEngineSection(t *testing.T) {
|
|||
require.EqualValues(t, 4294967296, wc.SizeLimit())
|
||||
|
||||
require.Equal(t, "tmp/1/meta", meta.Path())
|
||||
require.Equal(t, fs.FileMode(0o644), meta.BoltDB().Perm())
|
||||
require.Equal(t, 200, meta.BoltDB().MaxBatchSize())
|
||||
require.Equal(t, 20*time.Millisecond, meta.BoltDB().MaxBatchDelay())
|
||||
require.Equal(t, fs.FileMode(0o644), meta.Perm())
|
||||
require.Equal(t, false, meta.NoSync())
|
||||
require.Equal(t, false, meta.Verbose())
|
||||
require.Equal(t, metabaseconfig.MemtablesCountDefault, meta.MemtablesCount())
|
||||
require.Equal(t, int64(metabaseconfig.ValueThresholdDefault), meta.ValueThreshold())
|
||||
require.Equal(t, int64(metabaseconfig.ValueLogFileSizeDefault), meta.ValueLogFileSize())
|
||||
require.Equal(t, int64(metabaseconfig.IndexCacheSizeDefault), meta.IndexCacheSize())
|
||||
require.Equal(t, metabaseconfig.CompactorsCountDefault, meta.CompactorsCount())
|
||||
require.Equal(t, metabaseconfig.GCIntervalDefault, meta.GCInterval())
|
||||
require.Equal(t, metabaseconfig.GCDiscardPercentDefault, meta.GCDiscardPercent())
|
||||
|
||||
require.Equal(t, false, sc.Compress())
|
||||
require.Equal(t, []string(nil), sc.UncompressableContentTypes())
|
||||
|
|
|
@ -1,8 +1,21 @@
|
|||
package metabaseconfig
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config"
|
||||
boltdbconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/engine/shard/boltdb"
|
||||
)
|
||||
|
||||
const (
|
||||
PermDefault = 0o660
|
||||
MemtablesCountDefault = 32
|
||||
ValueThresholdDefault = 512
|
||||
ValueLogFileSizeDefault = 1<<30 - 1
|
||||
IndexCacheSizeDefault = 256 << 20
|
||||
CompactorsCountDefault = 8
|
||||
GCIntervalDefault = 10 * time.Minute
|
||||
GCDiscardPercentDefault = 50
|
||||
)
|
||||
|
||||
// Config is a wrapper over the config section
|
||||
|
@ -30,7 +43,109 @@ func (x *Config) Path() string {
|
|||
return p
|
||||
}
|
||||
|
||||
// BoltDB returns config instance for querying bolt db specific parameters.
|
||||
func (x *Config) BoltDB() *boltdbconfig.Config {
|
||||
return (*boltdbconfig.Config)(x)
|
||||
// Perm returns the value of "perm" config parameter as a fs.FileMode.
|
||||
//
|
||||
// Returns PermDefault if the value is not a positive number.
|
||||
func (x *Config) Perm() fs.FileMode {
|
||||
p := config.UintSafe(
|
||||
(*config.Config)(x),
|
||||
"perm",
|
||||
)
|
||||
|
||||
if p == 0 {
|
||||
p = PermDefault
|
||||
}
|
||||
|
||||
return fs.FileMode(p)
|
||||
}
|
||||
|
||||
// NoSync returns the value of "no_sync" config parameter.
|
||||
//
|
||||
// Returns false if the value is not a boolean.
|
||||
func (x *Config) NoSync() bool {
|
||||
return config.BoolSafe((*config.Config)(x), "no_sync")
|
||||
}
|
||||
|
||||
// Verbose returns the value of "verbose" config parameter.
|
||||
//
|
||||
// Returns false if the value is not a boolean.
|
||||
func (x *Config) Verbose() bool {
|
||||
return config.BoolSafe((*config.Config)(x), "verbose")
|
||||
}
|
||||
|
||||
// MemtablesCount returns the value of "memtables_count" config parameter.
|
||||
//
|
||||
// Returns MemtablesCountDefault if the value is not a positive number.
|
||||
func (x *Config) MemtablesCount() int {
|
||||
s := int(config.IntSafe((*config.Config)(x), "memtables_count"))
|
||||
if s <= 0 {
|
||||
s = MemtablesCountDefault
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// ValueThreshold returns the value of "value_threshold" config parameter.
|
||||
//
|
||||
// Returns ValueThresholdDefault if the value is not a positive number.
|
||||
func (x *Config) ValueThreshold() int64 {
|
||||
s := config.SizeInBytesSafe((*config.Config)(x), "value_threshold")
|
||||
if s <= 0 {
|
||||
s = ValueThresholdDefault
|
||||
}
|
||||
return int64(s)
|
||||
}
|
||||
|
||||
// ValueLogFileSize returns the value of "valuelog_file_size" config parameter.
|
||||
//
|
||||
// Returns ValueLogFileSizeDefault if the value is not a positive number.
|
||||
func (x *Config) ValueLogFileSize() int64 {
|
||||
s := config.SizeInBytesSafe((*config.Config)(x), "valuelog_file_size")
|
||||
if s <= 0 {
|
||||
s = ValueLogFileSizeDefault
|
||||
}
|
||||
return int64(s)
|
||||
}
|
||||
|
||||
// IndexCacheSize returns the value of "index_cache_size" config parameter.
|
||||
//
|
||||
// Returns IndexCacheSizeDefault if the value is not a positive number.
|
||||
func (x *Config) IndexCacheSize() int64 {
|
||||
s := config.SizeInBytesSafe((*config.Config)(x), "index_cache_size")
|
||||
if s <= 0 {
|
||||
s = IndexCacheSizeDefault
|
||||
}
|
||||
return int64(s)
|
||||
}
|
||||
|
||||
// CompactorsCount returns the value of "compactors_count" config parameter.
|
||||
//
|
||||
// Returns CompactorsCountDefault if the value is not a positive number.
|
||||
func (x *Config) CompactorsCount() int {
|
||||
s := int(config.IntSafe((*config.Config)(x), "compactors_count"))
|
||||
if s <= 0 {
|
||||
s = CompactorsCountDefault
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// GCInterval returns the value of "gc_interval" config parameter.
|
||||
//
|
||||
// Returns GCIntervalDefault if the value is not a positive number.
|
||||
func (x *Config) GCInterval() time.Duration {
|
||||
s := config.DurationSafe((*config.Config)(x), "gc_interval")
|
||||
if s <= 0 {
|
||||
s = GCIntervalDefault
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// GCDiscardPercent returns the value of "gc_discard_percent" config parameter.
|
||||
//
|
||||
// Returns GCDiscardPercentDefault if the value is not a number in range (0;100].
|
||||
func (x *Config) GCDiscardPercent() int {
|
||||
s := int(config.IntSafe((*config.Config)(x), "gc_discard_percent"))
|
||||
if s <= 0 || s > 100 {
|
||||
s = GCDiscardPercentDefault
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
|
|
@ -108,8 +108,15 @@ FROSTFS_STORAGE_SHARD_0_WRITECACHE_CAPACITY=3221225472
|
|||
### Metabase config
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_PATH=tmp/0/meta
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_PERM=0644
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_MAX_BATCH_SIZE=100
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_MAX_BATCH_DELAY=10ms
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_NO_SYNC=TRUE
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_VERBOSE=TRUE
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_MEMTABLES_COUNT=10
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_VALUE_THRESHOLD=1024
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_VALUELOG_FILE_SIZE=512mb
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_INDEX_CACHE_SIZE=10mb
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_COMPACTORS_COUNT=10
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_GC_INTERVAL=20m
|
||||
FROSTFS_STORAGE_SHARD_0_METABASE_GC_DISCARD_PERCENT=60
|
||||
### Blobstor config
|
||||
FROSTFS_STORAGE_SHARD_0_COMPRESS=true
|
||||
FROSTFS_STORAGE_SHARD_0_COMPRESSION_EXCLUDE_CONTENT_TYPES="audio/* video/*"
|
||||
|
@ -164,8 +171,6 @@ FROSTFS_STORAGE_SHARD_1_WRITECACHE_CAPACITY=4294967296
|
|||
### Metabase config
|
||||
FROSTFS_STORAGE_SHARD_1_METABASE_PATH=tmp/1/meta
|
||||
FROSTFS_STORAGE_SHARD_1_METABASE_PERM=0644
|
||||
FROSTFS_STORAGE_SHARD_1_METABASE_MAX_BATCH_SIZE=200
|
||||
FROSTFS_STORAGE_SHARD_1_METABASE_MAX_BATCH_DELAY=20ms
|
||||
### Blobstor config
|
||||
FROSTFS_STORAGE_SHARD_1_COMPRESS=false
|
||||
FROSTFS_STORAGE_SHARD_1_SMALL_OBJECT_SIZE=102400
|
||||
|
|
|
@ -153,8 +153,15 @@
|
|||
"metabase": {
|
||||
"path": "tmp/0/meta",
|
||||
"perm": "0644",
|
||||
"max_batch_size": 100,
|
||||
"max_batch_delay": "10ms"
|
||||
"no_sync": true,
|
||||
"verbose": true,
|
||||
"memtables_count": 10,
|
||||
"value_threshold": 1024,
|
||||
"valuelog_file_size": "512mb",
|
||||
"index_cache_size": "10mb",
|
||||
"compactors_count": 10,
|
||||
"gc_interval": "20m",
|
||||
"gc_discard_percent": 60
|
||||
},
|
||||
"compress": true,
|
||||
"compression_exclude_content_types": [
|
||||
|
@ -212,9 +219,7 @@
|
|||
},
|
||||
"metabase": {
|
||||
"path": "tmp/1/meta",
|
||||
"perm": "0644",
|
||||
"max_batch_size": 200,
|
||||
"max_batch_delay": "20ms"
|
||||
"perm": "0644"
|
||||
},
|
||||
"compress": false,
|
||||
"small_object_size": 102400,
|
||||
|
|
|
@ -131,8 +131,6 @@ storage:
|
|||
|
||||
metabase:
|
||||
perm: 0644 # permissions for metabase files(directories: +x for current user and group)
|
||||
max_batch_size: 200
|
||||
max_batch_delay: 20ms
|
||||
|
||||
pilorama:
|
||||
max_batch_delay: 5ms # maximum delay for a batch of operations to be executed
|
||||
|
@ -175,8 +173,15 @@ storage:
|
|||
|
||||
metabase:
|
||||
path: tmp/0/meta # metabase path
|
||||
max_batch_size: 100
|
||||
max_batch_delay: 10ms
|
||||
no_sync: true
|
||||
verbose: true
|
||||
memtables_count: 10
|
||||
value_threshold: 1024
|
||||
valuelog_file_size: 512mb
|
||||
index_cache_size: 10mb
|
||||
compactors_count: 10
|
||||
gc_interval: 20m
|
||||
gc_discard_percent: 60
|
||||
|
||||
compress: true # turn on/off zstd(level 3) compression of stored objects
|
||||
compression_exclude_content_types:
|
||||
|
|
|
@ -270,16 +270,30 @@ gc:
|
|||
metabase:
|
||||
path: /path/to/meta.db
|
||||
perm: 0644
|
||||
max_batch_size: 200
|
||||
max_batch_delay: 20ms
|
||||
no_sync: true
|
||||
verbose: true
|
||||
memtables_count: 10
|
||||
value_threshold: 1024
|
||||
valuelog_file_size: 512mb
|
||||
index_cache_size: 10mb
|
||||
compactors_count: 10
|
||||
gc_interval: 20m
|
||||
gc_discard_percent: 60
|
||||
```
|
||||
|
||||
| Parameter | Type | Default value | Description |
|
||||
|-------------------|------------|---------------|------------------------------------------------------------------------|
|
||||
| -------------------- | ---------- | ------------- | ----------------------------------------------------------------------------------------------------------------- |
|
||||
| `path` | `string` | | Path to the metabase file. |
|
||||
| `perm` | file mode | `0660` | Permissions to set for the database file. |
|
||||
| `max_batch_size` | `int` | `1000` | Maximum amount of write operations to perform in a single transaction. |
|
||||
| `max_batch_delay` | `duration` | `10ms` | Maximum delay before a batch starts. |
|
||||
| `no_sync` | `bool` | `false` | If `true` then metabase storage engine doesn't flush all changes to disk after each operation. |
|
||||
| `verbose` | `bool` | `false` | If `true` then metabase storage engine's logs will be passed to logger. |
|
||||
| `memtables_count` | `int` | `32` | Maximum number of tables to keep in memory before stalling. |
|
||||
| `value_threshold` | `size` | `512B` | Threshold used to decide whether a value is stored directly in the LSM tree or separately in the log value files. |
|
||||
| `valuelog_file_size` | `size` | `1GB` | Maximum size of a single value log file. |
|
||||
| `index_cache_size` | `size` | `256MB` | Maximum size of memory used by table indices. |
|
||||
| `compactors_count` | `int` | `64` | Number of compaction workers to run concurrently. |
|
||||
| `gc_interval` | `duration` | `10m` | Time interval between value log garbage collection runs. |
|
||||
| `gc_discard_percent` | `int` | `50` | Value log file will be rewritten if more than `gc_discard_percent` of the space can be discarded. |
|
||||
|
||||
### `writecache` subsection
|
||||
|
||||
|
|
10
go.mod
10
go.mod
|
@ -15,6 +15,7 @@ require (
|
|||
git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02
|
||||
github.com/cheggaaa/pb v1.0.29
|
||||
github.com/chzyer/readline v1.5.1
|
||||
github.com/dgraph-io/badger/v4 v4.2.0
|
||||
github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568
|
||||
github.com/go-pkgz/expirable-cache/v3 v3.0.0
|
||||
github.com/google/uuid v1.6.0
|
||||
|
@ -67,11 +68,18 @@ require (
|
|||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/davidmz/go-pageant v1.0.2 // indirect
|
||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect
|
||||
github.com/dgraph-io/ristretto v0.1.1 // indirect
|
||||
github.com/dustin/go-humanize v1.0.0 // indirect
|
||||
github.com/fsnotify/fsnotify v1.7.0 // indirect
|
||||
github.com/go-fed/httpsig v1.1.0 // indirect
|
||||
github.com/go-logr/logr v1.4.1 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/glog v1.2.0 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/flatbuffers v1.12.1 // indirect
|
||||
github.com/gorilla/websocket v1.5.1 // indirect
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.0 // indirect
|
||||
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.1 // indirect
|
||||
|
@ -99,6 +107,7 @@ require (
|
|||
github.com/nspcc-dev/neo-go/pkg/interop v0.0.0-20240521091047-78685785716d // indirect
|
||||
github.com/nspcc-dev/rfc6979 v0.2.1 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.1.1 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||
github.com/prometheus/client_model v0.5.0 // indirect
|
||||
github.com/prometheus/common v0.48.0 // indirect
|
||||
|
@ -111,6 +120,7 @@ require (
|
|||
github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d // indirect
|
||||
github.com/twmb/murmur3 v1.1.8 // indirect
|
||||
github.com/urfave/cli v1.22.14 // indirect
|
||||
go.opencensus.io v0.24.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.22.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.22.0 // indirect
|
||||
|
|
BIN
go.sum
BIN
go.sum
Binary file not shown.
|
@ -87,6 +87,7 @@ const (
|
|||
PersistentCouldNotGetSessionFromPersistentStorage = "could not get session from persistent storage"
|
||||
PersistentCouldNotDeleteSToken = "could not delete token"
|
||||
PersistentCouldNotCleanUpExpiredTokens = "could not clean up expired tokens"
|
||||
ControllerReportIsAlreadyStarted = "report is already started"
|
||||
TombstoneCouldNotGetTheTombstoneTheSource = "tombstone getter: could not get the tombstone the source"
|
||||
DeleteNoSplitInfoObjectIsPHY = "no split info, object is PHY"
|
||||
DeleteAssemblingChain = "assembling chain..."
|
||||
|
@ -134,13 +135,37 @@ const (
|
|||
UtilCouldNotPushTaskToWorkerPool = "could not push task to worker pool"
|
||||
V2CantCheckIfRequestFromInnerRing = "can't check if request from inner ring"
|
||||
V2CantCheckIfRequestFromContainerNode = "can't check if request from container node"
|
||||
NatsNatsConnectionWasLost = "nats: connection was lost"
|
||||
NatsNatsReconnectedToTheServer = "nats: reconnected to the server"
|
||||
NatsNatsClosingConnectionAsTheContextIsDone = "nats: closing connection as the context is done"
|
||||
NatsConnectedToEndpoint = "nats: successfully connected to endpoint"
|
||||
ControllerStartingToAnnounceTheValuesOfTheMetrics = "starting to announce the values of the metrics"
|
||||
ControllerCouldNotInitializeIteratorOverLocallyCollectedMetrics = "could not initialize iterator over locally collected metrics"
|
||||
ControllerCouldNotInitializeAnnouncementAccumulator = "could not initialize announcement accumulator"
|
||||
ControllerIteratorOverLocallyCollectedMetricsAborted = "iterator over locally collected metrics aborted"
|
||||
ControllerCouldNotFinishWritingLocalAnnouncements = "could not finish writing local announcements"
|
||||
ControllerTrustAnnouncementSuccessfullyFinished = "trust announcement successfully finished"
|
||||
ControllerAnnouncementIsAlreadyStarted = "announcement is already started"
|
||||
ControllerAnnouncementSuccessfullyInterrupted = "announcement successfully interrupted"
|
||||
ControllerAnnouncementIsNotStartedOrAlreadyInterrupted = "announcement is not started or already interrupted"
|
||||
ControllerCouldNotInitializeIteratorOverLocallyAccumulatedAnnouncements = "could not initialize iterator over locally accumulated announcements"
|
||||
ControllerCouldNotInitializeResultTarget = "could not initialize result target"
|
||||
ControllerIteratorOverLocalAnnouncementsAborted = "iterator over local announcements aborted"
|
||||
ControllerCouldNotFinishWritingLoadEstimations = "could not finish writing load estimations"
|
||||
RouteCouldNotInitializeWriterProvider = "could not initialize writer provider"
|
||||
RouteCouldNotInitializeWriter = "could not initialize writer"
|
||||
RouteCouldNotPutTheValue = "could not put the value"
|
||||
RouteCouldNotCloseRemoteServerWriter = "could not close remote server writer"
|
||||
ClientCouldNotRestoreBlockSubscriptionAfterRPCSwitch = "could not restore block subscription after RPC switch"
|
||||
ClientCouldNotRestoreNotificationSubscriptionAfterRPCSwitch = "could not restore notification subscription after RPC switch"
|
||||
ClientCouldNotRestoreNotaryNotificationSubscriptionAfterRPCSwitch = "could not restore notary notification subscription after RPC switch"
|
||||
ClientCouldNotEstablishConnectionToTheSwitchedRPCNode = "could not establish connection to the switched RPC node"
|
||||
ClientConnectionToTheNewRPCNodeHasBeenEstablished = "connection to the new RPC node has been established"
|
||||
ClientSwitchingToTheNextRPCNode = "switching to the next RPC node"
|
||||
ClientCouldNotEstablishConnectionToAnyRPCNode = "could not establish connection to any RPC node"
|
||||
ClientCouldNotCreateClientToTheHigherPriorityNode = "could not create client to the higher priority node"
|
||||
ClientSwitchedToTheHigherPriorityRPC = "switched to the higher priority RPC"
|
||||
ClientCouldNotRestoreSideChainSubscriptionsUsingNode = "could not restore side chain subscriptions using node"
|
||||
ClientNotaryDepositHasAlreadyBeenMade = "notary deposit has already been made"
|
||||
ClientNotaryDepositInvoke = "notary deposit invoke"
|
||||
ClientNotaryRequestWithPreparedMainTXInvoked = "notary request with prepared main TX invoked"
|
||||
|
@ -182,6 +207,12 @@ const (
|
|||
EventIgnoreNilNotaryEventHandler = "ignore nil notary event handler"
|
||||
EventIgnoreHandlerOfNotaryEventWoParser = "ignore handler of notary event w/o parser"
|
||||
EventIgnoreNilBlockHandler = "ignore nil block handler"
|
||||
SubscriberRemoteNotificationChannelHasBeenClosed = "remote notification channel has been closed"
|
||||
SubscriberCantCastNotifyEventValueToTheNotifyStruct = "can't cast notify event value to the notify struct"
|
||||
SubscriberNewNotificationEventFromSidechain = "new notification event from sidechain"
|
||||
SubscriberCantCastBlockEventValueToBlock = "can't cast block event value to block"
|
||||
SubscriberCantCastNotifyEventValueToTheNotaryRequestStruct = "can't cast notify event value to the notary request struct"
|
||||
SubscriberUnsupportedNotificationFromTheChain = "unsupported notification from the chain"
|
||||
StorageOperation = "local object storage operation"
|
||||
BlobovniczaCreatingDirectoryForBoltDB = "creating directory for BoltDB"
|
||||
BlobovniczaOpeningBoltDB = "opening BoltDB"
|
||||
|
@ -225,17 +256,20 @@ const (
|
|||
EngineFinishedWithErrorShardsEvacuation = "shards evacuation finished with error"
|
||||
EngineObjectIsMovedToAnotherShard = "object is moved to another shard"
|
||||
MetabaseMissingMatcher = "missing matcher"
|
||||
MetabaseErrorInFKBTSelection = "error in FKBT selection"
|
||||
MetabaseCantDecodeListBucketLeaf = "can't decode list bucket leaf"
|
||||
MetabaseUnknownOperation = "unknown operation"
|
||||
MetabaseCantIterateOverTheBucket = "can't iterate over the bucket"
|
||||
MetabaseCouldNotIterateOverTheBuckets = "could not iterate over the buckets"
|
||||
MetabaseCouldNotIterateOverThePrefix = "could not iterate over the prefix"
|
||||
MetabaseCreatedDirectoryForMetabase = "created directory for Metabase"
|
||||
MetabaseOpenedBoltDBInstanceForMetabase = "opened boltDB instance for Metabase"
|
||||
MetabaseCheckingMetabaseVersion = "checking metabase version"
|
||||
ShardCantSelectAllObjects = "can't select all objects"
|
||||
ShardSettingShardMode = "setting shard mode"
|
||||
ShardShardModeSetSuccessfully = "shard mode set successfully"
|
||||
ShardCouldNotMarkObjectForShardRelocationInMetabase = "could not mark object for shard relocation in metabase"
|
||||
ShardCantDeleteObjectFromWriteCache = "can't delete object from write cache"
|
||||
ShardCantGetStorageIDFromMetabase = "can't get storage ID from metabase"
|
||||
ShardCantRemoveObjectFromBlobStor = "can't remove object from blobStor"
|
||||
ShardFetchingObjectWithoutMeta = "fetching object without meta"
|
||||
ShardObjectIsMissingInWritecache = "object is missing in write-cache"
|
||||
ShardFailedToFetchObjectFromWritecache = "failed to fetch object from write-cache"
|
||||
|
@ -250,6 +284,7 @@ const (
|
|||
ShardCouldNotCloseShardComponent = "could not close shard component"
|
||||
ShardCantOpenMetabaseMoveToADegradedMode = "can't open metabase, move to a degraded mode"
|
||||
ShardCantInitializeMetabaseMoveToADegradedreadonlyMode = "can't initialize metabase, move to a degraded-read-only mode"
|
||||
ShardTryingToRestoreReadwriteMode = "trying to restore read-write mode"
|
||||
ShardStopEventListenerByClosedEventChannel = "stop event listener by closed `event` channel"
|
||||
ShardStopEventListenerByClosedStopChannel = "stop event listener by closed `stop` channel"
|
||||
ShardStopEventListenerByContext = "stop event listener by context"
|
||||
|
@ -275,17 +310,33 @@ const (
|
|||
ShardCouldNotMarkObjectToDeleteInMetabase = "could not mark object to delete in metabase"
|
||||
WritecacheTriedToFlushItemsFromWritecache = "tried to flush items from write-cache"
|
||||
WritecacheWaitingForChannelsToFlush = "waiting for channels to flush"
|
||||
WritecacheFillingFlushMarksForObjectsInFSTree = "filling flush marks for objects in FSTree"
|
||||
WritecacheFinishedUpdatingFSTreeFlushMarks = "finished updating FSTree flush marks"
|
||||
WritecacheFillingFlushMarksForObjectsInDatabase = "filling flush marks for objects in database"
|
||||
WritecacheFinishedUpdatingFlushMarks = "finished updating flush marks"
|
||||
WritecacheCantRemoveObjectsFromTheDatabase = "can't remove objects from the database"
|
||||
WritecacheCantRemoveObjectFromWritecache = "can't remove object from write-cache"
|
||||
BlobovniczatreeCouldNotGetObjectFromLevel = "could not get object from level"
|
||||
BlobovniczatreeCouldNotReadPayloadRangeFromOpenedBlobovnicza = "could not read payload range from opened blobovnicza"
|
||||
BlobovniczatreeCouldNotReadPayloadRangeFromActiveBlobovnicza = "could not read payload range from active blobovnicza"
|
||||
BlobovniczatreeCouldNotCloseBlobovnicza = "could not close Blobovnicza"
|
||||
BlobovniczatreeBlobovniczaSuccessfullyClosedOnEvict = "blobovnicza successfully closed on evict"
|
||||
BlobovniczatreeUpdatingActiveBlobovnicza = "updating active blobovnicza..."
|
||||
BlobovniczatreeActiveBlobovniczaSuccessfullyUpdated = "active blobovnicza successfully updated"
|
||||
BlobovniczatreeBlobovniczaSuccessfullyActivated = "blobovnicza successfully activated"
|
||||
BlobovniczatreeCouldNotRemoveObjectFromLevel = "could not remove object from level"
|
||||
BlobovniczatreeCouldNotRemoveObjectFromOpenedBlobovnicza = "could not remove object from opened blobovnicza"
|
||||
BlobovniczatreeCouldNotRemoveObjectFromActiveBlobovnicza = "could not remove object from active blobovnicza"
|
||||
BlobovniczatreeCouldNotGetActiveBlobovnicza = "could not get active blobovnicza"
|
||||
BlobovniczatreeBlobovniczaOverflowed = "blobovnicza overflowed"
|
||||
BlobovniczatreeCouldNotUpdateActiveBlobovnicza = "could not update active blobovnicza"
|
||||
BlobovniczatreeCouldNotPutObjectToActiveBlobovnicza = "could not put object to active blobovnicza"
|
||||
BlobovniczatreeCouldNotReadObjectFromOpenedBlobovnicza = "could not read object from opened blobovnicza"
|
||||
BlobovniczatreeCouldNotGetObjectFromActiveBlobovnicza = "could not get object from active blobovnicza"
|
||||
BlobovniczatreeInitializingBlobovniczas = "initializing Blobovnicza's"
|
||||
BlobovniczatreeReadonlyModeSkipBlobovniczasInitialization = "read-only mode, skip blobovniczas initialization..."
|
||||
BlobovniczatreeBlobovniczaSuccessfullyInitializedClosing = "blobovnicza successfully initialized, closing..."
|
||||
BlobovniczatreeCouldNotCloseActiveBlobovnicza = "could not close active blobovnicza"
|
||||
AlphabetTick = "tick"
|
||||
AlphabetAlphabetProcessorWorkerPoolDrained = "alphabet processor worker pool drained"
|
||||
AlphabetNonAlphabetModeIgnoreGasEmissionEvent = "non alphabet mode, ignore gas emission event"
|
||||
|
@ -313,6 +364,9 @@ const (
|
|||
ContainerNonAlphabetModeIgnoreSetEACL = "non alphabet mode, ignore set EACL"
|
||||
ContainerSetEACLCheckFailed = "set EACL check failed"
|
||||
ContainerCouldNotApproveSetEACL = "could not approve set EACL"
|
||||
FrostFSNonAlphabetModeIgnoreBind = "non alphabet mode, ignore bind"
|
||||
FrostFSInvalidManageKeyEvent = "invalid manage key event"
|
||||
FrostFSCouldNotDecodeScriptHashFromBytes = "could not decode script hash from bytes"
|
||||
FrostFSNonAlphabetModeIgnoreConfig = "non alphabet mode, ignore config"
|
||||
FrostFSCantRelaySetConfigEvent = "can't relay set config event"
|
||||
FrostFSFrostfsWorkerPool = "frostfs worker pool"
|
||||
|
@ -405,9 +459,9 @@ const (
|
|||
FrostFSNodeInternalApplicationError = "internal application error"
|
||||
FrostFSNodeInternalErrorProcessingIsComplete = "internal error processing is complete"
|
||||
FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration = "SIGHUP has been received, rereading configuration..."
|
||||
FrostFSNodeSIGHUPSkip = "node is not ready for reconfiguration, skipped SIGHUP"
|
||||
FrostFSNodeShutdownSkip = "node is already shutting down, skipped shutdown"
|
||||
FrostFSNodeShutdownWhenNotReady = "node is going to shut down when subsystems are still initializing"
|
||||
FrostFSNodeSIGHUPSkip = "node not ready for reconfiguration, skipped SIGHUP"
|
||||
FrostFSNodeShutdownSkip = "node already is going to shutting down, skipped shutdown"
|
||||
FrostFSNodeShutdownWhenNotReady = "node is going to shutting down when subsystems still initializing"
|
||||
FrostFSNodeConfigurationReading = "configuration reading"
|
||||
FrostFSNodeLoggerConfigurationPreparation = "logger configuration preparation"
|
||||
FrostFSNodeTracingConfigationUpdated = "tracing configation updated"
|
||||
|
@ -418,6 +472,9 @@ const (
|
|||
FrostFSNodeReadNewlyCreatedContainerAfterTheNotification = "read newly created container after the notification"
|
||||
FrostFSNodeContainerCreationEventsReceipt = "container creation event's receipt"
|
||||
FrostFSNodeContainerRemovalEventsReceipt = "container removal event's receipt"
|
||||
FrostFSNodeSaveUsedSpaceAnnouncementInContract = "save used space announcement in contract"
|
||||
FrostFSNodeFailedToCalculateContainerSizeInStorageEngine = "failed to calculate container size in storage engine"
|
||||
FrostFSNodeContainerSizeInStorageEngineCalculatedSuccessfully = "container size in storage engine calculated successfully"
|
||||
FrostFSNodeNotificatorCouldNotListContainers = "notificator: could not list containers"
|
||||
FrostFSNodeNotificatorCouldNotSelectObjectsFromContainer = "notificator: could not select objects from container"
|
||||
FrostFSNodeNotificatorCouldNotProcessObject = "notificator: could not process object"
|
||||
|
@ -459,10 +516,13 @@ const (
|
|||
EngineShardsEvacuationFailedToReadObject = "failed to read object to evacuate"
|
||||
EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node"
|
||||
ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked"
|
||||
ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache"
|
||||
FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap"
|
||||
EngineCouldNotChangeShardModeToDisabled = "could not change shard mode to disabled"
|
||||
NetmapNodeAlreadyInCandidateListOnlineSkipInitialBootstrap = "the node is already in candidate list with online state, skip initial bootstrap"
|
||||
RPConnectionLost = "RPC connection lost, attempting reconnect"
|
||||
RPCNodeSwitchFailure = "can't switch RPC node"
|
||||
FSTreeCantReadFile = "can't read a file"
|
||||
FSTreeCantUnmarshalObject = "can't unmarshal an object"
|
||||
FSTreeCantFushObjectBlobstor = "can't flush an object to blobstor"
|
||||
FSTreeCantUpdateID = "can't update object storage ID"
|
||||
|
@ -472,8 +532,13 @@ const (
|
|||
ObjectRemovalFailureBlobStor = "can't remove object from blobStor"
|
||||
CandidateStatusPriority = "candidate status is different from the netmap status, the former takes priority"
|
||||
TombstoneExpirationParseFailure = "tombstone getter: could not parse tombstone expiration epoch"
|
||||
FrostFSNodeCantUpdateObjectStorageID = "can't update object storage ID"
|
||||
FrostFSNodeCantFlushObjectToBlobstor = "can't flush an object to blobstor"
|
||||
FrostFSNodeCantDecodeObjectAddressFromDB = "can't decode object address from the DB"
|
||||
FrostFSNodeCantUnmarshalObjectFromDB = "can't unmarshal an object from the DB"
|
||||
RuntimeSoftMemoryLimitUpdated = "soft runtime memory limit value updated"
|
||||
RuntimeSoftMemoryDefinedWithGOMEMLIMIT = "soft runtime memory defined with GOMEMLIMIT environment variable, config value skipped"
|
||||
FailedToCountWritecacheItems = "failed to count writecache items"
|
||||
AttemtToCloseAlreadyClosedBlobovnicza = "attempt to close an already closed blobovnicza"
|
||||
FailedToGetContainerCounters = "failed to get container counters values"
|
||||
FailedToRebuildBlobstore = "failed to rebuild blobstore"
|
||||
|
@ -501,7 +566,9 @@ const (
|
|||
BlobovniczaTreeCompletingPreviousRebuild = "completing previous rebuild if failed..."
|
||||
BlobovniczaTreeCompletedPreviousRebuildSuccess = "previous rebuild completed successfully"
|
||||
BlobovniczaTreeCompletedPreviousRebuildFailed = "failed to complete previous rebuild"
|
||||
BlobovniczatreeCouldNotCheckExistenceInSourceDB = "could not check object existence in source blobovnicza"
|
||||
BlobovniczatreeCouldNotCheckExistenceInTargetDB = "could not check object existence in target blobovnicza"
|
||||
BlobovniczatreeCouldNotGetObjectFromSourceDB = "could not get object from source blobovnicza"
|
||||
BlobovniczatreeCouldNotPutObjectToTargetDB = "could not put object to target blobovnicza"
|
||||
BlobovniczaSavingCountersToMeta = "saving counters to blobovnicza's meta..."
|
||||
BlobovniczaSavingCountersToMetaSuccess = "saving counters to blobovnicza's meta completed successfully"
|
||||
|
@ -527,8 +594,15 @@ const (
|
|||
EngineShardsEvacuationTreeEvacuatedLocal = "tree evacuated to local node"
|
||||
EngineShardsEvacuationTreeEvacuatedRemote = "tree evacuated to other node"
|
||||
EngineRefillFailedToGetObjectsCount = "failed to get blobstor objects count, no resync percent estimation is available"
|
||||
BlobstoreFailedToGetFileinfo = "failed to get file info"
|
||||
ECFailedToSendToContainerNode = "failed to send EC object to container node"
|
||||
ECFailedToSaveECPart = "failed to save EC part"
|
||||
FailedToParseAddressFromKey = "failed to parse address from key"
|
||||
FailedToParseOwnerFromKey = "failed to parse owner from key"
|
||||
FailedToParsePayloadHashFromKey = "failed to parse payload hash from key"
|
||||
FailedToParseSplitIDFromKey = "failed to parse splitID from key"
|
||||
FailedToParseAttributeValueFromKey = "failed to parse attribute value from key"
|
||||
FailedToRunMetabaseGC = "failed to run badger GC on metabase"
|
||||
PolicerNodeIsNotECObjectNode = "current node is not EC object node"
|
||||
PolicerFailedToGetLocalECChunks = "failed to get local EC chunks"
|
||||
PolicerMissingECChunk = "failed to find EC chunk on any of the nodes"
|
||||
|
@ -541,5 +615,6 @@ const (
|
|||
PolicerFailedToRestoreObject = "failed to restore EC object"
|
||||
PolicerCouldNotGetChunk = "could not get EC chunk"
|
||||
PolicerCouldNotGetChunks = "could not get EC chunks"
|
||||
ErrMetabaseConflict = "metabase conflict"
|
||||
AuditEventLogRecord = "audit event log record"
|
||||
)
|
||||
|
|
|
@ -61,6 +61,7 @@ func (b *Blobovniczas) initializeDBs(ctx context.Context) error {
|
|||
shBlz := b.getBlobovniczaWithoutCaching(p)
|
||||
blz, err := shBlz.Open()
|
||||
if err != nil {
|
||||
b.log.Error("failed to open blobovnicza", zap.Error(err), zap.String("path", p))
|
||||
return err
|
||||
}
|
||||
defer shBlz.Close()
|
||||
|
@ -101,7 +102,6 @@ func (b *Blobovniczas) initializeDBs(ctx context.Context) error {
|
|||
})
|
||||
return false, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
_ = eg.Wait()
|
||||
return err
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// ContainerSizePrm groups parameters of ContainerSize operation.
|
||||
|
@ -41,56 +40,6 @@ func (r ListContainersRes) Containers() []cid.ID {
|
|||
return r.containers
|
||||
}
|
||||
|
||||
// ContainerSize returns the sum of estimation container sizes among all shards.
|
||||
//
|
||||
// Returns an error if executions are blocked (see BlockExecution).
|
||||
func (e *StorageEngine) ContainerSize(prm ContainerSizePrm) (res ContainerSizeRes, err error) {
|
||||
err = e.execIfNotBlocked(func() error {
|
||||
res, err = e.containerSize(prm)
|
||||
return err
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// ContainerSize calls ContainerSize method on engine to calculate sum of estimation container sizes among all shards.
|
||||
func ContainerSize(e *StorageEngine, id cid.ID) (uint64, error) {
|
||||
var prm ContainerSizePrm
|
||||
|
||||
prm.SetContainerID(id)
|
||||
|
||||
res, err := e.ContainerSize(prm)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return res.Size(), nil
|
||||
}
|
||||
|
||||
func (e *StorageEngine) containerSize(prm ContainerSizePrm) (res ContainerSizeRes, err error) {
|
||||
if e.metrics != nil {
|
||||
defer elapsed("EstimateContainerSize", e.metrics.AddMethodDuration)()
|
||||
}
|
||||
|
||||
e.iterateOverUnsortedShards(func(sh hashedShard) (stop bool) {
|
||||
var csPrm shard.ContainerSizePrm
|
||||
csPrm.SetContainerID(prm.cnr)
|
||||
|
||||
csRes, err := sh.Shard.ContainerSize(csPrm)
|
||||
if err != nil {
|
||||
e.reportShardError(sh, "can't get container size", err,
|
||||
zap.Stringer("container_id", prm.cnr))
|
||||
return false
|
||||
}
|
||||
|
||||
res.size += csRes.Size()
|
||||
|
||||
return false
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// ListContainers returns a unique container IDs presented in the engine objects.
|
||||
//
|
||||
// Returns an error if executions are blocked (see BlockExecution).
|
||||
|
|
|
@ -10,7 +10,6 @@ import (
|
|||
"strconv"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor"
|
||||
|
@ -24,7 +23,6 @@ import (
|
|||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger/test"
|
||||
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
// TestInitializationFailure checks that shard is initialized and closed even if media
|
||||
|
@ -53,10 +51,6 @@ func TestInitializationFailure(t *testing.T) {
|
|||
shard.WithBlobStorOptions(
|
||||
blobstor.WithStorages(storages)),
|
||||
shard.WithMetaBaseOptions(
|
||||
meta.WithBoltDBOptions(&bbolt.Options{
|
||||
Timeout: 100 * time.Millisecond,
|
||||
OpenFile: opts.openFileMetabase,
|
||||
}),
|
||||
meta.WithPath(filepath.Join(t.TempDir(), "metabase")),
|
||||
meta.WithPermissions(0o700),
|
||||
meta.WithEpochState(epochState{})),
|
||||
|
@ -83,6 +77,7 @@ func TestInitializationFailure(t *testing.T) {
|
|||
testEngineFailInitAndReload(t, false, shardOpts, beforeReload)
|
||||
})
|
||||
t.Run("metabase", func(t *testing.T) {
|
||||
t.Skip("badger doesn't support custom open file")
|
||||
var openFileMetabaseSucceed atomic.Bool
|
||||
openFileMetabase := func(p string, f int, mode fs.FileMode) (*os.File, error) {
|
||||
if openFileMetabaseSucceed.Load() {
|
||||
|
|
|
@ -277,7 +277,7 @@ func (e *StorageEngine) processExpiredLocks(ctx context.Context, epoch uint64, l
|
|||
|
||||
func (e *StorageEngine) processDeletedLocks(ctx context.Context, lockers []oid.Address) {
|
||||
e.iterateOverUnsortedShards(func(sh hashedShard) (stop bool) {
|
||||
sh.HandleDeletedLocks(lockers)
|
||||
sh.HandleDeletedLocks(ctx, lockers)
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
@ -317,7 +317,7 @@ func (e *StorageEngine) processZeroSizeContainers(ctx context.Context, ids []cid
|
|||
var drop []cid.ID
|
||||
for id := range idMap {
|
||||
prm.SetContainerID(id)
|
||||
s, err := sh.ContainerSize(prm)
|
||||
s, err := sh.ContainerSize(ctx, prm)
|
||||
if err != nil {
|
||||
e.log.Warn(logs.EngineFailedToGetContainerSize, zap.Stringer("container_id", id), zap.Error(err))
|
||||
failed = true
|
||||
|
|
137
pkg/local_object_storage/metabase/badger.go
Normal file
137
pkg/local_object_storage/metabase/badger.go
Normal file
|
@ -0,0 +1,137 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
const (
|
||||
// replace with lock by objectID ?
|
||||
retryCount = 10
|
||||
retryTimeout = 5 * time.Millisecond
|
||||
)
|
||||
|
||||
// deleteByPrefix must be used only with small transactions.
|
||||
func deleteByPrefix(ctx context.Context, tx *badger.Txn, prefix []byte) error {
|
||||
for {
|
||||
batch, err := selectByPrefixBatch(ctx, tx, prefix, batchSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, key := range batch {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
if err := tx.Delete(key); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if len(batch) < batchSize {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (db *DB) deleteByPrefixBatched(ctx context.Context, prefix []byte) error {
|
||||
wb := db.database.NewWriteBatch()
|
||||
defer wb.Cancel()
|
||||
|
||||
for {
|
||||
batch, err := db.selectByPrefixBatchTxn(ctx, prefix, batchSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, key := range batch {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
if err := wb.Delete(key); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if len(batch) < batchSize {
|
||||
return wb.Flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (db *DB) selectByPrefixBatchTxn(ctx context.Context, prefix []byte, batchSize int) ([][]byte, error) {
|
||||
txn := db.database.NewTransaction(false)
|
||||
defer txn.Discard()
|
||||
|
||||
return selectByPrefixBatch(ctx, txn, prefix, batchSize)
|
||||
}
|
||||
|
||||
func selectByPrefixBatch(ctx context.Context, tx *badger.Txn, prefix []byte, batchSize int) ([][]byte, error) {
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: prefix,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
var result [][]byte
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
result = append(result, it.Item().KeyCopy(nil))
|
||||
if len(result) == batchSize {
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func selectByPrefixAndSeek(ctx context.Context, tx *badger.Txn, prefix, lastSeen []byte, withValues bool, batchSize int) ([]keyValue, error) {
|
||||
opts := badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: prefix,
|
||||
}
|
||||
if withValues {
|
||||
opts.PrefetchValues = true
|
||||
}
|
||||
it := tx.NewIterator(opts)
|
||||
defer it.Close()
|
||||
|
||||
var result []keyValue
|
||||
for it.Seek(lastSeen); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
if bytes.Equal(lastSeen, it.Item().Key()) {
|
||||
continue
|
||||
}
|
||||
var current keyValue
|
||||
current.Key = it.Item().KeyCopy(nil)
|
||||
if withValues {
|
||||
var err error
|
||||
current.Value, err = it.Item().ValueCopy(nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
result = append(result, current)
|
||||
if len(result) == batchSize {
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func deleteByKey(tx *badger.Txn, key []byte) error {
|
||||
return tx.Delete(key)
|
||||
}
|
106
pkg/local_object_storage/metabase/bucket.go
Normal file
106
pkg/local_object_storage/metabase/bucket.go
Normal file
|
@ -0,0 +1,106 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync"
|
||||
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
)
|
||||
|
||||
type dbBucketDispatcher struct {
|
||||
cond *sync.Cond
|
||||
containerDispatchers map[cid.ID]*containerBucketDispatcher
|
||||
}
|
||||
|
||||
func newDBBucketDispatcher() *dbBucketDispatcher {
|
||||
return &dbBucketDispatcher{
|
||||
cond: sync.NewCond(&sync.Mutex{}),
|
||||
containerDispatchers: make(map[cid.ID]*containerBucketDispatcher),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *dbBucketDispatcher) BucketID(id cid.ID) (uint16, func()) {
|
||||
d.cond.L.Lock()
|
||||
defer d.cond.L.Unlock()
|
||||
|
||||
cd := d.getOrCreateContainerDispatcher(id)
|
||||
val, ok := cd.freeBucketID()
|
||||
for !ok {
|
||||
d.cond.Wait()
|
||||
cd = d.getOrCreateContainerDispatcher(id)
|
||||
val, ok = cd.freeBucketID()
|
||||
}
|
||||
return val, func() {
|
||||
d.release(id, val)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *dbBucketDispatcher) release(id cid.ID, bucketID uint16) {
|
||||
d.cond.L.Lock()
|
||||
defer d.cond.L.Unlock()
|
||||
|
||||
cd, ok := d.containerDispatchers[id]
|
||||
if !ok {
|
||||
panic("container bucket ID dispatcher not found")
|
||||
}
|
||||
notify := cd.full()
|
||||
cd.release(bucketID)
|
||||
if cd.empty() {
|
||||
delete(d.containerDispatchers, id)
|
||||
}
|
||||
if notify {
|
||||
d.cond.Broadcast()
|
||||
}
|
||||
}
|
||||
|
||||
func (d *dbBucketDispatcher) getOrCreateContainerDispatcher(id cid.ID) *containerBucketDispatcher {
|
||||
existed, found := d.containerDispatchers[id]
|
||||
if found {
|
||||
return existed
|
||||
}
|
||||
created := newContainerDispatcher()
|
||||
d.containerDispatchers[id] = created
|
||||
return created
|
||||
}
|
||||
|
||||
type containerBucketDispatcher struct {
|
||||
free []uint16
|
||||
next uint16
|
||||
taken map[uint16]struct{}
|
||||
}
|
||||
|
||||
func newContainerDispatcher() *containerBucketDispatcher {
|
||||
return &containerBucketDispatcher{
|
||||
taken: make(map[uint16]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *containerBucketDispatcher) freeBucketID() (uint16, bool) {
|
||||
if len(d.free) > 0 {
|
||||
idx := len(d.free) - 1
|
||||
result := d.free[idx]
|
||||
d.free = d.free[:idx]
|
||||
d.taken[result] = struct{}{}
|
||||
return result, true
|
||||
}
|
||||
if d.next == math.MaxUint16 {
|
||||
return 0, false
|
||||
}
|
||||
v := d.next
|
||||
d.next++
|
||||
d.taken[v] = struct{}{}
|
||||
return v, true
|
||||
}
|
||||
|
||||
func (d *containerBucketDispatcher) release(bucketID uint16) {
|
||||
delete(d.taken, bucketID)
|
||||
d.free = append(d.free, bucketID)
|
||||
}
|
||||
|
||||
func (d *containerBucketDispatcher) empty() bool {
|
||||
return len(d.taken) == 0
|
||||
}
|
||||
|
||||
func (d *containerBucketDispatcher) full() bool {
|
||||
return len(d.free) == 0 && len(d.taken) == math.MaxUint16
|
||||
}
|
|
@ -7,7 +7,7 @@ import (
|
|||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
@ -38,34 +38,29 @@ func (db *DB) GetChildren(ctx context.Context, addresses []oid.Address) (map[oid
|
|||
|
||||
result := make(map[oid.Address][]oid.Address, len(addresses))
|
||||
|
||||
buffer := make([]byte, bucketKeySize)
|
||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
err := db.database.View(func(tx *badger.Txn) error {
|
||||
for _, addr := range addresses {
|
||||
if _, found := result[addr]; found {
|
||||
continue
|
||||
}
|
||||
|
||||
result[addr] = []oid.Address{}
|
||||
bkt := tx.Bucket(parentBucketName(addr.Container(), buffer))
|
||||
if bkt == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
binObjIDs, err := decodeList(bkt.Get(objectKey(addr.Object(), buffer)))
|
||||
for {
|
||||
keys, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), batchSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, binObjID := range binObjIDs {
|
||||
var id oid.ID
|
||||
if err = id.Decode(binObjID); err != nil {
|
||||
for _, key := range keys {
|
||||
resultAddress, err := addressOfTargetFromParentKey(key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var resultAddress oid.Address
|
||||
resultAddress.SetContainer(addr.Container())
|
||||
resultAddress.SetObject(id)
|
||||
result[addr] = append(result[addr], resultAddress)
|
||||
}
|
||||
if len(keys) < batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
|
|
@ -3,12 +3,20 @@ package meta
|
|||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
const (
|
||||
containerSizeKeySize = 1 + cidSize + 2
|
||||
containerSizePrefixSize = 1 + cidSize
|
||||
)
|
||||
|
||||
func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) {
|
||||
|
@ -30,8 +38,8 @@ func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) {
|
|||
return nil, ErrDegradedMode
|
||||
}
|
||||
|
||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
list, err = db.containers(tx)
|
||||
err = db.database.View(func(tx *badger.Txn) error {
|
||||
list, err = containers(tx)
|
||||
|
||||
return err
|
||||
})
|
||||
|
@ -39,24 +47,28 @@ func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) {
|
|||
return list, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func (db *DB) containers(tx *bbolt.Tx) ([]cid.ID, error) {
|
||||
func containers(tx *badger.Txn) ([]cid.ID, error) {
|
||||
result := make([]cid.ID, 0)
|
||||
unique := make(map[string]struct{})
|
||||
var cnr cid.ID
|
||||
|
||||
err := tx.ForEach(func(name []byte, _ *bbolt.Bucket) error {
|
||||
if parseContainerID(&cnr, name, unique) {
|
||||
result = append(result, cnr)
|
||||
unique[string(name[1:bucketKeySize])] = struct{}{}
|
||||
}
|
||||
|
||||
return nil
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
return result, err
|
||||
for it.Seek(nil); it.Valid(); it.Next() {
|
||||
name := it.Item().Key()
|
||||
if parseContainerIDWithIgnore(&cnr, name, unique) {
|
||||
result = append(result, cnr)
|
||||
unique[string(name[1:containerSizePrefixSize])] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func (db *DB) ContainerSize(id cid.ID) (size uint64, err error) {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (db *DB) ContainerSize(ctx context.Context, id cid.ID) (size uint64, err error) {
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
|
@ -64,58 +76,171 @@ func (db *DB) ContainerSize(id cid.ID) (size uint64, err error) {
|
|||
return 0, ErrDegradedMode
|
||||
}
|
||||
|
||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
size, err = db.containerSize(tx, id)
|
||||
result, err := db.containerSizesInternal(ctx, &id)
|
||||
if err != nil {
|
||||
return 0, metaerr.Wrap(err)
|
||||
}
|
||||
return result[id], nil
|
||||
}
|
||||
|
||||
return err
|
||||
func (db *DB) ContainerSizes(ctx context.Context) (map[cid.ID]uint64, error) {
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return nil, ErrDegradedMode
|
||||
}
|
||||
|
||||
return db.containerSizesInternal(ctx, nil)
|
||||
}
|
||||
|
||||
// ZeroSizeContainers returns containers with size = 0.
|
||||
func (db *DB) ZeroSizeContainers(ctx context.Context) ([]cid.ID, error) {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
success = false
|
||||
)
|
||||
defer func() {
|
||||
db.metrics.AddMethodDuration("ZeroSizeContainers", time.Since(startedAt), success)
|
||||
}()
|
||||
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "metabase.ZeroSizeContainers")
|
||||
defer span.End()
|
||||
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
sizes, err := db.containerSizesInternal(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var result []cid.ID
|
||||
for id, size := range sizes {
|
||||
if size == 0 {
|
||||
result = append(result, id)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (db *DB) DeleteContainerSize(ctx context.Context, id cid.ID) error {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
success = false
|
||||
)
|
||||
defer func() {
|
||||
db.metrics.AddMethodDuration("DeleteContainerSize", time.Since(startedAt), success)
|
||||
}()
|
||||
|
||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.DeleteContainerSize",
|
||||
trace.WithAttributes(
|
||||
attribute.Stringer("container_id", id),
|
||||
))
|
||||
defer span.End()
|
||||
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return ErrDegradedMode
|
||||
}
|
||||
|
||||
if db.mode.ReadOnly() {
|
||||
return ErrReadOnlyMode
|
||||
}
|
||||
|
||||
return metaerr.Wrap(db.deleteByPrefixBatched(ctx, containerSizeKeyPrefix(id)))
|
||||
}
|
||||
|
||||
func (db *DB) containerSizesInternal(ctx context.Context, id *cid.ID) (map[cid.ID]uint64, error) {
|
||||
prefix := []byte{containerSizePrefix}
|
||||
if id != nil {
|
||||
prefix = containerSizeKeyPrefix(*id)
|
||||
}
|
||||
result := make(map[cid.ID]int64)
|
||||
err := db.database.View(func(tx *badger.Txn) error {
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: prefix,
|
||||
PrefetchValues: true,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
return size, metaerr.Wrap(err)
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
func (db *DB) containerSize(tx *bbolt.Tx, id cid.ID) (uint64, error) {
|
||||
containerVolume := tx.Bucket(containerVolumeBucketName)
|
||||
key := make([]byte, cidSize)
|
||||
id.Encode(key)
|
||||
|
||||
return parseContainerSize(containerVolume.Get(key)), nil
|
||||
key := it.Item().Key()
|
||||
var cnr cid.ID
|
||||
if err := cnr.Decode(key[1:containerSizePrefixSize]); err != nil {
|
||||
return fmt.Errorf("invalid container size key: %w", err)
|
||||
}
|
||||
|
||||
func parseContainerID(dst *cid.ID, name []byte, ignore map[string]struct{}) bool {
|
||||
if len(name) != bucketKeySize {
|
||||
return false
|
||||
if err := it.Item().Value(func(val []byte) error {
|
||||
value, ok := parseInt64Value(val)
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid container size value for container %s", cnr)
|
||||
}
|
||||
if _, ok := ignore[string(name[1:bucketKeySize])]; ok {
|
||||
return false
|
||||
result[cnr] += value
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
return dst.Decode(name[1:bucketKeySize]) == nil
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func parseContainerSize(v []byte) uint64 {
|
||||
if len(v) == 0 {
|
||||
return 0
|
||||
return normilizeContainerSizes(result)
|
||||
}
|
||||
|
||||
return binary.LittleEndian.Uint64(v)
|
||||
func normilizeContainerSizes(sizes map[cid.ID]int64) (map[cid.ID]uint64, error) {
|
||||
result := make(map[cid.ID]uint64, len(sizes))
|
||||
for k, v := range sizes {
|
||||
if v < 0 {
|
||||
return nil, fmt.Errorf("invalid cumulative size for container %s", k)
|
||||
}
|
||||
result[k] = uint64(v)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func changeContainerSize(tx *bbolt.Tx, id cid.ID, delta uint64, increase bool) error {
|
||||
containerVolume := tx.Bucket(containerVolumeBucketName)
|
||||
key := make([]byte, cidSize)
|
||||
id.Encode(key)
|
||||
func changeContainerSize(tx *badger.Txn, id cid.ID, delta int64, bucketID uint16) error {
|
||||
key := containerSizeKey(id, bucketID)
|
||||
|
||||
size := parseContainerSize(containerVolume.Get(key))
|
||||
v, err := valueOrNil(tx, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
size, ok := parseInt64Value(v)
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid container size value for container %s", id)
|
||||
}
|
||||
|
||||
if increase {
|
||||
size += delta
|
||||
} else if size > delta {
|
||||
size -= delta
|
||||
} else {
|
||||
size = 0
|
||||
value := marshalInt64(size)
|
||||
return tx.Set(key, value)
|
||||
}
|
||||
|
||||
buf := make([]byte, 8) // consider using sync.Pool to decrease allocations
|
||||
binary.LittleEndian.PutUint64(buf, size)
|
||||
|
||||
return containerVolume.Put(key, buf)
|
||||
// containerSizeKeyPrefix returns containerSizePrefix_CID key prefix.
|
||||
func containerSizeKeyPrefix(cnr cid.ID) []byte {
|
||||
result := make([]byte, containerSizePrefixSize)
|
||||
result[0] = containerSizePrefix
|
||||
cnr.Encode(result[1:])
|
||||
return result
|
||||
}
|
||||
|
||||
// containerSizeKey returns containerVolumePrefix_CID_bucketID key.
|
||||
func containerSizeKey(cnr cid.ID, bucketID uint16) []byte {
|
||||
result := make([]byte, containerSizeKeySize)
|
||||
result[0] = containerSizePrefix
|
||||
cnr.Encode(result[1:])
|
||||
binary.LittleEndian.PutUint16(result[containerSizePrefixSize:], bucketID)
|
||||
return result
|
||||
}
|
||||
|
|
|
@ -151,7 +151,7 @@ func TestDB_ContainerSize(t *testing.T) {
|
|||
}
|
||||
|
||||
for cnr, volume := range cids {
|
||||
n, err := db.ContainerSize(cnr)
|
||||
n, err := db.ContainerSize(context.Background(), cnr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, volume, int(n))
|
||||
}
|
||||
|
@ -169,7 +169,7 @@ func TestDB_ContainerSize(t *testing.T) {
|
|||
|
||||
volume -= int(obj.PayloadSize())
|
||||
|
||||
n, err := db.ContainerSize(cnr)
|
||||
n, err := db.ContainerSize(context.Background(), cnr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, volume, int(n))
|
||||
}
|
||||
|
|
|
@ -2,16 +2,18 @@ package meta
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"github.com/dgraph-io/badger/v4/options"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
|
@ -21,23 +23,7 @@ var ErrDegradedMode = logicerr.New("metabase is in a degraded mode")
|
|||
// ErrReadOnlyMode is returned when metabase is in a read-only mode.
|
||||
var ErrReadOnlyMode = logicerr.New("metabase is in a read-only mode")
|
||||
|
||||
var (
|
||||
mStaticBuckets = map[string]struct{}{
|
||||
string(containerVolumeBucketName): {},
|
||||
string(containerCounterBucketName): {},
|
||||
string(graveyardBucketName): {},
|
||||
string(garbageBucketName): {},
|
||||
string(shardInfoBucket): {},
|
||||
string(bucketNameLocked): {},
|
||||
}
|
||||
|
||||
// deprecatedBuckets buckets that are not used anymore.
|
||||
deprecatedBuckets = [][]byte{
|
||||
toMoveItBucketName,
|
||||
}
|
||||
)
|
||||
|
||||
// Open boltDB instance for metabase.
|
||||
// Open metabase.
|
||||
func (db *DB) Open(_ context.Context, m mode.Mode) error {
|
||||
db.modeMtx.Lock()
|
||||
defer db.modeMtx.Unlock()
|
||||
|
@ -47,149 +33,151 @@ func (db *DB) Open(_ context.Context, m mode.Mode) error {
|
|||
if m.NoMetabase() {
|
||||
return nil
|
||||
}
|
||||
|
||||
return db.openDB(m)
|
||||
}
|
||||
|
||||
func (db *DB) openDB(mode mode.Mode) error {
|
||||
err := util.MkdirAllX(filepath.Dir(db.info.Path), db.info.Permission)
|
||||
err := util.MkdirAllX(db.info.Path, db.info.Permission)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create dir %s for metabase: %w", db.info.Path, err)
|
||||
}
|
||||
|
||||
db.log.Debug(logs.MetabaseCreatedDirectoryForMetabase, zap.String("path", db.info.Path))
|
||||
|
||||
if db.boltOptions == nil {
|
||||
opts := *bbolt.DefaultOptions
|
||||
db.boltOptions = &opts
|
||||
}
|
||||
db.boltOptions.ReadOnly = mode.ReadOnly()
|
||||
|
||||
return metaerr.Wrap(db.openBolt())
|
||||
return metaerr.Wrap(db.openBadger(mode.ReadOnly()))
|
||||
}
|
||||
|
||||
func (db *DB) openBolt() error {
|
||||
func (db *DB) badgerOptions(readOnly bool) badger.Options {
|
||||
opts := badger.DefaultOptions(db.info.Path)
|
||||
|
||||
opts.BlockCacheSize = 0 // compression and encryption are disabled, so block cache should be disabled
|
||||
opts.IndexCacheSize = db.indexCacheSize // 256MB, to not to keep all indicies in memory
|
||||
opts.Compression = options.None // no need to compress metabase values
|
||||
opts.Logger = nil
|
||||
opts.MetricsEnabled = false
|
||||
opts.NumLevelZeroTablesStall = math.MaxInt // to not to stall because of Level0 slow compaction
|
||||
opts.NumMemtables = db.memtablesCount
|
||||
opts.NumCompactors = db.numCompactors
|
||||
opts.SyncWrites = !db.noSync
|
||||
opts.ValueLogMaxEntries = math.MaxUint32 // default vLog file size is 1GB, so size is more clear than entries count
|
||||
opts.ValueThreshold = db.valueThreshold
|
||||
opts.LmaxCompaction = true
|
||||
opts.ReadOnly = readOnly
|
||||
if db.verbose {
|
||||
opts.Logger = &badgerLogger{l: db.log}
|
||||
}
|
||||
opts.ValueLogFileSize = db.valueLogFileSize
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
func (db *DB) openBadger(readOnly bool) error {
|
||||
opts := db.badgerOptions(readOnly)
|
||||
|
||||
var err error
|
||||
|
||||
db.boltDB, err = bbolt.Open(db.info.Path, db.info.Permission, db.boltOptions)
|
||||
db.database, err = badger.Open(opts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't open boltDB database: %w", err)
|
||||
return fmt.Errorf("can't open badger database: %w", err)
|
||||
}
|
||||
db.boltDB.MaxBatchDelay = db.boltBatchDelay
|
||||
db.boltDB.MaxBatchSize = db.boltBatchSize
|
||||
|
||||
db.log.Debug(logs.MetabaseOpenedBoltDBInstanceForMetabase)
|
||||
|
||||
db.log.Debug(logs.MetabaseCheckingMetabaseVersion)
|
||||
return db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
// The safest way to check if the metabase is fresh is to check if it has no buckets.
|
||||
// However, shard info can be present. So here we check that the number of buckets is
|
||||
// at most 1.
|
||||
// Another thing to consider is that tests do not persist shard ID, we want to support
|
||||
// this case too.
|
||||
var n int
|
||||
err := tx.ForEach(func([]byte, *bbolt.Bucket) error {
|
||||
if n++; n >= 2 { // do not iterate a lot
|
||||
return errBreakBucketForEach
|
||||
if db.closed != nil {
|
||||
close(db.closed)
|
||||
db.wg.Wait()
|
||||
db.closed = nil
|
||||
}
|
||||
|
||||
db.closed = make(chan struct{})
|
||||
db.wg.Add(1)
|
||||
go db.collectGC()
|
||||
|
||||
return db.database.View(func(txn *badger.Txn) error {
|
||||
data, err := valueOrNil(txn, shardInfoKey(versionKey))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
db.initialized = len(data) > 0
|
||||
return nil
|
||||
})
|
||||
|
||||
if err == errBreakBucketForEach {
|
||||
db.initialized = true
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
// Init initializes metabase. It creates static (CID-independent) buckets in underlying BoltDB instance.
|
||||
func (db *DB) collectGC() {
|
||||
defer db.wg.Done()
|
||||
timer := time.NewTicker(db.gcInterval)
|
||||
for {
|
||||
select {
|
||||
case <-db.closed:
|
||||
return
|
||||
case <-timer.C:
|
||||
if err := db.database.RunValueLogGC(db.gcDiscardRatio); err == nil {
|
||||
_ = db.database.RunValueLogGC(db.gcDiscardRatio) // see https://dgraph.io/docs/badger/get-started/#garbage-collection
|
||||
} else {
|
||||
db.log.Warn(logs.FailedToRunMetabaseGC, zap.Error(err), zap.String("path", db.info.Path))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes metabase.
|
||||
//
|
||||
// Returns ErrOutdatedVersion if a database at the provided path is outdated.
|
||||
//
|
||||
// Does nothing if metabase has already been initialized and filled. To roll back the database to its initial state,
|
||||
// use Reset.
|
||||
func (db *DB) Init() error {
|
||||
return metaerr.Wrap(db.init(false))
|
||||
db.modeMtx.Lock()
|
||||
defer db.modeMtx.Unlock()
|
||||
|
||||
return metaerr.Wrap(db.init(context.TODO(), false))
|
||||
}
|
||||
|
||||
// Reset resets metabase. Works similar to Init but cleans up all static buckets and
|
||||
// removes all dynamic (CID-dependent) ones in non-blank BoltDB instances.
|
||||
func (db *DB) Reset() error {
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
func (db *DB) Init2(ctx context.Context) error {
|
||||
db.modeMtx.Lock()
|
||||
defer db.modeMtx.Unlock()
|
||||
|
||||
return metaerr.Wrap(db.init(ctx, false))
|
||||
}
|
||||
|
||||
// Reset resets metabase. Works similar to Init but cleans up all data records.
|
||||
func (db *DB) Reset(ctx context.Context) error {
|
||||
db.modeMtx.Lock()
|
||||
defer db.modeMtx.Unlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return ErrDegradedMode
|
||||
}
|
||||
|
||||
return metaerr.Wrap(db.init(true))
|
||||
return metaerr.Wrap(db.init(ctx, true))
|
||||
}
|
||||
|
||||
func (db *DB) init(reset bool) error {
|
||||
func (db *DB) init(ctx context.Context, reset bool) error {
|
||||
if db.mode.NoMetabase() || db.mode.ReadOnly() {
|
||||
return nil
|
||||
}
|
||||
if reset {
|
||||
if err := db.database.DropAll(); err != nil {
|
||||
return nil
|
||||
}
|
||||
return db.database.Update(func(tx *badger.Txn) error {
|
||||
return updateVersion(tx, version)
|
||||
})
|
||||
}
|
||||
|
||||
return db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
var err error
|
||||
if !reset {
|
||||
// Normal open, check version and update if not initialized.
|
||||
err := checkVersion(tx, db.initialized)
|
||||
if err != nil {
|
||||
if err := db.checkVersion(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for k := range mStaticBuckets {
|
||||
name := []byte(k)
|
||||
if reset {
|
||||
err := tx.DeleteBucket(name)
|
||||
if err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) {
|
||||
return fmt.Errorf("could not delete static bucket %s: %w", k, err)
|
||||
}
|
||||
}
|
||||
|
||||
_, err := tx.CreateBucketIfNotExists(name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not create static bucket %s: %w", k, err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, b := range deprecatedBuckets {
|
||||
err := tx.DeleteBucket(b)
|
||||
if err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) {
|
||||
return fmt.Errorf("could not delete deprecated bucket %s: %w", string(b), err)
|
||||
}
|
||||
}
|
||||
|
||||
if !reset { // counters will be recalculated by refill metabase
|
||||
err = syncCounter(tx, false)
|
||||
if err != nil {
|
||||
if err := db.syncCounter(ctx, false); err != nil {
|
||||
return fmt.Errorf("could not sync object counter: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
bucketCursor := tx.Cursor()
|
||||
name, _ := bucketCursor.First()
|
||||
for name != nil {
|
||||
if _, ok := mStaticBuckets[string(name)]; !ok {
|
||||
if err := tx.DeleteBucket(name); err != nil {
|
||||
return err
|
||||
}
|
||||
name, _ = bucketCursor.Seek(name)
|
||||
continue
|
||||
}
|
||||
name, _ = bucketCursor.Next()
|
||||
}
|
||||
return updateVersion(tx, version)
|
||||
})
|
||||
}
|
||||
|
||||
// SyncCounters forces to synchronize the object counters.
|
||||
func (db *DB) SyncCounters() error {
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
func (db *DB) SyncCounters(ctx context.Context) error {
|
||||
db.modeMtx.Lock()
|
||||
defer db.modeMtx.Unlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return ErrDegradedMode
|
||||
|
@ -197,17 +185,27 @@ func (db *DB) SyncCounters() error {
|
|||
return ErrReadOnlyMode
|
||||
}
|
||||
|
||||
return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
return syncCounter(tx, true)
|
||||
}))
|
||||
return metaerr.Wrap(db.syncCounter(ctx, true))
|
||||
}
|
||||
|
||||
// Close closes boltDB instance
|
||||
// and reports metabase metric.
|
||||
// Close closes metabase.
|
||||
func (db *DB) Close() error {
|
||||
db.modeMtx.Lock()
|
||||
defer db.modeMtx.Unlock()
|
||||
|
||||
return db.close()
|
||||
}
|
||||
|
||||
func (db *DB) close() error {
|
||||
if db.closed != nil {
|
||||
close(db.closed)
|
||||
db.wg.Wait()
|
||||
db.closed = nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if db.boltDB != nil {
|
||||
err = db.close()
|
||||
if db.database != nil {
|
||||
err = metaerr.Wrap(db.database.Close())
|
||||
}
|
||||
if err == nil {
|
||||
db.metrics.Close()
|
||||
|
@ -215,10 +213,6 @@ func (db *DB) Close() error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (db *DB) close() error {
|
||||
return metaerr.Wrap(db.boltDB.Close())
|
||||
}
|
||||
|
||||
// Reload reloads part of the configuration.
|
||||
// It returns true iff database was reopened.
|
||||
// If a config option is invalid, it logs an error and returns nil.
|
||||
|
@ -235,14 +229,14 @@ func (db *DB) Reload(opts ...Option) (bool, error) {
|
|||
defer db.modeMtx.Unlock()
|
||||
|
||||
if db.mode.NoMetabase() || c.info.Path != "" && filepath.Clean(db.info.Path) != filepath.Clean(c.info.Path) {
|
||||
if err := db.Close(); err != nil {
|
||||
if err := db.close(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
db.mode = mode.Disabled
|
||||
db.metrics.SetMode(mode.ComponentDisabled)
|
||||
db.info.Path = c.info.Path
|
||||
if err := db.openBolt(); err != nil {
|
||||
if err := db.openBadger(false); err != nil {
|
||||
return false, metaerr.Wrap(fmt.Errorf("%w: %v", ErrDegradedMode, err))
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ func TestReset(t *testing.T) {
|
|||
db := newDB(t)
|
||||
defer func() { require.NoError(t, db.Close()) }()
|
||||
|
||||
err := db.Reset()
|
||||
err := db.Reset(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
obj := testutil.GenerateObject()
|
||||
|
@ -47,7 +47,7 @@ func TestReset(t *testing.T) {
|
|||
assertExists(addr, true, nil)
|
||||
assertExists(addrToInhume, false, client.IsErrObjectAlreadyRemoved)
|
||||
|
||||
err = db.Reset()
|
||||
err = db.Reset(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
assertExists(addr, false, nil)
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
|
@ -13,29 +12,21 @@ import (
|
|||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
var (
|
||||
objectPhyCounterKey = []byte("phy_counter")
|
||||
objectLogicCounterKey = []byte("logic_counter")
|
||||
objectUserCounterKey = []byte("user_counter")
|
||||
)
|
||||
|
||||
var (
|
||||
errInvalidKeyLenght = errors.New("invalid key length")
|
||||
errInvalidKeyPrefix = errors.New("invalid key prefix")
|
||||
errInvalidValueLenght = errors.New("invalid value length")
|
||||
errInvalidContainerIDValue = errors.New("invalid container ID value")
|
||||
)
|
||||
|
||||
type objectType uint8
|
||||
|
||||
const (
|
||||
_ objectType = iota
|
||||
phy
|
||||
logical
|
||||
user
|
||||
containerObjectCountKeySize = 1 + cidSize + 2
|
||||
containerObjectCountPrefixSize = 1 + cidSize
|
||||
)
|
||||
|
||||
// ObjectCounters groups object counter
|
||||
|
@ -50,12 +41,18 @@ func (o ObjectCounters) IsZero() bool {
|
|||
return o.Phy == 0 && o.Logic == 0 && o.User == 0
|
||||
}
|
||||
|
||||
type objectCounterValue struct {
|
||||
Logic int64
|
||||
Phy int64
|
||||
User int64
|
||||
}
|
||||
|
||||
// ObjectCounters returns object counters that metabase has
|
||||
// tracked since it was opened and initialized.
|
||||
//
|
||||
// Returns only the errors that do not allow reading counter
|
||||
// in Bolt database.
|
||||
func (db *DB) ObjectCounters() (cc ObjectCounters, err error) {
|
||||
// in badger database.
|
||||
func (db *DB) ObjectCounters(ctx context.Context) (ObjectCounters, error) {
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
|
@ -63,29 +60,22 @@ func (db *DB) ObjectCounters() (cc ObjectCounters, err error) {
|
|||
return ObjectCounters{}, ErrDegradedMode
|
||||
}
|
||||
|
||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(shardInfoBucket)
|
||||
if b != nil {
|
||||
data := b.Get(objectPhyCounterKey)
|
||||
if len(data) == 8 {
|
||||
cc.Phy = binary.LittleEndian.Uint64(data)
|
||||
}
|
||||
|
||||
data = b.Get(objectLogicCounterKey)
|
||||
if len(data) == 8 {
|
||||
cc.Logic = binary.LittleEndian.Uint64(data)
|
||||
}
|
||||
|
||||
data = b.Get(objectUserCounterKey)
|
||||
if len(data) == 8 {
|
||||
cc.User = binary.LittleEndian.Uint64(data)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
var cc map[cid.ID]ObjectCounters
|
||||
err := db.database.View(func(tx *badger.Txn) error {
|
||||
var err error
|
||||
cc, err = containerObjectCounters(ctx, tx, nil)
|
||||
return err
|
||||
})
|
||||
|
||||
return cc, metaerr.Wrap(err)
|
||||
if err != nil {
|
||||
return ObjectCounters{}, metaerr.Wrap(err)
|
||||
}
|
||||
var result ObjectCounters
|
||||
for _, v := range cc {
|
||||
result.Logic += v.Logic
|
||||
result.Phy += v.Phy
|
||||
result.User += v.User
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type ContainerCounters struct {
|
||||
|
@ -96,7 +86,7 @@ type ContainerCounters struct {
|
|||
// that metabase has tracked since it was opened and initialized.
|
||||
//
|
||||
// Returns only the errors that do not allow reading counter
|
||||
// in Bolt database.
|
||||
// in badger database.
|
||||
//
|
||||
// It is guaranteed that the ContainerCounters fields are not nil.
|
||||
func (db *DB) ContainerCounters(ctx context.Context) (ContainerCounters, error) {
|
||||
|
@ -114,86 +104,18 @@ func (db *DB) ContainerCounters(ctx context.Context) (ContainerCounters, error)
|
|||
cc := ContainerCounters{
|
||||
Counts: make(map[cid.ID]ObjectCounters),
|
||||
}
|
||||
|
||||
lastKey := make([]byte, cidSize)
|
||||
|
||||
// there is no limit for containers count, so use batching with cancellation
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return cc, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
completed, err := db.containerCountersNextBatch(lastKey, func(id cid.ID, entity ObjectCounters) {
|
||||
cc.Counts[id] = entity
|
||||
err := db.database.View(func(tx *badger.Txn) error {
|
||||
var err error
|
||||
cc.Counts, err = containerObjectCounters(ctx, tx, nil)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return cc, err
|
||||
return ContainerCounters{}, metaerr.Wrap(err)
|
||||
}
|
||||
if completed {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
success = true
|
||||
return cc, nil
|
||||
}
|
||||
|
||||
func (db *DB) containerCountersNextBatch(lastKey []byte, f func(id cid.ID, entity ObjectCounters)) (bool, error) {
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return false, ErrDegradedMode
|
||||
}
|
||||
|
||||
counter := 0
|
||||
const batchSize = 1000
|
||||
|
||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(containerCounterBucketName)
|
||||
if b == nil {
|
||||
return ErrInterruptIterator
|
||||
}
|
||||
c := b.Cursor()
|
||||
var key, value []byte
|
||||
for key, value = c.Seek(lastKey); key != nil; key, value = c.Next() {
|
||||
if bytes.Equal(lastKey, key) {
|
||||
continue
|
||||
}
|
||||
copy(lastKey, key)
|
||||
|
||||
cnrID, err := parseContainerCounterKey(key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ent, err := parseContainerCounterValue(value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f(cnrID, ent)
|
||||
|
||||
counter++
|
||||
if counter == batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if counter < batchSize { // last batch
|
||||
return ErrInterruptIterator
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrInterruptIterator) {
|
||||
return true, nil
|
||||
}
|
||||
return false, metaerr.Wrap(err)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (db *DB) ContainerCount(ctx context.Context, id cid.ID) (ObjectCounters, error) {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
|
@ -213,144 +135,65 @@ func (db *DB) ContainerCount(ctx context.Context, id cid.ID) (ObjectCounters, er
|
|||
return ObjectCounters{}, ErrDegradedMode
|
||||
}
|
||||
|
||||
var result ObjectCounters
|
||||
|
||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(containerCounterBucketName)
|
||||
key := make([]byte, cidSize)
|
||||
id.Encode(key)
|
||||
v := b.Get(key)
|
||||
if v == nil {
|
||||
return nil
|
||||
}
|
||||
var cc map[cid.ID]ObjectCounters
|
||||
err := db.database.View(func(tx *badger.Txn) error {
|
||||
var err error
|
||||
result, err = parseContainerCounterValue(v)
|
||||
cc, err = containerObjectCounters(ctx, tx, &id)
|
||||
return err
|
||||
})
|
||||
|
||||
return result, metaerr.Wrap(err)
|
||||
if err != nil {
|
||||
return ObjectCounters{}, metaerr.Wrap(err)
|
||||
}
|
||||
return cc[id], nil
|
||||
}
|
||||
|
||||
func (db *DB) incCounters(tx *bbolt.Tx, cnrID cid.ID, isUserObject bool) error {
|
||||
b := tx.Bucket(shardInfoBucket)
|
||||
if b == nil {
|
||||
return db.incContainerObjectCounter(tx, cnrID, isUserObject)
|
||||
func containerCounterKey(cnrID cid.ID, bucketID uint16) []byte {
|
||||
result := make([]byte, containerObjectCountKeySize)
|
||||
result[0] = containerCountersPrefix
|
||||
cnrID.Encode(result[1:])
|
||||
binary.LittleEndian.PutUint16(result[containerObjectCountPrefixSize:], bucketID)
|
||||
return result
|
||||
}
|
||||
|
||||
if err := db.updateShardObjectCounterBucket(b, phy, 1, true); err != nil {
|
||||
return fmt.Errorf("could not increase phy object counter: %w", err)
|
||||
}
|
||||
if err := db.updateShardObjectCounterBucket(b, logical, 1, true); err != nil {
|
||||
return fmt.Errorf("could not increase logical object counter: %w", err)
|
||||
func incCounters(tx *badger.Txn, cnrID cid.ID, isUserObject bool, bucketID uint16) error {
|
||||
delta := objectCounterValue{
|
||||
Logic: 1,
|
||||
Phy: 1,
|
||||
}
|
||||
if isUserObject {
|
||||
if err := db.updateShardObjectCounterBucket(b, user, 1, true); err != nil {
|
||||
return fmt.Errorf("could not increase user object counter: %w", err)
|
||||
delta.User = 1
|
||||
}
|
||||
}
|
||||
return db.incContainerObjectCounter(tx, cnrID, isUserObject)
|
||||
return editContainerCounterValue(tx, cnrID, delta, bucketID)
|
||||
}
|
||||
|
||||
func (db *DB) updateShardObjectCounter(tx *bbolt.Tx, typ objectType, delta uint64, inc bool) error {
|
||||
b := tx.Bucket(shardInfoBucket)
|
||||
if b == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return db.updateShardObjectCounterBucket(b, typ, delta, inc)
|
||||
}
|
||||
|
||||
func (*DB) updateShardObjectCounterBucket(b *bbolt.Bucket, typ objectType, delta uint64, inc bool) error {
|
||||
var counter uint64
|
||||
var counterKey []byte
|
||||
|
||||
switch typ {
|
||||
case phy:
|
||||
counterKey = objectPhyCounterKey
|
||||
case logical:
|
||||
counterKey = objectLogicCounterKey
|
||||
case user:
|
||||
counterKey = objectUserCounterKey
|
||||
default:
|
||||
panic("unknown object type counter")
|
||||
}
|
||||
|
||||
data := b.Get(counterKey)
|
||||
if len(data) == 8 {
|
||||
counter = binary.LittleEndian.Uint64(data)
|
||||
}
|
||||
|
||||
if inc {
|
||||
counter += delta
|
||||
} else if counter <= delta {
|
||||
counter = 0
|
||||
} else {
|
||||
counter -= delta
|
||||
}
|
||||
|
||||
newCounter := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(newCounter, counter)
|
||||
|
||||
return b.Put(counterKey, newCounter)
|
||||
}
|
||||
|
||||
func (db *DB) updateContainerCounter(tx *bbolt.Tx, delta map[cid.ID]ObjectCounters, inc bool) error {
|
||||
b := tx.Bucket(containerCounterBucketName)
|
||||
if b == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
key := make([]byte, cidSize)
|
||||
func updateContainerCounter(tx *badger.Txn, delta map[cid.ID]objectCounterValue, bucketIDs map[cid.ID]uint16) error {
|
||||
for cnrID, cnrDelta := range delta {
|
||||
cnrID.Encode(key)
|
||||
if err := db.editContainerCounterValue(b, key, cnrDelta, inc); err != nil {
|
||||
bucketID, found := bucketIDs[cnrID]
|
||||
if !found {
|
||||
return fmt.Errorf("bucket ID not found for container %s", cnrID)
|
||||
}
|
||||
if err := editContainerCounterValue(tx, cnrID, cnrDelta, bucketID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*DB) editContainerCounterValue(b *bbolt.Bucket, key []byte, delta ObjectCounters, inc bool) error {
|
||||
var entity ObjectCounters
|
||||
var err error
|
||||
data := b.Get(key)
|
||||
if len(data) > 0 {
|
||||
entity, err = parseContainerCounterValue(data)
|
||||
func editContainerCounterValue(tx *badger.Txn, cnrID cid.ID, delta objectCounterValue, bucketID uint16) error {
|
||||
key := containerCounterKey(cnrID, bucketID)
|
||||
val, err := valueOrNil(tx, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
setValue := delta
|
||||
if val != nil {
|
||||
exited, err := parseContainerCounterValue(val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entity.Phy = nextValue(entity.Phy, delta.Phy, inc)
|
||||
entity.Logic = nextValue(entity.Logic, delta.Logic, inc)
|
||||
entity.User = nextValue(entity.User, delta.User, inc)
|
||||
value := containerCounterValue(entity)
|
||||
return b.Put(key, value)
|
||||
setValue = mergeObjectCounterValues(setValue, exited)
|
||||
}
|
||||
|
||||
func nextValue(existed, delta uint64, inc bool) uint64 {
|
||||
if inc {
|
||||
existed += delta
|
||||
} else if existed <= delta {
|
||||
existed = 0
|
||||
} else {
|
||||
existed -= delta
|
||||
}
|
||||
return existed
|
||||
}
|
||||
|
||||
func (db *DB) incContainerObjectCounter(tx *bbolt.Tx, cnrID cid.ID, isUserObject bool) error {
|
||||
b := tx.Bucket(containerCounterBucketName)
|
||||
if b == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
key := make([]byte, cidSize)
|
||||
cnrID.Encode(key)
|
||||
c := ObjectCounters{Logic: 1, Phy: 1}
|
||||
if isUserObject {
|
||||
c.User = 1
|
||||
}
|
||||
return db.editContainerCounterValue(b, key, c, true)
|
||||
return tx.Set(key, marshalContainerCounterValue(setValue))
|
||||
}
|
||||
|
||||
// syncCounter updates object counters according to metabase state:
|
||||
|
@ -359,34 +202,34 @@ func (db *DB) incContainerObjectCounter(tx *bbolt.Tx, cnrID cid.ID, isUserObject
|
|||
//
|
||||
// Does nothing if counters are not empty and force is false. If force is
|
||||
// true, updates the counters anyway.
|
||||
func syncCounter(tx *bbolt.Tx, force bool) error {
|
||||
shardInfoB, err := createBucketLikelyExists(tx, shardInfoBucket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not get shard info bucket: %w", err)
|
||||
}
|
||||
shardObjectCounterInitialized := len(shardInfoB.Get(objectPhyCounterKey)) == 8 &&
|
||||
len(shardInfoB.Get(objectLogicCounterKey)) == 8 &&
|
||||
len(shardInfoB.Get(objectUserCounterKey)) == 8
|
||||
containerObjectCounterInitialized := containerObjectCounterInitialized(tx)
|
||||
if !force && shardObjectCounterInitialized && containerObjectCounterInitialized {
|
||||
// the counters are already inited
|
||||
func (db *DB) syncCounter(ctx context.Context, force bool) error {
|
||||
if !force && db.containerObjectCounterInitialized(ctx) {
|
||||
return nil
|
||||
}
|
||||
|
||||
containerCounterB, err := createBucketLikelyExists(tx, containerCounterBucketName)
|
||||
// drop existed counters
|
||||
err := db.deleteByPrefixBatched(ctx, []byte{containerCountersPrefix})
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not get container counter bucket: %w", err)
|
||||
return err
|
||||
}
|
||||
|
||||
counters, err := db.getActualObjectCounters()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return db.setObjectCounters(counters)
|
||||
}
|
||||
|
||||
func (db *DB) getActualObjectCounters() (map[cid.ID]ObjectCounters, error) {
|
||||
tx := db.database.NewTransaction(false)
|
||||
defer tx.Discard()
|
||||
|
||||
var addr oid.Address
|
||||
var isAvailable bool
|
||||
counters := make(map[cid.ID]ObjectCounters)
|
||||
|
||||
graveyardBKT := tx.Bucket(graveyardBucketName)
|
||||
garbageBKT := tx.Bucket(garbageBucketName)
|
||||
key := make([]byte, addressKeySize)
|
||||
var isAvailable bool
|
||||
|
||||
err = iteratePhyObjects(tx, func(cnr cid.ID, objID oid.ID, obj *objectSDK.Object) error {
|
||||
err := iteratePhyObjects(tx, func(cnr cid.ID, objID oid.ID, obj *objectSDK.Object) error {
|
||||
if v, ok := counters[cnr]; ok {
|
||||
v.Phy++
|
||||
counters[cnr] = v
|
||||
|
@ -400,9 +243,12 @@ func syncCounter(tx *bbolt.Tx, force bool) error {
|
|||
addr.SetObject(objID)
|
||||
isAvailable = false
|
||||
|
||||
// check if an object is available: not with GCMark
|
||||
// and not covered with a tombstone
|
||||
if inGraveyardWithKey(addressKey(addr, key), graveyardBKT, garbageBKT) == 0 {
|
||||
st, err := inGraveyardWithKey(tx, addr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if st == 0 {
|
||||
if v, ok := counters[cnr]; ok {
|
||||
v.Logic++
|
||||
counters[cnr] = v
|
||||
|
@ -428,102 +274,30 @@ func syncCounter(tx *bbolt.Tx, force bool) error {
|
|||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not iterate objects: %w", err)
|
||||
return nil, fmt.Errorf("could not iterate objects: %w", err)
|
||||
}
|
||||
return counters, nil
|
||||
}
|
||||
|
||||
return setObjectCounters(counters, shardInfoB, containerCounterB)
|
||||
}
|
||||
func (db *DB) setObjectCounters(counters map[cid.ID]ObjectCounters) error {
|
||||
wb := db.database.NewWriteBatch()
|
||||
defer wb.Cancel()
|
||||
|
||||
func setObjectCounters(counters map[cid.ID]ObjectCounters, shardInfoB, containerCounterB *bbolt.Bucket) error {
|
||||
var phyTotal uint64
|
||||
var logicTotal uint64
|
||||
var userTotal uint64
|
||||
key := make([]byte, cidSize)
|
||||
for cnrID, count := range counters {
|
||||
phyTotal += count.Phy
|
||||
logicTotal += count.Logic
|
||||
userTotal += count.User
|
||||
|
||||
cnrID.Encode(key)
|
||||
value := containerCounterValue(count)
|
||||
err := containerCounterB.Put(key, value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not update phy container object counter: %w", err)
|
||||
}
|
||||
}
|
||||
phyData := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(phyData, phyTotal)
|
||||
|
||||
err := shardInfoB.Put(objectPhyCounterKey, phyData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not update phy object counter: %w", err)
|
||||
value := objectCounterValue{
|
||||
Logic: int64(count.Logic),
|
||||
Phy: int64(count.Phy),
|
||||
User: int64(count.User),
|
||||
}
|
||||
|
||||
logData := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(logData, logicTotal)
|
||||
|
||||
err = shardInfoB.Put(objectLogicCounterKey, logData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not update logic object counter: %w", err)
|
||||
// this function called by init or refill, so no other updates should happen
|
||||
// so here bucketID = 0 can be used
|
||||
key := containerCounterKey(cnrID, 0)
|
||||
if err := wb.Set(key, marshalContainerCounterValue(value)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
userData := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(userData, userTotal)
|
||||
|
||||
err = shardInfoB.Put(objectUserCounterKey, userData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not update user object counter: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func containerCounterValue(entity ObjectCounters) []byte {
|
||||
res := make([]byte, 24)
|
||||
binary.LittleEndian.PutUint64(res, entity.Phy)
|
||||
binary.LittleEndian.PutUint64(res[8:], entity.Logic)
|
||||
binary.LittleEndian.PutUint64(res[16:], entity.User)
|
||||
return res
|
||||
}
|
||||
|
||||
func parseContainerCounterKey(buf []byte) (cid.ID, error) {
|
||||
if len(buf) != cidSize {
|
||||
return cid.ID{}, errInvalidKeyLenght
|
||||
}
|
||||
var cnrID cid.ID
|
||||
if err := cnrID.Decode(buf); err != nil {
|
||||
return cid.ID{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||
}
|
||||
return cnrID, nil
|
||||
}
|
||||
|
||||
// parseContainerCounterValue return phy, logic values.
|
||||
func parseContainerCounterValue(buf []byte) (ObjectCounters, error) {
|
||||
if len(buf) != 24 {
|
||||
return ObjectCounters{}, errInvalidValueLenght
|
||||
}
|
||||
return ObjectCounters{
|
||||
Phy: binary.LittleEndian.Uint64(buf),
|
||||
Logic: binary.LittleEndian.Uint64(buf[8:16]),
|
||||
User: binary.LittleEndian.Uint64(buf[16:]),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func containerObjectCounterInitialized(tx *bbolt.Tx) bool {
|
||||
b := tx.Bucket(containerCounterBucketName)
|
||||
if b == nil {
|
||||
return false
|
||||
}
|
||||
k, v := b.Cursor().First()
|
||||
if k == nil && v == nil {
|
||||
return true
|
||||
}
|
||||
_, err := parseContainerCounterKey(k)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_, err = parseContainerCounterValue(v)
|
||||
return err == nil
|
||||
return wb.Flush()
|
||||
}
|
||||
|
||||
func IsUserObject(obj *objectSDK.Object) bool {
|
||||
|
@ -537,134 +311,6 @@ func IsUserObject(obj *objectSDK.Object) bool {
|
|||
return ech.Index() == 0 && (ech.ParentSplitID() == nil || ech.ParentSplitParentID() != nil)
|
||||
}
|
||||
|
||||
// ZeroSizeContainers returns containers with size = 0.
|
||||
func (db *DB) ZeroSizeContainers(ctx context.Context) ([]cid.ID, error) {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
success = false
|
||||
)
|
||||
defer func() {
|
||||
db.metrics.AddMethodDuration("ZeroSizeContainers", time.Since(startedAt), success)
|
||||
}()
|
||||
|
||||
ctx, span := tracing.StartSpanFromContext(ctx, "metabase.ZeroSizeContainers")
|
||||
defer span.End()
|
||||
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
var result []cid.ID
|
||||
lastKey := make([]byte, cidSize)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
completed, err := db.containerSizesNextBatch(lastKey, func(contID cid.ID, size uint64) {
|
||||
if size == 0 {
|
||||
result = append(result, contID)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if completed {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
success = true
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (db *DB) containerSizesNextBatch(lastKey []byte, f func(cid.ID, uint64)) (bool, error) {
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return false, ErrDegradedMode
|
||||
}
|
||||
|
||||
counter := 0
|
||||
const batchSize = 1000
|
||||
|
||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(containerVolumeBucketName)
|
||||
c := b.Cursor()
|
||||
var key, value []byte
|
||||
for key, value = c.Seek(lastKey); key != nil; key, value = c.Next() {
|
||||
if bytes.Equal(lastKey, key) {
|
||||
continue
|
||||
}
|
||||
copy(lastKey, key)
|
||||
|
||||
size := parseContainerSize(value)
|
||||
var id cid.ID
|
||||
if err := id.Decode(key); err != nil {
|
||||
return err
|
||||
}
|
||||
f(id, size)
|
||||
|
||||
counter++
|
||||
if counter == batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if counter < batchSize {
|
||||
return ErrInterruptIterator
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrInterruptIterator) {
|
||||
return true, nil
|
||||
}
|
||||
return false, metaerr.Wrap(err)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (db *DB) DeleteContainerSize(ctx context.Context, id cid.ID) error {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
success = false
|
||||
)
|
||||
defer func() {
|
||||
db.metrics.AddMethodDuration("DeleteContainerSize", time.Since(startedAt), success)
|
||||
}()
|
||||
|
||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.DeleteContainerSize",
|
||||
trace.WithAttributes(
|
||||
attribute.Stringer("container_id", id),
|
||||
))
|
||||
defer span.End()
|
||||
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return ErrDegradedMode
|
||||
}
|
||||
|
||||
if db.mode.ReadOnly() {
|
||||
return ErrReadOnlyMode
|
||||
}
|
||||
|
||||
err := db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(containerVolumeBucketName)
|
||||
|
||||
key := make([]byte, cidSize)
|
||||
id.Encode(key)
|
||||
return b.Delete(key)
|
||||
})
|
||||
success = err == nil
|
||||
return metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
// ZeroCountContainers returns containers with objects count = 0 in metabase.
|
||||
func (db *DB) ZeroCountContainers(ctx context.Context) ([]cid.ID, error) {
|
||||
var (
|
||||
|
@ -687,24 +333,18 @@ func (db *DB) ZeroCountContainers(ctx context.Context) ([]cid.ID, error) {
|
|||
|
||||
var result []cid.ID
|
||||
|
||||
lastKey := make([]byte, cidSize)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
completed, err := db.containerCountersNextBatch(lastKey, func(id cid.ID, entity ObjectCounters) {
|
||||
if entity.IsZero() {
|
||||
result = append(result, id)
|
||||
}
|
||||
var cc map[cid.ID]ObjectCounters
|
||||
err := db.database.View(func(tx *badger.Txn) error {
|
||||
var err error
|
||||
cc, err = containerObjectCounters(ctx, tx, nil)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, metaerr.Wrap(err)
|
||||
}
|
||||
if completed {
|
||||
break
|
||||
for cnrID, c := range cc {
|
||||
if c.IsZero() {
|
||||
result = append(result, cnrID)
|
||||
}
|
||||
}
|
||||
success = true
|
||||
|
@ -737,13 +377,105 @@ func (db *DB) DeleteContainerCount(ctx context.Context, id cid.ID) error {
|
|||
return ErrReadOnlyMode
|
||||
}
|
||||
|
||||
err := db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(containerCounterBucketName)
|
||||
prefix := make([]byte, containerObjectCountPrefixSize)
|
||||
prefix[0] = containerCountersPrefix
|
||||
id.Encode(prefix[1:])
|
||||
|
||||
key := make([]byte, cidSize)
|
||||
id.Encode(key)
|
||||
return b.Delete(key)
|
||||
})
|
||||
success = err == nil
|
||||
err := db.deleteByPrefixBatched(ctx, prefix)
|
||||
if err != nil {
|
||||
return metaerr.Wrap(err)
|
||||
}
|
||||
success = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *DB) containerObjectCounterInitialized(ctx context.Context) bool {
|
||||
err := db.database.View(func(txn *badger.Txn) error {
|
||||
_, e := containerObjectCounters(ctx, txn, nil)
|
||||
return e
|
||||
})
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func containerObjectCounters(ctx context.Context, tx *badger.Txn, cnrID *cid.ID) (map[cid.ID]ObjectCounters, error) {
|
||||
prefix := []byte{containerCountersPrefix}
|
||||
if cnrID != nil {
|
||||
buf := make([]byte, cidSize)
|
||||
cnrID.Encode(buf)
|
||||
prefix = append(prefix, buf...)
|
||||
}
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: prefix,
|
||||
PrefetchValues: true,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
counters := make(map[cid.ID]objectCounterValue)
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
var cnrID cid.ID
|
||||
if !parseContainerID(&cnrID, it.Item().Key()) {
|
||||
return nil, errInvalidContainerIDValue
|
||||
}
|
||||
|
||||
if err := it.Item().Value(func(val []byte) error {
|
||||
oc, err := parseContainerCounterValue(val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
counters[cnrID] = mergeObjectCounterValues(counters[cnrID], oc)
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("invalid container object counter value: %w", err)
|
||||
}
|
||||
}
|
||||
return normilizeObjectCounters(counters)
|
||||
}
|
||||
|
||||
// parseContainerCounterValue return phy, logic values.
|
||||
func parseContainerCounterValue(buf []byte) (objectCounterValue, error) {
|
||||
if len(buf) != 24 {
|
||||
return objectCounterValue{}, errInvalidValueLenght
|
||||
}
|
||||
return objectCounterValue{
|
||||
Phy: int64(binary.LittleEndian.Uint64(buf[:8])),
|
||||
Logic: int64(binary.LittleEndian.Uint64(buf[8:16])),
|
||||
User: int64(binary.LittleEndian.Uint64(buf[16:])),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func marshalContainerCounterValue(v objectCounterValue) []byte {
|
||||
buf := make([]byte, 24)
|
||||
binary.LittleEndian.PutUint64(buf[:8], uint64(v.Phy))
|
||||
binary.LittleEndian.PutUint64(buf[8:16], uint64(v.Logic))
|
||||
binary.LittleEndian.PutUint64(buf[16:], uint64(v.User))
|
||||
return buf
|
||||
}
|
||||
|
||||
func mergeObjectCounterValues(lhs, rhs objectCounterValue) objectCounterValue {
|
||||
lhs.Logic += rhs.Logic
|
||||
lhs.Phy += rhs.Phy
|
||||
lhs.User += rhs.User
|
||||
return lhs
|
||||
}
|
||||
|
||||
func normilizeObjectCounters(values map[cid.ID]objectCounterValue) (map[cid.ID]ObjectCounters, error) {
|
||||
result := make(map[cid.ID]ObjectCounters, len(values))
|
||||
for k, v := range values {
|
||||
if v.Logic < 0 || v.Phy < 0 || v.User < 0 {
|
||||
return nil, fmt.Errorf("invalid container object counter for container ID %s", k.EncodeToString())
|
||||
}
|
||||
var oc ObjectCounters
|
||||
oc.Logic = uint64(v.Logic)
|
||||
oc.Phy = uint64(v.Phy)
|
||||
oc.User = uint64(v.User)
|
||||
result[k] = oc
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ func TestCounters(t *testing.T) {
|
|||
t.Parallel()
|
||||
db := newDB(t)
|
||||
defer func() { require.NoError(t, db.Close()) }()
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Zero(t, c.Phy)
|
||||
require.Zero(t, c.Logic)
|
||||
|
@ -59,7 +59,7 @@ func TestCounters(t *testing.T) {
|
|||
_, err := db.Put(context.Background(), prm)
|
||||
require.NoError(t, err)
|
||||
|
||||
c, err = db.ObjectCounters()
|
||||
c, err = db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, uint64(i+1), c.Phy)
|
||||
|
@ -90,13 +90,13 @@ func TestCounters(t *testing.T) {
|
|||
|
||||
var prm meta.DeletePrm
|
||||
for i := objCount - 1; i >= 0; i-- {
|
||||
prm.SetAddresses(objectcore.AddressOf(oo[i]))
|
||||
prm.Address = objectcore.AddressOf(oo[i])
|
||||
|
||||
res, err := db.Delete(context.Background(), prm)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(1), res.LogicCount())
|
||||
require.Equal(t, uint64(1), res.LogicCount)
|
||||
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, uint64(i), c.Phy)
|
||||
|
@ -164,7 +164,7 @@ func TestCounters(t *testing.T) {
|
|||
require.Equal(t, uint64(len(inhumedObjs)), res.LogicInhumed())
|
||||
require.Equal(t, uint64(len(inhumedObjs)), res.UserInhumed())
|
||||
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, uint64(objCount), c.Phy)
|
||||
|
@ -203,7 +203,7 @@ func TestCounters(t *testing.T) {
|
|||
|
||||
require.NoError(t, putBig(db, o))
|
||||
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(i+1), c.Phy)
|
||||
require.Equal(t, uint64(i+1), c.Logic)
|
||||
|
@ -238,7 +238,7 @@ func TestCounters(t *testing.T) {
|
|||
addr := objectcore.AddressOf(o)
|
||||
require.NoError(t, metaDelete(db, addr))
|
||||
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(objCount-i-1), c.Phy)
|
||||
require.Equal(t, uint64(objCount-i-1), c.Logic)
|
||||
|
@ -302,7 +302,7 @@ func TestCounters(t *testing.T) {
|
|||
_, err := db.Inhume(context.Background(), prm)
|
||||
require.NoError(t, err)
|
||||
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, uint64(objCount), c.Phy)
|
||||
|
@ -336,7 +336,7 @@ func TestDoublePut(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
require.True(t, pr.Inserted)
|
||||
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, uint64(1), c.Phy)
|
||||
|
@ -352,7 +352,7 @@ func TestDoublePut(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
require.False(t, pr.Inserted)
|
||||
|
||||
c, err = db.ObjectCounters()
|
||||
c, err = db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, uint64(1), c.Phy)
|
||||
|
@ -395,7 +395,7 @@ func TestCounters_Expired(t *testing.T) {
|
|||
|
||||
// 1. objects are available and counters are correct
|
||||
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(objCount), c.Phy)
|
||||
require.Equal(t, uint64(objCount), c.Logic)
|
||||
|
@ -416,7 +416,7 @@ func TestCounters_Expired(t *testing.T) {
|
|||
|
||||
es.e = epoch + 2
|
||||
|
||||
c, err = db.ObjectCounters()
|
||||
c, err = db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(objCount), c.Phy)
|
||||
require.Equal(t, uint64(objCount), c.Logic)
|
||||
|
@ -445,7 +445,7 @@ func TestCounters_Expired(t *testing.T) {
|
|||
require.Equal(t, uint64(1), inhumeRes.LogicInhumed())
|
||||
require.Equal(t, uint64(1), inhumeRes.UserInhumed())
|
||||
|
||||
c, err = db.ObjectCounters()
|
||||
c, err = db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, uint64(len(oo)), c.Phy)
|
||||
|
@ -472,12 +472,12 @@ func TestCounters_Expired(t *testing.T) {
|
|||
// that step they should be equal)
|
||||
|
||||
var deletePrm meta.DeletePrm
|
||||
deletePrm.SetAddresses(oo[0])
|
||||
deletePrm.Address = oo[0]
|
||||
|
||||
deleteRes, err := db.Delete(context.Background(), deletePrm)
|
||||
require.NoError(t, err)
|
||||
require.Zero(t, deleteRes.LogicCount())
|
||||
require.Zero(t, deleteRes.UserCount())
|
||||
require.Zero(t, deleteRes.LogicCount)
|
||||
require.Zero(t, deleteRes.UserCount)
|
||||
|
||||
if v, ok := exp[oo[0].Container()]; ok {
|
||||
v.Phy--
|
||||
|
@ -486,7 +486,7 @@ func TestCounters_Expired(t *testing.T) {
|
|||
|
||||
oo = oo[1:]
|
||||
|
||||
c, err = db.ObjectCounters()
|
||||
c, err = db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(len(oo)), c.Phy)
|
||||
require.Equal(t, uint64(len(oo)), c.Logic)
|
||||
|
@ -501,12 +501,12 @@ func TestCounters_Expired(t *testing.T) {
|
|||
// service do) should decrease both counters despite the
|
||||
// expiration fact
|
||||
|
||||
deletePrm.SetAddresses(oo[0])
|
||||
deletePrm.Address = oo[0]
|
||||
|
||||
deleteRes, err = db.Delete(context.Background(), deletePrm)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(1), deleteRes.LogicCount())
|
||||
require.Equal(t, uint64(1), deleteRes.UserCount())
|
||||
require.Equal(t, uint64(1), deleteRes.LogicCount)
|
||||
require.Equal(t, uint64(1), deleteRes.UserCount)
|
||||
|
||||
if v, ok := exp[oo[0].Container()]; ok {
|
||||
v.Phy--
|
||||
|
@ -517,7 +517,7 @@ func TestCounters_Expired(t *testing.T) {
|
|||
|
||||
oo = oo[1:]
|
||||
|
||||
c, err = db.ObjectCounters()
|
||||
c, err = db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uint64(len(oo)), c.Phy)
|
||||
require.Equal(t, uint64(len(oo)), c.Logic)
|
||||
|
@ -548,7 +548,7 @@ func putObjs(t *testing.T, db *meta.DB, count int, withParent bool) []*objectSDK
|
|||
_, err = db.Put(context.Background(), prm)
|
||||
require.NoError(t, err)
|
||||
|
||||
c, err := db.ObjectCounters()
|
||||
c, err := db.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, uint64(i+1), c.Phy)
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"io/fs"
|
||||
|
@ -14,17 +13,11 @@ import (
|
|||
v2object "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"github.com/mr-tron/base58"
|
||||
"go.etcd.io/bbolt"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type matcher struct {
|
||||
matchSlow func(string, []byte, string) bool
|
||||
matchBucket func(*bbolt.Bucket, string, string, func([]byte, []byte) error) error
|
||||
}
|
||||
|
||||
// EpochState is an interface that provides access to the
|
||||
// current epoch number.
|
||||
type EpochState interface {
|
||||
|
@ -39,28 +32,37 @@ type DB struct {
|
|||
modeMtx sync.RWMutex
|
||||
mode mode.Mode
|
||||
|
||||
matchers map[objectSDK.SearchMatchType]matcher
|
||||
|
||||
boltDB *bbolt.DB
|
||||
|
||||
database *badger.DB
|
||||
initialized bool
|
||||
|
||||
bucketIDs *dbBucketDispatcher
|
||||
|
||||
wg sync.WaitGroup
|
||||
closed chan struct{}
|
||||
}
|
||||
|
||||
// Option is an option of DB constructor.
|
||||
type Option func(*cfg)
|
||||
|
||||
type cfg struct {
|
||||
boltOptions *bbolt.Options // optional
|
||||
|
||||
boltBatchSize int
|
||||
boltBatchDelay time.Duration
|
||||
|
||||
info Info
|
||||
|
||||
log *logger.Logger
|
||||
|
||||
epochState EpochState
|
||||
metrics Metrics
|
||||
|
||||
// badger options
|
||||
|
||||
noSync bool
|
||||
verbose bool
|
||||
memtablesCount int
|
||||
valueThreshold int64
|
||||
valueLogFileSize int64
|
||||
indexCacheSize int64
|
||||
numCompactors int
|
||||
gcInterval time.Duration
|
||||
gcDiscardRatio float64
|
||||
}
|
||||
|
||||
func defaultCfg() *cfg {
|
||||
|
@ -68,10 +70,15 @@ func defaultCfg() *cfg {
|
|||
info: Info{
|
||||
Permission: os.ModePerm, // 0777
|
||||
},
|
||||
boltBatchDelay: bbolt.DefaultMaxBatchDelay,
|
||||
boltBatchSize: bbolt.DefaultMaxBatchSize,
|
||||
log: &logger.Logger{Logger: zap.L()},
|
||||
metrics: &noopMetrics{},
|
||||
memtablesCount: 32,
|
||||
valueThreshold: 512,
|
||||
valueLogFileSize: 1<<30 - 1,
|
||||
indexCacheSize: 256 << 20,
|
||||
numCompactors: 8,
|
||||
gcInterval: 10 * time.Minute,
|
||||
gcDiscardRatio: 0.5,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -89,24 +96,7 @@ func New(opts ...Option) *DB {
|
|||
|
||||
return &DB{
|
||||
cfg: c,
|
||||
matchers: map[objectSDK.SearchMatchType]matcher{
|
||||
objectSDK.MatchUnknown: {
|
||||
matchSlow: unknownMatcher,
|
||||
matchBucket: unknownMatcherBucket,
|
||||
},
|
||||
objectSDK.MatchStringEqual: {
|
||||
matchSlow: stringEqualMatcher,
|
||||
matchBucket: stringEqualMatcherBucket,
|
||||
},
|
||||
objectSDK.MatchStringNotEqual: {
|
||||
matchSlow: stringNotEqualMatcher,
|
||||
matchBucket: stringNotEqualMatcherBucket,
|
||||
},
|
||||
objectSDK.MatchCommonPrefix: {
|
||||
matchSlow: stringCommonPrefixMatcher,
|
||||
matchBucket: stringCommonPrefixMatcherBucket,
|
||||
},
|
||||
},
|
||||
bucketIDs: newDBBucketDispatcher(),
|
||||
mode: mode.Disabled,
|
||||
}
|
||||
}
|
||||
|
@ -115,7 +105,7 @@ func stringifyValue(key string, objVal []byte) string {
|
|||
switch key {
|
||||
default:
|
||||
return string(objVal)
|
||||
case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent:
|
||||
case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent, v2object.FilterHeaderECParent:
|
||||
return base58.Encode(objVal)
|
||||
case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash:
|
||||
return hex.EncodeToString(objVal)
|
||||
|
@ -146,7 +136,7 @@ func destringifyValue(key, value string, prefix bool) ([]byte, bool, bool) {
|
|||
switch key {
|
||||
default:
|
||||
return []byte(value), false, true
|
||||
case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent:
|
||||
case v2object.FilterHeaderObjectID, v2object.FilterHeaderContainerID, v2object.FilterHeaderParent, v2object.FilterHeaderECParent:
|
||||
v, err := base58.Decode(value)
|
||||
return v, false, err == nil
|
||||
case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash:
|
||||
|
@ -186,110 +176,18 @@ func stringEqualMatcher(key string, objVal []byte, filterVal string) bool {
|
|||
return stringifyValue(key, objVal) == filterVal
|
||||
}
|
||||
|
||||
func stringEqualMatcherBucket(b *bbolt.Bucket, fKey string, fValue string, f func([]byte, []byte) error) error {
|
||||
// Ignore the second return value because we check for strict equality.
|
||||
val, _, ok := destringifyValue(fKey, fValue, false)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
if data := b.Get(val); data != nil {
|
||||
return f(val, data)
|
||||
}
|
||||
if b.Bucket(val) != nil {
|
||||
return f(val, nil)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func stringNotEqualMatcher(key string, objVal []byte, filterVal string) bool {
|
||||
return stringifyValue(key, objVal) != filterVal
|
||||
}
|
||||
|
||||
func stringNotEqualMatcherBucket(b *bbolt.Bucket, fKey string, fValue string, f func([]byte, []byte) error) error {
|
||||
// Ignore the second return value because we check for strict inequality.
|
||||
val, _, ok := destringifyValue(fKey, fValue, false)
|
||||
return b.ForEach(func(k, v []byte) error {
|
||||
if !ok || !bytes.Equal(val, k) {
|
||||
return f(k, v)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func stringCommonPrefixMatcher(key string, objVal []byte, filterVal string) bool {
|
||||
return strings.HasPrefix(stringifyValue(key, objVal), filterVal)
|
||||
}
|
||||
|
||||
func stringCommonPrefixMatcherBucket(b *bbolt.Bucket, fKey string, fVal string, f func([]byte, []byte) error) error {
|
||||
val, checkLast, ok := destringifyValue(fKey, fVal, true)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
prefix := val
|
||||
if checkLast {
|
||||
prefix = val[:len(val)-1]
|
||||
}
|
||||
|
||||
if len(val) == 0 {
|
||||
// empty common prefix, all the objects
|
||||
// satisfy that filter
|
||||
return b.ForEach(f)
|
||||
}
|
||||
|
||||
c := b.Cursor()
|
||||
for k, v := c.Seek(val); bytes.HasPrefix(k, prefix); k, v = c.Next() {
|
||||
if checkLast && (len(k) == len(prefix) || k[len(prefix)]>>4 != val[len(val)-1]) {
|
||||
// If the last byte doesn't match, this means the prefix does no longer match,
|
||||
// so we need to break here.
|
||||
break
|
||||
}
|
||||
if err := f(k, v); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func unknownMatcher(_ string, _ []byte, _ string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func unknownMatcherBucket(_ *bbolt.Bucket, _ string, _ string, _ func([]byte, []byte) error) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// bucketKeyHelper returns byte representation of val that is used as a key
|
||||
// in boltDB. Useful for getting filter values from unique and list indexes.
|
||||
func bucketKeyHelper(hdr string, val string) []byte {
|
||||
switch hdr {
|
||||
case v2object.FilterHeaderParent, v2object.FilterHeaderECParent:
|
||||
v, err := base58.Decode(val)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return v
|
||||
case v2object.FilterHeaderPayloadHash:
|
||||
v, err := hex.DecodeString(val)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return v
|
||||
case v2object.FilterHeaderSplitID:
|
||||
s := objectSDK.NewSplitID()
|
||||
|
||||
err := s.Parse(val)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return s.ToV2()
|
||||
default:
|
||||
return []byte(val)
|
||||
}
|
||||
}
|
||||
|
||||
// SetLogger sets logger. It is used after the shard ID was generated to use it in logs.
|
||||
func (db *DB) SetLogger(l *logger.Logger) {
|
||||
db.log = l
|
||||
|
@ -307,13 +205,6 @@ func WithLogger(l *logger.Logger) Option {
|
|||
}
|
||||
}
|
||||
|
||||
// WithBoltDBOptions returns option to specify BoltDB options.
|
||||
func WithBoltDBOptions(opts *bbolt.Options) Option {
|
||||
return func(c *cfg) {
|
||||
c.boltOptions = opts
|
||||
}
|
||||
}
|
||||
|
||||
// WithPath returns option to set system path to Metabase.
|
||||
func WithPath(path string) Option {
|
||||
return func(c *cfg) {
|
||||
|
@ -329,28 +220,6 @@ func WithPermissions(perm fs.FileMode) Option {
|
|||
}
|
||||
}
|
||||
|
||||
// WithMaxBatchSize returns option to specify maximum concurrent operations
|
||||
// to be processed in a single transactions.
|
||||
// This option is missing from `bbolt.Options` but is set right after DB is open.
|
||||
func WithMaxBatchSize(s int) Option {
|
||||
return func(c *cfg) {
|
||||
if s != 0 {
|
||||
c.boltBatchSize = s
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WithMaxBatchDelay returns option to specify maximum time to wait before
|
||||
// the batch of concurrent transactions is processed.
|
||||
// This option is missing from `bbolt.Options` but is set right after DB is open.
|
||||
func WithMaxBatchDelay(d time.Duration) Option {
|
||||
return func(c *cfg) {
|
||||
if d != 0 {
|
||||
c.boltBatchDelay = d
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WithEpochState return option to specify a source of current epoch height.
|
||||
func WithEpochState(s EpochState) Option {
|
||||
return func(c *cfg) {
|
||||
|
@ -364,3 +233,57 @@ func WithMetrics(m Metrics) Option {
|
|||
c.metrics = m
|
||||
}
|
||||
}
|
||||
|
||||
func WithNoSync(v bool) Option {
|
||||
return func(c *cfg) {
|
||||
c.noSync = v
|
||||
}
|
||||
}
|
||||
|
||||
func WithVerbose(v bool) Option {
|
||||
return func(c *cfg) {
|
||||
c.verbose = v
|
||||
}
|
||||
}
|
||||
|
||||
func WithMemtablesCount(v int) Option {
|
||||
return func(c *cfg) {
|
||||
c.memtablesCount = v
|
||||
}
|
||||
}
|
||||
|
||||
func WithValueThreshold(v int64) Option {
|
||||
return func(c *cfg) {
|
||||
c.valueThreshold = v
|
||||
}
|
||||
}
|
||||
|
||||
func WithValueLogFileSize(v int64) Option {
|
||||
return func(c *cfg) {
|
||||
c.valueLogFileSize = v
|
||||
}
|
||||
}
|
||||
|
||||
func WithIndexCacheSize(v int64) Option {
|
||||
return func(c *cfg) {
|
||||
c.indexCacheSize = v
|
||||
}
|
||||
}
|
||||
|
||||
func WithNumCompactors(v int) Option {
|
||||
return func(c *cfg) {
|
||||
c.numCompactors = v
|
||||
}
|
||||
}
|
||||
|
||||
func WithGCInterval(v time.Duration) Option {
|
||||
return func(c *cfg) {
|
||||
c.gcInterval = v
|
||||
}
|
||||
}
|
||||
|
||||
func WithGCDiscardRatio(v float64) Option {
|
||||
return func(c *cfg) {
|
||||
c.gcDiscardRatio = v
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
@ -15,63 +14,23 @@ import (
|
|||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
var errFailedToRemoveUniqueIndexes = errors.New("can't remove unique indexes")
|
||||
|
||||
// DeletePrm groups the parameters of Delete operation.
|
||||
type DeletePrm struct {
|
||||
addrs []oid.Address
|
||||
Address oid.Address
|
||||
}
|
||||
|
||||
// DeleteRes groups the resulting values of Delete operation.
|
||||
type DeleteRes struct {
|
||||
phyCount uint64
|
||||
logicCount uint64
|
||||
userCount uint64
|
||||
phySize uint64
|
||||
logicSize uint64
|
||||
removedByCnrID map[cid.ID]ObjectCounters
|
||||
}
|
||||
|
||||
// LogicCount returns the number of removed logic
|
||||
// objects.
|
||||
func (d DeleteRes) LogicCount() uint64 {
|
||||
return d.logicCount
|
||||
}
|
||||
|
||||
func (d DeleteRes) UserCount() uint64 {
|
||||
return d.userCount
|
||||
}
|
||||
|
||||
// RemovedByCnrID returns the number of removed objects by container ID.
|
||||
func (d DeleteRes) RemovedByCnrID() map[cid.ID]ObjectCounters {
|
||||
return d.removedByCnrID
|
||||
}
|
||||
|
||||
// PhyCount returns the number of removed physical objects.
|
||||
func (d DeleteRes) PhyCount() uint64 {
|
||||
return d.phyCount
|
||||
}
|
||||
|
||||
// PhySize returns the size of removed physical objects.
|
||||
func (d DeleteRes) PhySize() uint64 {
|
||||
return d.phySize
|
||||
}
|
||||
|
||||
// LogicSize returns the size of removed logical objects.
|
||||
func (d DeleteRes) LogicSize() uint64 {
|
||||
return d.logicSize
|
||||
}
|
||||
|
||||
// SetAddresses is a Delete option to set the addresses of the objects to delete.
|
||||
//
|
||||
// Option is required.
|
||||
func (p *DeletePrm) SetAddresses(addrs ...oid.Address) {
|
||||
p.addrs = addrs
|
||||
PhyCount uint64
|
||||
LogicCount uint64
|
||||
UserCount uint64
|
||||
PhySize uint64
|
||||
LogicSize uint64
|
||||
}
|
||||
|
||||
type referenceNumber struct {
|
||||
|
@ -82,8 +41,6 @@ type referenceNumber struct {
|
|||
obj *objectSDK.Object
|
||||
}
|
||||
|
||||
type referenceCounter map[string]*referenceNumber
|
||||
|
||||
// Delete removed object records from metabase indexes.
|
||||
func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
|
||||
var (
|
||||
|
@ -94,10 +51,7 @@ func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
|
|||
db.metrics.AddMethodDuration("Delete", time.Since(startedAt), deleted)
|
||||
}()
|
||||
|
||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.Delete",
|
||||
trace.WithAttributes(
|
||||
attribute.Int("addr_count", len(prm.addrs)),
|
||||
))
|
||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.Delete")
|
||||
defer span.End()
|
||||
|
||||
db.modeMtx.RLock()
|
||||
|
@ -109,370 +63,232 @@ func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
|
|||
return DeleteRes{}, ErrReadOnlyMode
|
||||
}
|
||||
|
||||
bucketID, release := db.bucketIDs.BucketID(prm.Address.Container())
|
||||
defer release()
|
||||
|
||||
var err error
|
||||
var res DeleteRes
|
||||
|
||||
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
res, err = db.deleteGroup(tx, prm.addrs)
|
||||
err = db.database.Update(func(tx *badger.Txn) error {
|
||||
res, err = db.deleteByAddress(ctx, tx, prm.Address, bucketID)
|
||||
return err
|
||||
})
|
||||
if err == nil {
|
||||
deleted = true
|
||||
for i := range prm.addrs {
|
||||
storagelog.Write(db.log,
|
||||
storagelog.AddressField(prm.addrs[i]),
|
||||
storagelog.AddressField(prm.Address),
|
||||
storagelog.OpField("metabase DELETE"))
|
||||
}
|
||||
}
|
||||
return res, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
// deleteGroup deletes object from the metabase. Handles removal of the
|
||||
// references of the split objects.
|
||||
func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address) (DeleteRes, error) {
|
||||
res := DeleteRes{
|
||||
removedByCnrID: make(map[cid.ID]ObjectCounters),
|
||||
}
|
||||
refCounter := make(referenceCounter, len(addrs))
|
||||
func (db *DB) deleteByAddress(ctx context.Context, tx *badger.Txn, addr oid.Address, bucketID uint16) (DeleteRes, error) {
|
||||
refCounter := &referenceNumber{}
|
||||
currEpoch := db.epochState.CurrentEpoch()
|
||||
|
||||
for i := range addrs {
|
||||
r, err := db.delete(tx, addrs[i], refCounter, currEpoch)
|
||||
res, err := db.delete(ctx, tx, addr, refCounter, currEpoch)
|
||||
if err != nil {
|
||||
return DeleteRes{}, err
|
||||
}
|
||||
|
||||
applyDeleteSingleResult(r, &res, addrs, i)
|
||||
}
|
||||
|
||||
if err := db.updateCountersDelete(tx, res); err != nil {
|
||||
if err := db.updateCountersDelete(tx, addr.Container(), res, bucketID); err != nil {
|
||||
return DeleteRes{}, err
|
||||
}
|
||||
|
||||
for _, refNum := range refCounter {
|
||||
if refNum.cur == refNum.all {
|
||||
err := db.deleteObject(tx, refNum.obj, true)
|
||||
if refCounter.cur == refCounter.all {
|
||||
err := db.deleteObject(ctx, tx, refCounter.obj, true)
|
||||
if err != nil {
|
||||
return DeleteRes{}, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (db *DB) updateCountersDelete(tx *bbolt.Tx, res DeleteRes) error {
|
||||
if res.phyCount > 0 {
|
||||
err := db.updateShardObjectCounter(tx, phy, res.phyCount, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not decrease phy object counter: %w", err)
|
||||
func (db *DB) updateCountersDelete(tx *badger.Txn, cnrID cid.ID, res DeleteRes, bucketID uint16) error {
|
||||
delta := map[cid.ID]objectCounterValue{
|
||||
cnrID: {
|
||||
Logic: -1 * int64(res.LogicCount),
|
||||
Phy: -1 * int64(res.PhyCount),
|
||||
User: -1 * int64(res.UserCount),
|
||||
},
|
||||
}
|
||||
bucketIDs := map[cid.ID]uint16{
|
||||
cnrID: bucketID,
|
||||
}
|
||||
|
||||
if res.logicCount > 0 {
|
||||
err := db.updateShardObjectCounter(tx, logical, res.logicCount, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not decrease logical object counter: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if res.userCount > 0 {
|
||||
err := db.updateShardObjectCounter(tx, user, res.userCount, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not decrease user object counter: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := db.updateContainerCounter(tx, res.removedByCnrID, false); err != nil {
|
||||
if err := updateContainerCounter(tx, delta, bucketIDs); err != nil {
|
||||
return fmt.Errorf("could not decrease container object counter: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func applyDeleteSingleResult(r deleteSingleResult, res *DeleteRes, addrs []oid.Address, i int) {
|
||||
if r.Phy {
|
||||
if v, ok := res.removedByCnrID[addrs[i].Container()]; ok {
|
||||
v.Phy++
|
||||
res.removedByCnrID[addrs[i].Container()] = v
|
||||
} else {
|
||||
res.removedByCnrID[addrs[i].Container()] = ObjectCounters{
|
||||
Phy: 1,
|
||||
}
|
||||
}
|
||||
|
||||
res.phyCount++
|
||||
res.phySize += r.Size
|
||||
}
|
||||
|
||||
if r.Logic {
|
||||
if v, ok := res.removedByCnrID[addrs[i].Container()]; ok {
|
||||
v.Logic++
|
||||
res.removedByCnrID[addrs[i].Container()] = v
|
||||
} else {
|
||||
res.removedByCnrID[addrs[i].Container()] = ObjectCounters{
|
||||
Logic: 1,
|
||||
}
|
||||
}
|
||||
|
||||
res.logicCount++
|
||||
res.logicSize += r.Size
|
||||
}
|
||||
|
||||
if r.User {
|
||||
if v, ok := res.removedByCnrID[addrs[i].Container()]; ok {
|
||||
v.User++
|
||||
res.removedByCnrID[addrs[i].Container()] = v
|
||||
} else {
|
||||
res.removedByCnrID[addrs[i].Container()] = ObjectCounters{
|
||||
User: 1,
|
||||
}
|
||||
}
|
||||
|
||||
res.userCount++
|
||||
}
|
||||
}
|
||||
|
||||
type deleteSingleResult struct {
|
||||
Phy bool
|
||||
Logic bool
|
||||
User bool
|
||||
Size uint64
|
||||
}
|
||||
|
||||
// delete removes object indexes from the metabase. Counts the references
|
||||
// of the object that is being removed.
|
||||
// The first return value indicates if an object has been removed. (removing a
|
||||
// non-exist object is error-free). The second return value indicates if an
|
||||
// object was available before the removal (for calculating the logical object
|
||||
// counter). The third return value The fourth return value is removed object payload size.
|
||||
func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (deleteSingleResult, error) {
|
||||
key := make([]byte, addressKeySize)
|
||||
addrKey := addressKey(addr, key)
|
||||
garbageBKT := tx.Bucket(garbageBucketName)
|
||||
graveyardBKT := tx.Bucket(graveyardBucketName)
|
||||
|
||||
removeAvailableObject := inGraveyardWithKey(addrKey, graveyardBKT, garbageBKT) == 0
|
||||
func (db *DB) delete(ctx context.Context, tx *badger.Txn, addr oid.Address, refCounter *referenceNumber, currEpoch uint64) (DeleteRes, error) {
|
||||
status, err := inGraveyardWithKey(tx, addr)
|
||||
if err != nil {
|
||||
return DeleteRes{}, err
|
||||
}
|
||||
removeAvailableObject := status == 0
|
||||
|
||||
// unmarshal object, work only with physically stored (raw == true) objects
|
||||
obj, err := db.get(tx, addr, key, false, true, currEpoch)
|
||||
obj, err := get(ctx, tx, addr, false, true, currEpoch)
|
||||
if err != nil {
|
||||
if client.IsErrObjectNotFound(err) {
|
||||
addrKey = addressKey(addr, key)
|
||||
if garbageBKT != nil {
|
||||
err := garbageBKT.Delete(addrKey)
|
||||
err := deleteFromGarbage(tx, addr)
|
||||
if err != nil {
|
||||
return deleteSingleResult{}, fmt.Errorf("could not remove from garbage bucket: %w", err)
|
||||
return DeleteRes{}, fmt.Errorf("could not remove from garbage bucket: %w", err)
|
||||
}
|
||||
}
|
||||
return deleteSingleResult{}, nil
|
||||
return DeleteRes{}, nil
|
||||
}
|
||||
var siErr *objectSDK.SplitInfoError
|
||||
var ecErr *objectSDK.ECInfoError
|
||||
if errors.As(err, &siErr) || errors.As(err, &ecErr) {
|
||||
// if object is virtual (parent) then do nothing, it will be deleted with last child
|
||||
// if object is erasure-coded it will be deleted with the last chunk presented on the shard
|
||||
return deleteSingleResult{}, nil
|
||||
return DeleteRes{}, nil
|
||||
}
|
||||
|
||||
return deleteSingleResult{}, err
|
||||
return DeleteRes{}, err
|
||||
}
|
||||
|
||||
addrKey = addressKey(addr, key)
|
||||
// remove record from the garbage bucket
|
||||
if garbageBKT != nil {
|
||||
err := garbageBKT.Delete(addrKey)
|
||||
err = deleteFromGarbage(tx, addr)
|
||||
if err != nil {
|
||||
return deleteSingleResult{}, fmt.Errorf("could not remove from garbage bucket: %w", err)
|
||||
}
|
||||
return DeleteRes{}, fmt.Errorf("could not remove from garbage bucket: %w", err)
|
||||
}
|
||||
|
||||
// if object is an only link to a parent, then remove parent
|
||||
if parent := obj.Parent(); parent != nil {
|
||||
parAddr := object.AddressOf(parent)
|
||||
sParAddr := addressKey(parAddr, key)
|
||||
k := string(sParAddr)
|
||||
|
||||
nRef, ok := refCounter[k]
|
||||
if !ok {
|
||||
nRef = &referenceNumber{
|
||||
all: parentLength(tx, parAddr),
|
||||
addr: parAddr,
|
||||
obj: parent,
|
||||
parentLen, err := parentLength(ctx, tx, parAddr)
|
||||
if err != nil {
|
||||
return DeleteRes{}, fmt.Errorf("failed to get parent count for object %s: %w", parAddr, err)
|
||||
}
|
||||
|
||||
refCounter[k] = nRef
|
||||
}
|
||||
|
||||
nRef.cur++
|
||||
refCounter.addr = parAddr
|
||||
refCounter.all = parentLen
|
||||
refCounter.obj = parent
|
||||
refCounter.cur = 1
|
||||
}
|
||||
|
||||
isUserObject := IsUserObject(obj)
|
||||
|
||||
// remove object
|
||||
err = db.deleteObject(tx, obj, false)
|
||||
err = db.deleteObject(ctx, tx, obj, false)
|
||||
if err != nil {
|
||||
return deleteSingleResult{}, fmt.Errorf("could not remove object: %w", err)
|
||||
return DeleteRes{}, fmt.Errorf("could not remove object: %w", err)
|
||||
}
|
||||
|
||||
return deleteSingleResult{
|
||||
Phy: true,
|
||||
Logic: removeAvailableObject,
|
||||
User: isUserObject && removeAvailableObject,
|
||||
Size: obj.PayloadSize(),
|
||||
}, nil
|
||||
var result DeleteRes
|
||||
result.PhyCount = 1
|
||||
result.PhySize = obj.PayloadSize()
|
||||
if removeAvailableObject {
|
||||
result.LogicCount = 1
|
||||
result.LogicSize = obj.PayloadSize()
|
||||
}
|
||||
if removeAvailableObject && isUserObject {
|
||||
result.UserCount = 1
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (db *DB) deleteObject(
|
||||
tx *bbolt.Tx,
|
||||
ctx context.Context,
|
||||
tx *badger.Txn,
|
||||
obj *objectSDK.Object,
|
||||
isParent bool,
|
||||
) error {
|
||||
err := delUniqueIndexes(tx, obj, isParent)
|
||||
err := delUniqueIndexes(ctx, tx, obj, isParent)
|
||||
if err != nil {
|
||||
return errFailedToRemoveUniqueIndexes
|
||||
}
|
||||
|
||||
err = updateListIndexes(tx, obj, delListIndexItem)
|
||||
err = updateListIndexes(tx, obj, deleteByKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't remove list indexes: %w", err)
|
||||
}
|
||||
|
||||
err = updateFKBTIndexes(tx, obj, delFKBTIndexItem)
|
||||
err = updateFKBTIndexes(tx, obj, deleteByKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't remove fake bucket tree indexes: %w", err)
|
||||
}
|
||||
|
||||
if isParent {
|
||||
// remove record from the garbage bucket, because regular object deletion does nothing for virtual object
|
||||
garbageBKT := tx.Bucket(garbageBucketName)
|
||||
if garbageBKT != nil {
|
||||
key := make([]byte, addressKeySize)
|
||||
addrKey := addressKey(object.AddressOf(obj), key)
|
||||
err := garbageBKT.Delete(addrKey)
|
||||
err := deleteFromGarbage(tx, object.AddressOf(obj))
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not remove from garbage bucket: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func deleteFromGarbage(tx *badger.Txn, addr oid.Address) error {
|
||||
return tx.Delete(garbageKey(addr.Container(), addr.Object()))
|
||||
}
|
||||
|
||||
// parentLength returns amount of available children from parentid index.
|
||||
func parentLength(tx *bbolt.Tx, addr oid.Address) int {
|
||||
bucketName := make([]byte, bucketKeySize)
|
||||
|
||||
bkt := tx.Bucket(parentBucketName(addr.Container(), bucketName[:]))
|
||||
if bkt == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
lst, err := decodeList(bkt.Get(objectKey(addr.Object(), bucketName[:])))
|
||||
func parentLength(ctx context.Context, tx *badger.Txn, addr oid.Address) (int, error) {
|
||||
var result int
|
||||
prefix := parentKeyLongPrefix(addr.Container(), addr.Object())
|
||||
const batchSize = 1000
|
||||
for {
|
||||
ids, err := selectByPrefixBatch(ctx, tx, prefix, batchSize)
|
||||
if err != nil {
|
||||
return 0
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return len(lst)
|
||||
}
|
||||
|
||||
func delUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem) {
|
||||
bkt := tx.Bucket(item.name)
|
||||
if bkt != nil {
|
||||
_ = bkt.Delete(item.key) // ignore error, best effort there
|
||||
}
|
||||
}
|
||||
|
||||
func delFKBTIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
||||
bkt := tx.Bucket(item.name)
|
||||
if bkt == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
fkbtRoot := bkt.Bucket(item.key)
|
||||
if fkbtRoot == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
_ = fkbtRoot.Delete(item.val) // ignore error, best effort there
|
||||
return nil
|
||||
}
|
||||
|
||||
func delListIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
||||
bkt := tx.Bucket(item.name)
|
||||
if bkt == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
lst, err := decodeList(bkt.Get(item.key))
|
||||
if err != nil || len(lst) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// remove element from the list
|
||||
for i := range lst {
|
||||
if bytes.Equal(item.val, lst[i]) {
|
||||
copy(lst[i:], lst[i+1:])
|
||||
lst = lst[:len(lst)-1]
|
||||
result += len(ids)
|
||||
if len(ids) < batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// if list empty, remove the key from <list> bucket
|
||||
if len(lst) == 0 {
|
||||
_ = bkt.Delete(item.key) // ignore error, best effort there
|
||||
|
||||
return nil
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// if list is not empty, then update it
|
||||
encodedLst, err := encodeList(lst)
|
||||
if err != nil {
|
||||
return nil // ignore error, best effort there
|
||||
func delParent(ctx context.Context, tx *badger.Txn, addr oid.Address) error {
|
||||
prefix := parentKeyLongPrefix(addr.Container(), addr.Object())
|
||||
return deleteByPrefix(ctx, tx, prefix)
|
||||
}
|
||||
|
||||
_ = bkt.Put(item.key, encodedLst) // ignore error, best effort there
|
||||
return nil
|
||||
}
|
||||
|
||||
func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error {
|
||||
func delUniqueIndexes(ctx context.Context, tx *badger.Txn, obj *objectSDK.Object, isParent bool) error {
|
||||
addr := object.AddressOf(obj)
|
||||
|
||||
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
|
||||
cnr := addr.Container()
|
||||
bucketName := make([]byte, bucketKeySize)
|
||||
|
||||
// add value to primary unique bucket
|
||||
if !isParent {
|
||||
var key []byte
|
||||
switch obj.Type() {
|
||||
case objectSDK.TypeRegular:
|
||||
bucketName = primaryBucketName(cnr, bucketName)
|
||||
key = primaryKey(addr.Container(), addr.Object())
|
||||
case objectSDK.TypeTombstone:
|
||||
bucketName = tombstoneBucketName(cnr, bucketName)
|
||||
key = tombstoneKey(addr.Container(), addr.Object())
|
||||
case objectSDK.TypeLock:
|
||||
bucketName = bucketNameLockers(cnr, bucketName)
|
||||
key = lockersKey(addr.Container(), addr.Object())
|
||||
default:
|
||||
return ErrUnknownObjectType
|
||||
}
|
||||
|
||||
delUniqueIndexItem(tx, namedBucketItem{
|
||||
name: bucketName,
|
||||
key: objKey,
|
||||
})
|
||||
if err := tx.Delete(key); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
delUniqueIndexItem(tx, namedBucketItem{
|
||||
name: parentBucketName(cnr, bucketName),
|
||||
key: objKey,
|
||||
})
|
||||
if err := delParent(ctx, tx, addr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index
|
||||
name: smallBucketName(cnr, bucketName),
|
||||
key: objKey,
|
||||
})
|
||||
delUniqueIndexItem(tx, namedBucketItem{ // remove from root index
|
||||
name: rootBucketName(cnr, bucketName),
|
||||
key: objKey,
|
||||
})
|
||||
|
||||
return nil
|
||||
if err := tx.Delete(smallKey(addr.Container(), addr.Object())); err != nil {
|
||||
return err
|
||||
}
|
||||
if ecHead := obj.ECHeader(); ecHead != nil {
|
||||
if err := tx.Delete(ecInfoKey(addr.Container(), ecHead.Parent(), addr.Object())); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Delete(rootKey(addr.Container(), addr.Object()))
|
||||
}
|
||||
|
|
|
@ -92,8 +92,8 @@ func TestDeleteAllChildren(t *testing.T) {
|
|||
require.True(t, errors.As(err, &siErr))
|
||||
|
||||
// remove all children in single call
|
||||
err = metaDelete(db, object.AddressOf(child1), object.AddressOf(child2))
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, metaDelete(db, object.AddressOf(child1)))
|
||||
require.NoError(t, metaDelete(db, object.AddressOf(child2)))
|
||||
|
||||
// parent should not be found now
|
||||
ex, err := metaExists(db, object.AddressOf(parent))
|
||||
|
@ -155,9 +155,11 @@ func TestDelete(t *testing.T) {
|
|||
require.NoError(t, db.IterateOverGarbage(context.Background(), iprm))
|
||||
require.Equal(t, 10, len(addrs))
|
||||
var deletePrm meta.DeletePrm
|
||||
deletePrm.SetAddresses(addrs...)
|
||||
for _, addr := range addrs {
|
||||
deletePrm.Address = addr
|
||||
_, err := db.Delete(context.Background(), deletePrm)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
addrs = nil
|
||||
iprm.SetHandler(func(o meta.GarbageObject) error {
|
||||
|
@ -190,7 +192,7 @@ func TestDeleteDropsGCMarkIfObjectNotFound(t *testing.T) {
|
|||
require.Equal(t, 1, garbageCount)
|
||||
|
||||
var delPrm meta.DeletePrm
|
||||
delPrm.SetAddresses(addr)
|
||||
delPrm.Address = addr
|
||||
_, err = db.Delete(context.Background(), delPrm)
|
||||
require.NoError(t, err)
|
||||
|
||||
|
@ -199,9 +201,9 @@ func TestDeleteDropsGCMarkIfObjectNotFound(t *testing.T) {
|
|||
require.Equal(t, 0, garbageCount)
|
||||
}
|
||||
|
||||
func metaDelete(db *meta.DB, addrs ...oid.Address) error {
|
||||
func metaDelete(db *meta.DB, addr oid.Address) error {
|
||||
var deletePrm meta.DeletePrm
|
||||
deletePrm.SetAddresses(addrs...)
|
||||
deletePrm.Address = addr
|
||||
|
||||
_, err := db.Delete(context.Background(), deletePrm)
|
||||
return err
|
||||
|
|
|
@ -1,19 +1,18 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
@ -81,8 +80,8 @@ func (db *DB) Exists(ctx context.Context, prm ExistsPrm) (res ExistsRes, err err
|
|||
|
||||
currEpoch := db.epochState.CurrentEpoch()
|
||||
|
||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
res.exists, res.locked, err = db.exists(tx, prm.addr, prm.paddr, currEpoch)
|
||||
err = db.database.View(func(tx *badger.Txn) error {
|
||||
res.exists, res.locked, err = exists(ctx, tx, prm.addr, prm.paddr, currEpoch)
|
||||
|
||||
return err
|
||||
})
|
||||
|
@ -90,13 +89,20 @@ func (db *DB) Exists(ctx context.Context, prm ExistsPrm) (res ExistsRes, err err
|
|||
return res, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpoch uint64) (bool, bool, error) {
|
||||
func exists(ctx context.Context, tx *badger.Txn, addr oid.Address, parent oid.Address, currEpoch uint64) (bool, bool, error) {
|
||||
status, err := objectStatus(ctx, tx, addr, currEpoch)
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
var locked bool
|
||||
if !parent.Equals(oid.Address{}) {
|
||||
locked = objectLocked(tx, parent.Container(), parent.Object())
|
||||
locked, err = objectLocked(ctx, tx, parent.Container(), parent.Object())
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
}
|
||||
// check graveyard and object expiration first
|
||||
switch objectStatus(tx, addr, currEpoch) {
|
||||
switch status {
|
||||
case 1:
|
||||
return false, locked, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||
case 2:
|
||||
|
@ -105,32 +111,41 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpo
|
|||
return false, locked, ErrObjectIsExpired
|
||||
}
|
||||
|
||||
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
|
||||
|
||||
cnr := addr.Container()
|
||||
key := make([]byte, bucketKeySize)
|
||||
|
||||
// if graveyard is empty, then check if object exists in primary bucket
|
||||
if inBucket(tx, primaryBucketName(cnr, key), objKey) {
|
||||
v, err := valueOrNil(tx, primaryKey(addr.Container(), addr.Object()))
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
if v != nil {
|
||||
return true, locked, nil
|
||||
}
|
||||
|
||||
// if primary bucket is empty, then check if object exists in parent bucket
|
||||
if inBucket(tx, parentBucketName(cnr, key), objKey) {
|
||||
splitInfo, err := getSplitInfo(tx, cnr, objKey)
|
||||
children, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), 1) // try to found any child
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
if len(children) > 0 {
|
||||
splitInfo, err := getSplitInfo(tx, addr)
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
return false, locked, logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo))
|
||||
}
|
||||
|
||||
// if parent bucket is empty, then check if object exists with ec prefix
|
||||
children, err = selectByPrefixBatch(ctx, tx, ecInfoLongKeyPrefix(addr.Container(), addr.Object()), 1) // try to found any child
|
||||
if err != nil {
|
||||
return false, locked, err
|
||||
}
|
||||
|
||||
return false, locked, logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo))
|
||||
}
|
||||
// if parent bucket is empty, then check if object exists in ec bucket
|
||||
if data := getFromBucket(tx, ecInfoBucketName(cnr, key), objKey); len(data) != 0 {
|
||||
return false, locked, getECInfoError(tx, cnr, data)
|
||||
if len(children) > 0 {
|
||||
return false, locked, getECInfoError(ctx, tx, addr)
|
||||
}
|
||||
|
||||
// if parent bucket is empty, then check if object exists in typed buckets
|
||||
return firstIrregularObjectType(tx, cnr, objKey) != objectSDK.TypeRegular, locked, nil
|
||||
t, err := firstIrregularObjectType(tx, addr.Container(), addr.Object())
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
|
||||
return t != objectSDK.TypeRegular, locked, nil
|
||||
}
|
||||
|
||||
// objectStatus returns:
|
||||
|
@ -138,86 +153,68 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, parent oid.Address, currEpo
|
|||
// - 1 if object with GC mark;
|
||||
// - 2 if object is covered with tombstone;
|
||||
// - 3 if object is expired.
|
||||
func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 {
|
||||
func objectStatus(ctx context.Context, tx *badger.Txn, addr oid.Address, currEpoch uint64) (uint8, error) {
|
||||
// locked object could not be removed/marked with GC/expired
|
||||
if objectLocked(tx, addr.Container(), addr.Object()) {
|
||||
return 0
|
||||
locked, err := objectLocked(ctx, tx, addr.Container(), addr.Object())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if locked {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// we check only if the object is expired in the current
|
||||
// epoch since it is considered the only corner case: the
|
||||
// GC is expected to collect all the objects that have
|
||||
// expired previously for less than the one epoch duration
|
||||
st, err := inGraveyardWithKey(tx, addr)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if st > 0 {
|
||||
return st, nil
|
||||
}
|
||||
|
||||
expired := isExpiredWithAttribute(tx, objectV2.SysAttributeExpEpoch, addr, currEpoch)
|
||||
if !expired {
|
||||
expired = isExpiredWithAttribute(tx, objectV2.SysAttributeExpEpochNeoFS, addr, currEpoch)
|
||||
expired, err := isExpired(ctx, tx, addr, currEpoch)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if expired {
|
||||
return 3
|
||||
return 3, nil
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
graveyardBkt := tx.Bucket(graveyardBucketName)
|
||||
garbageBkt := tx.Bucket(garbageBucketName)
|
||||
addrKey := addressKey(addr, make([]byte, addressKeySize))
|
||||
return inGraveyardWithKey(addrKey, graveyardBkt, garbageBkt)
|
||||
func inGraveyardWithKey(tx *badger.Txn, addr oid.Address) (uint8, error) {
|
||||
v, err := valueOrNil(tx, graveyardKey(addr.Container(), addr.Object()))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if v != nil {
|
||||
return 2, nil
|
||||
}
|
||||
v, err = valueOrNil(tx, garbageKey(addr.Container(), addr.Object()))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if v != nil {
|
||||
return 1, nil
|
||||
}
|
||||
|
||||
func inGraveyardWithKey(addrKey []byte, graveyard, garbageBCK *bbolt.Bucket) uint8 {
|
||||
if graveyard == nil {
|
||||
// incorrect metabase state, does not make
|
||||
// sense to check garbage bucket
|
||||
return 0
|
||||
}
|
||||
|
||||
val := graveyard.Get(addrKey)
|
||||
if val == nil {
|
||||
if garbageBCK == nil {
|
||||
// incorrect node state
|
||||
return 0
|
||||
}
|
||||
|
||||
val = garbageBCK.Get(addrKey)
|
||||
if val != nil {
|
||||
// object has been marked with GC
|
||||
return 1
|
||||
}
|
||||
|
||||
// neither in the graveyard
|
||||
// nor was marked with GC mark
|
||||
return 0
|
||||
}
|
||||
|
||||
// object in the graveyard
|
||||
return 2
|
||||
}
|
||||
|
||||
// inBucket checks if key <key> is present in bucket <name>.
|
||||
func inBucket(tx *bbolt.Tx, name, key []byte) bool {
|
||||
bkt := tx.Bucket(name)
|
||||
if bkt == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// using `get` as `exists`: https://github.com/boltdb/bolt/issues/321
|
||||
val := bkt.Get(key)
|
||||
|
||||
return len(val) != 0
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// getSplitInfo returns SplitInfo structure from root index. Returns error
|
||||
// if there is no `key` record in root index.
|
||||
func getSplitInfo(tx *bbolt.Tx, cnr cid.ID, key []byte) (*objectSDK.SplitInfo, error) {
|
||||
bucketName := rootBucketName(cnr, make([]byte, bucketKeySize))
|
||||
rawSplitInfo := getFromBucket(tx, bucketName, key)
|
||||
if len(rawSplitInfo) == 0 {
|
||||
func getSplitInfo(tx *badger.Txn, addr oid.Address) (*objectSDK.SplitInfo, error) {
|
||||
rawSplitInfo, err := valueOrNil(tx, rootKey(addr.Container(), addr.Object()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(rawSplitInfo) == 0 || bytes.Equal(zeroValue, rawSplitInfo) {
|
||||
return nil, ErrLackSplitInfo
|
||||
}
|
||||
|
||||
splitInfo := objectSDK.NewSplitInfo()
|
||||
|
||||
err := splitInfo.Unmarshal(rawSplitInfo)
|
||||
err = splitInfo.Unmarshal(rawSplitInfo)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("can't unmarshal split info from root index: %w", err)
|
||||
}
|
||||
|
|
|
@ -3,16 +3,13 @@ package meta
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
@ -44,44 +41,10 @@ func (db *DB) FilterExpired(ctx context.Context, epoch uint64, addresses []oid.A
|
|||
}
|
||||
|
||||
result := make([]oid.Address, 0, len(addresses))
|
||||
containerIDToObjectIDs := make(map[cid.ID][]oid.ID)
|
||||
for _, addr := range addresses {
|
||||
containerIDToObjectIDs[addr.Container()] = append(containerIDToObjectIDs[addr.Container()], addr.Object())
|
||||
}
|
||||
|
||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
for containerID, objectIDs := range containerIDToObjectIDs {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ErrInterruptIterator
|
||||
default:
|
||||
}
|
||||
|
||||
expiredNeoFS, err := selectExpiredObjectIDs(tx, objectV2.SysAttributeExpEpochNeoFS, epoch, containerID, objectIDs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
expiredSys, err := selectExpiredObjectIDs(tx, objectV2.SysAttributeExpEpoch, epoch, containerID, objectIDs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, o := range expiredNeoFS {
|
||||
var a oid.Address
|
||||
a.SetContainer(containerID)
|
||||
a.SetObject(o)
|
||||
result = append(result, a)
|
||||
}
|
||||
|
||||
for _, o := range expiredSys {
|
||||
var a oid.Address
|
||||
a.SetContainer(containerID)
|
||||
a.SetObject(o)
|
||||
result = append(result, a)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
err := db.database.View(func(tx *badger.Txn) error {
|
||||
var e error
|
||||
result, e = selectExpiredObjects(ctx, tx, epoch, addresses)
|
||||
return e
|
||||
})
|
||||
if err != nil {
|
||||
return nil, metaerr.Wrap(err)
|
||||
|
@ -90,76 +53,179 @@ func (db *DB) FilterExpired(ctx context.Context, epoch uint64, addresses []oid.A
|
|||
return result, nil
|
||||
}
|
||||
|
||||
func isExpiredWithAttribute(tx *bbolt.Tx, attr string, addr oid.Address, currEpoch uint64) bool {
|
||||
// bucket with objects that have expiration attr
|
||||
attrKey := make([]byte, bucketKeySize+len(attr))
|
||||
expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), attr, attrKey))
|
||||
if expirationBucket != nil {
|
||||
// bucket that contains objects that expire in the current epoch
|
||||
prevEpochBkt := expirationBucket.Bucket([]byte(strconv.FormatUint(currEpoch-1, 10)))
|
||||
if prevEpochBkt != nil {
|
||||
rawOID := objectKey(addr.Object(), make([]byte, objectKeySize))
|
||||
if prevEpochBkt.Get(rawOID) != nil {
|
||||
return true
|
||||
}
|
||||
func isExpired(ctx context.Context, tx *badger.Txn, addr oid.Address, currEpoch uint64) (bool, error) {
|
||||
prefix := []byte{expiredPrefix}
|
||||
opts := badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: []byte{expiredPrefix},
|
||||
}
|
||||
it := tx.NewIterator(opts)
|
||||
defer it.Close()
|
||||
|
||||
// iteration does in ascending order by expiration epoch.
|
||||
// gc does expired objects collect every epoch, so here should be not so much items.
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func selectExpiredObjectIDs(tx *bbolt.Tx, attr string, epoch uint64, containerID cid.ID, objectIDs []oid.ID) ([]oid.ID, error) {
|
||||
result := make([]oid.ID, 0)
|
||||
notResolved := make(map[oid.ID]struct{})
|
||||
for _, oid := range objectIDs {
|
||||
notResolved[oid] = struct{}{}
|
||||
}
|
||||
|
||||
expiredBuffer := make([]oid.ID, 0)
|
||||
objectKeyBuffer := make([]byte, objectKeySize)
|
||||
|
||||
expirationBucketKey := make([]byte, bucketKeySize+len(attr))
|
||||
expirationBucket := tx.Bucket(attributeBucketName(containerID, attr, expirationBucketKey))
|
||||
if expirationBucket == nil {
|
||||
return result, nil // all not expired
|
||||
}
|
||||
|
||||
err := expirationBucket.ForEach(func(epochExpBucketKey, _ []byte) error {
|
||||
bucketExpiresAfter, err := strconv.ParseUint(string(epochExpBucketKey), 10, 64)
|
||||
expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not parse expiration epoch: %w", err)
|
||||
} else if bucketExpiresAfter >= epoch {
|
||||
return nil
|
||||
return false, err
|
||||
}
|
||||
|
||||
epochExpirationBucket := expirationBucket.Bucket(epochExpBucketKey)
|
||||
if epochExpirationBucket == nil {
|
||||
return nil
|
||||
if expEpoch >= currEpoch {
|
||||
return false, nil // keys are ordered by epoch, so next items will be discarded anyway.
|
||||
}
|
||||
|
||||
expiredBuffer = expiredBuffer[:0]
|
||||
for oid := range notResolved {
|
||||
key := objectKey(oid, objectKeyBuffer)
|
||||
if epochExpirationBucket.Get(key) != nil {
|
||||
expiredBuffer = append(expiredBuffer, oid)
|
||||
curAddr, err := addressFromExpiredKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if curAddr == addr {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
for _, oid := range expiredBuffer {
|
||||
delete(notResolved, oid)
|
||||
result = append(result, oid)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if len(notResolved) == 0 {
|
||||
return errBreakBucketForEach
|
||||
func selectExpiredObjects(ctx context.Context, tx *badger.Txn, epoch uint64, objects []oid.Address) ([]oid.Address, error) {
|
||||
result := make([]oid.Address, 0)
|
||||
objMap := make(map[oid.Address]struct{})
|
||||
for _, obj := range objects {
|
||||
objMap[obj] = struct{}{}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
prefix := []byte{expiredPrefix}
|
||||
opts := badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: []byte{expiredPrefix},
|
||||
}
|
||||
it := tx.NewIterator(opts)
|
||||
defer it.Close()
|
||||
|
||||
if err != nil && !errors.Is(err, errBreakBucketForEach) {
|
||||
// iteration does in ascending order by expiration epoch.
|
||||
// gc does expired objects collect every epoch, so here should be not so much items.
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if expEpoch >= epoch {
|
||||
return result, nil // keys are ordered by epoch, so next items will be discarded anyway.
|
||||
}
|
||||
|
||||
addr, err := addressFromExpiredKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, ok := objMap[addr]; ok {
|
||||
result = append(result, addr)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// IterateExpired iterates over all objects in DB which are out of date
|
||||
// relative to epoch. Locked objects are not included (do not confuse
|
||||
// with objects of type LOCK).
|
||||
//
|
||||
// If h returns ErrInterruptIterator, nil returns immediately.
|
||||
// Returns other errors of h directly.
|
||||
func (db *DB) IterateExpired(ctx context.Context, epoch uint64, h ExpiredObjectHandler) error {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
success = false
|
||||
)
|
||||
defer func() {
|
||||
db.metrics.AddMethodDuration("IterateExpired", time.Since(startedAt), success)
|
||||
}()
|
||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.IterateExpired",
|
||||
trace.WithAttributes(
|
||||
attribute.String("epoch", strconv.FormatUint(epoch, 10)),
|
||||
))
|
||||
defer span.End()
|
||||
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return ErrDegradedMode
|
||||
}
|
||||
|
||||
err := metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||
return iterateExpired(ctx, tx, epoch, h)
|
||||
}))
|
||||
success = err == nil
|
||||
return err
|
||||
}
|
||||
|
||||
func iterateExpired(ctx context.Context, tx *badger.Txn, epoch uint64, h ExpiredObjectHandler) error {
|
||||
prefix := []byte{expiredPrefix}
|
||||
opts := badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: []byte{expiredPrefix},
|
||||
}
|
||||
it := tx.NewIterator(opts)
|
||||
defer it.Close()
|
||||
|
||||
// iteration does in ascending order by expiration epoch.
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if expEpoch >= epoch {
|
||||
return nil // keys are ordered by epoch, so next items will be discarded anyway.
|
||||
}
|
||||
|
||||
addr, err := addressFromExpiredKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Ignore locked objects.
|
||||
//
|
||||
// To slightly optimize performance we can check only REGULAR objects
|
||||
// (only they can be locked), but it's more reliable.
|
||||
isLocked, err := objectLocked(ctx, tx, addr.Container(), addr.Object())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if isLocked {
|
||||
continue
|
||||
}
|
||||
|
||||
objType, err := firstIrregularObjectType(tx, addr.Container(), addr.Object())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := h(&ExpiredObject{
|
||||
typ: objType,
|
||||
addr: addr,
|
||||
}); err != nil {
|
||||
if errors.Is(err, ErrInterruptIterator) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -2,15 +2,54 @@ package meta_test
|
|||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
||||
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestExpiredIterationOrder(t *testing.T) {
|
||||
db := newDB(t)
|
||||
defer func() { require.NoError(t, db.Close()) }()
|
||||
|
||||
containerID := cidtest.ID()
|
||||
|
||||
for i := 1; i <= 1000; i++ {
|
||||
obj := testutil.GenerateObject()
|
||||
obj.SetContainerID(containerID)
|
||||
setExpiration(obj, uint64(i))
|
||||
err := putBig(db, obj)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
var previous uint64
|
||||
require.NoError(t, db.IterateExpired(context.Background(), 2000, func(eo *meta.ExpiredObject) error {
|
||||
var prm meta.GetPrm
|
||||
prm.SetAddress(eo.Address())
|
||||
obj, err := db.Get(context.Background(), prm)
|
||||
require.NoError(t, err)
|
||||
|
||||
var found bool
|
||||
for _, attr := range obj.Header().Attributes() {
|
||||
if attr.Key() == objectV2.SysAttributeExpEpoch {
|
||||
found = true
|
||||
epoch, err := strconv.ParseUint(attr.Value(), 10, 64)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, previous+1, epoch)
|
||||
previous = epoch
|
||||
}
|
||||
}
|
||||
require.True(t, found)
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
|
||||
func TestDB_SelectExpired(t *testing.T) {
|
||||
db := newDB(t)
|
||||
defer func() { require.NoError(t, db.Close()) }()
|
||||
|
|
|
@ -9,10 +9,9 @@ import (
|
|||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
@ -77,9 +76,8 @@ func (db *DB) Get(ctx context.Context, prm GetPrm) (res GetRes, err error) {
|
|||
|
||||
currEpoch := db.epochState.CurrentEpoch()
|
||||
|
||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
key := make([]byte, addressKeySize)
|
||||
res.hdr, err = db.get(tx, prm.addr, key, true, prm.raw, currEpoch)
|
||||
err = db.database.View(func(tx *badger.Txn) error {
|
||||
res.hdr, err = get(ctx, tx, prm.addr, true, prm.raw, currEpoch)
|
||||
|
||||
return err
|
||||
})
|
||||
|
@ -87,9 +85,13 @@ func (db *DB) Get(ctx context.Context, prm GetPrm) (res GetRes, err error) {
|
|||
return res, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) {
|
||||
func get(ctx context.Context, tx *badger.Txn, addr oid.Address, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) {
|
||||
if checkStatus {
|
||||
switch objectStatus(tx, addr, currEpoch) {
|
||||
st, err := objectStatus(ctx, tx, addr, currEpoch)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch st {
|
||||
case 1:
|
||||
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||
case 2:
|
||||
|
@ -98,78 +100,76 @@ func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw b
|
|||
return nil, ErrObjectIsExpired
|
||||
}
|
||||
}
|
||||
|
||||
key = objectKey(addr.Object(), key)
|
||||
cnr := addr.Container()
|
||||
obj := objectSDK.New()
|
||||
bucketName := make([]byte, bucketKeySize)
|
||||
|
||||
// check in primary index
|
||||
data := getFromBucket(tx, primaryBucketName(cnr, bucketName), key)
|
||||
if len(data) != 0 {
|
||||
data, err := valueOrNil(tx, primaryKey(addr.Container(), addr.Object()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if data != nil {
|
||||
return obj, obj.Unmarshal(data)
|
||||
}
|
||||
|
||||
data = getFromBucket(tx, ecInfoBucketName(cnr, bucketName), key)
|
||||
if len(data) != 0 {
|
||||
return nil, getECInfoError(tx, cnr, data)
|
||||
children, err := selectByPrefixBatch(ctx, tx, ecInfoLongKeyPrefix(addr.Container(), addr.Object()), 1) // try to found any child
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(children) > 0 {
|
||||
return nil, getECInfoError(ctx, tx, addr)
|
||||
}
|
||||
|
||||
// if not found then check in tombstone index
|
||||
data = getFromBucket(tx, tombstoneBucketName(cnr, bucketName), key)
|
||||
if len(data) != 0 {
|
||||
data, err = valueOrNil(tx, tombstoneKey(addr.Container(), addr.Object()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if data != nil {
|
||||
return obj, obj.Unmarshal(data)
|
||||
}
|
||||
|
||||
// if not found then check in locker index
|
||||
data = getFromBucket(tx, bucketNameLockers(cnr, bucketName), key)
|
||||
if len(data) != 0 {
|
||||
data, err = valueOrNil(tx, lockersKey(addr.Container(), addr.Object()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if data != nil {
|
||||
return obj, obj.Unmarshal(data)
|
||||
}
|
||||
|
||||
// if not found then check if object is a virtual
|
||||
return getVirtualObject(tx, cnr, key, raw)
|
||||
return getVirtualObject(ctx, tx, addr, raw)
|
||||
}
|
||||
|
||||
func getFromBucket(tx *bbolt.Tx, name, key []byte) []byte {
|
||||
bkt := tx.Bucket(name)
|
||||
if bkt == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return bkt.Get(key)
|
||||
}
|
||||
|
||||
func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSDK.Object, error) {
|
||||
func getVirtualObject(ctx context.Context, tx *badger.Txn, addr oid.Address, raw bool) (*objectSDK.Object, error) {
|
||||
if raw {
|
||||
return nil, getSplitInfoError(tx, cnr, key)
|
||||
return nil, getSplitInfoError(tx, addr)
|
||||
}
|
||||
|
||||
bucketName := make([]byte, bucketKeySize)
|
||||
parentBucket := tx.Bucket(parentBucketName(cnr, bucketName))
|
||||
if parentBucket == nil {
|
||||
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||
}
|
||||
|
||||
relativeLst, err := decodeList(parentBucket.Get(key))
|
||||
binObjIDs, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(relativeLst) == 0 { // this should never happen though
|
||||
if len(binObjIDs) == 0 { // this should never happen though
|
||||
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||
}
|
||||
|
||||
// pick last item, for now there is not difference which address to pick
|
||||
// but later list might be sorted so first or last value can be more
|
||||
// prioritized to choose
|
||||
virtualOID := relativeLst[len(relativeLst)-1]
|
||||
data := getFromBucket(tx, primaryBucketName(cnr, bucketName), virtualOID)
|
||||
|
||||
child := objectSDK.New()
|
||||
|
||||
err = child.Unmarshal(data)
|
||||
phyObjAddr, err := addressOfTargetFromParentKey(binObjIDs[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := valueOrNil(tx, primaryKey(phyObjAddr.Container(), phyObjAddr.Object()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if data == nil { // this should never happen though #2
|
||||
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||
}
|
||||
child := objectSDK.New()
|
||||
if err := child.Unmarshal(data); err != nil {
|
||||
return nil, fmt.Errorf("can't unmarshal child with parent: %w", err)
|
||||
}
|
||||
|
||||
|
@ -182,8 +182,8 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD
|
|||
return par, nil
|
||||
}
|
||||
|
||||
func getSplitInfoError(tx *bbolt.Tx, cnr cid.ID, key []byte) error {
|
||||
splitInfo, err := getSplitInfo(tx, cnr, key)
|
||||
func getSplitInfoError(tx *badger.Txn, addr oid.Address) error {
|
||||
splitInfo, err := getSplitInfo(tx, addr)
|
||||
if err == nil {
|
||||
return logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo))
|
||||
}
|
||||
|
@ -191,18 +191,42 @@ func getSplitInfoError(tx *bbolt.Tx, cnr cid.ID, key []byte) error {
|
|||
return logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||
}
|
||||
|
||||
func getECInfoError(tx *bbolt.Tx, cnr cid.ID, data []byte) error {
|
||||
keys, err := decodeList(data)
|
||||
func getECInfoError(ctx context.Context, tx *badger.Txn, addr oid.Address) error {
|
||||
var chunkAddresses []oid.Address
|
||||
for {
|
||||
keys, err := selectByPrefixBatch(ctx, tx, ecInfoLongKeyPrefix(addr.Container(), addr.Object()), batchSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ecInfo := objectSDK.NewECInfo()
|
||||
for _, key := range keys {
|
||||
// check in primary index
|
||||
ojbData := getFromBucket(tx, primaryBucketName(cnr, make([]byte, bucketKeySize)), key)
|
||||
if len(ojbData) != 0 {
|
||||
chunkAddress, err := addressOfChunkFromECInfoKey(key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
chunkAddresses = append(chunkAddresses, chunkAddress)
|
||||
}
|
||||
if len(keys) < batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
ecInfo := objectSDK.NewECInfo()
|
||||
for _, chunkAddress := range chunkAddresses {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
objData, err := valueOrNil(tx, primaryKey(chunkAddress.Container(), chunkAddress.Object()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(objData) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
obj := objectSDK.New()
|
||||
if err := obj.Unmarshal(ojbData); err != nil {
|
||||
if err := obj.Unmarshal(objData); err != nil {
|
||||
return err
|
||||
}
|
||||
chunk := objectSDK.ECChunk{}
|
||||
|
@ -212,6 +236,5 @@ func getECInfoError(tx *bbolt.Tx, cnr cid.ID, data []byte) error {
|
|||
chunk.Total = obj.ECHeader().Total()
|
||||
ecInfo.AddChunk(chunk)
|
||||
}
|
||||
}
|
||||
return logicerr.Wrap(objectSDK.NewECInfoError(ecInfo))
|
||||
}
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
"os"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||
|
@ -215,11 +214,8 @@ func BenchmarkGet(b *testing.B) {
|
|||
}
|
||||
|
||||
func benchmarkGet(b *testing.B, numOfObj int) {
|
||||
prepareDb := func(batchSize int) (*meta.DB, []oid.Address) {
|
||||
db := newDB(b,
|
||||
meta.WithMaxBatchSize(batchSize),
|
||||
meta.WithMaxBatchDelay(10*time.Millisecond),
|
||||
)
|
||||
prepareDb := func(_ int) (*meta.DB, []oid.Address) {
|
||||
db := newDB(b)
|
||||
defer func() { require.NoError(b, db.Close()) }()
|
||||
addrs := make([]oid.Address, 0, numOfObj)
|
||||
|
||||
|
|
|
@ -9,8 +9,9 @@ import (
|
|||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
// GarbageObject represents descriptor of the
|
||||
|
@ -80,8 +81,8 @@ func (db *DB) IterateOverGarbage(ctx context.Context, p GarbageIterationPrm) err
|
|||
return ErrDegradedMode
|
||||
}
|
||||
|
||||
err := metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
return db.iterateDeletedObj(tx, gcHandler{p.h}, p.offset)
|
||||
err := metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||
return db.iterateDeletedObj(ctx, tx, gcHandler{p.h}, p.offset)
|
||||
}))
|
||||
success = err == nil
|
||||
return err
|
||||
|
@ -160,8 +161,8 @@ func (db *DB) IterateOverGraveyard(ctx context.Context, p GraveyardIterationPrm)
|
|||
return ErrDegradedMode
|
||||
}
|
||||
|
||||
return metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
return db.iterateDeletedObj(tx, graveyardHandler{p.h}, p.offset)
|
||||
return metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||
return db.iterateDeletedObj(ctx, tx, graveyardHandler{p.h}, p.offset)
|
||||
}))
|
||||
}
|
||||
|
||||
|
@ -195,40 +196,53 @@ func (g graveyardHandler) handleKV(k, v []byte) error {
|
|||
return g.h(o)
|
||||
}
|
||||
|
||||
func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address) error {
|
||||
var bkt *bbolt.Bucket
|
||||
func (db *DB) iterateDeletedObj(ctx context.Context, tx *badger.Txn, h kvHandler, offset *oid.Address) error {
|
||||
var prefix []byte
|
||||
switch t := h.(type) {
|
||||
case graveyardHandler:
|
||||
bkt = tx.Bucket(graveyardBucketName)
|
||||
prefix = []byte{graveyardPrefix}
|
||||
case gcHandler:
|
||||
bkt = tx.Bucket(garbageBucketName)
|
||||
prefix = []byte{garbagePrefix}
|
||||
default:
|
||||
panic(fmt.Sprintf("metabase: unknown iteration object hadler: %T", t))
|
||||
}
|
||||
|
||||
c := bkt.Cursor()
|
||||
var k, v []byte
|
||||
|
||||
if offset == nil {
|
||||
k, v = c.First()
|
||||
} else {
|
||||
rawAddr := addressKey(*offset, make([]byte, addressKeySize))
|
||||
|
||||
k, v = c.Seek(rawAddr)
|
||||
if bytes.Equal(k, rawAddr) {
|
||||
// offset was found, move
|
||||
// cursor to the next element
|
||||
k, v = c.Next()
|
||||
}
|
||||
var seekKey []byte
|
||||
if offset != nil {
|
||||
cidBytes := make([]byte, cidSize)
|
||||
offset.Container().Encode(cidBytes)
|
||||
oidBytes := make([]byte, objectKeySize)
|
||||
offset.Object().Encode(oidBytes)
|
||||
seekKey = append(prefix, cidBytes...)
|
||||
seekKey = append(seekKey, oidBytes...)
|
||||
}
|
||||
|
||||
for ; k != nil; k, v = c.Next() {
|
||||
err := h.handleKV(k, v)
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: prefix,
|
||||
PrefetchValues: true,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
for it.Seek(seekKey); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
if bytes.Equal(it.Item().Key(), seekKey) {
|
||||
continue
|
||||
}
|
||||
|
||||
key := it.Item().KeyCopy(nil)
|
||||
value, err := it.Item().ValueCopy(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = h.handleKV(key, value); err != nil {
|
||||
if errors.Is(err, ErrInterruptIterator) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -237,7 +251,7 @@ func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address)
|
|||
}
|
||||
|
||||
func garbageFromKV(k []byte) (res GarbageObject, err error) {
|
||||
err = decodeAddressFromKey(&res.addr, k)
|
||||
res.addr, err = addressFromGarbageKey(k)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("could not parse address: %w", err)
|
||||
}
|
||||
|
@ -246,13 +260,42 @@ func garbageFromKV(k []byte) (res GarbageObject, err error) {
|
|||
}
|
||||
|
||||
func graveFromKV(k, v []byte) (res TombstonedObject, err error) {
|
||||
if err = decodeAddressFromKey(&res.addr, k); err != nil {
|
||||
res.addr, err = addressFromGraveyardKey(k)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("decode tombstone target from key: %w", err)
|
||||
} else if err = decodeAddressFromKey(&res.tomb, v); err != nil {
|
||||
return
|
||||
}
|
||||
res.tomb, err = decodeAddressFromGrave(v)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("decode tombstone address from value: %w", err)
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
func encodeAddressToGrave(addr oid.Address) []byte {
|
||||
value := make([]byte, cidSize+objectKeySize)
|
||||
addr.Container().Encode(value)
|
||||
addr.Object().Encode(value[cidSize:])
|
||||
return value
|
||||
}
|
||||
|
||||
func decodeAddressFromGrave(v []byte) (oid.Address, error) {
|
||||
if len(v) != cidSize+objectKeySize {
|
||||
return oid.Address{}, errInvalidValueLenght
|
||||
}
|
||||
var cont cid.ID
|
||||
if err := cont.Decode(v[:cidSize]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||
}
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(v[cidSize:]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||
}
|
||||
var result oid.Address
|
||||
result.SetContainer(cont)
|
||||
result.SetObject(obj)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// DropGraves deletes tombstoned objects from the
|
||||
|
@ -280,16 +323,9 @@ func (db *DB) DropGraves(ctx context.Context, tss []TombstonedObject) error {
|
|||
return ErrReadOnlyMode
|
||||
}
|
||||
|
||||
buf := make([]byte, addressKeySize)
|
||||
|
||||
return db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
bkt := tx.Bucket(graveyardBucketName)
|
||||
if bkt == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return db.database.Update(func(tx *badger.Txn) error {
|
||||
for _, ts := range tss {
|
||||
err := bkt.Delete(addressKey(ts.Address(), buf))
|
||||
err := tx.Delete(graveyardKey(ts.Address().Container(), ts.Address().Object()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"math"
|
||||
mrand "math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/nspcc-dev/neo-go/pkg/io"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func Test_getVarUint(t *testing.T) {
|
||||
data := make([]byte, 10)
|
||||
for _, val := range []uint64{0, 0xfc, 0xfd, 0xfffe, 0xffff, 0xfffffffe, 0xffffffff, math.MaxUint64} {
|
||||
expSize := io.PutVarUint(data, val)
|
||||
actual, actSize, err := getVarUint(data)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, val, actual)
|
||||
require.Equal(t, expSize, actSize, "value: %x", val)
|
||||
|
||||
_, _, err = getVarUint(data[:expSize-1])
|
||||
require.Error(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_decodeList(t *testing.T) {
|
||||
t.Run("empty", func(t *testing.T) {
|
||||
lst, err := decodeList(nil)
|
||||
require.NoError(t, err)
|
||||
require.True(t, len(lst) == 0)
|
||||
})
|
||||
t.Run("empty, 0 len", func(t *testing.T) {
|
||||
lst, err := decodeList([]byte{0})
|
||||
require.NoError(t, err)
|
||||
require.True(t, len(lst) == 0)
|
||||
})
|
||||
t.Run("bad len", func(t *testing.T) {
|
||||
_, err := decodeList([]byte{0xfe})
|
||||
require.Error(t, err)
|
||||
})
|
||||
t.Run("random", func(t *testing.T) {
|
||||
r := mrand.New(mrand.NewSource(time.Now().Unix()))
|
||||
expected := make([][]byte, 20)
|
||||
for i := range expected {
|
||||
expected[i] = make([]byte, r.Uint32()%10)
|
||||
rand.Read(expected[i])
|
||||
}
|
||||
|
||||
data, err := encodeList(expected)
|
||||
require.NoError(t, err)
|
||||
|
||||
actual, err := decodeList(data)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, expected, actual)
|
||||
|
||||
t.Run("unexpected EOF", func(t *testing.T) {
|
||||
for i := 1; i < len(data)-1; i++ {
|
||||
_, err := decodeList(data[:i])
|
||||
require.Error(t, err)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
|
@ -5,6 +5,7 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
|
||||
|
@ -15,7 +16,7 @@ import (
|
|||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
// InhumePrm encapsulates parameters for Inhume operation.
|
||||
|
@ -180,9 +181,11 @@ func (db *DB) Inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) {
|
|||
res := InhumeRes{
|
||||
inhumedByCnrID: make(map[cid.ID]ObjectCounters),
|
||||
}
|
||||
bucketIDs, release := db.acquireBucketIDs(prm)
|
||||
defer release()
|
||||
currEpoch := db.epochState.CurrentEpoch()
|
||||
err := db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
return db.inhumeTx(tx, currEpoch, prm, &res)
|
||||
err := db.database.Update(func(tx *badger.Txn) error {
|
||||
return db.inhumeTx(ctx, tx, currEpoch, prm, bucketIDs, &res)
|
||||
})
|
||||
success = err == nil
|
||||
if success {
|
||||
|
@ -195,48 +198,56 @@ func (db *DB) Inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) {
|
|||
return res, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes) error {
|
||||
garbageBKT := tx.Bucket(garbageBucketName)
|
||||
graveyardBKT := tx.Bucket(graveyardBucketName)
|
||||
func (db *DB) acquireBucketIDs(prm InhumePrm) (map[cid.ID]uint16, func()) {
|
||||
unique := make(map[cid.ID]struct{})
|
||||
for _, addr := range prm.target {
|
||||
unique[addr.Container()] = struct{}{}
|
||||
}
|
||||
containers := make([]cid.ID, 0, len(unique))
|
||||
for contID := range unique {
|
||||
containers = append(containers, contID)
|
||||
}
|
||||
slices.SortFunc(containers, func(lhs, rhs cid.ID) int {
|
||||
return bytes.Compare(lhs[:], rhs[:])
|
||||
})
|
||||
result := make(map[cid.ID]uint16, len(unique))
|
||||
releases := make([]func(), len(unique))
|
||||
|
||||
bkt, value, err := db.getInhumeTargetBucketAndValue(garbageBKT, graveyardBKT, &prm)
|
||||
for i, contID := range containers {
|
||||
result[contID], releases[i] = db.bucketIDs.BucketID(contID)
|
||||
}
|
||||
return result, func() {
|
||||
for i := range releases {
|
||||
releases[len(releases)-i-1]()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (db *DB) inhumeTx(ctx context.Context, tx *badger.Txn, epoch uint64, prm InhumePrm, bucketIDs map[cid.ID]uint16, res *InhumeRes) error {
|
||||
keyer, value, err := getInhumeTargetBucketAndValue(tx, prm)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
buf := make([]byte, addressKeySize)
|
||||
for i := range prm.target {
|
||||
id := prm.target[i].Object()
|
||||
cnr := prm.target[i].Container()
|
||||
|
||||
// prevent locked objects to be inhumed
|
||||
if !prm.forceRemoval && objectLocked(tx, cnr, id) {
|
||||
return new(apistatus.ObjectLocked)
|
||||
}
|
||||
|
||||
var lockWasChecked bool
|
||||
|
||||
// prevent lock objects to be inhumed
|
||||
// if `Inhume` was called not with the
|
||||
// `WithForceGCMark` option
|
||||
var ecErr *objectSDK.ECInfoError
|
||||
if !prm.forceRemoval {
|
||||
if isLockObject(tx, cnr, id) {
|
||||
return ErrLockObjectRemoval
|
||||
if err := checkNotLockerOrLocked(ctx, tx, cnr, id); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
lockWasChecked = true
|
||||
}
|
||||
|
||||
obj, err := db.get(tx, prm.target[i], buf, false, true, epoch)
|
||||
targetKey := addressKey(prm.target[i], buf)
|
||||
var ecErr *objectSDK.ECInfoError
|
||||
obj, err := get(ctx, tx, prm.target[i], false, true, epoch)
|
||||
if err == nil {
|
||||
err = db.updateDeleteInfo(tx, garbageBKT, graveyardBKT, targetKey, cnr, obj, res)
|
||||
err = db.updateDeleteInfo(tx, prm.target[i], obj, bucketIDs, res)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if errors.As(err, &ecErr) {
|
||||
err = db.inhumeECInfo(tx, epoch, prm.tomb, res, garbageBKT, graveyardBKT, ecErr.ECInfo(), cnr, bkt, value, targetKey)
|
||||
err = db.inhumeECInfo(ctx, tx, epoch, keyer, value, res, ecErr.ECInfo(), cnr, bucketIDs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -244,18 +255,18 @@ func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes
|
|||
|
||||
if prm.tomb != nil {
|
||||
var isTomb bool
|
||||
isTomb, err = db.markAsGC(graveyardBKT, garbageBKT, targetKey)
|
||||
isTomb, err = markAsGC(tx, prm.target[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if isTomb {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// consider checking if target is already in graveyard?
|
||||
err = bkt.Put(targetKey, value)
|
||||
key := keyer(prm.target[i])
|
||||
err = tx.Set(key, value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -268,22 +279,24 @@ func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes
|
|||
// the LOCK type
|
||||
continue
|
||||
}
|
||||
|
||||
if isLockObject(tx, cnr, id) {
|
||||
isLock, err := isLockObject(tx, cnr, id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if isLock {
|
||||
res.deletedLockObj = append(res.deletedLockObj, prm.target[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return db.applyInhumeResToCounters(tx, res)
|
||||
return db.applyInhumeResToCounters(tx, bucketIDs, res)
|
||||
}
|
||||
|
||||
func (db *DB) inhumeECInfo(tx *bbolt.Tx, epoch uint64, tomb *oid.Address, res *InhumeRes,
|
||||
garbageBKT *bbolt.Bucket, graveyardBKT *bbolt.Bucket,
|
||||
ecInfo *objectSDK.ECInfo, cnr cid.ID, targetBucket *bbolt.Bucket, value []byte, targetKey []byte,
|
||||
func (db *DB) inhumeECInfo(ctx context.Context, tx *badger.Txn, epoch uint64,
|
||||
keyer func(addr oid.Address) []byte, value []byte,
|
||||
res *InhumeRes, ecInfo *objectSDK.ECInfo, cnr cid.ID, bucketIDs map[cid.ID]uint16,
|
||||
) error {
|
||||
for _, chunk := range ecInfo.Chunks {
|
||||
chunkBuf := make([]byte, addressKeySize)
|
||||
var chunkAddr oid.Address
|
||||
chunkAddr.SetContainer(cnr)
|
||||
var chunkID oid.ID
|
||||
|
@ -292,22 +305,16 @@ func (db *DB) inhumeECInfo(tx *bbolt.Tx, epoch uint64, tomb *oid.Address, res *I
|
|||
return err
|
||||
}
|
||||
chunkAddr.SetObject(chunkID)
|
||||
chunkObj, err := db.get(tx, chunkAddr, chunkBuf, false, true, epoch)
|
||||
chunkObj, err := get(ctx, tx, chunkAddr, false, true, epoch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = db.updateDeleteInfo(tx, garbageBKT, graveyardBKT, targetKey, cnr, chunkObj, res)
|
||||
err = db.updateDeleteInfo(tx, chunkAddr, chunkObj, bucketIDs, res)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
chunkKey := addressKey(chunkAddr, chunkBuf)
|
||||
if tomb != nil {
|
||||
_, err = db.markAsGC(graveyardBKT, garbageBKT, chunkKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
err = targetBucket.Put(chunkKey, value)
|
||||
key := keyer(chunkAddr)
|
||||
err = tx.Set(key, value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -315,15 +322,38 @@ func (db *DB) inhumeECInfo(tx *bbolt.Tx, epoch uint64, tomb *oid.Address, res *I
|
|||
return nil
|
||||
}
|
||||
|
||||
func (db *DB) applyInhumeResToCounters(tx *bbolt.Tx, res *InhumeRes) error {
|
||||
if err := db.updateShardObjectCounter(tx, logical, res.LogicInhumed(), false); err != nil {
|
||||
func checkNotLockerOrLocked(ctx context.Context, tx *badger.Txn, cnr cid.ID, id oid.ID) error {
|
||||
// prevent locked objects to be inhumed
|
||||
locked, err := objectLocked(ctx, tx, cnr, id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := db.updateShardObjectCounter(tx, user, res.UserInhumed(), false); err != nil {
|
||||
if locked {
|
||||
return new(apistatus.ObjectLocked)
|
||||
}
|
||||
// prevent lock objects to be inhumed
|
||||
// if `Inhume` was called not with the
|
||||
// `WithForceGCMark` option
|
||||
isLock, err := isLockObject(tx, cnr, id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if isLock {
|
||||
return ErrLockObjectRemoval
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return db.updateContainerCounter(tx, res.inhumedByCnrID, false)
|
||||
func (db *DB) applyInhumeResToCounters(tx *badger.Txn, bucketIDs map[cid.ID]uint16, res *InhumeRes) error {
|
||||
counters := make(map[cid.ID]objectCounterValue, len(res.inhumedByCnrID))
|
||||
for contID, inhumed := range res.inhumedByCnrID {
|
||||
counters[contID] = objectCounterValue{
|
||||
Logic: -1 * int64(inhumed.Logic),
|
||||
Phy: -1 * int64(inhumed.Phy),
|
||||
User: -1 * int64(inhumed.User),
|
||||
}
|
||||
}
|
||||
return updateContainerCounter(tx, counters, bucketIDs)
|
||||
}
|
||||
|
||||
// getInhumeTargetBucketAndValue return target bucket to store inhume result and value that will be put in the bucket.
|
||||
|
@ -336,31 +366,33 @@ func (db *DB) applyInhumeResToCounters(tx *bbolt.Tx, res *InhumeRes) error {
|
|||
// 1. tombstone address if Inhume was called with
|
||||
// a Tombstone
|
||||
// 2. zeroValue if Inhume was called with a GC mark
|
||||
func (db *DB) getInhumeTargetBucketAndValue(garbageBKT, graveyardBKT *bbolt.Bucket, prm *InhumePrm) (targetBucket *bbolt.Bucket, value []byte, err error) {
|
||||
func getInhumeTargetBucketAndValue(tx *badger.Txn, prm InhumePrm) (key func(addr oid.Address) []byte, value []byte, err error) {
|
||||
if prm.tomb != nil {
|
||||
targetBucket = graveyardBKT
|
||||
tombKey := addressKey(*prm.tomb, make([]byte, addressKeySize))
|
||||
|
||||
// it is forbidden to have a tomb-on-tomb in FrostFS,
|
||||
// so graveyard keys must not be addresses of tombstones
|
||||
data := targetBucket.Get(tombKey)
|
||||
if data != nil {
|
||||
err := targetBucket.Delete(tombKey)
|
||||
tombKey := graveyardKey(prm.tomb.Container(), prm.tomb.Object())
|
||||
v, err := valueOrNil(tx, tombKey)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if v != nil {
|
||||
err := tx.Delete(tombKey)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("could not remove grave with tombstone key: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
value = tombKey
|
||||
} else {
|
||||
targetBucket = garbageBKT
|
||||
value = zeroValue
|
||||
return func(addr oid.Address) []byte {
|
||||
return graveyardKey(addr.Container(), addr.Object())
|
||||
}, encodeAddressToGrave(*prm.tomb), nil
|
||||
}
|
||||
return targetBucket, value, nil
|
||||
return func(addr oid.Address) []byte {
|
||||
return garbageKey(addr.Container(), addr.Object())
|
||||
}, zeroValue, nil
|
||||
}
|
||||
|
||||
func (db *DB) markAsGC(graveyardBKT, garbageBKT *bbolt.Bucket, key []byte) (bool, error) {
|
||||
targetIsTomb, err := isTomb(graveyardBKT, key)
|
||||
func markAsGC(tx *badger.Txn, addr oid.Address) (bool, error) {
|
||||
targetIsTomb, err := isTomb(tx, addr)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
@ -372,19 +404,27 @@ func (db *DB) markAsGC(graveyardBKT, garbageBKT *bbolt.Bucket, key []byte) (bool
|
|||
|
||||
// if tombstone appears object must be
|
||||
// additionally marked with GC
|
||||
return false, garbageBKT.Put(key, zeroValue)
|
||||
key := garbageKey(addr.Container(), addr.Object())
|
||||
return false, tx.Set(key, zeroValue)
|
||||
}
|
||||
|
||||
func (db *DB) updateDeleteInfo(tx *bbolt.Tx, garbageBKT, graveyardBKT *bbolt.Bucket, targetKey []byte, cnr cid.ID, obj *objectSDK.Object, res *InhumeRes) error {
|
||||
containerID, _ := obj.ContainerID()
|
||||
if inGraveyardWithKey(targetKey, graveyardBKT, garbageBKT) == 0 {
|
||||
res.storeDeletionInfo(containerID, obj.PayloadSize(), IsUserObject(obj))
|
||||
func (db *DB) updateDeleteInfo(tx *badger.Txn, addr oid.Address, obj *objectSDK.Object, bucketIDs map[cid.ID]uint16, res *InhumeRes) error {
|
||||
st, err := inGraveyardWithKey(tx, addr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if st == 0 {
|
||||
res.storeDeletionInfo(addr.Container(), obj.PayloadSize(), IsUserObject(obj))
|
||||
}
|
||||
|
||||
// if object is stored, and it is regular object then update bucket
|
||||
// with container size estimations
|
||||
if obj.Type() == objectSDK.TypeRegular {
|
||||
err := changeContainerSize(tx, cnr, obj.PayloadSize(), false)
|
||||
bucketID, found := bucketIDs[addr.Container()]
|
||||
if !found {
|
||||
panic("bucketID not found")
|
||||
}
|
||||
err := changeContainerSize(tx, addr.Container(), -1*int64(obj.PayloadSize()), bucketID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -392,25 +432,39 @@ func (db *DB) updateDeleteInfo(tx *bbolt.Tx, garbageBKT, graveyardBKT *bbolt.Buc
|
|||
return nil
|
||||
}
|
||||
|
||||
func isTomb(graveyardBucket *bbolt.Bucket, key []byte) (bool, error) {
|
||||
func isTomb(tx *badger.Txn, addr oid.Address) (bool, error) {
|
||||
targetIsTomb := false
|
||||
expectedValue := make([]byte, cidSize+objectKeySize)
|
||||
addr.Container().Encode(expectedValue)
|
||||
addr.Object().Encode(expectedValue[cidSize:])
|
||||
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: []byte{graveyardPrefix},
|
||||
PrefetchValues: true,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
// iterate over graveyard and check if target address
|
||||
// is the address of tombstone in graveyard.
|
||||
err := graveyardBucket.ForEach(func(_, v []byte) error {
|
||||
// check if graveyard has record with key corresponding
|
||||
// to tombstone address (at least one)
|
||||
targetIsTomb = bytes.Equal(v, key)
|
||||
for it.Seek(nil); it.ValidForPrefix([]byte{graveyardPrefix}); it.Next() {
|
||||
err := it.Item().Value(func(val []byte) error {
|
||||
targetIsTomb = bytes.Equal(expectedValue, val)
|
||||
|
||||
if targetIsTomb {
|
||||
// break bucket iterator
|
||||
return errBreakBucketForEach
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil && !errors.Is(err, errBreakBucketForEach) {
|
||||
if err != nil {
|
||||
if errors.Is(err, errBreakBucketForEach) {
|
||||
return targetIsTomb, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
return targetIsTomb, nil
|
||||
}
|
||||
|
|
|
@ -3,21 +3,15 @@ package meta
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
// ExpiredObject is a descriptor of expired object from DB.
|
||||
|
@ -44,99 +38,7 @@ type ExpiredObjectHandler func(*ExpiredObject) error
|
|||
// as a "break" keyword.
|
||||
var ErrInterruptIterator = logicerr.New("iterator is interrupted")
|
||||
|
||||
// IterateExpired iterates over all objects in DB which are out of date
|
||||
// relative to epoch. Locked objects are not included (do not confuse
|
||||
// with objects of type LOCK).
|
||||
//
|
||||
// If h returns ErrInterruptIterator, nil returns immediately.
|
||||
// Returns other errors of h directly.
|
||||
func (db *DB) IterateExpired(ctx context.Context, epoch uint64, h ExpiredObjectHandler) error {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
success = false
|
||||
)
|
||||
defer func() {
|
||||
db.metrics.AddMethodDuration("IterateExpired", time.Since(startedAt), success)
|
||||
}()
|
||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.IterateExpired",
|
||||
trace.WithAttributes(
|
||||
attribute.String("epoch", strconv.FormatUint(epoch, 10)),
|
||||
))
|
||||
defer span.End()
|
||||
|
||||
db.modeMtx.RLock()
|
||||
defer db.modeMtx.RUnlock()
|
||||
|
||||
if db.mode.NoMetabase() {
|
||||
return ErrDegradedMode
|
||||
}
|
||||
|
||||
err := metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
return db.iterateExpired(tx, epoch, h)
|
||||
}))
|
||||
success = err == nil
|
||||
return err
|
||||
}
|
||||
|
||||
func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler) error {
|
||||
err := tx.ForEach(func(name []byte, b *bbolt.Bucket) error {
|
||||
cidBytes := cidFromAttributeBucket(name, objectV2.SysAttributeExpEpoch)
|
||||
if cidBytes == nil {
|
||||
cidBytes = cidFromAttributeBucket(name, objectV2.SysAttributeExpEpochNeoFS)
|
||||
if cidBytes == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
var cnrID cid.ID
|
||||
err := cnrID.Decode(cidBytes)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not parse container ID of expired bucket: %w", err)
|
||||
}
|
||||
|
||||
return b.ForEachBucket(func(expKey []byte) error {
|
||||
bktExpired := b.Bucket(expKey)
|
||||
expiresAfter, err := strconv.ParseUint(string(expKey), 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not parse expiration epoch: %w", err)
|
||||
} else if expiresAfter >= epoch {
|
||||
return nil
|
||||
}
|
||||
|
||||
return bktExpired.ForEach(func(idKey, _ []byte) error {
|
||||
var id oid.ID
|
||||
|
||||
err = id.Decode(idKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not parse ID of expired object: %w", err)
|
||||
}
|
||||
|
||||
// Ignore locked objects.
|
||||
//
|
||||
// To slightly optimize performance we can check only REGULAR objects
|
||||
// (only they can be locked), but it's more reliable.
|
||||
if objectLocked(tx, cnrID, id) {
|
||||
return nil
|
||||
}
|
||||
|
||||
var addr oid.Address
|
||||
addr.SetContainer(cnrID)
|
||||
addr.SetObject(id)
|
||||
|
||||
return h(&ExpiredObject{
|
||||
typ: firstIrregularObjectType(tx, cnrID, idKey),
|
||||
addr: addr,
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
if errors.Is(err, ErrInterruptIterator) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
var errInvalidAttributeKey = errors.New("invalid userr attribute key")
|
||||
|
||||
// IterateCoveredByTombstones iterates over all objects in DB which are covered
|
||||
// by tombstone with string address from tss. Locked objects are not included
|
||||
|
@ -164,69 +66,99 @@ func (db *DB) IterateCoveredByTombstones(ctx context.Context, tss map[string]oid
|
|||
return ErrDegradedMode
|
||||
}
|
||||
|
||||
return db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
return db.iterateCoveredByTombstones(tx, tss, h)
|
||||
return db.database.View(func(tx *badger.Txn) error {
|
||||
return db.iterateCoveredByTombstones(ctx, tx, tss, h)
|
||||
})
|
||||
}
|
||||
|
||||
func (db *DB) iterateCoveredByTombstones(tx *bbolt.Tx, tss map[string]oid.Address, h func(oid.Address) error) error {
|
||||
bktGraveyard := tx.Bucket(graveyardBucketName)
|
||||
|
||||
err := bktGraveyard.ForEach(func(k, v []byte) error {
|
||||
var addr oid.Address
|
||||
if err := decodeAddressFromKey(&addr, v); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, ok := tss[addr.EncodeToString()]; ok {
|
||||
var addr oid.Address
|
||||
|
||||
err := decodeAddressFromKey(&addr, k)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not parse address of the object under tombstone: %w", err)
|
||||
}
|
||||
|
||||
if objectLocked(tx, addr.Container(), addr.Object()) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return h(addr)
|
||||
}
|
||||
|
||||
return nil
|
||||
func (db *DB) iterateCoveredByTombstones(ctx context.Context, tx *badger.Txn, tss map[string]oid.Address, h func(oid.Address) error) error {
|
||||
prefix := []byte{graveyardPrefix}
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: prefix,
|
||||
PrefetchValues: true,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
if errors.Is(err, ErrInterruptIterator) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func iteratePhyObjects(tx *bbolt.Tx, f func(cid.ID, oid.ID, *objectSDK.Object) error) error {
|
||||
var cid cid.ID
|
||||
var oid oid.ID
|
||||
obj := objectSDK.New()
|
||||
|
||||
return tx.ForEach(func(name []byte, b *bbolt.Bucket) error {
|
||||
b58CID, postfix := parseContainerIDWithPrefix(&cid, name)
|
||||
if len(b58CID) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch postfix {
|
||||
case primaryPrefix,
|
||||
lockersPrefix,
|
||||
tombstonePrefix:
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
var tombstoneAddress oid.Address
|
||||
if err := it.Item().Value(func(val []byte) error {
|
||||
var e error
|
||||
tombstoneAddress, e = decodeAddressFromGrave(val)
|
||||
return e
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, ok := tss[tombstoneAddress.EncodeToString()]; !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
var objectAddress oid.Address
|
||||
var err error
|
||||
objectAddress, err = addressFromGraveyardKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
isLocked, err := objectLocked(ctx, tx, objectAddress.Container(), objectAddress.Object())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if isLocked {
|
||||
continue
|
||||
}
|
||||
if err := h(objectAddress); err != nil {
|
||||
if errors.Is(err, ErrInterruptIterator) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return b.ForEach(func(k, v []byte) error {
|
||||
if oid.Decode(k) == nil && obj.Unmarshal(v) == nil {
|
||||
return f(cid, oid, obj)
|
||||
func iteratePhyObjects(tx *badger.Txn, f func(cid.ID, oid.ID, *objectSDK.Object) error) error {
|
||||
if err := iteratePhyObjectsWithPrefix(tx, primaryPrefix, f); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := iteratePhyObjectsWithPrefix(tx, lockersPrefix, f); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := iteratePhyObjectsWithPrefix(tx, tombstonePrefix, f); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
func iteratePhyObjectsWithPrefix(tx *badger.Txn, typePrefix byte, f func(cid.ID, oid.ID, *objectSDK.Object) error) error {
|
||||
prefix := []byte{typePrefix}
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: prefix,
|
||||
PrefetchValues: true,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
addr, err := addressFromKey(typePrefix, it.Item().Key())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
obj := objectSDK.New()
|
||||
if err := it.Item().Value(func(val []byte) error {
|
||||
return obj.Unmarshal(val)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := f(addr.Container(), addr.Object(), obj); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -2,16 +2,17 @@ package meta
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
objectcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
@ -21,10 +22,33 @@ import (
|
|||
// cursor. Use nil cursor object to start listing again.
|
||||
var ErrEndOfListing = logicerr.New("end of object listing")
|
||||
|
||||
type listPrefix struct {
|
||||
prefix []byte
|
||||
keyParser func(k []byte) (oid.Address, error)
|
||||
objectType objectSDK.Type
|
||||
}
|
||||
|
||||
var listPrefixes = []listPrefix{
|
||||
{
|
||||
prefix: []byte{primaryPrefix},
|
||||
keyParser: addressFromPrimaryKey,
|
||||
objectType: objectSDK.TypeRegular,
|
||||
},
|
||||
{
|
||||
prefix: []byte{lockersPrefix},
|
||||
keyParser: addressFromLockersKey,
|
||||
objectType: objectSDK.TypeLock,
|
||||
},
|
||||
{
|
||||
prefix: []byte{tombstonePrefix},
|
||||
keyParser: addressFromTombstoneKey,
|
||||
objectType: objectSDK.TypeTombstone,
|
||||
},
|
||||
}
|
||||
|
||||
// Cursor is a type for continuous object listing.
|
||||
type Cursor struct {
|
||||
bucketName []byte
|
||||
inBucketOffset []byte
|
||||
lastKey []byte
|
||||
}
|
||||
|
||||
// ListPrm contains parameters for ListWithCursor operation.
|
||||
|
@ -89,141 +113,87 @@ func (db *DB) ListWithCursor(ctx context.Context, prm ListPrm) (res ListRes, err
|
|||
return res, ErrDegradedMode
|
||||
}
|
||||
|
||||
result := make([]objectcore.Info, 0, prm.count)
|
||||
if prm.count == 0 {
|
||||
return ListRes{}, ErrEndOfListing
|
||||
}
|
||||
|
||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
res.addrList, res.cursor, err = db.listWithCursor(tx, result, prm.count, prm.cursor)
|
||||
err = db.database.View(func(tx *badger.Txn) error {
|
||||
res.addrList, res.cursor, err = db.listWithCursor(ctx, tx, prm.count, prm.cursor)
|
||||
return err
|
||||
})
|
||||
success = err == nil
|
||||
return res, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func (db *DB) listWithCursor(tx *bbolt.Tx, result []objectcore.Info, count int, cursor *Cursor) ([]objectcore.Info, *Cursor, error) {
|
||||
threshold := cursor == nil // threshold is a flag to ignore cursor
|
||||
var bucketName []byte
|
||||
var err error
|
||||
|
||||
c := tx.Cursor()
|
||||
name, _ := c.First()
|
||||
|
||||
if !threshold {
|
||||
name, _ = c.Seek(cursor.bucketName)
|
||||
func (db *DB) listWithCursor(ctx context.Context, tx *badger.Txn, count int, cursor *Cursor) ([]objectcore.Info, *Cursor, error) {
|
||||
var prefix []byte
|
||||
var lastSeen []byte
|
||||
if cursor != nil {
|
||||
prefix = []byte{cursor.lastKey[0]}
|
||||
lastSeen = cursor.lastKey
|
||||
} else {
|
||||
prefix = listPrefixes[0].prefix
|
||||
}
|
||||
|
||||
var containerID cid.ID
|
||||
var offset []byte
|
||||
graveyardBkt := tx.Bucket(graveyardBucketName)
|
||||
garbageBkt := tx.Bucket(garbageBucketName)
|
||||
|
||||
rawAddr := make([]byte, cidSize, addressKeySize)
|
||||
|
||||
loop:
|
||||
for ; name != nil; name, _ = c.Next() {
|
||||
cidRaw, prefix := parseContainerIDWithPrefix(&containerID, name)
|
||||
if cidRaw == nil {
|
||||
continue
|
||||
idx := slices.IndexFunc(listPrefixes, func(e listPrefix) bool {
|
||||
return e.prefix[0] == prefix[0]
|
||||
})
|
||||
if idx < 0 {
|
||||
return nil, nil, fmt.Errorf("invalid prefix value %d", prefix[0])
|
||||
}
|
||||
|
||||
var objType objectSDK.Type
|
||||
|
||||
switch prefix {
|
||||
case primaryPrefix:
|
||||
objType = objectSDK.TypeRegular
|
||||
case lockersPrefix:
|
||||
objType = objectSDK.TypeLock
|
||||
case tombstonePrefix:
|
||||
objType = objectSDK.TypeTombstone
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
bkt := tx.Bucket(name)
|
||||
if bkt != nil {
|
||||
copy(rawAddr, cidRaw)
|
||||
result, offset, cursor, err = selectNFromBucket(bkt, objType, graveyardBkt, garbageBkt, rawAddr, containerID,
|
||||
result, count, cursor, threshold)
|
||||
var next Cursor
|
||||
result := make([]objectcore.Info, 0, count)
|
||||
for ; idx < len(listPrefixes); idx++ {
|
||||
indexResult, lastIndexSeen, err := listByPrefix(ctx, tx, lastSeen, idx, count-len(result))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
result = append(result, indexResult...)
|
||||
if len(lastIndexSeen) > 0 {
|
||||
next.lastKey = lastIndexSeen
|
||||
}
|
||||
bucketName = name
|
||||
if len(result) >= count {
|
||||
break loop
|
||||
if len(result) == count {
|
||||
return result, &next, nil
|
||||
}
|
||||
|
||||
// set threshold flag after first `selectNFromBucket` invocation
|
||||
// first invocation must look for cursor object
|
||||
threshold = true
|
||||
lastSeen = nil
|
||||
}
|
||||
|
||||
if offset != nil {
|
||||
// new slice is much faster but less memory efficient
|
||||
// we need to copy, because offset exists during bbolt tx
|
||||
cursor.inBucketOffset = make([]byte, len(offset))
|
||||
copy(cursor.inBucketOffset, offset)
|
||||
}
|
||||
|
||||
if len(result) == 0 {
|
||||
return nil, nil, ErrEndOfListing
|
||||
}
|
||||
|
||||
// new slice is much faster but less memory efficient
|
||||
// we need to copy, because bucketName exists during bbolt tx
|
||||
cursor.bucketName = make([]byte, len(bucketName))
|
||||
copy(cursor.bucketName, bucketName)
|
||||
|
||||
return result, cursor, nil
|
||||
return result, &next, nil
|
||||
}
|
||||
|
||||
// selectNFromBucket similar to selectAllFromBucket but uses cursor to find
|
||||
// object to start selecting from. Ignores inhumed objects.
|
||||
func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
|
||||
objType objectSDK.Type, // type of the objects stored in the main bucket
|
||||
graveyardBkt, garbageBkt *bbolt.Bucket, // cached graveyard buckets
|
||||
cidRaw []byte, // container ID prefix, optimization
|
||||
cnt cid.ID, // container ID
|
||||
to []objectcore.Info, // listing result
|
||||
limit int, // stop listing at `limit` items in result
|
||||
cursor *Cursor, // start from cursor object
|
||||
threshold bool, // ignore cursor and start immediately
|
||||
) ([]objectcore.Info, []byte, *Cursor, error) {
|
||||
if cursor == nil {
|
||||
cursor = new(Cursor)
|
||||
func listByPrefix(ctx context.Context, tx *badger.Txn, lastSeen []byte, idx int, count int) ([]objectcore.Info, []byte, error) {
|
||||
var result []objectcore.Info
|
||||
for {
|
||||
kvs, err := selectByPrefixAndSeek(ctx, tx, listPrefixes[idx].prefix, lastSeen, listPrefixes[idx].objectType == objectSDK.TypeRegular, count-len(result))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
count := len(to)
|
||||
c := bkt.Cursor()
|
||||
k, v := c.First()
|
||||
|
||||
offset := cursor.inBucketOffset
|
||||
|
||||
if !threshold {
|
||||
c.Seek(offset)
|
||||
k, v = c.Next() // we are looking for objects _after_ the cursor
|
||||
if len(kvs) == 0 {
|
||||
return result, lastSeen, nil
|
||||
}
|
||||
|
||||
for ; k != nil; k, v = c.Next() {
|
||||
if count >= limit {
|
||||
break
|
||||
for _, kv := range kvs {
|
||||
lastSeen = kv.Key
|
||||
addr, err := listPrefixes[idx].keyParser(kv.Key)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(k); err != nil {
|
||||
break
|
||||
st, err := inGraveyardWithKey(tx, addr)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
offset = k
|
||||
if inGraveyardWithKey(append(cidRaw, k...), graveyardBkt, garbageBkt) > 0 {
|
||||
if st > 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var isLinkingObj bool
|
||||
var ecInfo *objectcore.ECInfo
|
||||
if objType == objectSDK.TypeRegular {
|
||||
if listPrefixes[idx].objectType == objectSDK.TypeRegular {
|
||||
var o objectSDK.Object
|
||||
if err := o.Unmarshal(v); err != nil {
|
||||
return nil, nil, nil, err
|
||||
if err := o.Unmarshal(kv.Value); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
isLinkingObj = isLinkObject(&o)
|
||||
ecHeader := o.ECHeader()
|
||||
|
@ -236,26 +206,16 @@ func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
|
|||
}
|
||||
}
|
||||
|
||||
var a oid.Address
|
||||
a.SetContainer(cnt)
|
||||
a.SetObject(obj)
|
||||
to = append(to, objectcore.Info{Address: a, Type: objType, IsLinkingObject: isLinkingObj, ECInfo: ecInfo})
|
||||
count++
|
||||
}
|
||||
result = append(result, objectcore.Info{
|
||||
Address: addr,
|
||||
Type: listPrefixes[idx].objectType,
|
||||
IsLinkingObject: isLinkingObj,
|
||||
ECInfo: ecInfo,
|
||||
})
|
||||
|
||||
return to, offset, cursor, nil
|
||||
if len(result) == count {
|
||||
return result, lastSeen, nil
|
||||
}
|
||||
|
||||
func parseContainerIDWithPrefix(containerID *cid.ID, name []byte) ([]byte, byte) {
|
||||
if len(name) < bucketKeySize {
|
||||
return nil, 0
|
||||
}
|
||||
|
||||
rawID := name[1:bucketKeySize]
|
||||
|
||||
if err := containerID.Decode(rawID); err != nil {
|
||||
return nil, 0
|
||||
}
|
||||
|
||||
return rawID, name[0]
|
||||
}
|
||||
|
|
|
@ -12,7 +12,6 @@ import (
|
|||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
func BenchmarkListWithCursor(b *testing.B) {
|
||||
|
@ -29,9 +28,7 @@ func BenchmarkListWithCursor(b *testing.B) {
|
|||
}
|
||||
|
||||
func listWithCursorPrepareDB(b *testing.B) *meta.DB {
|
||||
db := newDB(b, meta.WithMaxBatchSize(1), meta.WithBoltDBOptions(&bbolt.Options{
|
||||
NoSync: true,
|
||||
})) // faster single-thread generation
|
||||
db := newDB(b)
|
||||
defer func() { require.NoError(b, db.Close()) }()
|
||||
|
||||
obj := testutil.GenerateObject()
|
||||
|
@ -147,7 +144,7 @@ func TestLisObjectsWithCursor(t *testing.T) {
|
|||
}
|
||||
|
||||
_, _, err = metaListWithCursor(db, uint32(countPerReq), cursor)
|
||||
require.ErrorIs(t, err, meta.ErrEndOfListing, "count:%d", countPerReq, cursor)
|
||||
require.ErrorIs(t, err, meta.ErrEndOfListing, "count:%d, cursor:%v", countPerReq, cursor)
|
||||
require.ElementsMatch(t, expected, got, "count:%d", countPerReq)
|
||||
}
|
||||
})
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
|
@ -13,23 +11,16 @@ import (
|
|||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
var bucketNameLocked = []byte{lockedPrefix}
|
||||
|
||||
type keyValue struct {
|
||||
Key []byte
|
||||
Value []byte
|
||||
}
|
||||
|
||||
// returns name of the bucket with objects of type LOCK for specified container.
|
||||
func bucketNameLockers(idCnr cid.ID, key []byte) []byte {
|
||||
return bucketName(idCnr, lockersPrefix, key)
|
||||
}
|
||||
|
||||
// Lock marks objects as locked with another object. All objects are from the
|
||||
// specified container.
|
||||
//
|
||||
|
@ -66,66 +57,43 @@ func (db *DB) Lock(ctx context.Context, cnr cid.ID, locker oid.ID, locked []oid.
|
|||
panic("empty locked list")
|
||||
}
|
||||
|
||||
err := db.lockInternal(locked, cnr, locker)
|
||||
err := db.database.Update(func(txn *badger.Txn) error {
|
||||
return lockInternal(txn, locked, cnr, locker)
|
||||
})
|
||||
success = err == nil
|
||||
return err
|
||||
}
|
||||
|
||||
func (db *DB) lockInternal(locked []oid.ID, cnr cid.ID, locker oid.ID) error {
|
||||
bucketKeysLocked := make([][]byte, len(locked))
|
||||
for i := range locked {
|
||||
bucketKeysLocked[i] = objectKey(locked[i], make([]byte, objectKeySize))
|
||||
func lockInternal(tx *badger.Txn, locked []oid.ID, cnr cid.ID, locker oid.ID) error {
|
||||
t, err := firstIrregularObjectType(tx, cnr, locked...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
key := make([]byte, cidSize)
|
||||
|
||||
return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
if firstIrregularObjectType(tx, cnr, bucketKeysLocked...) != objectSDK.TypeRegular {
|
||||
if t != objectSDK.TypeRegular {
|
||||
return logicerr.Wrap(new(apistatus.LockNonRegularObject))
|
||||
}
|
||||
|
||||
bucketLocked := tx.Bucket(bucketNameLocked)
|
||||
|
||||
cnr.Encode(key)
|
||||
bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists(key)
|
||||
for _, objID := range locked {
|
||||
key := lockedKey(cnr, objID, locker)
|
||||
v, err := valueOrNil(tx, key)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create container bucket for locked objects %v: %w", cnr, err)
|
||||
return err
|
||||
}
|
||||
if v != nil {
|
||||
// already locked by locker
|
||||
continue
|
||||
}
|
||||
|
||||
keyLocker := objectKey(locker, key)
|
||||
var exLockers [][]byte
|
||||
var updLockers []byte
|
||||
|
||||
loop:
|
||||
for i := range bucketKeysLocked {
|
||||
exLockers, err = decodeList(bucketLockedContainer.Get(bucketKeysLocked[i]))
|
||||
if err != nil {
|
||||
return fmt.Errorf("decode list of object lockers: %w", err)
|
||||
}
|
||||
|
||||
for i := range exLockers {
|
||||
if bytes.Equal(exLockers[i], keyLocker) {
|
||||
continue loop
|
||||
if err := tx.Set(key, zeroValue); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
updLockers, err = encodeList(append(exLockers, keyLocker))
|
||||
if err != nil {
|
||||
return fmt.Errorf("encode list of object lockers: %w", err)
|
||||
}
|
||||
|
||||
err = bucketLockedContainer.Put(bucketKeysLocked[i], updLockers)
|
||||
if err != nil {
|
||||
return fmt.Errorf("update list of object lockers: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
|
||||
// FreeLockedBy unlocks all objects in DB which are locked by lockers.
|
||||
// Returns slice of unlocked object ID's or an error.
|
||||
func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) {
|
||||
func (db *DB) FreeLockedBy(ctx context.Context, lockers []oid.Address) ([]oid.Address, error) {
|
||||
var (
|
||||
startedAt = time.Now()
|
||||
success = false
|
||||
|
@ -143,9 +111,9 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) {
|
|||
|
||||
var unlockedObjects []oid.Address
|
||||
|
||||
if err := db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
if err := db.database.Update(func(tx *badger.Txn) error {
|
||||
for i := range lockers {
|
||||
unlocked, err := freePotentialLocks(tx, lockers[i].Container(), lockers[i].Object())
|
||||
unlocked, err := freePotentialLocks(ctx, tx, lockers[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -161,42 +129,38 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) {
|
|||
}
|
||||
|
||||
// checks if specified object is locked in the specified container.
|
||||
func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool {
|
||||
bucketLocked := tx.Bucket(bucketNameLocked)
|
||||
if bucketLocked != nil {
|
||||
key := make([]byte, cidSize)
|
||||
idCnr.Encode(key)
|
||||
bucketLockedContainer := bucketLocked.Bucket(key)
|
||||
if bucketLockedContainer != nil {
|
||||
return bucketLockedContainer.Get(objectKey(idObj, key)) != nil
|
||||
}
|
||||
}
|
||||
func objectLocked(ctx context.Context, tx *badger.Txn, idCnr cid.ID, idObj oid.ID) (bool, error) {
|
||||
prefix := lockedKeyLongPrefix(idCnr, idObj)
|
||||
|
||||
return false
|
||||
items, err := selectByPrefixBatch(ctx, tx, prefix, 1)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return len(items) > 0, nil
|
||||
}
|
||||
|
||||
// return `LOCK` id's if specified object is locked in the specified container.
|
||||
func getLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) ([]oid.ID, error) {
|
||||
func getLocked(ctx context.Context, tx *badger.Txn, idCnr cid.ID, idObj oid.ID) ([]oid.ID, error) {
|
||||
prefix := lockedKeyLongPrefix(idCnr, idObj)
|
||||
|
||||
var lockers []oid.ID
|
||||
bucketLocked := tx.Bucket(bucketNameLocked)
|
||||
if bucketLocked != nil {
|
||||
key := make([]byte, cidSize)
|
||||
idCnr.Encode(key)
|
||||
bucketLockedContainer := bucketLocked.Bucket(key)
|
||||
if bucketLockedContainer != nil {
|
||||
binObjIDs, err := decodeList(bucketLockedContainer.Get(objectKey(idObj, key)))
|
||||
for {
|
||||
items, err := selectByPrefixBatch(ctx, tx, prefix, batchSize)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode list of object lockers: %w", err)
|
||||
return nil, err
|
||||
}
|
||||
for _, binObjID := range binObjIDs {
|
||||
var id oid.ID
|
||||
if err = id.Decode(binObjID); err != nil {
|
||||
for _, it := range items {
|
||||
id, err := lockerObjectIDFromLockedKey(it)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
lockers = append(lockers, id)
|
||||
}
|
||||
if len(items) < batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return lockers, nil
|
||||
}
|
||||
|
||||
|
@ -206,95 +170,64 @@ func getLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) ([]oid.ID, error) {
|
|||
// Operation is very resource-intensive, which is caused by the admissibility
|
||||
// of multiple locks. Also, if we knew what objects are locked, it would be
|
||||
// possible to speed up the execution.
|
||||
func freePotentialLocks(tx *bbolt.Tx, idCnr cid.ID, locker oid.ID) ([]oid.Address, error) {
|
||||
func freePotentialLocks(ctx context.Context, tx *badger.Txn, locker oid.Address) ([]oid.Address, error) {
|
||||
var unlockedObjects []oid.Address
|
||||
bucketLocked := tx.Bucket(bucketNameLocked)
|
||||
if bucketLocked == nil {
|
||||
return unlockedObjects, nil
|
||||
}
|
||||
|
||||
key := make([]byte, cidSize)
|
||||
idCnr.Encode(key)
|
||||
|
||||
bucketLockedContainer := bucketLocked.Bucket(key)
|
||||
if bucketLockedContainer == nil {
|
||||
return unlockedObjects, nil
|
||||
}
|
||||
|
||||
keyLocker := objectKey(locker, key)
|
||||
updates := make([]keyValue, 0)
|
||||
err := bucketLockedContainer.ForEach(func(k, v []byte) error {
|
||||
keyLockers, err := decodeList(v)
|
||||
locked, err := lockedObjects(tx, locker)
|
||||
if err != nil {
|
||||
return fmt.Errorf("decode list of lockers in locked bucket: %w", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i := range keyLockers {
|
||||
if bytes.Equal(keyLockers[i], keyLocker) {
|
||||
if len(keyLockers) == 1 {
|
||||
updates = append(updates, keyValue{
|
||||
Key: k,
|
||||
Value: nil,
|
||||
})
|
||||
for _, lockedObject := range locked {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
var id oid.ID
|
||||
err = id.Decode(k)
|
||||
if err := tx.Delete(lockedKey(locker.Container(), lockedObject, locker.Object())); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
isLocked, err := objectLocked(ctx, tx, locker.Container(), lockedObject)
|
||||
if err != nil {
|
||||
return fmt.Errorf("decode unlocked object id error: %w", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !isLocked { // deleted locker was the last one
|
||||
var addr oid.Address
|
||||
addr.SetContainer(idCnr)
|
||||
addr.SetObject(id)
|
||||
|
||||
addr.SetContainer(locker.Container())
|
||||
addr.SetObject(lockedObject)
|
||||
unlockedObjects = append(unlockedObjects, addr)
|
||||
} else {
|
||||
// exclude locker
|
||||
keyLockers = append(keyLockers[:i], keyLockers[i+1:]...)
|
||||
|
||||
v, err = encodeList(keyLockers)
|
||||
if err != nil {
|
||||
return fmt.Errorf("encode updated list of lockers: %w", err)
|
||||
}
|
||||
|
||||
updates = append(updates, keyValue{
|
||||
Key: k,
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err = applyBucketUpdates(bucketLockedContainer, updates); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return unlockedObjects, nil
|
||||
}
|
||||
|
||||
func applyBucketUpdates(bucket *bbolt.Bucket, updates []keyValue) error {
|
||||
for _, update := range updates {
|
||||
if update.Value == nil {
|
||||
err := bucket.Delete(update.Key)
|
||||
func lockedObjects(tx *badger.Txn, locker oid.Address) ([]oid.ID, error) {
|
||||
var lockedByLocker []oid.ID
|
||||
|
||||
prefix := lockedKeyShortPrefix(locker.Container())
|
||||
it := tx.NewIterator(badger.IteratorOptions{
|
||||
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||
Prefix: prefix,
|
||||
})
|
||||
defer it.Close()
|
||||
|
||||
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||
currentLockerObjID, err := lockerObjectIDFromLockedKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete locked object record from locked bucket: %w", err)
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
err := bucket.Put(update.Key, update.Value)
|
||||
if !currentLockerObjID.Equals(locker.Object()) {
|
||||
continue
|
||||
}
|
||||
currentObjectID, err := objectIDFromLockedKey(it.Item().Key())
|
||||
if err != nil {
|
||||
return fmt.Errorf("update list of lockers: %w", err)
|
||||
return nil, err
|
||||
}
|
||||
lockedByLocker = append(lockedByLocker, currentObjectID)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return lockedByLocker, nil
|
||||
}
|
||||
|
||||
// IsLockedPrm groups the parameters of IsLocked operation.
|
||||
|
@ -343,9 +276,10 @@ func (db *DB) IsLocked(ctx context.Context, prm IsLockedPrm) (res IsLockedRes, e
|
|||
if db.mode.NoMetabase() {
|
||||
return res, ErrDegradedMode
|
||||
}
|
||||
err = metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
res.locked = objectLocked(tx, prm.addr.Container(), prm.addr.Object())
|
||||
return nil
|
||||
err = metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||
var e error
|
||||
res.locked, e = objectLocked(ctx, tx, prm.addr.Container(), prm.addr.Object())
|
||||
return e
|
||||
}))
|
||||
success = err == nil
|
||||
return res, err
|
||||
|
@ -376,8 +310,8 @@ func (db *DB) GetLocked(ctx context.Context, addr oid.Address) (res []oid.ID, er
|
|||
if db.mode.NoMetabase() {
|
||||
return res, ErrDegradedMode
|
||||
}
|
||||
err = metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
res, err = getLocked(tx, addr.Container(), addr.Object())
|
||||
err = metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||
res, err = getLocked(ctx, tx, addr.Container(), addr.Object())
|
||||
return nil
|
||||
}))
|
||||
success = err == nil
|
||||
|
|
|
@ -117,7 +117,7 @@ func TestDB_Lock(t *testing.T) {
|
|||
require.Len(t, res.DeletedLockObjects(), 1)
|
||||
require.Equal(t, objectcore.AddressOf(lockObj), res.DeletedLockObjects()[0])
|
||||
|
||||
_, err = db.FreeLockedBy([]oid.Address{lockAddr})
|
||||
_, err = db.FreeLockedBy(context.Background(), []oid.Address{lockAddr})
|
||||
require.NoError(t, err)
|
||||
|
||||
inhumePrm.SetAddresses(objAddr)
|
||||
|
@ -148,7 +148,7 @@ func TestDB_Lock(t *testing.T) {
|
|||
|
||||
// unlock just objects that were locked by
|
||||
// just removed locker
|
||||
_, err = db.FreeLockedBy([]oid.Address{res.DeletedLockObjects()[0]})
|
||||
_, err = db.FreeLockedBy(context.Background(), []oid.Address{res.DeletedLockObjects()[0]})
|
||||
require.NoError(t, err)
|
||||
|
||||
// removing objects after unlock
|
||||
|
|
32
pkg/local_object_storage/metabase/logger.go
Normal file
32
pkg/local_object_storage/metabase/logger.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
var _ badger.Logger = (*badgerLogger)(nil)
|
||||
|
||||
type badgerLogger struct {
|
||||
l *logger.Logger
|
||||
}
|
||||
|
||||
// Debugf implements badger.Logger.
|
||||
func (d *badgerLogger) Debugf(msg string, args ...interface{}) {
|
||||
d.l.Sugar().Debugf(msg, args...)
|
||||
}
|
||||
|
||||
// Errorf implements badger.Logger.
|
||||
func (d *badgerLogger) Errorf(msg string, args ...interface{}) {
|
||||
d.l.Sugar().Errorf(msg, args...)
|
||||
}
|
||||
|
||||
// Infof implements badger.Logger.
|
||||
func (d *badgerLogger) Infof(msg string, args ...interface{}) {
|
||||
d.l.Sugar().Infof(msg, args...)
|
||||
}
|
||||
|
||||
// Warningf implements badger.Logger.
|
||||
func (d *badgerLogger) Warningf(msg string, args ...interface{}) {
|
||||
d.l.Sugar().Warnf(msg, args...)
|
||||
}
|
|
@ -1,8 +1,10 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||
)
|
||||
|
||||
|
@ -17,17 +19,17 @@ func (db *DB) SetMode(m mode.Mode) error {
|
|||
}
|
||||
|
||||
if !db.mode.NoMetabase() {
|
||||
if err := db.Close(); err != nil {
|
||||
if err := db.close(); err != nil {
|
||||
return fmt.Errorf("can't set metabase mode (old=%s, new=%s): %w", db.mode, m, err)
|
||||
}
|
||||
}
|
||||
|
||||
if m.NoMetabase() {
|
||||
db.boltDB = nil
|
||||
db.database = nil
|
||||
} else {
|
||||
err := db.openDB(m)
|
||||
if err == nil && !m.ReadOnly() {
|
||||
err = db.Init()
|
||||
err = metaerr.Wrap(db.init(context.TODO(), false))
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't set metabase mode (old=%s, new=%s): %w", db.mode, m, err)
|
||||
|
|
|
@ -24,14 +24,14 @@ func Test_Mode(t *testing.T) {
|
|||
}...)
|
||||
|
||||
require.NoError(t, bdb.Open(context.Background(), mode.DegradedReadOnly))
|
||||
require.Nil(t, bdb.boltDB)
|
||||
require.Nil(t, bdb.database)
|
||||
require.NoError(t, bdb.Init())
|
||||
require.Nil(t, bdb.boltDB)
|
||||
require.Nil(t, bdb.database)
|
||||
require.NoError(t, bdb.Close())
|
||||
|
||||
require.NoError(t, bdb.Open(context.Background(), mode.Degraded))
|
||||
require.Nil(t, bdb.boltDB)
|
||||
require.Nil(t, bdb.database)
|
||||
require.NoError(t, bdb.Init())
|
||||
require.Nil(t, bdb.boltDB)
|
||||
require.Nil(t, bdb.database)
|
||||
require.NoError(t, bdb.Close())
|
||||
}
|
||||
|
|
62
pkg/local_object_storage/metabase/parse.go
Normal file
62
pkg/local_object_storage/metabase/parse.go
Normal file
|
@ -0,0 +1,62 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
// valueOrNil returns value or nil, if key not found.
|
||||
// value must be used only inside transaction.
|
||||
func valueOrNil(tx *badger.Txn, key []byte) ([]byte, error) {
|
||||
i, err := tx.Get(key)
|
||||
if err != nil {
|
||||
if errors.Is(err, badger.ErrKeyNotFound) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
var value []byte
|
||||
if err := i.Value(func(val []byte) error {
|
||||
value = val
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return value, nil
|
||||
}
|
||||
|
||||
func parseInt64Value(v []byte) (int64, bool) {
|
||||
if len(v) == 0 {
|
||||
return 0, true
|
||||
}
|
||||
if len(v) != 8 {
|
||||
return 0, false
|
||||
}
|
||||
return int64(binary.LittleEndian.Uint64(v)), true
|
||||
}
|
||||
|
||||
func marshalInt64(v int64) []byte {
|
||||
buf := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(buf, uint64(v))
|
||||
return buf
|
||||
}
|
||||
|
||||
func parseContainerIDWithIgnore(dst *cid.ID, name []byte, ignore map[string]struct{}) bool {
|
||||
if len(name) < bucketKeySize {
|
||||
return false
|
||||
}
|
||||
if _, ok := ignore[string(name[1:bucketKeySize])]; ok {
|
||||
return false
|
||||
}
|
||||
return dst.Decode(name[1:bucketKeySize]) == nil
|
||||
}
|
||||
|
||||
func parseContainerID(dst *cid.ID, name []byte) bool {
|
||||
if len(name) < bucketKeySize {
|
||||
return false
|
||||
}
|
||||
return dst.Decode(name[1:bucketKeySize]) == nil
|
||||
}
|
|
@ -1,13 +1,15 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
gio "io"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||
objectCore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
|
@ -16,18 +18,11 @@ import (
|
|||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"github.com/nspcc-dev/neo-go/pkg/io"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
type (
|
||||
namedBucketItem struct {
|
||||
name, key, val []byte
|
||||
}
|
||||
)
|
||||
|
||||
// PutPrm groups the parameters of Put operation.
|
||||
type PutPrm struct {
|
||||
obj *objectSDK.Object
|
||||
|
@ -54,6 +49,8 @@ var (
|
|||
ErrUnknownObjectType = errors.New("unknown object type")
|
||||
ErrIncorrectSplitInfoUpdate = errors.New("updating split info on object without it")
|
||||
ErrIncorrectRootObject = errors.New("invalid root object")
|
||||
|
||||
errInvalidUserAttributeKeyFormat = errors.New("invalid user attribute key format")
|
||||
)
|
||||
|
||||
// Put saves object header in metabase. Object payload expected to be cut.
|
||||
|
@ -85,12 +82,28 @@ func (db *DB) Put(ctx context.Context, prm PutPrm) (res PutRes, err error) {
|
|||
}
|
||||
|
||||
currEpoch := db.epochState.CurrentEpoch()
|
||||
cnr, ok := prm.obj.ContainerID()
|
||||
if !ok {
|
||||
return PutRes{}, errors.New("missing container in object")
|
||||
}
|
||||
|
||||
err = db.boltDB.Batch(func(tx *bbolt.Tx) error {
|
||||
bucketID, release := db.bucketIDs.BucketID(cnr)
|
||||
defer release()
|
||||
|
||||
const retryCount = 10
|
||||
for i := 0; i < retryCount; i++ {
|
||||
err = db.database.Update(func(tx *badger.Txn) error {
|
||||
var e error
|
||||
res, e = db.put(tx, prm.obj, prm.id, nil, currEpoch)
|
||||
res, e = db.put(ctx, tx, prm.obj, prm.id, nil, currEpoch, bucketID)
|
||||
return e
|
||||
})
|
||||
if errors.Is(err, badger.ErrConflict) {
|
||||
db.log.Warn(logs.ErrMetabaseConflict)
|
||||
time.Sleep(retryTimeout)
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if err == nil {
|
||||
success = true
|
||||
storagelog.Write(db.log,
|
||||
|
@ -101,11 +114,14 @@ func (db *DB) Put(ctx context.Context, prm PutPrm) (res PutRes, err error) {
|
|||
return res, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func (db *DB) put(tx *bbolt.Tx,
|
||||
func (db *DB) put(
|
||||
ctx context.Context,
|
||||
tx *badger.Txn,
|
||||
obj *objectSDK.Object,
|
||||
id []byte,
|
||||
si *objectSDK.SplitInfo,
|
||||
currEpoch uint64,
|
||||
bucketID uint16,
|
||||
) (PutRes, error) {
|
||||
cnr, ok := obj.ContainerID()
|
||||
if !ok {
|
||||
|
@ -114,7 +130,7 @@ func (db *DB) put(tx *bbolt.Tx,
|
|||
|
||||
isParent := si != nil
|
||||
|
||||
exists, _, err := db.exists(tx, objectCore.AddressOf(obj), oid.Address{}, currEpoch)
|
||||
exists, _, err := exists(ctx, tx, objectCore.AddressOf(obj), oid.Address{}, currEpoch)
|
||||
|
||||
var splitInfoError *objectSDK.SplitInfoError
|
||||
if errors.As(err, &splitInfoError) {
|
||||
|
@ -127,37 +143,38 @@ func (db *DB) put(tx *bbolt.Tx,
|
|||
return PutRes{}, db.updateObj(tx, obj, id, si, isParent)
|
||||
}
|
||||
|
||||
return PutRes{Inserted: true}, db.insertObject(tx, obj, id, si, isParent, cnr, currEpoch)
|
||||
return PutRes{Inserted: true}, db.insertObject(ctx, tx, obj, id, si, isParent, cnr, currEpoch, bucketID)
|
||||
}
|
||||
|
||||
func (db *DB) updateObj(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool) error {
|
||||
func (db *DB) updateObj(tx *badger.Txn, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool) error {
|
||||
addr := objectCore.AddressOf(obj)
|
||||
// most right child and split header overlap parent so we have to
|
||||
// check if object exists to not overwrite it twice
|
||||
|
||||
// When storage engine moves objects between different sub-storages,
|
||||
// it calls metabase.Put method with new storage ID, thus triggering this code.
|
||||
if !isParent && id != nil {
|
||||
return setStorageID(tx, objectCore.AddressOf(obj), id, true)
|
||||
return setStorageID(tx, addr, id, true)
|
||||
}
|
||||
|
||||
// when storage already has last object in split hierarchy and there is
|
||||
// a linking object to put (or vice versa), we should update split info
|
||||
// with object ids of these objects
|
||||
if isParent {
|
||||
return updateSplitInfo(tx, objectCore.AddressOf(obj), si)
|
||||
return updateSplitInfo(tx, addr.Container(), addr.Object(), si)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool, cnr cid.ID, currEpoch uint64) error {
|
||||
func (db *DB) insertObject(ctx context.Context, tx *badger.Txn, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool, cnr cid.ID, currEpoch uint64, bucketID uint16) error {
|
||||
if par := obj.Parent(); par != nil && !isParent { // limit depth by two
|
||||
parentSI, err := splitInfoFromObject(obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = db.put(tx, par, id, parentSI, currEpoch)
|
||||
_, err = db.put(ctx, tx, par, id, parentSI, currEpoch, bucketID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -173,21 +190,21 @@ func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *o
|
|||
return fmt.Errorf("can't put list indexes: %w", err)
|
||||
}
|
||||
|
||||
err = updateFKBTIndexes(tx, obj, putFKBTIndexItem)
|
||||
err = updateFKBTIndexes(tx, obj, putListIndexItem)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't put fake bucket tree indexes: %w", err)
|
||||
}
|
||||
|
||||
// update container volume size estimation
|
||||
if obj.Type() == objectSDK.TypeRegular && !isParent {
|
||||
err = changeContainerSize(tx, cnr, obj.PayloadSize(), true)
|
||||
err = changeContainerSize(tx, cnr, int64(obj.PayloadSize()), bucketID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if !isParent {
|
||||
if err = db.incCounters(tx, cnr, IsUserObject(obj)); err != nil {
|
||||
if err = incCounters(tx, cnr, IsUserObject(obj), bucketID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -196,26 +213,24 @@ func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *o
|
|||
}
|
||||
|
||||
func putUniqueIndexes(
|
||||
tx *bbolt.Tx,
|
||||
tx *badger.Txn,
|
||||
obj *objectSDK.Object,
|
||||
si *objectSDK.SplitInfo,
|
||||
id []byte,
|
||||
) error {
|
||||
isParent := si != nil
|
||||
addr := objectCore.AddressOf(obj)
|
||||
cnr := addr.Container()
|
||||
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
|
||||
|
||||
bucketName := make([]byte, bucketKeySize)
|
||||
// add value to primary unique bucket
|
||||
if !isParent {
|
||||
var key []byte
|
||||
switch obj.Type() {
|
||||
case objectSDK.TypeRegular:
|
||||
bucketName = primaryBucketName(cnr, bucketName)
|
||||
key = primaryKey(addr.Container(), addr.Object())
|
||||
case objectSDK.TypeTombstone:
|
||||
bucketName = tombstoneBucketName(cnr, bucketName)
|
||||
key = tombstoneKey(addr.Container(), addr.Object())
|
||||
case objectSDK.TypeLock:
|
||||
bucketName = bucketNameLockers(cnr, bucketName)
|
||||
key = lockersKey(addr.Container(), addr.Object())
|
||||
default:
|
||||
return ErrUnknownObjectType
|
||||
}
|
||||
|
@ -225,11 +240,7 @@ func putUniqueIndexes(
|
|||
return fmt.Errorf("can't marshal object header: %w", err)
|
||||
}
|
||||
|
||||
err = putUniqueIndexItem(tx, namedBucketItem{
|
||||
name: bucketName,
|
||||
key: objKey,
|
||||
val: rawObject,
|
||||
})
|
||||
err = tx.Set(key, rawObject)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -244,6 +255,7 @@ func putUniqueIndexes(
|
|||
|
||||
// index root object
|
||||
if obj.Type() == objectSDK.TypeRegular && !obj.HasParent() {
|
||||
objID := addr.Object()
|
||||
if ecHead := obj.ECHeader(); ecHead != nil {
|
||||
parentID := ecHead.Parent()
|
||||
if ecHead.ParentSplitID() != nil {
|
||||
|
@ -258,53 +270,23 @@ func putUniqueIndexes(
|
|||
|
||||
parentID = *parentSplitParentID
|
||||
}
|
||||
objKey = objectKey(parentID, objKey)
|
||||
objID = parentID
|
||||
}
|
||||
return updateSplitInfoIndex(tx, objKey, cnr, bucketName, si)
|
||||
return updateSplitInfo(tx, addr.Container(), objID, si)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateSplitInfoIndex(tx *bbolt.Tx, objKey []byte, cnr cid.ID, bucketName []byte, si *objectSDK.SplitInfo) error {
|
||||
return updateUniqueIndexItem(tx, namedBucketItem{
|
||||
name: rootBucketName(cnr, bucketName),
|
||||
key: objKey,
|
||||
}, func(old, _ []byte) ([]byte, error) {
|
||||
switch {
|
||||
case si == nil && old == nil:
|
||||
return []byte{}, nil
|
||||
case si == nil:
|
||||
return old, nil
|
||||
case old == nil:
|
||||
return si.Marshal()
|
||||
default:
|
||||
oldSI := objectSDK.NewSplitInfo()
|
||||
if err := oldSI.Unmarshal(old); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
si = util.MergeSplitInfo(si, oldSI)
|
||||
return si.Marshal()
|
||||
}
|
||||
})
|
||||
}
|
||||
type updateIndexItemFunc = func(tx *badger.Txn, key []byte) error
|
||||
|
||||
type updateIndexItemFunc = func(tx *bbolt.Tx, item namedBucketItem) error
|
||||
|
||||
func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error {
|
||||
func updateListIndexes(tx *badger.Txn, obj *objectSDK.Object, f updateIndexItemFunc) error {
|
||||
idObj, _ := obj.ID()
|
||||
cnr, _ := obj.ContainerID()
|
||||
objKey := objectKey(idObj, make([]byte, objectKeySize))
|
||||
bucketName := make([]byte, bucketKeySize)
|
||||
|
||||
cs, _ := obj.PayloadChecksum()
|
||||
|
||||
// index payload hashes
|
||||
err := f(tx, namedBucketItem{
|
||||
name: payloadHashBucketName(cnr, bucketName),
|
||||
key: cs.Value(),
|
||||
val: objKey,
|
||||
})
|
||||
err := f(tx, payloadHashKey(cnr, idObj, cs.Value()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -313,11 +295,7 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
|
|||
|
||||
// index parent ids
|
||||
if ok {
|
||||
err := f(tx, namedBucketItem{
|
||||
name: parentBucketName(cnr, bucketName),
|
||||
key: objectKey(idParent, make([]byte, objectKeySize)),
|
||||
val: objKey,
|
||||
})
|
||||
err := f(tx, parentKey(cnr, idParent, idObj))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -325,33 +303,35 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
|
|||
|
||||
// index split ids
|
||||
if obj.SplitID() != nil {
|
||||
err := f(tx, namedBucketItem{
|
||||
name: splitBucketName(cnr, bucketName),
|
||||
key: obj.SplitID().ToV2(),
|
||||
val: objKey,
|
||||
})
|
||||
err := f(tx, splitKey(cnr, idObj, obj.SplitID().ToV2()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, attr := range obj.Attributes() {
|
||||
if attr.Key() != objectV2.SysAttributeExpEpochNeoFS && attr.Key() != objectV2.SysAttributeExpEpoch {
|
||||
continue
|
||||
}
|
||||
expEpoch, err := strconv.ParseUint(attr.Value(), 10, 64)
|
||||
if err != nil {
|
||||
return errInvalidUserAttributeKeyFormat
|
||||
}
|
||||
err = f(tx, expiredKey(cnr, idObj, expEpoch))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if ech := obj.ECHeader(); ech != nil {
|
||||
err := f(tx, namedBucketItem{
|
||||
name: ecInfoBucketName(cnr, bucketName),
|
||||
key: objectKey(ech.Parent(), make([]byte, objectKeySize)),
|
||||
val: objKey,
|
||||
})
|
||||
err := f(tx, ecInfoKey(cnr, ech.Parent(), idObj))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if ech.ParentSplitID() != nil {
|
||||
objKey := objectKey(ech.Parent(), make([]byte, objectKeySize))
|
||||
err := f(tx, namedBucketItem{
|
||||
name: splitBucketName(cnr, bucketName),
|
||||
key: ech.ParentSplitID().ToV2(),
|
||||
val: objKey,
|
||||
})
|
||||
err := f(tx, splitKey(cnr, ech.Parent(), ech.ParentSplitID().ToV2()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -361,17 +341,10 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
|
|||
return nil
|
||||
}
|
||||
|
||||
func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error {
|
||||
func updateFKBTIndexes(tx *badger.Txn, obj *objectSDK.Object, f updateIndexItemFunc) error {
|
||||
id, _ := obj.ID()
|
||||
cnr, _ := obj.ContainerID()
|
||||
objKey := objectKey(id, make([]byte, objectKeySize))
|
||||
|
||||
key := make([]byte, bucketKeySize)
|
||||
err := f(tx, namedBucketItem{
|
||||
name: ownerBucketName(cnr, key),
|
||||
key: []byte(obj.OwnerID().EncodeToString()),
|
||||
val: objKey,
|
||||
})
|
||||
err := f(tx, ownerKey(cnr, id, []byte(obj.OwnerID().EncodeToString())))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -379,19 +352,14 @@ func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
|
|||
var attrs []objectSDK.Attribute
|
||||
if obj.ECHeader() != nil {
|
||||
attrs = obj.ECHeader().ParentAttributes()
|
||||
objKey = objectKey(obj.ECHeader().Parent(), make([]byte, objectKeySize))
|
||||
id = obj.ECHeader().Parent()
|
||||
} else {
|
||||
attrs = obj.Attributes()
|
||||
}
|
||||
|
||||
// user specified attributes
|
||||
for i := range attrs {
|
||||
key = attributeBucketName(cnr, attrs[i].Key(), key)
|
||||
err := f(tx, namedBucketItem{
|
||||
name: key,
|
||||
key: []byte(attrs[i].Value()),
|
||||
val: objKey,
|
||||
})
|
||||
err := f(tx, attributeKey(cnr, id, attrs[i].Key(), attrs[i].Value()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -400,161 +368,42 @@ func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
|
|||
return nil
|
||||
}
|
||||
|
||||
type bucketContainer interface {
|
||||
Bucket([]byte) *bbolt.Bucket
|
||||
CreateBucket([]byte) (*bbolt.Bucket, error)
|
||||
CreateBucketIfNotExists([]byte) (*bbolt.Bucket, error)
|
||||
}
|
||||
|
||||
func createBucketLikelyExists[T bucketContainer](tx T, name []byte) (*bbolt.Bucket, error) {
|
||||
if bkt := tx.Bucket(name); bkt != nil {
|
||||
return bkt, nil
|
||||
}
|
||||
return tx.CreateBucket(name)
|
||||
}
|
||||
|
||||
func updateUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem, update func(oldData, newData []byte) ([]byte, error)) error {
|
||||
bkt, err := createBucketLikelyExists(tx, item.name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create index %v: %w", item.name, err)
|
||||
}
|
||||
|
||||
data, err := update(bkt.Get(item.key), item.val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return bkt.Put(item.key, data)
|
||||
}
|
||||
|
||||
func putUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
||||
return updateUniqueIndexItem(tx, item, func(_, val []byte) ([]byte, error) { return val, nil })
|
||||
}
|
||||
|
||||
func putFKBTIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
||||
bkt, err := createBucketLikelyExists(tx, item.name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create index %v: %w", item.name, err)
|
||||
}
|
||||
|
||||
fkbtRoot, err := createBucketLikelyExists(bkt, item.key)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create fake bucket tree index %v: %w", item.key, err)
|
||||
}
|
||||
|
||||
return fkbtRoot.Put(item.val, zeroValue)
|
||||
}
|
||||
|
||||
func putListIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
||||
bkt, err := createBucketLikelyExists(tx, item.name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create index %v: %w", item.name, err)
|
||||
}
|
||||
|
||||
lst, err := decodeList(bkt.Get(item.key))
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't decode leaf list %v: %w", item.key, err)
|
||||
}
|
||||
|
||||
lst = append(lst, item.val)
|
||||
|
||||
encodedLst, err := encodeList(lst)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't encode leaf list %v: %w", item.key, err)
|
||||
}
|
||||
|
||||
return bkt.Put(item.key, encodedLst)
|
||||
}
|
||||
|
||||
// encodeList decodes list of bytes into a single blog for list bucket indexes.
|
||||
func encodeList(lst [][]byte) ([]byte, error) {
|
||||
w := io.NewBufBinWriter()
|
||||
w.WriteVarUint(uint64(len(lst)))
|
||||
for i := range lst {
|
||||
w.WriteVarBytes(lst[i])
|
||||
}
|
||||
if w.Err != nil {
|
||||
return nil, w.Err
|
||||
}
|
||||
return w.Bytes(), nil
|
||||
}
|
||||
|
||||
// decodeList decodes blob into the list of bytes from list bucket index.
|
||||
func decodeList(data []byte) (lst [][]byte, err error) {
|
||||
if len(data) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var offset uint64
|
||||
size, n, err := getVarUint(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
offset += uint64(n)
|
||||
lst = make([][]byte, size, size+1)
|
||||
for i := range lst {
|
||||
sz, n, err := getVarUint(data[offset:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
offset += uint64(n)
|
||||
|
||||
next := offset + sz
|
||||
if uint64(len(data)) < next {
|
||||
return nil, gio.ErrUnexpectedEOF
|
||||
}
|
||||
lst[i] = data[offset:next]
|
||||
offset = next
|
||||
}
|
||||
return lst, nil
|
||||
}
|
||||
|
||||
func getVarUint(data []byte) (uint64, int, error) {
|
||||
if len(data) == 0 {
|
||||
return 0, 0, gio.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
switch b := data[0]; b {
|
||||
case 0xfd:
|
||||
if len(data) < 3 {
|
||||
return 0, 1, gio.ErrUnexpectedEOF
|
||||
}
|
||||
return uint64(binary.LittleEndian.Uint16(data[1:])), 3, nil
|
||||
case 0xfe:
|
||||
if len(data) < 5 {
|
||||
return 0, 1, gio.ErrUnexpectedEOF
|
||||
}
|
||||
return uint64(binary.LittleEndian.Uint32(data[1:])), 5, nil
|
||||
case 0xff:
|
||||
if len(data) < 9 {
|
||||
return 0, 1, gio.ErrUnexpectedEOF
|
||||
}
|
||||
return binary.LittleEndian.Uint64(data[1:]), 9, nil
|
||||
default:
|
||||
return uint64(b), 1, nil
|
||||
}
|
||||
}
|
||||
|
||||
// setStorageID for existing objects if they were moved from one
|
||||
// storage location to another.
|
||||
func setStorageID(tx *bbolt.Tx, addr oid.Address, id []byte, override bool) error {
|
||||
key := make([]byte, bucketKeySize)
|
||||
bkt, err := createBucketLikelyExists(tx, smallBucketName(addr.Container(), key))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
key = objectKey(addr.Object(), key)
|
||||
if override || bkt.Get(key) == nil {
|
||||
return bkt.Put(key, id)
|
||||
}
|
||||
return nil
|
||||
func putListIndexItem(tx *badger.Txn, key []byte) error {
|
||||
return tx.Set(key, zeroValue)
|
||||
}
|
||||
|
||||
// updateSpliInfo for existing objects if storage filled with extra information
|
||||
// about last object in split hierarchy or linking object.
|
||||
func updateSplitInfo(tx *bbolt.Tx, addr oid.Address, from *objectSDK.SplitInfo) error {
|
||||
objKey := objectKey(addr.Object(), make([]byte, bucketKeySize))
|
||||
return updateSplitInfoIndex(tx, objKey, addr.Container(), make([]byte, bucketKeySize), from)
|
||||
func updateSplitInfo(tx *badger.Txn, cnr cid.ID, obj oid.ID, si *objectSDK.SplitInfo) error {
|
||||
key := rootKey(cnr, obj)
|
||||
existed, err := valueOrNil(tx, key)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch {
|
||||
case si == nil && existed == nil:
|
||||
return tx.Set(key, zeroValue)
|
||||
case si == nil:
|
||||
return nil
|
||||
case existed == nil || bytes.Equal(existed, zeroValue):
|
||||
siBytes, err := si.Marshal()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return tx.Set(key, siBytes)
|
||||
default:
|
||||
existedSI := objectSDK.NewSplitInfo()
|
||||
if err := existedSI.Unmarshal(existed); err != nil {
|
||||
return err
|
||||
}
|
||||
si = util.MergeSplitInfo(si, existedSI)
|
||||
siBytes, err := si.Marshal()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return tx.Set(key, siBytes)
|
||||
}
|
||||
}
|
||||
|
||||
// splitInfoFromObject returns split info based on last or linkin object.
|
||||
|
|
|
@ -2,11 +2,9 @@ package meta_test
|
|||
|
||||
import (
|
||||
"context"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||
|
@ -43,9 +41,7 @@ func prepareObjects(n int) []*objectSDK.Object {
|
|||
|
||||
func BenchmarkPut(b *testing.B) {
|
||||
b.Run("parallel", func(b *testing.B) {
|
||||
db := newDB(b,
|
||||
meta.WithMaxBatchDelay(time.Millisecond*10),
|
||||
meta.WithMaxBatchSize(runtime.NumCPU()))
|
||||
db := newDB(b)
|
||||
defer func() { require.NoError(b, db.Close()) }()
|
||||
// Ensure the benchmark is bound by CPU and not waiting batch-delay time.
|
||||
b.SetParallelism(1)
|
||||
|
@ -65,9 +61,7 @@ func BenchmarkPut(b *testing.B) {
|
|||
})
|
||||
})
|
||||
b.Run("sequential", func(b *testing.B) {
|
||||
db := newDB(b,
|
||||
meta.WithMaxBatchDelay(time.Millisecond*10),
|
||||
meta.WithMaxBatchSize(1))
|
||||
db := newDB(b)
|
||||
defer func() { require.NoError(b, db.Close()) }()
|
||||
var index atomic.Int64
|
||||
index.Store(-1)
|
||||
|
|
|
@ -8,8 +8,8 @@ import (
|
|||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type epochState struct{ e uint64 }
|
||||
|
@ -42,16 +42,15 @@ func TestResetDropsContainerBuckets(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.NoError(t, db.Reset())
|
||||
require.NoError(t, db.Reset(context.Background()))
|
||||
|
||||
var bucketCount int
|
||||
require.NoError(t, db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
return tx.ForEach(func(name []byte, b *bbolt.Bucket) error {
|
||||
_, exists := mStaticBuckets[string(name)]
|
||||
require.True(t, exists, "unexpected bucket:"+string(name))
|
||||
bucketCount++
|
||||
return nil
|
||||
})
|
||||
}))
|
||||
require.Equal(t, len(mStaticBuckets), bucketCount)
|
||||
require.NoError(t, db.database.Update(func(tx *badger.Txn) error {
|
||||
it := tx.NewIterator(badger.DefaultIteratorOptions)
|
||||
defer it.Close()
|
||||
|
||||
for it.Seek(nil); it.Valid(); it.Next() {
|
||||
require.Equal(t, byte(shardInfoPrefix), it.Item().Key()[0], "unexpected prefix: %d", it.Item().Key()[0])
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -8,13 +8,10 @@ import (
|
|||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
metamode "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
var (
|
||||
shardInfoBucket = []byte{shardInfoPrefix}
|
||||
shardIDKey = []byte("id")
|
||||
)
|
||||
var shardIDKey = []byte("id")
|
||||
|
||||
// GetShardID sets metabase operation mode
|
||||
// and reads shard id from db.
|
||||
|
@ -36,7 +33,7 @@ func (db *DB) GetShardID(mode metamode.Mode) ([]byte, error) {
|
|||
|
||||
id, err := db.readShardID()
|
||||
|
||||
if cErr := db.close(); cErr != nil {
|
||||
if cErr := metaerr.Wrap(db.database.Close()); cErr != nil {
|
||||
err = errors.Join(err, fmt.Errorf("failed to close metabase: %w", cErr))
|
||||
}
|
||||
|
||||
|
@ -47,11 +44,12 @@ func (db *DB) GetShardID(mode metamode.Mode) ([]byte, error) {
|
|||
// If id is missing, returns nil, nil.
|
||||
func (db *DB) readShardID() ([]byte, error) {
|
||||
var id []byte
|
||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(shardInfoBucket)
|
||||
if b != nil {
|
||||
id = bytes.Clone(b.Get(shardIDKey))
|
||||
err := db.database.View(func(tx *badger.Txn) error {
|
||||
v, err := valueOrNil(tx, shardInfoKey(shardIDKey))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
id = bytes.Clone(v)
|
||||
return nil
|
||||
})
|
||||
return id, metaerr.Wrap(err)
|
||||
|
@ -77,7 +75,7 @@ func (db *DB) SetShardID(id []byte, mode metamode.Mode) error {
|
|||
db.metrics.SetMode(metamode.ConvertToComponentModeDegraded(mode))
|
||||
}
|
||||
|
||||
if cErr := db.close(); cErr != nil {
|
||||
if cErr := metaerr.Wrap(db.database.Close()); cErr != nil {
|
||||
err = errors.Join(err, fmt.Errorf("failed to close metabase: %w", cErr))
|
||||
}
|
||||
|
||||
|
@ -86,11 +84,7 @@ func (db *DB) SetShardID(id []byte, mode metamode.Mode) error {
|
|||
|
||||
// writeShardID writes shard id to db.
|
||||
func (db *DB) writeShardID(id []byte) error {
|
||||
return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
b, err := tx.CreateBucketIfNotExists(shardInfoBucket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return b.Put(shardIDKey, id)
|
||||
return metaerr.Wrap(db.database.Update(func(tx *badger.Txn) error {
|
||||
return tx.Set(shardInfoKey(shardIDKey), id)
|
||||
}))
|
||||
}
|
||||
|
|
|
@ -3,12 +3,13 @@ package meta
|
|||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
@ -57,7 +58,7 @@ func (db *DB) StorageID(ctx context.Context, prm StorageIDPrm) (res StorageIDRes
|
|||
return res, ErrDegradedMode
|
||||
}
|
||||
|
||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
err = db.database.View(func(tx *badger.Txn) error {
|
||||
res.id, err = db.storageID(tx, prm.addr)
|
||||
|
||||
return err
|
||||
|
@ -66,14 +67,11 @@ func (db *DB) StorageID(ctx context.Context, prm StorageIDPrm) (res StorageIDRes
|
|||
return res, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
func (db *DB) storageID(tx *bbolt.Tx, addr oid.Address) ([]byte, error) {
|
||||
key := make([]byte, bucketKeySize)
|
||||
smallBucket := tx.Bucket(smallBucketName(addr.Container(), key))
|
||||
if smallBucket == nil {
|
||||
return nil, nil
|
||||
func (db *DB) storageID(tx *badger.Txn, addr oid.Address) ([]byte, error) {
|
||||
storageID, err := valueOrNil(tx, smallKey(addr.Container(), addr.Object()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
storageID := smallBucket.Get(objectKey(addr.Object(), key))
|
||||
if storageID == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
@ -126,9 +124,33 @@ func (db *DB) UpdateStorageID(ctx context.Context, prm UpdateStorageIDPrm) (res
|
|||
return res, ErrReadOnlyMode
|
||||
}
|
||||
|
||||
err = db.boltDB.Batch(func(tx *bbolt.Tx) error {
|
||||
for i := 0; i < retryCount; i++ {
|
||||
err = db.database.Update(func(tx *badger.Txn) error {
|
||||
return setStorageID(tx, prm.addr, prm.id, true)
|
||||
})
|
||||
if errors.Is(err, badger.ErrConflict) {
|
||||
time.Sleep(retryTimeout)
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
success = err == nil
|
||||
return res, metaerr.Wrap(err)
|
||||
}
|
||||
|
||||
// setStorageID for existing objects if they were moved from one
|
||||
// storage location to another.
|
||||
func setStorageID(tx *badger.Txn, addr oid.Address, id []byte, override bool) error {
|
||||
key := smallKey(addr.Container(), addr.Object())
|
||||
if override {
|
||||
return tx.Set(key, id)
|
||||
}
|
||||
v, err := valueOrNil(tx, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if v == nil {
|
||||
return tx.Set(key, id)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -3,31 +3,16 @@ package meta
|
|||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
var (
|
||||
// graveyardBucketName stores rows with the objects that have been
|
||||
// covered with Tombstone objects. That objects should not be returned
|
||||
// from the node and should not be accepted by the node from other
|
||||
// nodes.
|
||||
graveyardBucketName = []byte{graveyardPrefix}
|
||||
// garbageBucketName stores rows with the objects that should be physically
|
||||
// deleted by the node (Garbage Collector routine).
|
||||
garbageBucketName = []byte{garbagePrefix}
|
||||
toMoveItBucketName = []byte{toMoveItPrefix}
|
||||
containerVolumeBucketName = []byte{containerVolumePrefix}
|
||||
containerCounterBucketName = []byte{containerCountersPrefix}
|
||||
|
||||
zeroValue = []byte{0xFF}
|
||||
|
||||
errInvalidLength = errors.New("invalid length")
|
||||
)
|
||||
var zeroValue = []byte{0xFF}
|
||||
|
||||
// Prefix bytes for database keys. All ids and addresses are encoded in binary
|
||||
// unless specified otherwise.
|
||||
|
@ -42,13 +27,13 @@ const (
|
|||
// Key: object address
|
||||
// Value: dummy value
|
||||
garbagePrefix
|
||||
// toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving
|
||||
// _ Previous usage was for for bucket containing IDs of objects that are candidates for moving
|
||||
// to another shard.
|
||||
toMoveItPrefix
|
||||
// containerVolumePrefix is used for storing container size estimations.
|
||||
_
|
||||
// containerSizePrefix is used for storing container size estimations.
|
||||
// Key: container ID
|
||||
// Value: container size in bytes as little-endian uint64
|
||||
containerVolumePrefix
|
||||
containerSizePrefix
|
||||
// lockedPrefix is used for storing locked objects information.
|
||||
// Key: container ID
|
||||
// Value: bucket mapping objects locked to the list of corresponding LOCK objects.
|
||||
|
@ -124,6 +109,9 @@ const (
|
|||
// Key: container ID + type
|
||||
// Value: Object id
|
||||
ecInfoPrefix
|
||||
|
||||
// expiredPrefix used to store expiration info.
|
||||
expiredPrefix
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -133,139 +121,589 @@ const (
|
|||
addressKeySize = cidSize + objectKeySize
|
||||
)
|
||||
|
||||
func bucketName(cnr cid.ID, prefix byte, key []byte) []byte {
|
||||
key[0] = prefix
|
||||
cnr.Encode(key[1:])
|
||||
return key[:bucketKeySize]
|
||||
func keyPrefix(cnr cid.ID, prefix byte) []byte {
|
||||
result := make([]byte, 1+cidSize)
|
||||
result[0] = prefix
|
||||
cnr.Encode(result[1:])
|
||||
return result
|
||||
}
|
||||
|
||||
// primaryBucketName returns <CID>.
|
||||
func primaryBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, primaryPrefix, key)
|
||||
func keyObject(prefix byte, cnr cid.ID, objID oid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize+objectKeySize)
|
||||
result[0] = prefix
|
||||
cnr.Encode(result[1:])
|
||||
objID.Encode(result[1+cidSize:])
|
||||
return result
|
||||
}
|
||||
|
||||
// tombstoneBucketName returns <CID>_TS.
|
||||
func tombstoneBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, tombstonePrefix, key)
|
||||
func addressFromKey(prefix byte, key []byte) (oid.Address, error) {
|
||||
if len(key) != 1+cidSize+objectKeySize {
|
||||
return oid.Address{}, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != prefix {
|
||||
return oid.Address{}, errInvalidKeyPrefix
|
||||
}
|
||||
var cont cid.ID
|
||||
if err := cont.Decode(key[1 : 1+cidSize]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||
}
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(key[1+cidSize:]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||
}
|
||||
var result oid.Address
|
||||
result.SetContainer(cont)
|
||||
result.SetObject(obj)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// smallBucketName returns <CID>_small.
|
||||
func smallBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, smallPrefix, key)
|
||||
// primaryKeyPrefix returns primaryPrefix_<CID>.
|
||||
func primaryKeyPrefix(cnr cid.ID) []byte {
|
||||
return keyPrefix(cnr, primaryPrefix)
|
||||
}
|
||||
|
||||
// attributeBucketName returns <CID>_attr_<attributeKey>.
|
||||
func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte {
|
||||
key[0] = userAttributePrefix
|
||||
cnr.Encode(key[1:])
|
||||
return append(key[:bucketKeySize], attributeKey...)
|
||||
func primaryKey(cnr cid.ID, objID oid.ID) []byte {
|
||||
return keyObject(primaryPrefix, cnr, objID)
|
||||
}
|
||||
|
||||
// returns <CID> from attributeBucketName result, nil otherwise.
|
||||
func cidFromAttributeBucket(val []byte, attributeKey string) []byte {
|
||||
if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) {
|
||||
return nil
|
||||
func addressFromPrimaryKey(v []byte) (oid.Address, error) {
|
||||
return addressFromKey(primaryPrefix, v)
|
||||
}
|
||||
|
||||
return val[1:bucketKeySize]
|
||||
// tombstoneKeyPrefix returns tombstonePrefix_<CID>.
|
||||
func tombstoneKeyPrefix(cnr cid.ID) []byte {
|
||||
return keyPrefix(cnr, tombstonePrefix)
|
||||
}
|
||||
|
||||
// payloadHashBucketName returns <CID>_payloadhash.
|
||||
func payloadHashBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, payloadHashPrefix, key)
|
||||
func tombstoneKey(cnr cid.ID, objID oid.ID) []byte {
|
||||
return keyObject(tombstonePrefix, cnr, objID)
|
||||
}
|
||||
|
||||
// rootBucketName returns <CID>_root.
|
||||
func rootBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, rootPrefix, key)
|
||||
func addressFromTombstoneKey(v []byte) (oid.Address, error) {
|
||||
return addressFromKey(tombstonePrefix, v)
|
||||
}
|
||||
|
||||
// ownerBucketName returns <CID>_ownerid.
|
||||
func ownerBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, ownerPrefix, key)
|
||||
func garbageKey(cnr cid.ID, objID oid.ID) []byte {
|
||||
return keyObject(garbagePrefix, cnr, objID)
|
||||
}
|
||||
|
||||
// parentBucketName returns <CID>_parent.
|
||||
func parentBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, parentPrefix, key)
|
||||
func addressFromGarbageKey(v []byte) (oid.Address, error) {
|
||||
return addressFromKey(garbagePrefix, v)
|
||||
}
|
||||
|
||||
// splitBucketName returns <CID>_splitid.
|
||||
func splitBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, splitPrefix, key)
|
||||
func graveyardKey(cnr cid.ID, objID oid.ID) []byte {
|
||||
return keyObject(graveyardPrefix, cnr, objID)
|
||||
}
|
||||
|
||||
// ecInfoBucketName returns <CID>_ecinfo.
|
||||
func ecInfoBucketName(cnr cid.ID, key []byte) []byte {
|
||||
return bucketName(cnr, ecInfoPrefix, key)
|
||||
func addressFromGraveyardKey(v []byte) (oid.Address, error) {
|
||||
return addressFromKey(graveyardPrefix, v)
|
||||
}
|
||||
|
||||
// addressKey returns key for K-V tables when key is a whole address.
|
||||
func addressKey(addr oid.Address, key []byte) []byte {
|
||||
addr.Container().Encode(key)
|
||||
addr.Object().Encode(key[cidSize:])
|
||||
return key[:addressKeySize]
|
||||
func smallKey(cnr cid.ID, obj oid.ID) []byte {
|
||||
return keyObject(smallPrefix, cnr, obj)
|
||||
}
|
||||
|
||||
// parses object address formed by addressKey.
|
||||
func decodeAddressFromKey(dst *oid.Address, k []byte) error {
|
||||
if len(k) != addressKeySize {
|
||||
return errInvalidLength
|
||||
// attributeKeyPrefix returns userAttributePrefix_<attributeKey>_<CID>_<attributeValue>.
|
||||
func attributeKeyPrefix(cnr cid.ID, attributeKey, attributeValue string) []byte {
|
||||
result := make([]byte, 1+len(attributeKey)+cidSize+len(attributeValue))
|
||||
result[0] = userAttributePrefix
|
||||
copy(result[1:], []byte(attributeKey))
|
||||
cnr.Encode(result[1+len(attributeKey):])
|
||||
copy(result[1+len(attributeKey)+cidSize:], []byte(attributeValue))
|
||||
return result
|
||||
}
|
||||
|
||||
// userAttributePrefix+attributeKey+<CID>+attributeValue+<OID>.
|
||||
func attributeKey(cnr cid.ID, objID oid.ID, attributeKey, attributeValue string) []byte {
|
||||
result := make([]byte, 1+len(attributeKey)+cidSize+len(attributeValue)+objectKeySize)
|
||||
result[0] = userAttributePrefix
|
||||
copy(result[1:], []byte(attributeKey))
|
||||
cnr.Encode(result[1+len(attributeKey):])
|
||||
copy(result[1+len(attributeKey)+cidSize:], []byte(attributeValue))
|
||||
objID.Encode(result[1+cidSize+len(attributeKey)+len(attributeValue):])
|
||||
return result
|
||||
}
|
||||
|
||||
// returns attributeValue from attributeKey result, nil otherwise.
|
||||
func attributeValueFromAttributeKey(key []byte, attributeKey string) ([]byte, error) {
|
||||
if len(key) < 1+len(attributeKey)+cidSize+objectKeySize {
|
||||
return nil, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != userAttributePrefix {
|
||||
return nil, errInvalidKeyPrefix
|
||||
}
|
||||
if !bytes.Equal(key[1:1+len(attributeKey)], []byte(attributeKey)) {
|
||||
return nil, errInvalidAttributeKey
|
||||
}
|
||||
|
||||
return key[1+len(attributeKey)+cidSize : len(key)-objectKeySize], nil
|
||||
}
|
||||
|
||||
func addressFromAttributeKey(key []byte, attributeKey string) (oid.Address, error) {
|
||||
if len(key) < 1+len(attributeKey)+cidSize+objectKeySize {
|
||||
return oid.Address{}, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != userAttributePrefix {
|
||||
return oid.Address{}, errInvalidKeyPrefix
|
||||
}
|
||||
if !bytes.Equal(key[1:1+len(attributeKey)], []byte(attributeKey)) {
|
||||
return oid.Address{}, errInvalidAttributeKey
|
||||
}
|
||||
var cnrID cid.ID
|
||||
if err := cnrID.Decode(key[1+len(attributeKey) : 1+len(attributeKey)+cidSize]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||
}
|
||||
var objID oid.ID
|
||||
if err := objID.Decode(key[len(key)-objectKeySize:]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||
}
|
||||
var result oid.Address
|
||||
result.SetContainer(cnrID)
|
||||
result.SetObject(objID)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// payloadHashKeyLongPrefix returns payloadHashPrefix_<CID>_hash.
|
||||
func payloadHashKeyLongPrefix(cnr cid.ID, hash []byte) []byte {
|
||||
result := make([]byte, 1+cidSize+len(hash))
|
||||
result[0] = payloadHashPrefix
|
||||
cnr.Encode(result[1:])
|
||||
copy(result[1+cidSize:], hash)
|
||||
return result
|
||||
}
|
||||
|
||||
// payloadHashKeyShortPrefix returns payloadHashPrefix_<CID>.
|
||||
func payloadHashKeyShortPrefix(cnr cid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize)
|
||||
result[0] = payloadHashPrefix
|
||||
cnr.Encode(result[1:])
|
||||
return result
|
||||
}
|
||||
|
||||
// payloadHashKey returns payloadHashPrefix_<CID>_hash_<OID>.
|
||||
func payloadHashKey(cnr cid.ID, obj oid.ID, hash []byte) []byte {
|
||||
result := make([]byte, 1+cidSize+len(hash)+objectKeySize)
|
||||
result[0] = payloadHashPrefix
|
||||
cnr.Encode(result[1:])
|
||||
copy(result[1+cidSize:], hash)
|
||||
obj.Encode(result[1+cidSize+len(hash):])
|
||||
return result
|
||||
}
|
||||
|
||||
func addressFromPayloadHashKey(k []byte) (oid.Address, error) {
|
||||
if len(k) < 1+cidSize+objectKeySize {
|
||||
return oid.Address{}, errInvalidKeyLenght
|
||||
}
|
||||
if k[0] != payloadHashPrefix {
|
||||
return oid.Address{}, errInvalidKeyPrefix
|
||||
}
|
||||
var cnr cid.ID
|
||||
if err := cnr.Decode(k[:cidSize]); err != nil {
|
||||
return err
|
||||
if err := cnr.Decode(k[1 : 1+cidSize]); err != nil {
|
||||
return oid.Address{}, err
|
||||
}
|
||||
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(k[cidSize:]); err != nil {
|
||||
return err
|
||||
if err := obj.Decode(k[len(k)-objectKeySize:]); err != nil {
|
||||
return oid.Address{}, err
|
||||
}
|
||||
|
||||
dst.SetObject(obj)
|
||||
dst.SetContainer(cnr)
|
||||
return nil
|
||||
var result oid.Address
|
||||
result.SetObject(obj)
|
||||
result.SetContainer(cnr)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// objectKey returns key for K-V tables when key is an object id.
|
||||
func objectKey(obj oid.ID, key []byte) []byte {
|
||||
obj.Encode(key)
|
||||
return key[:objectKeySize]
|
||||
func payloadHashFromPayloadHashKey(k []byte) ([]byte, error) {
|
||||
if len(k) < 1+cidSize+objectKeySize {
|
||||
return nil, errInvalidKeyLenght
|
||||
}
|
||||
if k[0] != payloadHashPrefix {
|
||||
return nil, errInvalidKeyPrefix
|
||||
}
|
||||
|
||||
return bytes.Clone(k[1+cidSize : len(k)-objectKeySize]), nil
|
||||
}
|
||||
|
||||
// rootBucketName returns rootPrefix_<CID>.
|
||||
func rootKeyPrefix(cnr cid.ID) []byte {
|
||||
return keyPrefix(cnr, rootPrefix)
|
||||
}
|
||||
|
||||
func rootKey(cnr cid.ID, objID oid.ID) []byte {
|
||||
return keyObject(rootPrefix, cnr, objID)
|
||||
}
|
||||
|
||||
func addressFromRootKey(key []byte) (oid.Address, error) {
|
||||
return addressFromKey(rootPrefix, key)
|
||||
}
|
||||
|
||||
// ownerKey returns ownerPrefix_<CID>_owner_<OID>.
|
||||
func ownerKey(cnr cid.ID, obj oid.ID, owner []byte) []byte {
|
||||
result := make([]byte, 1+cidSize+len(owner)+objectKeySize)
|
||||
result[0] = ownerPrefix
|
||||
cnr.Encode(result[1:])
|
||||
copy(result[1+cidSize:], owner)
|
||||
obj.Encode(result[1+cidSize+len(owner):])
|
||||
return result
|
||||
}
|
||||
|
||||
// ownerKeyShortPrefix returns ownerPrefix_<CID>.
|
||||
func ownerKeyShortPrefix(cnr cid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize)
|
||||
result[0] = ownerPrefix
|
||||
cnr.Encode(result[1:])
|
||||
return result
|
||||
}
|
||||
|
||||
// ownerKeyLongPrefix returns ownerPrefix_<CID>_owner.
|
||||
func ownerKeyLongPrefix(cnr cid.ID, owner []byte) []byte {
|
||||
result := make([]byte, 1+cidSize+len(owner))
|
||||
result[0] = ownerPrefix
|
||||
cnr.Encode(result[1:])
|
||||
copy(result[1+cidSize:], owner)
|
||||
return result
|
||||
}
|
||||
|
||||
func addressFromOwnerKey(k []byte) (oid.Address, error) {
|
||||
if len(k) < 1+cidSize+objectKeySize {
|
||||
return oid.Address{}, errInvalidKeyLenght
|
||||
}
|
||||
if k[0] != ownerPrefix {
|
||||
return oid.Address{}, errInvalidKeyPrefix
|
||||
}
|
||||
var cnr cid.ID
|
||||
if err := cnr.Decode(k[1 : 1+cidSize]); err != nil {
|
||||
return oid.Address{}, err
|
||||
}
|
||||
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(k[len(k)-objectKeySize:]); err != nil {
|
||||
return oid.Address{}, err
|
||||
}
|
||||
|
||||
var result oid.Address
|
||||
result.SetObject(obj)
|
||||
result.SetContainer(cnr)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func ownerFromOwnerKey(k []byte) ([]byte, error) {
|
||||
if len(k) < 1+cidSize+objectKeySize {
|
||||
return nil, errInvalidKeyLenght
|
||||
}
|
||||
if k[0] != ownerPrefix {
|
||||
return nil, errInvalidKeyPrefix
|
||||
}
|
||||
return bytes.Clone(k[1+cidSize : len(k)-objectKeySize]), nil
|
||||
}
|
||||
|
||||
// ecInfoLongKeyPrefix returns ecInfoPrefix_<CID>_<parent_OID>.
|
||||
func ecInfoLongKeyPrefix(cnr cid.ID, parent oid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize+objectKeySize)
|
||||
result[0] = ecInfoPrefix
|
||||
cnr.Encode(result[1:])
|
||||
parent.Encode(result[1+cidSize:])
|
||||
return result
|
||||
}
|
||||
|
||||
// ecInfoShortKeyPrefix returns ecInfoPrefix_<CID>.
|
||||
func ecInfoShortKeyPrefix(cnr cid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize)
|
||||
result[0] = ecInfoPrefix
|
||||
cnr.Encode(result[1:])
|
||||
return result
|
||||
}
|
||||
|
||||
func ecInfoKey(cnr cid.ID, parent, chunk oid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize+objectKeySize+objectKeySize)
|
||||
result[0] = ecInfoPrefix
|
||||
cnr.Encode(result[1:])
|
||||
parent.Encode(result[1+cidSize:])
|
||||
chunk.Encode(result[1+cidSize+objectKeySize:])
|
||||
return result
|
||||
}
|
||||
|
||||
func addressOfParentFromECInfoKey(key []byte) (oid.Address, error) {
|
||||
return addressFromKey(ecInfoPrefix, key[:1+cidSize+objectKeySize])
|
||||
}
|
||||
|
||||
func addressOfChunkFromECInfoKey(key []byte) (oid.Address, error) {
|
||||
if len(key) != 1+cidSize+objectKeySize+objectKeySize {
|
||||
return oid.Address{}, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != ecInfoPrefix {
|
||||
return oid.Address{}, errInvalidKeyPrefix
|
||||
}
|
||||
var cont cid.ID
|
||||
if err := cont.Decode(key[1 : 1+cidSize]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||
}
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(key[1+cidSize+objectKeySize:]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||
}
|
||||
var result oid.Address
|
||||
result.SetContainer(cont)
|
||||
result.SetObject(obj)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// parentKeyShortPrefix returns parentPrefix_<CID>.
|
||||
func parentKeyShortPrefix(cnr cid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize)
|
||||
result[0] = parentPrefix
|
||||
cnr.Encode(result[1:])
|
||||
return result
|
||||
}
|
||||
|
||||
func addressOfParentFromParentKey(key []byte) (oid.Address, error) {
|
||||
return addressFromKey(parentPrefix, key[:1+cidSize+objectKeySize])
|
||||
}
|
||||
|
||||
func addressOfTargetFromParentKey(key []byte) (oid.Address, error) {
|
||||
if len(key) != 1+cidSize+objectKeySize+objectKeySize {
|
||||
return oid.Address{}, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != parentPrefix {
|
||||
return oid.Address{}, errInvalidKeyPrefix
|
||||
}
|
||||
var cont cid.ID
|
||||
if err := cont.Decode(key[1 : 1+cidSize]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||
}
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(key[1+cidSize+objectKeySize:]); err != nil {
|
||||
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||
}
|
||||
var result oid.Address
|
||||
result.SetContainer(cont)
|
||||
result.SetObject(obj)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// parentKeyLongPrefix returns parentPrefix_<CID>_<parent_OID>.
|
||||
func parentKeyLongPrefix(cnr cid.ID, parentObj oid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize+objectKeySize)
|
||||
result[0] = parentPrefix
|
||||
cnr.Encode(result[1:])
|
||||
parentObj.Encode(result[bucketKeySize:])
|
||||
return result
|
||||
}
|
||||
|
||||
func parentKey(cnr cid.ID, parentObj, obj oid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize+objectKeySize+objectKeySize)
|
||||
result[0] = parentPrefix
|
||||
cnr.Encode(result[1:])
|
||||
parentObj.Encode(result[1+cidSize:])
|
||||
obj.Encode(result[1+cidSize+objectKeySize:])
|
||||
return result
|
||||
}
|
||||
|
||||
// splitKeyLongPrefix returns splitPrefix_<CID>_splitID.
|
||||
func splitKeyLongPrefix(cnr cid.ID, splitID []byte) []byte {
|
||||
result := make([]byte, 1+cidSize+len(splitID))
|
||||
result[0] = splitPrefix
|
||||
cnr.Encode(result[1:])
|
||||
copy(result[1+cidSize:], splitID)
|
||||
return result
|
||||
}
|
||||
|
||||
// splitKeyShortPrefix returns splitPrefix_<CID>.
|
||||
func splitKeyShortPrefix(cnr cid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize)
|
||||
result[0] = splitPrefix
|
||||
cnr.Encode(result[1:])
|
||||
return result
|
||||
}
|
||||
|
||||
// splitKey returns splitPrefix_<CID>_splitID_<OID>.
|
||||
func splitKey(cnr cid.ID, obj oid.ID, splitID []byte) []byte {
|
||||
result := make([]byte, 1+cidSize+len(splitID)+objectKeySize)
|
||||
result[0] = splitPrefix
|
||||
cnr.Encode(result[1:])
|
||||
copy(result[1+cidSize:], splitID)
|
||||
obj.Encode(result[1+cidSize+len(splitID):])
|
||||
return result
|
||||
}
|
||||
|
||||
func addressFromSplitKey(key []byte) (oid.Address, error) {
|
||||
if len(key) < 1+cidSize+objectKeySize {
|
||||
return oid.Address{}, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != splitPrefix {
|
||||
return oid.Address{}, errInvalidKeyPrefix
|
||||
}
|
||||
var cnr cid.ID
|
||||
if err := cnr.Decode(key[1 : 1+cidSize]); err != nil {
|
||||
return oid.Address{}, err
|
||||
}
|
||||
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(key[len(key)-objectKeySize:]); err != nil {
|
||||
return oid.Address{}, err
|
||||
}
|
||||
|
||||
var result oid.Address
|
||||
result.SetObject(obj)
|
||||
result.SetContainer(cnr)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func splitIDFromSplitKey(key []byte) ([]byte, error) {
|
||||
if len(key) < 1+cidSize+objectKeySize {
|
||||
return nil, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != splitPrefix {
|
||||
return nil, errInvalidKeyPrefix
|
||||
}
|
||||
|
||||
return bytes.Clone(key[1+cidSize : len(key)-objectKeySize]), nil
|
||||
}
|
||||
|
||||
// returns prefix of the keys with objects of type LOCK for specified container.
|
||||
func lockersKeyPrefix(idCnr cid.ID) []byte {
|
||||
return keyPrefix(idCnr, lockersPrefix)
|
||||
}
|
||||
|
||||
func lockersKey(cnrID cid.ID, objID oid.ID) []byte {
|
||||
return keyObject(lockersPrefix, cnrID, objID)
|
||||
}
|
||||
|
||||
func addressFromLockersKey(v []byte) (oid.Address, error) {
|
||||
return addressFromKey(lockersPrefix, v)
|
||||
}
|
||||
|
||||
// returns lockedPrefix_<CID>_<OID>.
|
||||
func lockedKeyLongPrefix(cnrID cid.ID, objID oid.ID) []byte {
|
||||
prefix := make([]byte, 1+cidSize+objectKeySize)
|
||||
prefix[0] = lockedPrefix
|
||||
cnrID.Encode(prefix[1:])
|
||||
objID.Encode(prefix[1+cidSize:])
|
||||
return prefix
|
||||
}
|
||||
|
||||
// returns lockedPrefix_<CID>.
|
||||
func lockedKeyShortPrefix(cnrID cid.ID) []byte {
|
||||
prefix := make([]byte, 1+cidSize)
|
||||
prefix[0] = lockedPrefix
|
||||
cnrID.Encode(prefix[1:])
|
||||
return prefix
|
||||
}
|
||||
|
||||
// returns lockedPrefix_<CID>_<OID>_<LOCKER_OID>.
|
||||
func lockedKey(cnrID cid.ID, objID, lockerObjID oid.ID) []byte {
|
||||
result := make([]byte, 1+cidSize+objectKeySize+objectKeySize)
|
||||
result[0] = lockedPrefix
|
||||
cnrID.Encode(result[1:])
|
||||
objID.Encode(result[1+cidSize:])
|
||||
lockerObjID.Encode(result[1+cidSize+objectKeySize:])
|
||||
return result
|
||||
}
|
||||
|
||||
func lockerObjectIDFromLockedKey(k []byte) (oid.ID, error) {
|
||||
if len(k) != 1+cidSize+objectKeySize+objectKeySize {
|
||||
return oid.ID{}, errInvalidKeyLenght
|
||||
}
|
||||
if k[0] != lockedPrefix {
|
||||
return oid.ID{}, errInvalidKeyPrefix
|
||||
}
|
||||
var result oid.ID
|
||||
if err := result.Decode(k[1+cidSize+objectKeySize:]); err != nil {
|
||||
return oid.ID{}, fmt.Errorf("failed to decode lockers object ID: %w", err)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func objectIDFromLockedKey(k []byte) (oid.ID, error) {
|
||||
if len(k) != 1+cidSize+objectKeySize+objectKeySize {
|
||||
return oid.ID{}, errInvalidKeyLenght
|
||||
}
|
||||
if k[0] != lockedPrefix {
|
||||
return oid.ID{}, errInvalidKeyPrefix
|
||||
}
|
||||
var result oid.ID
|
||||
if err := result.Decode(k[1+cidSize : 1+cidSize+objectKeySize]); err != nil {
|
||||
return oid.ID{}, fmt.Errorf("failed to decode locked object ID: %w", err)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// if meets irregular object container in objs - returns its type, otherwise returns object.TypeRegular.
|
||||
//
|
||||
// firstIrregularObjectType(tx, cnr, obj) usage allows getting object type.
|
||||
func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) objectSDK.Type {
|
||||
if len(objs) == 0 {
|
||||
panic("empty object list in firstIrregularObjectType")
|
||||
func firstIrregularObjectType(tx *badger.Txn, idCnr cid.ID, objs ...oid.ID) (objectSDK.Type, error) {
|
||||
for _, objID := range objs {
|
||||
key := tombstoneKey(idCnr, objID)
|
||||
v, err := valueOrNil(tx, key)
|
||||
if err != nil {
|
||||
return objectSDK.TypeRegular, err
|
||||
}
|
||||
|
||||
var keys [2][1 + cidSize]byte
|
||||
|
||||
irregularTypeBuckets := [...]struct {
|
||||
typ objectSDK.Type
|
||||
name []byte
|
||||
}{
|
||||
{objectSDK.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])},
|
||||
{objectSDK.TypeLock, bucketNameLockers(idCnr, keys[1][:])},
|
||||
if v != nil {
|
||||
return objectSDK.TypeTombstone, nil
|
||||
}
|
||||
|
||||
for i := range objs {
|
||||
for j := range irregularTypeBuckets {
|
||||
if inBucket(tx, irregularTypeBuckets[j].name, objs[i]) {
|
||||
return irregularTypeBuckets[j].typ
|
||||
key = lockersKey(idCnr, objID)
|
||||
v, err = valueOrNil(tx, key)
|
||||
if err != nil {
|
||||
return objectSDK.TypeRegular, err
|
||||
}
|
||||
if v != nil {
|
||||
return objectSDK.TypeLock, nil
|
||||
}
|
||||
}
|
||||
|
||||
return objectSDK.TypeRegular
|
||||
return objectSDK.TypeRegular, nil
|
||||
}
|
||||
|
||||
// return true if provided object is of LOCK type.
|
||||
func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool {
|
||||
return inBucket(tx,
|
||||
bucketNameLockers(idCnr, make([]byte, bucketKeySize)),
|
||||
objectKey(obj, make([]byte, objectKeySize)))
|
||||
func isLockObject(tx *badger.Txn, idCnr cid.ID, obj oid.ID) (bool, error) {
|
||||
key := lockersKey(idCnr, obj)
|
||||
v, err := valueOrNil(tx, key)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return v != nil, nil
|
||||
}
|
||||
|
||||
func expiredKey(cnr cid.ID, obj oid.ID, epoch uint64) []byte {
|
||||
result := make([]byte, 1+8+cidSize+objectKeySize)
|
||||
result[0] = expiredPrefix
|
||||
// BigEndian is important for iteration order
|
||||
binary.BigEndian.PutUint64(result[1:1+8], epoch)
|
||||
cnr.Encode(result[1+8 : 1+8+cidSize])
|
||||
obj.Encode(result[1+8+cidSize:])
|
||||
return result
|
||||
}
|
||||
|
||||
func expirationEpochFromExpiredKey(key []byte) (uint64, error) {
|
||||
if len(key) != 1+8+cidSize+objectKeySize {
|
||||
return 0, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != expiredPrefix {
|
||||
return 0, errInvalidKeyPrefix
|
||||
}
|
||||
// BigEndian is important for iteration order
|
||||
return binary.BigEndian.Uint64(key[1 : 1+8]), nil
|
||||
}
|
||||
|
||||
func addressFromExpiredKey(key []byte) (oid.Address, error) {
|
||||
if len(key) != 1+8+cidSize+objectKeySize {
|
||||
return oid.Address{}, errInvalidKeyLenght
|
||||
}
|
||||
if key[0] != expiredPrefix {
|
||||
return oid.Address{}, errInvalidKeyPrefix
|
||||
}
|
||||
var cnr cid.ID
|
||||
if err := cnr.Decode(key[1+8 : 1+8+cidSize]); err != nil {
|
||||
return oid.Address{}, err
|
||||
}
|
||||
|
||||
var obj oid.ID
|
||||
if err := obj.Decode(key[len(key)-objectKeySize:]); err != nil {
|
||||
return oid.Address{}, err
|
||||
}
|
||||
|
||||
var result oid.Address
|
||||
result.SetObject(obj)
|
||||
result.SetContainer(cnr)
|
||||
return result, nil
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ import (
|
|||
"fmt"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||
"go.etcd.io/bbolt"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
)
|
||||
|
||||
// version contains current metabase version.
|
||||
|
@ -18,12 +18,14 @@ var versionKey = []byte("version")
|
|||
// the current code version.
|
||||
var ErrOutdatedVersion = logicerr.New("invalid version, resynchronization is required")
|
||||
|
||||
func checkVersion(tx *bbolt.Tx, initialized bool) error {
|
||||
func (db *DB) checkVersion() error {
|
||||
return db.database.Update(func(txn *badger.Txn) error {
|
||||
var knownVersion bool
|
||||
|
||||
b := tx.Bucket(shardInfoBucket)
|
||||
if b != nil {
|
||||
data := b.Get(versionKey)
|
||||
data, err := valueOrNil(txn, shardInfoKey(versionKey))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(data) == 8 {
|
||||
knownVersion = true
|
||||
|
||||
|
@ -32,11 +34,10 @@ func checkVersion(tx *bbolt.Tx, initialized bool) error {
|
|||
return fmt.Errorf("%w: expected=%d, stored=%d", ErrOutdatedVersion, version, stored)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !initialized {
|
||||
if !db.initialized {
|
||||
// new database, write version
|
||||
return updateVersion(tx, version)
|
||||
return updateVersion(txn, version)
|
||||
} else if !knownVersion {
|
||||
// db is initialized but no version
|
||||
// has been found; that could happen
|
||||
|
@ -47,15 +48,18 @@ func checkVersion(tx *bbolt.Tx, initialized bool) error {
|
|||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func updateVersion(tx *bbolt.Tx, version uint64) error {
|
||||
func shardInfoKey(key []byte) []byte {
|
||||
result := make([]byte, len(key))
|
||||
result[0] = shardInfoPrefix
|
||||
copy(result[1:], key)
|
||||
return result
|
||||
}
|
||||
|
||||
func updateVersion(tx *badger.Txn, version uint64) error {
|
||||
data := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(data, version)
|
||||
|
||||
b, err := tx.CreateBucketIfNotExists(shardInfoBucket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create auxiliary bucket: %w", err)
|
||||
}
|
||||
return b.Put(versionKey, data)
|
||||
return tx.Set(shardInfoKey(versionKey), data)
|
||||
}
|
||||
|
|
|
@ -9,8 +9,8 @@ import (
|
|||
"testing"
|
||||
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||
"github.com/dgraph-io/badger/v4"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type epochStateImpl struct{}
|
||||
|
@ -27,19 +27,20 @@ func TestVersion(t *testing.T) {
|
|||
WithPermissions(0o600), WithEpochState(epochStateImpl{}))
|
||||
}
|
||||
check := func(t *testing.T, db *DB) {
|
||||
require.NoError(t, db.boltDB.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(shardInfoBucket)
|
||||
if b == nil {
|
||||
return errors.New("shard info bucket not found")
|
||||
require.NoError(t, db.database.View(func(tx *badger.Txn) error {
|
||||
item, err := tx.Get(shardInfoKey(versionKey))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
data := b.Get(versionKey)
|
||||
if len(data) != 8 {
|
||||
return item.Value(func(val []byte) error {
|
||||
if len(val) != 8 {
|
||||
return errors.New("invalid version data")
|
||||
}
|
||||
if stored := binary.LittleEndian.Uint64(data); stored != version {
|
||||
if stored := binary.LittleEndian.Uint64(val); stored != version {
|
||||
return fmt.Errorf("invalid version: %d != %d", stored, version)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}))
|
||||
}
|
||||
t.Run("simple", func(t *testing.T) {
|
||||
|
@ -68,7 +69,7 @@ func TestVersion(t *testing.T) {
|
|||
t.Run("invalid version", func(t *testing.T) {
|
||||
db := newDB(t)
|
||||
require.NoError(t, db.Open(context.Background(), mode.ReadWrite))
|
||||
require.NoError(t, db.boltDB.Update(func(tx *bbolt.Tx) error {
|
||||
require.NoError(t, db.database.Update(func(tx *badger.Txn) error {
|
||||
return updateVersion(tx, version+1)
|
||||
}))
|
||||
require.NoError(t, db.Close())
|
||||
|
@ -79,7 +80,7 @@ func TestVersion(t *testing.T) {
|
|||
|
||||
t.Run("reset", func(t *testing.T) {
|
||||
require.NoError(t, db.Open(context.Background(), mode.ReadWrite))
|
||||
require.NoError(t, db.Reset())
|
||||
require.NoError(t, db.Reset(context.Background()))
|
||||
check(t, db)
|
||||
require.NoError(t, db.Close())
|
||||
})
|
||||
|
|
|
@ -26,7 +26,7 @@ func (r ContainerSizeRes) Size() uint64 {
|
|||
return r.size
|
||||
}
|
||||
|
||||
func (s *Shard) ContainerSize(prm ContainerSizePrm) (ContainerSizeRes, error) {
|
||||
func (s *Shard) ContainerSize(ctx context.Context, prm ContainerSizePrm) (ContainerSizeRes, error) {
|
||||
s.m.RLock()
|
||||
defer s.m.RUnlock()
|
||||
|
||||
|
@ -34,7 +34,7 @@ func (s *Shard) ContainerSize(prm ContainerSizePrm) (ContainerSizeRes, error) {
|
|||
return ContainerSizeRes{}, ErrDegradedMode
|
||||
}
|
||||
|
||||
size, err := s.metaBase.ContainerSize(prm.cnr)
|
||||
size, err := s.metaBase.ContainerSize(ctx, prm.cnr)
|
||||
if err != nil {
|
||||
return ContainerSizeRes{}, fmt.Errorf("could not get container size: %w", err)
|
||||
}
|
||||
|
|
|
@ -194,17 +194,19 @@ func (s *Shard) refillMetabase(ctx context.Context) error {
|
|||
}
|
||||
}()
|
||||
|
||||
err := s.metaBase.Reset()
|
||||
err := s.metaBase.Reset(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not reset metabase: %w", err)
|
||||
}
|
||||
|
||||
s.log.Warn("counting objects...")
|
||||
withCount := true
|
||||
totalObjects, err := s.blobStor.ObjectsCount(ctx)
|
||||
if err != nil {
|
||||
s.log.Warn(logs.EngineRefillFailedToGetObjectsCount, zap.Error(err))
|
||||
withCount = false
|
||||
}
|
||||
s.log.Warn("counting objects completed")
|
||||
|
||||
eg, egCtx := errgroup.WithContext(ctx)
|
||||
if s.cfg.refillMetabaseWorkersCount > 0 {
|
||||
|
@ -213,6 +215,7 @@ func (s *Shard) refillMetabase(ctx context.Context) error {
|
|||
|
||||
var completedCount uint64
|
||||
var metricGuard sync.Mutex
|
||||
s.log.Warn("iterating objects")
|
||||
itErr := blobstor.IterateBinaryObjects(egCtx, s.blobStor, func(addr oid.Address, data []byte, descriptor []byte) error {
|
||||
eg.Go(func() error {
|
||||
var success bool
|
||||
|
@ -248,7 +251,7 @@ func (s *Shard) refillMetabase(ctx context.Context) error {
|
|||
return fmt.Errorf("could not put objects to the meta: %w", err)
|
||||
}
|
||||
|
||||
err = s.metaBase.SyncCounters()
|
||||
err = s.metaBase.SyncCounters(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not sync object counters: %w", err)
|
||||
}
|
||||
|
@ -410,7 +413,7 @@ func (s *Shard) Reload(ctx context.Context, opts ...Option) error {
|
|||
// config after the node was updated.
|
||||
err = s.refillMetabase(ctx)
|
||||
} else {
|
||||
err = s.metaBase.Init()
|
||||
err = s.metaBase.Init2(ctx)
|
||||
}
|
||||
if err != nil {
|
||||
s.log.Error(logs.ShardCantInitializeMetabaseMoveToADegradedreadonlyMode, zap.Error(err))
|
||||
|
|
|
@ -3,7 +3,6 @@ package shard
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
@ -28,7 +27,6 @@ import (
|
|||
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
|
||||
objecttest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type objAddr struct {
|
||||
|
@ -37,6 +35,7 @@ type objAddr struct {
|
|||
}
|
||||
|
||||
func TestShardOpen(t *testing.T) {
|
||||
t.Skip("badger does not support open file")
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
|
@ -49,6 +48,7 @@ func TestShardOpen(t *testing.T) {
|
|||
))
|
||||
|
||||
var allowedMode atomic.Int64
|
||||
/*
|
||||
openFileMetabase := func(p string, f int, perm fs.FileMode) (*os.File, error) {
|
||||
const modeMask = os.O_RDONLY | os.O_RDWR | os.O_WRONLY
|
||||
if int64(f&modeMask) == allowedMode.Load() {
|
||||
|
@ -56,6 +56,7 @@ func TestShardOpen(t *testing.T) {
|
|||
}
|
||||
return nil, fs.ErrPermission
|
||||
}
|
||||
*/
|
||||
|
||||
wcOpts := []writecache.Option{
|
||||
writecache.WithPath(filepath.Join(dir, "wc")),
|
||||
|
@ -72,7 +73,7 @@ func TestShardOpen(t *testing.T) {
|
|||
WithMetaBaseOptions(
|
||||
meta.WithPath(metaPath),
|
||||
meta.WithEpochState(epochState{}),
|
||||
meta.WithBoltDBOptions(&bbolt.Options{OpenFile: openFileMetabase}),
|
||||
// meta.WithBoltDBOptions(&bbolt.Options{OpenFile: openFileMetabase}),
|
||||
),
|
||||
WithPiloramaOptions(
|
||||
pilorama.WithPath(filepath.Join(dir, "pilorama"))),
|
||||
|
@ -352,7 +353,7 @@ func TestRefillMetabase(t *testing.T) {
|
|||
checkTombMembers(true)
|
||||
checkLocked(t, cnrLocked, locked)
|
||||
|
||||
c, err := sh.metaBase.ObjectCounters()
|
||||
c, err := sh.metaBase.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
phyBefore := c.Phy
|
||||
|
@ -388,7 +389,7 @@ func TestRefillMetabase(t *testing.T) {
|
|||
err = sh.refillMetabase(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
c, err = sh.metaBase.ObjectCounters()
|
||||
c, err = sh.metaBase.ObjectCounters(context.Background())
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, phyBefore, c.Phy)
|
||||
|
|
|
@ -23,7 +23,7 @@ func (s *Shard) LogicalObjectsCount(ctx context.Context) (uint64, error) {
|
|||
return 0, ErrDegradedMode
|
||||
}
|
||||
|
||||
cc, err := s.metaBase.ObjectCounters()
|
||||
cc, err := s.metaBase.ObjectCounters(ctx)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
tracingPkg "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/tracing"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
|
@ -134,19 +135,25 @@ func (s *Shard) deleteFromBlobstor(ctx context.Context, addr oid.Address) error
|
|||
}
|
||||
|
||||
func (s *Shard) deleteFromMetabase(ctx context.Context, addr oid.Address) error {
|
||||
var delPrm meta.DeletePrm
|
||||
delPrm.SetAddresses(addr)
|
||||
|
||||
res, err := s.metaBase.Delete(ctx, delPrm)
|
||||
res, err := s.metaBase.Delete(ctx, meta.DeletePrm{
|
||||
Address: addr,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.decObjectCounterBy(physical, res.PhyCount())
|
||||
s.decObjectCounterBy(logical, res.LogicCount())
|
||||
s.decObjectCounterBy(user, res.UserCount())
|
||||
s.decContainerObjectCounter(res.RemovedByCnrID())
|
||||
s.addToContainerSize(addr.Container().EncodeToString(), -int64(res.LogicSize()))
|
||||
s.addToPayloadSize(-int64(res.PhySize()))
|
||||
s.decObjectCounterBy(physical, res.PhyCount)
|
||||
s.decObjectCounterBy(logical, res.LogicCount)
|
||||
s.decObjectCounterBy(user, res.UserCount)
|
||||
containerCounter := map[cid.ID]meta.ObjectCounters{
|
||||
addr.Container(): {
|
||||
Logic: res.LogicCount,
|
||||
Phy: res.PhyCount,
|
||||
User: res.UserCount,
|
||||
},
|
||||
}
|
||||
s.decContainerObjectCounter(containerCounter)
|
||||
s.addToContainerSize(addr.Container().EncodeToString(), -int64(res.LogicSize))
|
||||
s.addToPayloadSize(-int64(res.PhySize))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -678,7 +678,7 @@ func (s *Shard) HandleExpiredLocks(ctx context.Context, epoch uint64, lockers []
|
|||
if s.GetMode().NoMetabase() {
|
||||
return
|
||||
}
|
||||
unlocked, err := s.metaBase.FreeLockedBy(lockers)
|
||||
unlocked, err := s.metaBase.FreeLockedBy(ctx, lockers)
|
||||
if err != nil {
|
||||
s.log.Warn(logs.ShardFailureToUnlockObjects,
|
||||
zap.String("error", err.Error()),
|
||||
|
@ -730,12 +730,12 @@ func (s *Shard) inhumeUnlockedIfExpired(ctx context.Context, epoch uint64, unloc
|
|||
}
|
||||
|
||||
// HandleDeletedLocks unlocks all objects which were locked by lockers.
|
||||
func (s *Shard) HandleDeletedLocks(lockers []oid.Address) {
|
||||
func (s *Shard) HandleDeletedLocks(ctx context.Context, lockers []oid.Address) {
|
||||
if s.GetMode().NoMetabase() {
|
||||
return
|
||||
}
|
||||
|
||||
_, err := s.metaBase.FreeLockedBy(lockers)
|
||||
_, err := s.metaBase.FreeLockedBy(ctx, lockers)
|
||||
if err != nil {
|
||||
s.log.Warn(logs.ShardFailureToUnlockObjects,
|
||||
zap.String("error", err.Error()),
|
||||
|
|
|
@ -61,8 +61,8 @@ func Test_ObjectNotFoundIfNotDeletedFromMetabase(t *testing.T) {
|
|||
meta.WithEpochState(epochState{}),
|
||||
),
|
||||
WithPiloramaOptions(pilorama.WithPath(filepath.Join(rootPath, "pilorama"))),
|
||||
WithDeletedLockCallback(func(_ context.Context, addresses []oid.Address) {
|
||||
sh.HandleDeletedLocks(addresses)
|
||||
WithDeletedLockCallback(func(ctx context.Context, addresses []oid.Address) {
|
||||
sh.HandleDeletedLocks(ctx, addresses)
|
||||
}),
|
||||
WithExpiredLocksCallback(func(ctx context.Context, epoch uint64, a []oid.Address) {
|
||||
sh.HandleExpiredLocks(ctx, epoch, a)
|
||||
|
|
|
@ -35,7 +35,7 @@ func (s *Shard) UpdateID() (err error) {
|
|||
var idFromMetabase []byte
|
||||
modeDegraded := s.GetMode().NoMetabase()
|
||||
if !modeDegraded {
|
||||
if idFromMetabase, err = s.metaBase.GetShardID(mode.ReadOnly); err != nil {
|
||||
if idFromMetabase, err = s.metaBase.GetShardID(mode.ReadWrite); err != nil {
|
||||
err = fmt.Errorf("failed to read shard id from metabase: %w", err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,8 +53,8 @@ func TestShard_Lock(t *testing.T) {
|
|||
meta.WithPath(filepath.Join(rootPath, "meta")),
|
||||
meta.WithEpochState(epochState{}),
|
||||
),
|
||||
WithDeletedLockCallback(func(_ context.Context, addresses []oid.Address) {
|
||||
sh.HandleDeletedLocks(addresses)
|
||||
WithDeletedLockCallback(func(ctx context.Context, addresses []oid.Address) {
|
||||
sh.HandleDeletedLocks(ctx, addresses)
|
||||
}),
|
||||
}
|
||||
|
||||
|
|
|
@ -439,7 +439,7 @@ func (s *Shard) updateMetrics(ctx context.Context) {
|
|||
return
|
||||
}
|
||||
|
||||
cc, err := s.metaBase.ObjectCounters()
|
||||
cc, err := s.metaBase.ObjectCounters(ctx)
|
||||
if err != nil {
|
||||
s.log.Warn(logs.ShardMetaObjectCounterRead,
|
||||
zap.Error(err),
|
||||
|
@ -452,23 +452,16 @@ func (s *Shard) updateMetrics(ctx context.Context) {
|
|||
s.setObjectCounterBy(logical, cc.Logic)
|
||||
s.setObjectCounterBy(user, cc.User)
|
||||
|
||||
cnrList, err := s.metaBase.Containers(ctx)
|
||||
cnrList, err := s.metaBase.ContainerSizes(ctx)
|
||||
if err != nil {
|
||||
s.log.Warn(logs.ShardMetaCantReadContainerList, zap.Error(err))
|
||||
s.log.Warn(logs.ShardMetaCantReadContainerSize, zap.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
var totalPayload uint64
|
||||
|
||||
for i := range cnrList {
|
||||
size, err := s.metaBase.ContainerSize(cnrList[i])
|
||||
if err != nil {
|
||||
s.log.Warn(logs.ShardMetaCantReadContainerSize,
|
||||
zap.String("cid", cnrList[i].EncodeToString()),
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
s.addToContainerSize(cnrList[i].EncodeToString(), int64(size))
|
||||
for cnrID, size := range cnrList {
|
||||
s.addToContainerSize(cnrID.EncodeToString(), int64(size))
|
||||
totalPayload += size
|
||||
}
|
||||
|
||||
|
|
|
@ -89,8 +89,8 @@ func newCustomShard(t testing.TB, enableWriteCache bool, o shardOptions) *Shard
|
|||
WithPiloramaOptions(pilorama.WithPath(filepath.Join(o.rootPath, "pilorama"))),
|
||||
WithWriteCache(enableWriteCache),
|
||||
WithWriteCacheOptions(o.wcOpts),
|
||||
WithDeletedLockCallback(func(_ context.Context, addresses []oid.Address) {
|
||||
sh.HandleDeletedLocks(addresses)
|
||||
WithDeletedLockCallback(func(ctx context.Context, addresses []oid.Address) {
|
||||
sh.HandleDeletedLocks(ctx, addresses)
|
||||
}),
|
||||
WithExpiredLocksCallback(func(ctx context.Context, epoch uint64, a []oid.Address) {
|
||||
sh.HandleExpiredLocks(ctx, epoch, a)
|
||||
|
|
Loading…
Reference in a new issue