WIP: Badger metabase support/v0.37 #1106
71 changed files with 4091 additions and 2682 deletions
|
@ -1,12 +1,9 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"time"
|
|
||||||
|
|
||||||
common "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-lens/internal"
|
common "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-lens/internal"
|
||||||
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -37,10 +34,6 @@ func init() {
|
||||||
func openMeta(cmd *cobra.Command) *meta.DB {
|
func openMeta(cmd *cobra.Command) *meta.DB {
|
||||||
db := meta.New(
|
db := meta.New(
|
||||||
meta.WithPath(vPath),
|
meta.WithPath(vPath),
|
||||||
meta.WithBoltDBOptions(&bbolt.Options{
|
|
||||||
ReadOnly: true,
|
|
||||||
Timeout: 100 * time.Millisecond,
|
|
||||||
}),
|
|
||||||
meta.WithEpochState(epochState{}),
|
meta.WithEpochState(epochState{}),
|
||||||
)
|
)
|
||||||
common.ExitOnErr(cmd, common.Errf("could not open metabase: %w", db.Open(cmd.Context(), true)))
|
common.ExitOnErr(cmd, common.Errf("could not open metabase: %w", db.Open(cmd.Context(), true)))
|
||||||
|
|
|
@ -71,7 +71,6 @@ import (
|
||||||
"github.com/nspcc-dev/neo-go/pkg/crypto/keys"
|
"github.com/nspcc-dev/neo-go/pkg/crypto/keys"
|
||||||
neogoutil "github.com/nspcc-dev/neo-go/pkg/util"
|
neogoutil "github.com/nspcc-dev/neo-go/pkg/util"
|
||||||
"github.com/panjf2000/ants/v2"
|
"github.com/panjf2000/ants/v2"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
)
|
)
|
||||||
|
@ -107,17 +106,19 @@ type applicationConfiguration struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type shardCfg struct {
|
type shardCfg struct {
|
||||||
compress bool
|
compress bool
|
||||||
smallSizeObjectLimit uint64
|
smallSizeObjectLimit uint64
|
||||||
uncompressableContentType []string
|
uncompressableContentType []string
|
||||||
refillMetabase bool
|
refillMetabase bool
|
||||||
mode shardmode.Mode
|
refillMetabaseWorkersCount int
|
||||||
|
mode shardmode.Mode
|
||||||
|
|
||||||
metaCfg struct {
|
metaCfg struct {
|
||||||
path string
|
path string
|
||||||
perm fs.FileMode
|
perm fs.FileMode
|
||||||
maxBatchSize int
|
maxBatchSize int
|
||||||
maxBatchDelay time.Duration
|
maxBatchDelay time.Duration
|
||||||
|
noSync bool
|
||||||
}
|
}
|
||||||
|
|
||||||
subStorages []subStorageCfg
|
subStorages []subStorageCfg
|
||||||
|
@ -223,6 +224,7 @@ func (a *applicationConfiguration) updateShardConfig(c *config.Config, oldConfig
|
||||||
var newConfig shardCfg
|
var newConfig shardCfg
|
||||||
|
|
||||||
newConfig.refillMetabase = oldConfig.RefillMetabase()
|
newConfig.refillMetabase = oldConfig.RefillMetabase()
|
||||||
|
newConfig.refillMetabaseWorkersCount = oldConfig.RefillMetabaseWorkersCount()
|
||||||
newConfig.mode = oldConfig.Mode()
|
newConfig.mode = oldConfig.Mode()
|
||||||
newConfig.compress = oldConfig.Compress()
|
newConfig.compress = oldConfig.Compress()
|
||||||
newConfig.uncompressableContentType = oldConfig.UncompressableContentTypes()
|
newConfig.uncompressableContentType = oldConfig.UncompressableContentTypes()
|
||||||
|
@ -324,6 +326,7 @@ func (a *applicationConfiguration) setMetabaseConfig(newConfig *shardCfg, oldCon
|
||||||
m.perm = metabaseCfg.BoltDB().Perm()
|
m.perm = metabaseCfg.BoltDB().Perm()
|
||||||
m.maxBatchDelay = metabaseCfg.BoltDB().MaxBatchDelay()
|
m.maxBatchDelay = metabaseCfg.BoltDB().MaxBatchDelay()
|
||||||
m.maxBatchSize = metabaseCfg.BoltDB().MaxBatchSize()
|
m.maxBatchSize = metabaseCfg.BoltDB().MaxBatchSize()
|
||||||
|
m.noSync = metabaseCfg.BoltDB().NoSync()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *applicationConfiguration) setGCConfig(newConfig *shardCfg, oldConfig *shardconfig.Config) {
|
func (a *applicationConfiguration) setGCConfig(newConfig *shardCfg, oldConfig *shardconfig.Config) {
|
||||||
|
@ -857,13 +860,9 @@ func (c *cfg) getShardOpts(shCfg shardCfg) shardOptsWithID {
|
||||||
mbOptions := []meta.Option{
|
mbOptions := []meta.Option{
|
||||||
meta.WithPath(shCfg.metaCfg.path),
|
meta.WithPath(shCfg.metaCfg.path),
|
||||||
meta.WithPermissions(shCfg.metaCfg.perm),
|
meta.WithPermissions(shCfg.metaCfg.perm),
|
||||||
meta.WithMaxBatchSize(shCfg.metaCfg.maxBatchSize),
|
|
||||||
meta.WithMaxBatchDelay(shCfg.metaCfg.maxBatchDelay),
|
|
||||||
meta.WithBoltDBOptions(&bbolt.Options{
|
|
||||||
Timeout: 100 * time.Millisecond,
|
|
||||||
}),
|
|
||||||
meta.WithLogger(c.log),
|
meta.WithLogger(c.log),
|
||||||
meta.WithEpochState(c.cfgNetmap.state),
|
meta.WithEpochState(c.cfgNetmap.state),
|
||||||
|
meta.WithNoSync(shCfg.metaCfg.noSync),
|
||||||
}
|
}
|
||||||
if c.metricsCollector != nil {
|
if c.metricsCollector != nil {
|
||||||
mbOptions = append(mbOptions, meta.WithMetrics(lsmetrics.NewMetabaseMetrics(shCfg.metaCfg.path, c.metricsCollector.MetabaseMetrics())))
|
mbOptions = append(mbOptions, meta.WithMetrics(lsmetrics.NewMetabaseMetrics(shCfg.metaCfg.path, c.metricsCollector.MetabaseMetrics())))
|
||||||
|
@ -874,6 +873,7 @@ func (c *cfg) getShardOpts(shCfg shardCfg) shardOptsWithID {
|
||||||
sh.shOpts = []shard.Option{
|
sh.shOpts = []shard.Option{
|
||||||
shard.WithLogger(c.log),
|
shard.WithLogger(c.log),
|
||||||
shard.WithRefillMetabase(shCfg.refillMetabase),
|
shard.WithRefillMetabase(shCfg.refillMetabase),
|
||||||
|
shard.WithRefillMetabaseWorkersCount(shCfg.refillMetabaseWorkersCount),
|
||||||
shard.WithMode(shCfg.mode),
|
shard.WithMode(shCfg.mode),
|
||||||
shard.WithBlobStorOptions(blobstoreOpts...),
|
shard.WithBlobStorOptions(blobstoreOpts...),
|
||||||
shard.WithMetaBaseOptions(mbOptions...),
|
shard.WithMetaBaseOptions(mbOptions...),
|
||||||
|
|
|
@ -114,6 +114,7 @@ func TestEngineSection(t *testing.T) {
|
||||||
|
|
||||||
require.Equal(t, false, sc.RefillMetabase())
|
require.Equal(t, false, sc.RefillMetabase())
|
||||||
require.Equal(t, mode.ReadOnly, sc.Mode())
|
require.Equal(t, mode.ReadOnly, sc.Mode())
|
||||||
|
require.Equal(t, 100, sc.RefillMetabaseWorkersCount())
|
||||||
case 1:
|
case 1:
|
||||||
require.Equal(t, "tmp/1/blob/pilorama.db", pl.Path())
|
require.Equal(t, "tmp/1/blob/pilorama.db", pl.Path())
|
||||||
require.Equal(t, fs.FileMode(0o644), pl.Perm())
|
require.Equal(t, fs.FileMode(0o644), pl.Perm())
|
||||||
|
@ -164,6 +165,7 @@ func TestEngineSection(t *testing.T) {
|
||||||
|
|
||||||
require.Equal(t, true, sc.RefillMetabase())
|
require.Equal(t, true, sc.RefillMetabase())
|
||||||
require.Equal(t, mode.ReadWrite, sc.Mode())
|
require.Equal(t, mode.ReadWrite, sc.Mode())
|
||||||
|
require.Equal(t, shardconfig.RefillMetabaseWorkersCountDefault, sc.RefillMetabaseWorkersCount())
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
|
@ -16,8 +16,12 @@ import (
|
||||||
// which provides access to Shard configurations.
|
// which provides access to Shard configurations.
|
||||||
type Config config.Config
|
type Config config.Config
|
||||||
|
|
||||||
// SmallSizeLimitDefault is a default limit of small objects payload in bytes.
|
const (
|
||||||
const SmallSizeLimitDefault = 1 << 20
|
// SmallSizeLimitDefault is a default limit of small objects payload in bytes.
|
||||||
|
SmallSizeLimitDefault = 1 << 20
|
||||||
|
EstimateCompressibilityThresholdDefault = 0.1
|
||||||
|
RefillMetabaseWorkersCountDefault = 500
|
||||||
|
)
|
||||||
|
|
||||||
// From wraps config section into Config.
|
// From wraps config section into Config.
|
||||||
func From(c *config.Config) *Config {
|
func From(c *config.Config) *Config {
|
||||||
|
@ -109,6 +113,20 @@ func (x *Config) RefillMetabase() bool {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RefillMetabaseWorkersCount returns the value of "resync_metabase_worker_count" config parameter.
|
||||||
|
//
|
||||||
|
// Returns RefillMetabaseWorkersCountDefault if the value is not a positive number.
|
||||||
|
func (x *Config) RefillMetabaseWorkersCount() int {
|
||||||
|
v := config.IntSafe(
|
||||||
|
(*config.Config)(x),
|
||||||
|
"resync_metabase_worker_count",
|
||||||
|
)
|
||||||
|
if v > 0 {
|
||||||
|
return int(v)
|
||||||
|
}
|
||||||
|
return RefillMetabaseWorkersCountDefault
|
||||||
|
}
|
||||||
|
|
||||||
// Mode return the value of "mode" config parameter.
|
// Mode return the value of "mode" config parameter.
|
||||||
//
|
//
|
||||||
// Panics if read the value is not one of predefined
|
// Panics if read the value is not one of predefined
|
||||||
|
|
|
@ -96,6 +96,7 @@ FROSTFS_STORAGE_REBUILD_WORKERS_COUNT=1000
|
||||||
## 0 shard
|
## 0 shard
|
||||||
### Flag to refill Metabase from BlobStor
|
### Flag to refill Metabase from BlobStor
|
||||||
FROSTFS_STORAGE_SHARD_0_RESYNC_METABASE=false
|
FROSTFS_STORAGE_SHARD_0_RESYNC_METABASE=false
|
||||||
|
FROSTFS_STORAGE_SHARD_0_RESYNC_METABASE_WORKER_COUNT=100
|
||||||
### Flag to set shard mode
|
### Flag to set shard mode
|
||||||
FROSTFS_STORAGE_SHARD_0_MODE=read-only
|
FROSTFS_STORAGE_SHARD_0_MODE=read-only
|
||||||
### Write cache config
|
### Write cache config
|
||||||
|
|
|
@ -142,6 +142,7 @@
|
||||||
"0": {
|
"0": {
|
||||||
"mode": "read-only",
|
"mode": "read-only",
|
||||||
"resync_metabase": false,
|
"resync_metabase": false,
|
||||||
|
"resync_metabase_worker_count": 100,
|
||||||
"writecache": {
|
"writecache": {
|
||||||
"enabled": false,
|
"enabled": false,
|
||||||
"no_sync": true,
|
"no_sync": true,
|
||||||
|
|
|
@ -163,6 +163,7 @@ storage:
|
||||||
# degraded-read-only
|
# degraded-read-only
|
||||||
# disabled (do not work with the shard, allows to not remove it from the config)
|
# disabled (do not work with the shard, allows to not remove it from the config)
|
||||||
resync_metabase: false # sync metabase with blobstor on start, expensive, leave false until complete understanding
|
resync_metabase: false # sync metabase with blobstor on start, expensive, leave false until complete understanding
|
||||||
|
resync_metabase_worker_count: 100
|
||||||
|
|
||||||
writecache:
|
writecache:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
|
@ -177,17 +177,20 @@ Contains configuration for each shard. Keys must be consecutive numbers starting
|
||||||
`default` subsection has the same format and specifies defaults for missing values.
|
`default` subsection has the same format and specifies defaults for missing values.
|
||||||
The following table describes configuration for each shard.
|
The following table describes configuration for each shard.
|
||||||
|
|
||||||
| Parameter | Type | Default value | Description |
|
| Parameter | Type | Default value | Description |
|
||||||
|-------------------------------------|---------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| ------------------------------------------------ | ------------------------------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `compress` | `bool` | `false` | Flag to enable compression. |
|
| `compress` | `bool` | `false` | Flag to enable compression. |
|
||||||
| `compression_exclude_content_types` | `[]string` | | List of content-types to disable compression for. Content-type is taken from `Content-Type` object attribute. Each element can contain a star `*` as a first (last) character, which matches any prefix (suffix). |
|
| `compression_exclude_content_types` | `[]string` | | List of content-types to disable compression for. Content-type is taken from `Content-Type` object attribute. Each element can contain a star `*` as a first (last) character, which matches any prefix (suffix). |
|
||||||
| `mode` | `string` | `read-write` | Shard Mode.<br/>Possible values: `read-write`, `read-only`, `degraded`, `degraded-read-only`, `disabled` |
|
| `compression_estimate_compressibility` | `bool` | `false` | If `true`, then noramalized compressibility estimation is used to decide compress data or not. |
|
||||||
| `resync_metabase` | `bool` | `false` | Flag to enable metabase resync on start. |
|
| `compression_estimate_compressibility_threshold` | `float` | `0.1` | Normilized compressibility estimate threshold: data will compress if estimation if greater than this value. |
|
||||||
| `writecache` | [Writecache config](#writecache-subsection) | | Write-cache configuration. |
|
| `mode` | `string` | `read-write` | Shard Mode.<br/>Possible values: `read-write`, `read-only`, `degraded`, `degraded-read-only`, `disabled` |
|
||||||
| `metabase` | [Metabase config](#metabase-subsection) | | Metabase configuration. |
|
| `resync_metabase` | `bool` | `false` | Flag to enable metabase resync on start. |
|
||||||
| `blobstor` | [Blobstor config](#blobstor-subsection) | | Blobstor configuration. |
|
| `resync_metabase_worker_count` | `int` | `1000` | Count of concurrent workers to resync metabase. |
|
||||||
| `small_object_size` | `size` | `1M` | Maximum size of an object stored in blobovnicza tree. |
|
| `writecache` | [Writecache config](#writecache-subsection) | | Write-cache configuration. |
|
||||||
| `gc` | [GC config](#gc-subsection) | | GC configuration. |
|
| `metabase` | [Metabase config](#metabase-subsection) | | Metabase configuration. |
|
||||||
|
| `blobstor` | [Blobstor config](#blobstor-subsection) | | Blobstor configuration. |
|
||||||
|
| `small_object_size` | `size` | `1M` | Maximum size of an object stored in blobovnicza tree. |
|
||||||
|
| `gc` | [GC config](#gc-subsection) | | GC configuration. |
|
||||||
|
|
||||||
### `blobstor` subsection
|
### `blobstor` subsection
|
||||||
|
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -66,7 +66,7 @@ require (
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect
|
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect
|
||||||
github.com/dgraph-io/badger/v4 v4.1.0
|
github.com/dgraph-io/badger/v4 v4.2.0
|
||||||
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
||||||
github.com/go-logr/logr v1.2.4 // indirect
|
github.com/go-logr/logr v1.2.4 // indirect
|
||||||
github.com/go-logr/stdr v1.2.2 // indirect
|
github.com/go-logr/stdr v1.2.2 // indirect
|
||||||
|
|
BIN
go.sum
BIN
go.sum
Binary file not shown.
|
@ -545,4 +545,11 @@ const (
|
||||||
BlobovniczaSavingCountersToMetaSuccess = "saving counters to blobovnicza's meta completed successfully"
|
BlobovniczaSavingCountersToMetaSuccess = "saving counters to blobovnicza's meta completed successfully"
|
||||||
BlobovniczaSavingCountersToMetaFailed = "saving counters to blobovnicza's meta failed"
|
BlobovniczaSavingCountersToMetaFailed = "saving counters to blobovnicza's meta failed"
|
||||||
ObjectRemovalFailureExistsInWritecache = "can't remove object: object must be flushed from writecache"
|
ObjectRemovalFailureExistsInWritecache = "can't remove object: object must be flushed from writecache"
|
||||||
|
FailedToParseAddressFromKey = "failed to parse address from key"
|
||||||
|
FailedToParseOwnerFromKey = "failed to parse owner from key"
|
||||||
|
FailedToParsePayloadHashFromKey = "failed to parse payload hash from key"
|
||||||
|
FailedToParseSplitIDFromKey = "failed to parse splitID from key"
|
||||||
|
FailedToParseAttributeValueFromKey = "failed to parse attribute value from key"
|
||||||
|
FailedToRunMetabaseGC = "failed to run badger GC on metabase"
|
||||||
|
MetabaseCouldNotIterateOverThePrefix = "could not iterate over the prefix"
|
||||||
)
|
)
|
||||||
|
|
|
@ -8,6 +8,7 @@ import (
|
||||||
// AddressWithType groups object address with its FrostFS
|
// AddressWithType groups object address with its FrostFS
|
||||||
// object type.
|
// object type.
|
||||||
type AddressWithType struct {
|
type AddressWithType struct {
|
||||||
Address oid.Address
|
Address oid.Address
|
||||||
Type objectSDK.Type
|
Type objectSDK.Type
|
||||||
|
IsLinkingObject bool
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package blobovnicza
|
package blobovnicza
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
@ -159,7 +160,7 @@ func (b *Blobovnicza) Iterate(ctx context.Context, prm IteratePrm) (IterateRes,
|
||||||
}
|
}
|
||||||
|
|
||||||
if !prm.withoutData {
|
if !prm.withoutData {
|
||||||
elem.data = v
|
elem.data = bytes.Clone(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
return prm.handler(elem)
|
return prm.handler(elem)
|
||||||
|
|
|
@ -10,7 +10,6 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor"
|
||||||
|
@ -25,7 +24,6 @@ import (
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger/test"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger/test"
|
||||||
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestInitializationFailure checks that shard is initialized and closed even if media
|
// TestInitializationFailure checks that shard is initialized and closed even if media
|
||||||
|
@ -57,12 +55,8 @@ func TestInitializationFailure(t *testing.T) {
|
||||||
shard.WithBlobStorOptions(
|
shard.WithBlobStorOptions(
|
||||||
blobstor.WithStorages(storages)),
|
blobstor.WithStorages(storages)),
|
||||||
shard.WithMetaBaseOptions(
|
shard.WithMetaBaseOptions(
|
||||||
meta.WithBoltDBOptions(&bbolt.Options{
|
|
||||||
Timeout: 100 * time.Millisecond,
|
|
||||||
OpenFile: opts.openFileMetabase,
|
|
||||||
}),
|
|
||||||
meta.WithPath(filepath.Join(t.TempDir(), "metabase")),
|
meta.WithPath(filepath.Join(t.TempDir(), "metabase")),
|
||||||
meta.WithPermissions(0700),
|
meta.WithPermissions(0o700),
|
||||||
meta.WithEpochState(epochState{})),
|
meta.WithEpochState(epochState{})),
|
||||||
shard.WithWriteCache(true),
|
shard.WithWriteCache(true),
|
||||||
shard.WithWriteCacheOptions(wcOpts),
|
shard.WithWriteCacheOptions(wcOpts),
|
||||||
|
@ -228,7 +222,6 @@ func TestPersistentShardID(t *testing.T) {
|
||||||
require.Equal(t, te.shards[1].id, newTe.shards[0].id)
|
require.Equal(t, te.shards[1].id, newTe.shards[0].id)
|
||||||
require.Equal(t, te.shards[0].id, newTe.shards[1].id)
|
require.Equal(t, te.shards[0].id, newTe.shards[1].id)
|
||||||
require.NoError(t, newTe.ng.Close(context.Background()))
|
require.NoError(t, newTe.ng.Close(context.Background()))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestReload(t *testing.T) {
|
func TestReload(t *testing.T) {
|
||||||
|
@ -299,7 +292,7 @@ func engineWithShards(t *testing.T, path string, num int) (*StorageEngine, []str
|
||||||
blobstor.WithStorages(newStorages(filepath.Join(addPath, strconv.Itoa(id)), errSmallSize))),
|
blobstor.WithStorages(newStorages(filepath.Join(addPath, strconv.Itoa(id)), errSmallSize))),
|
||||||
shard.WithMetaBaseOptions(
|
shard.WithMetaBaseOptions(
|
||||||
meta.WithPath(filepath.Join(addPath, fmt.Sprintf("%d.metabase", id))),
|
meta.WithPath(filepath.Join(addPath, fmt.Sprintf("%d.metabase", id))),
|
||||||
meta.WithPermissions(0700),
|
meta.WithPermissions(0o700),
|
||||||
meta.WithEpochState(epochState{}),
|
meta.WithEpochState(epochState{}),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
|
@ -106,7 +106,7 @@ func (e *StorageEngine) putToShard(ctx context.Context, sh hashedShard, ind int,
|
||||||
var existPrm shard.ExistsPrm
|
var existPrm shard.ExistsPrm
|
||||||
existPrm.SetAddress(addr)
|
existPrm.SetAddress(addr)
|
||||||
|
|
||||||
exists, err := sh.Exists(ctx, existPrm)
|
_, err := sh.Exists(ctx, existPrm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if shard.IsErrObjectExpired(err) {
|
if shard.IsErrObjectExpired(err) {
|
||||||
// object is already found but
|
// object is already found but
|
||||||
|
@ -117,24 +117,6 @@ func (e *StorageEngine) putToShard(ctx context.Context, sh hashedShard, ind int,
|
||||||
return // this is not ErrAlreadyRemoved error so we can go to the next shard
|
return // this is not ErrAlreadyRemoved error so we can go to the next shard
|
||||||
}
|
}
|
||||||
|
|
||||||
alreadyExists = exists.Exists()
|
|
||||||
if alreadyExists {
|
|
||||||
if ind != 0 {
|
|
||||||
var toMoveItPrm shard.ToMoveItPrm
|
|
||||||
toMoveItPrm.SetAddress(addr)
|
|
||||||
|
|
||||||
_, err = sh.ToMoveIt(ctx, toMoveItPrm)
|
|
||||||
if err != nil {
|
|
||||||
e.log.Warn(logs.EngineCouldNotMarkObjectForShardRelocation,
|
|
||||||
zap.Stringer("shard", sh.ID()),
|
|
||||||
zap.String("error", err.Error()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var putPrm shard.PutPrm
|
var putPrm shard.PutPrm
|
||||||
putPrm.SetObject(obj)
|
putPrm.SetObject(obj)
|
||||||
|
|
||||||
|
|
104
pkg/local_object_storage/metabase/badger.go
Normal file
104
pkg/local_object_storage/metabase/badger.go
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
package meta
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/dgraph-io/badger/v4"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// replace with lock by objectID ?
|
||||||
|
retryCount = 10
|
||||||
|
retryTimeout = 5 * time.Millisecond
|
||||||
|
)
|
||||||
|
|
||||||
|
func deleteByPrefix(ctx context.Context, tx *badger.Txn, prefix []byte) error {
|
||||||
|
const batchSize = 1000
|
||||||
|
for {
|
||||||
|
batch, err := selectByPrefixBatch(ctx, tx, prefix, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, key := range batch {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tx.Delete(key); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(batch) < batchSize {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func selectByPrefixBatch(ctx context.Context, tx *badger.Txn, prefix []byte, batchSize int) ([][]byte, error) {
|
||||||
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
|
Prefix: prefix,
|
||||||
|
})
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
|
var result [][]byte
|
||||||
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
result = append(result, it.Item().KeyCopy(nil))
|
||||||
|
if len(result) == batchSize {
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func selectByPrefixAndSeek(ctx context.Context, tx *badger.Txn, prefix, lastSeen []byte, withValues bool, batchSize int) ([]keyValue, error) {
|
||||||
|
opts := badger.IteratorOptions{
|
||||||
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
|
Prefix: prefix,
|
||||||
|
}
|
||||||
|
if withValues {
|
||||||
|
opts.PrefetchValues = true
|
||||||
|
}
|
||||||
|
it := tx.NewIterator(opts)
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
|
var result []keyValue
|
||||||
|
for it.Seek(lastSeen); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
if bytes.Equal(lastSeen, it.Item().Key()) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var current keyValue
|
||||||
|
current.Key = it.Item().KeyCopy(nil)
|
||||||
|
if withValues {
|
||||||
|
var err error
|
||||||
|
current.Value, err = it.Item().ValueCopy(nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result = append(result, current)
|
||||||
|
if len(result) == batchSize {
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func deleteByKey(tx *badger.Txn, key []byte) error {
|
||||||
|
return tx.Delete(key)
|
||||||
|
}
|
106
pkg/local_object_storage/metabase/bucket.go
Normal file
106
pkg/local_object_storage/metabase/bucket.go
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
package meta
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
|
)
|
||||||
|
|
||||||
|
type dbBucketDispatcher struct {
|
||||||
|
cond *sync.Cond
|
||||||
|
containerDispatchers map[cid.ID]*containerBucketDispatcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func newDBBucketDispatcher() *dbBucketDispatcher {
|
||||||
|
return &dbBucketDispatcher{
|
||||||
|
cond: sync.NewCond(&sync.Mutex{}),
|
||||||
|
containerDispatchers: make(map[cid.ID]*containerBucketDispatcher),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *dbBucketDispatcher) BucketID(id cid.ID) (uint16, func()) {
|
||||||
|
d.cond.L.Lock()
|
||||||
|
defer d.cond.L.Unlock()
|
||||||
|
|
||||||
|
cd := d.getOrCreateContainerDispatcher(id)
|
||||||
|
val, ok := cd.freeBucketID()
|
||||||
|
for !ok {
|
||||||
|
d.cond.Wait()
|
||||||
|
cd = d.getOrCreateContainerDispatcher(id)
|
||||||
|
val, ok = cd.freeBucketID()
|
||||||
|
}
|
||||||
|
return val, func() {
|
||||||
|
d.release(id, val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *dbBucketDispatcher) release(id cid.ID, bucketID uint16) {
|
||||||
|
d.cond.L.Lock()
|
||||||
|
defer d.cond.L.Unlock()
|
||||||
|
|
||||||
|
cd, ok := d.containerDispatchers[id]
|
||||||
|
if !ok {
|
||||||
|
panic("container bucket ID dispatcher not found")
|
||||||
|
}
|
||||||
|
notify := cd.full()
|
||||||
|
cd.release(bucketID)
|
||||||
|
if cd.empty() {
|
||||||
|
delete(d.containerDispatchers, id)
|
||||||
|
}
|
||||||
|
if notify {
|
||||||
|
d.cond.Broadcast()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *dbBucketDispatcher) getOrCreateContainerDispatcher(id cid.ID) *containerBucketDispatcher {
|
||||||
|
existed, found := d.containerDispatchers[id]
|
||||||
|
if found {
|
||||||
|
return existed
|
||||||
|
}
|
||||||
|
created := newContainerDispatcher()
|
||||||
|
d.containerDispatchers[id] = created
|
||||||
|
return created
|
||||||
|
}
|
||||||
|
|
||||||
|
type containerBucketDispatcher struct {
|
||||||
|
free []uint16
|
||||||
|
next uint16
|
||||||
|
taken map[uint16]struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newContainerDispatcher() *containerBucketDispatcher {
|
||||||
|
return &containerBucketDispatcher{
|
||||||
|
taken: make(map[uint16]struct{}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *containerBucketDispatcher) freeBucketID() (uint16, bool) {
|
||||||
|
if len(d.free) > 0 {
|
||||||
|
idx := len(d.free) - 1
|
||||||
|
result := d.free[idx]
|
||||||
|
d.free = d.free[:idx]
|
||||||
|
d.taken[result] = struct{}{}
|
||||||
|
return result, true
|
||||||
|
}
|
||||||
|
if d.next == math.MaxUint16 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
v := d.next
|
||||||
|
d.next++
|
||||||
|
d.taken[v] = struct{}{}
|
||||||
|
return v, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *containerBucketDispatcher) release(bucketID uint16) {
|
||||||
|
delete(d.taken, bucketID)
|
||||||
|
d.free = append(d.free, bucketID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *containerBucketDispatcher) empty() bool {
|
||||||
|
return len(d.taken) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *containerBucketDispatcher) full() bool {
|
||||||
|
return len(d.free) == 0 && len(d.taken) == math.MaxUint16
|
||||||
|
}
|
|
@ -7,7 +7,7 @@ import (
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
@ -38,38 +38,33 @@ func (db *DB) GetChildren(ctx context.Context, addresses []oid.Address) (map[oid
|
||||||
|
|
||||||
result := make(map[oid.Address][]oid.Address, len(addresses))
|
result := make(map[oid.Address][]oid.Address, len(addresses))
|
||||||
|
|
||||||
buffer := make([]byte, bucketKeySize)
|
err := db.database.View(func(tx *badger.Txn) error {
|
||||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
|
||||||
for _, addr := range addresses {
|
for _, addr := range addresses {
|
||||||
if _, found := result[addr]; found {
|
if _, found := result[addr]; found {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
const batchSize = 1000
|
||||||
result[addr] = []oid.Address{}
|
result[addr] = []oid.Address{}
|
||||||
bkt := tx.Bucket(parentBucketName(addr.Container(), buffer))
|
for {
|
||||||
if bkt == nil {
|
keys, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), batchSize)
|
||||||
continue
|
if err != nil {
|
||||||
}
|
|
||||||
|
|
||||||
binObjIDs, err := decodeList(bkt.Get(objectKey(addr.Object(), buffer)))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, binObjID := range binObjIDs {
|
|
||||||
var id oid.ID
|
|
||||||
if err = id.Decode(binObjID); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
var resultAddress oid.Address
|
|
||||||
resultAddress.SetContainer(addr.Container())
|
for _, key := range keys {
|
||||||
resultAddress.SetObject(id)
|
resultAddress, err := addressOfTargetFromParentKey(key)
|
||||||
result[addr] = append(result[addr], resultAddress)
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
result[addr] = append(result[addr], resultAddress)
|
||||||
|
}
|
||||||
|
if len(keys) < batchSize {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, metaerr.Wrap(err)
|
return nil, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,12 +3,20 @@ package meta
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
"go.opentelemetry.io/otel/trace"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
containerSizeKeySize = 1 + cidSize + 2
|
||||||
|
containerSizePrefixSize = 1 + cidSize
|
||||||
)
|
)
|
||||||
|
|
||||||
func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) {
|
func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) {
|
||||||
|
@ -30,8 +38,8 @@ func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) {
|
||||||
return nil, ErrDegradedMode
|
return nil, ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
err = db.database.View(func(tx *badger.Txn) error {
|
||||||
list, err = db.containers(tx)
|
list, err = containers(tx)
|
||||||
|
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
|
@ -39,24 +47,28 @@ func (db *DB) Containers(ctx context.Context) (list []cid.ID, err error) {
|
||||||
return list, metaerr.Wrap(err)
|
return list, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) containers(tx *bbolt.Tx) ([]cid.ID, error) {
|
func containers(tx *badger.Txn) ([]cid.ID, error) {
|
||||||
result := make([]cid.ID, 0)
|
result := make([]cid.ID, 0)
|
||||||
unique := make(map[string]struct{})
|
unique := make(map[string]struct{})
|
||||||
var cnr cid.ID
|
var cnr cid.ID
|
||||||
|
|
||||||
err := tx.ForEach(func(name []byte, _ *bbolt.Bucket) error {
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
if parseContainerID(&cnr, name, unique) {
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
result = append(result, cnr)
|
|
||||||
unique[string(name[1:bucketKeySize])] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
})
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
return result, err
|
for it.Seek(nil); it.Valid(); it.Next() {
|
||||||
|
name := it.Item().Key()
|
||||||
|
if parseContainerIDWithIgnore(&cnr, name, unique) {
|
||||||
|
result = append(result, cnr)
|
||||||
|
unique[string(name[1:containerSizePrefixSize])] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) ContainerSize(id cid.ID) (size uint64, err error) {
|
func (db *DB) ContainerSize(ctx context.Context, id cid.ID) (size uint64, err error) {
|
||||||
db.modeMtx.RLock()
|
db.modeMtx.RLock()
|
||||||
defer db.modeMtx.RUnlock()
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
@ -64,58 +76,174 @@ func (db *DB) ContainerSize(id cid.ID) (size uint64, err error) {
|
||||||
return 0, ErrDegradedMode
|
return 0, ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
result, err := db.containerSizesInternal(ctx, &id)
|
||||||
size, err = db.containerSize(tx, id)
|
if err != nil {
|
||||||
|
return 0, metaerr.Wrap(err)
|
||||||
|
}
|
||||||
|
return result[id], nil
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
func (db *DB) ContainerSizes(ctx context.Context) (map[cid.ID]uint64, error) {
|
||||||
|
db.modeMtx.RLock()
|
||||||
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
if db.mode.NoMetabase() {
|
||||||
|
return nil, ErrDegradedMode
|
||||||
|
}
|
||||||
|
|
||||||
|
return db.containerSizesInternal(ctx, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ZeroSizeContainers returns containers with size = 0.
|
||||||
|
func (db *DB) ZeroSizeContainers(ctx context.Context) ([]cid.ID, error) {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("ZeroSizeContainers", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
|
||||||
|
ctx, span := tracing.StartSpanFromContext(ctx, "metabase.ZeroSizeContainers")
|
||||||
|
defer span.End()
|
||||||
|
|
||||||
|
db.modeMtx.RLock()
|
||||||
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
sizes, err := db.containerSizesInternal(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var result []cid.ID
|
||||||
|
for id, size := range sizes {
|
||||||
|
if size == 0 {
|
||||||
|
result = append(result, id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) DeleteContainerSize(ctx context.Context, id cid.ID) error {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("DeleteContainerSize", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
|
||||||
|
_, span := tracing.StartSpanFromContext(ctx, "metabase.DeleteContainerSize",
|
||||||
|
trace.WithAttributes(
|
||||||
|
attribute.Stringer("container_id", id),
|
||||||
|
))
|
||||||
|
defer span.End()
|
||||||
|
|
||||||
|
db.modeMtx.RLock()
|
||||||
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
if db.mode.NoMetabase() {
|
||||||
|
return ErrDegradedMode
|
||||||
|
}
|
||||||
|
|
||||||
|
if db.mode.ReadOnly() {
|
||||||
|
return ErrReadOnlyMode
|
||||||
|
}
|
||||||
|
|
||||||
|
return metaerr.Wrap(db.database.Update(
|
||||||
|
func(txn *badger.Txn) error {
|
||||||
|
return deleteByPrefix(ctx, txn, containerSizeKeyPrefix(id))
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) containerSizesInternal(ctx context.Context, id *cid.ID) (map[cid.ID]uint64, error) {
|
||||||
|
prefix := []byte{containerSizePrefix}
|
||||||
|
if id != nil {
|
||||||
|
prefix = containerSizeKeyPrefix(*id)
|
||||||
|
}
|
||||||
|
result := make(map[cid.ID]int64)
|
||||||
|
err := db.database.View(func(tx *badger.Txn) error {
|
||||||
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
|
Prefix: prefix,
|
||||||
|
PrefetchValues: true,
|
||||||
|
})
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
key := it.Item().Key()
|
||||||
|
var cnr cid.ID
|
||||||
|
if err := cnr.Decode(key[1:containerSizePrefixSize]); err != nil {
|
||||||
|
return fmt.Errorf("invalid container size key: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := it.Item().Value(func(val []byte) error {
|
||||||
|
value, ok := parseInt64Value(val)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("invalid container size value for container %s", cnr)
|
||||||
|
}
|
||||||
|
result[cnr] += value
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
return size, metaerr.Wrap(err)
|
return nil, metaerr.Wrap(err)
|
||||||
}
|
|
||||||
|
|
||||||
func (db *DB) containerSize(tx *bbolt.Tx, id cid.ID) (uint64, error) {
|
|
||||||
containerVolume := tx.Bucket(containerVolumeBucketName)
|
|
||||||
key := make([]byte, cidSize)
|
|
||||||
id.Encode(key)
|
|
||||||
|
|
||||||
return parseContainerSize(containerVolume.Get(key)), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseContainerID(dst *cid.ID, name []byte, ignore map[string]struct{}) bool {
|
|
||||||
if len(name) != bucketKeySize {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if _, ok := ignore[string(name[1:bucketKeySize])]; ok {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return dst.Decode(name[1:bucketKeySize]) == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseContainerSize(v []byte) uint64 {
|
|
||||||
if len(v) == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return binary.LittleEndian.Uint64(v)
|
return normilizeContainerSizes(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func changeContainerSize(tx *bbolt.Tx, id cid.ID, delta uint64, increase bool) error {
|
func normilizeContainerSizes(sizes map[cid.ID]int64) (map[cid.ID]uint64, error) {
|
||||||
containerVolume := tx.Bucket(containerVolumeBucketName)
|
result := make(map[cid.ID]uint64, len(sizes))
|
||||||
key := make([]byte, cidSize)
|
for k, v := range sizes {
|
||||||
id.Encode(key)
|
if v < 0 {
|
||||||
|
return nil, fmt.Errorf("invalid cumulative size for container %s", k)
|
||||||
|
}
|
||||||
|
result[k] = uint64(v)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
size := parseContainerSize(containerVolume.Get(key))
|
func changeContainerSize(tx *badger.Txn, id cid.ID, delta int64, bucketID uint16) error {
|
||||||
|
key := containerSizeKey(id, bucketID)
|
||||||
|
|
||||||
if increase {
|
v, err := valueOrNil(tx, key)
|
||||||
size += delta
|
if err != nil {
|
||||||
} else if size > delta {
|
return err
|
||||||
size -= delta
|
|
||||||
} else {
|
|
||||||
size = 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := make([]byte, 8) // consider using sync.Pool to decrease allocations
|
size, ok := parseInt64Value(v)
|
||||||
binary.LittleEndian.PutUint64(buf, size)
|
if !ok {
|
||||||
|
return fmt.Errorf("invalid container size value for container %s", id)
|
||||||
|
}
|
||||||
|
|
||||||
return containerVolume.Put(key, buf)
|
size += delta
|
||||||
|
value := marshalInt64(size)
|
||||||
|
return tx.Set(key, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// containerSizeKeyPrefix returns containerSizePrefix_CID key prefix.
|
||||||
|
func containerSizeKeyPrefix(cnr cid.ID) []byte {
|
||||||
|
result := make([]byte, containerSizePrefixSize)
|
||||||
|
result[0] = containerSizePrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// containerSizeKey returns containerVolumePrefix_CID_bucketID key.
|
||||||
|
func containerSizeKey(cnr cid.ID, bucketID uint16) []byte {
|
||||||
|
result := make([]byte, containerSizeKeySize)
|
||||||
|
result[0] = containerSizePrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
binary.LittleEndian.PutUint16(result[containerSizePrefixSize:], bucketID)
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,6 @@ package meta_test
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"sort"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
|
@ -19,6 +18,7 @@ func TestDB_Containers(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
const N = 10
|
const N = 10
|
||||||
|
|
||||||
|
@ -73,29 +73,13 @@ func TestDB_Containers(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assertContains(cnrs, cnr)
|
assertContains(cnrs, cnr)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("ToMoveIt", func(t *testing.T) {
|
|
||||||
obj := testutil.GenerateObject()
|
|
||||||
|
|
||||||
require.NoError(t, putBig(db, obj))
|
|
||||||
|
|
||||||
cnrs, err := db.Containers(context.Background())
|
|
||||||
require.NoError(t, err)
|
|
||||||
cnr, _ := obj.ContainerID()
|
|
||||||
assertContains(cnrs, cnr)
|
|
||||||
|
|
||||||
require.NoError(t, metaToMoveIt(db, object.AddressOf(obj)))
|
|
||||||
|
|
||||||
cnrs, err = db.Containers(context.Background())
|
|
||||||
require.NoError(t, err)
|
|
||||||
assertContains(cnrs, cnr)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDB_ContainersCount(t *testing.T) {
|
func TestDB_ContainersCount(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
const R, T, SG, L = 10, 11, 12, 13 // amount of object per type
|
const R, T, SG, L = 10, 11, 12, 13 // amount of object per type
|
||||||
|
|
||||||
|
@ -123,24 +107,16 @@ func TestDB_ContainersCount(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sort.Slice(expected, func(i, j int) bool {
|
|
||||||
return expected[i].EncodeToString() < expected[j].EncodeToString()
|
|
||||||
})
|
|
||||||
|
|
||||||
got, err := db.Containers(context.Background())
|
got, err := db.Containers(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
require.ElementsMatch(t, expected, got)
|
||||||
sort.Slice(got, func(i, j int) bool {
|
|
||||||
return got[i].EncodeToString() < got[j].EncodeToString()
|
|
||||||
})
|
|
||||||
|
|
||||||
require.Equal(t, expected, got)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDB_ContainerSize(t *testing.T) {
|
func TestDB_ContainerSize(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
const (
|
const (
|
||||||
C = 3
|
C = 3
|
||||||
|
@ -175,7 +151,7 @@ func TestDB_ContainerSize(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for cnr, volume := range cids {
|
for cnr, volume := range cids {
|
||||||
n, err := db.ContainerSize(cnr)
|
n, err := db.ContainerSize(context.Background(), cnr)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, volume, int(n))
|
require.Equal(t, volume, int(n))
|
||||||
}
|
}
|
||||||
|
@ -193,7 +169,7 @@ func TestDB_ContainerSize(t *testing.T) {
|
||||||
|
|
||||||
volume -= int(obj.PayloadSize())
|
volume -= int(obj.PayloadSize())
|
||||||
|
|
||||||
n, err := db.ContainerSize(cnr)
|
n, err := db.ContainerSize(context.Background(), cnr)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, volume, int(n))
|
require.Equal(t, volume, int(n))
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,16 +2,18 @@ package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
|
"github.com/dgraph-io/badger/v4/options"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -23,57 +25,105 @@ var ErrReadOnlyMode = logicerr.New("metabase is in a read-only mode")
|
||||||
|
|
||||||
// Open boltDB instance for metabase.
|
// Open boltDB instance for metabase.
|
||||||
func (db *DB) Open(_ context.Context, readOnly bool) error {
|
func (db *DB) Open(_ context.Context, readOnly bool) error {
|
||||||
err := util.MkdirAllX(filepath.Dir(db.info.Path), db.info.Permission)
|
m := mode.ReadWrite
|
||||||
|
if readOnly {
|
||||||
|
m = mode.ReadOnly
|
||||||
|
}
|
||||||
|
db.modeMtx.Lock()
|
||||||
|
defer db.modeMtx.Unlock()
|
||||||
|
db.mode = m
|
||||||
|
db.metrics.SetMode(m)
|
||||||
|
|
||||||
|
if m.NoMetabase() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return db.openDB(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) openDB(mode mode.Mode) error {
|
||||||
|
err := util.MkdirAllX(db.info.Path, db.info.Permission)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("can't create dir %s for metabase: %w", db.info.Path, err)
|
return fmt.Errorf("can't create dir %s for metabase: %w", db.info.Path, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
db.log.Debug(logs.MetabaseCreatedDirectoryForMetabase, zap.String("path", db.info.Path))
|
db.log.Debug(logs.MetabaseCreatedDirectoryForMetabase, zap.String("path", db.info.Path))
|
||||||
|
|
||||||
if db.boltOptions == nil {
|
opts := badgerOptions(db.info.Path, mode.ReadOnly())
|
||||||
opts := *bbolt.DefaultOptions
|
|
||||||
db.boltOptions = &opts
|
|
||||||
}
|
|
||||||
db.boltOptions.ReadOnly = readOnly
|
|
||||||
|
|
||||||
return metaerr.Wrap(db.openBolt())
|
if db.cfg.noSync {
|
||||||
|
opts.SyncWrites = false
|
||||||
|
db.log.Warn("metabase open with no_sync=true", zap.String("path", db.info.Path))
|
||||||
|
}
|
||||||
|
|
||||||
|
return metaerr.Wrap(db.openBadger(opts))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) openBolt() error {
|
func badgerOptions(path string, readOnly bool) badger.Options {
|
||||||
|
opts := badger.DefaultOptions(path)
|
||||||
|
|
||||||
|
opts.BlockCacheSize = 0 // compression and encryption are disabled, so block cache should be disabled
|
||||||
|
opts.IndexCacheSize = 256 << 20 // 256MB, to not to keep all indicies in memory
|
||||||
|
opts.Compression = options.None // no need to compress metabase values
|
||||||
|
opts.Logger = nil
|
||||||
|
opts.MetricsEnabled = false
|
||||||
|
opts.NumLevelZeroTablesStall = math.MaxInt // to not to stall because of Level0 slow compaction
|
||||||
|
opts.NumMemtables = 32 // default memtable size is 64MB, so max memory consumption will be 2GB before stall
|
||||||
|
opts.NumCompactors = 64
|
||||||
|
opts.SyncWrites = true
|
||||||
|
opts.ValueLogMaxEntries = math.MaxUint32 // default vLog file size is 1GB, so size is more clear than entries count
|
||||||
|
opts.ValueThreshold = 512
|
||||||
|
opts.LmaxCompaction = true
|
||||||
|
opts.ReadOnly = readOnly
|
||||||
|
|
||||||
|
return opts
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) openBadger(opts badger.Options) error {
|
||||||
var err error
|
var err error
|
||||||
|
db.database, err = badger.Open(opts)
|
||||||
db.boltDB, err = bbolt.Open(db.info.Path, db.info.Permission, db.boltOptions)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("can't open boltDB database: %w", err)
|
return fmt.Errorf("can't open badger database: %w", err)
|
||||||
}
|
}
|
||||||
db.boltDB.MaxBatchDelay = db.boltBatchDelay
|
|
||||||
db.boltDB.MaxBatchSize = db.boltBatchSize
|
|
||||||
|
|
||||||
db.log.Debug(logs.MetabaseOpenedBoltDBInstanceForMetabase)
|
if db.closed != nil {
|
||||||
|
close(db.closed)
|
||||||
|
db.wg.Wait()
|
||||||
|
db.closed = nil
|
||||||
|
}
|
||||||
|
|
||||||
db.log.Debug(logs.MetabaseCheckingMetabaseVersion)
|
db.closed = make(chan struct{})
|
||||||
return db.boltDB.View(func(tx *bbolt.Tx) error {
|
db.wg.Add(1)
|
||||||
// The safest way to check if the metabase is fresh is to check if it has no buckets.
|
go db.collectGC()
|
||||||
// However, shard info can be present. So here we check that the number of buckets is
|
|
||||||
// at most 1.
|
|
||||||
// Another thing to consider is that tests do not persist shard ID, we want to support
|
|
||||||
// this case too.
|
|
||||||
var n int
|
|
||||||
err := tx.ForEach(func([]byte, *bbolt.Bucket) error {
|
|
||||||
if n++; n >= 2 { // do not iterate a lot
|
|
||||||
return errBreakBucketForEach
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
if err == errBreakBucketForEach {
|
return db.database.View(func(txn *badger.Txn) error {
|
||||||
db.initialized = true
|
data, err := valueOrNil(txn, shardInfoKey(versionKey))
|
||||||
err = nil
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
return err
|
db.initialized = len(data) > 0
|
||||||
|
return nil
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (db *DB) collectGC() {
|
||||||
|
defer db.wg.Done()
|
||||||
|
|
||||||
|
timer := time.NewTicker(10 * time.Minute)
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-db.closed:
|
||||||
|
return
|
||||||
|
case <-timer.C:
|
||||||
|
if err := db.database.RunValueLogGC(0.5); err == nil {
|
||||||
|
_ = db.database.RunValueLogGC(0.5) // see https://dgraph.io/docs/badger/get-started/#garbage-collection
|
||||||
|
} else {
|
||||||
|
db.log.Error(logs.FailedToRunMetabaseGC, zap.Error(err), zap.String("path", db.info.Path))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Init initializes metabase. It creates static (CID-independent) buckets in underlying BoltDB instance.
|
// Init initializes metabase. It creates static (CID-independent) buckets in underlying BoltDB instance.
|
||||||
//
|
//
|
||||||
// Returns ErrOutdatedVersion if a database at the provided path is outdated.
|
// Returns ErrOutdatedVersion if a database at the provided path is outdated.
|
||||||
|
@ -81,85 +131,61 @@ func (db *DB) openBolt() error {
|
||||||
// Does nothing if metabase has already been initialized and filled. To roll back the database to its initial state,
|
// Does nothing if metabase has already been initialized and filled. To roll back the database to its initial state,
|
||||||
// use Reset.
|
// use Reset.
|
||||||
func (db *DB) Init() error {
|
func (db *DB) Init() error {
|
||||||
return metaerr.Wrap(db.init(false))
|
db.modeMtx.Lock()
|
||||||
|
defer db.modeMtx.Unlock()
|
||||||
|
|
||||||
|
return metaerr.Wrap(db.init(context.TODO(), false))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) Init2(ctx context.Context) error {
|
||||||
|
db.modeMtx.Lock()
|
||||||
|
defer db.modeMtx.Unlock()
|
||||||
|
|
||||||
|
return metaerr.Wrap(db.init(ctx, false))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset resets metabase. Works similar to Init but cleans up all static buckets and
|
// Reset resets metabase. Works similar to Init but cleans up all static buckets and
|
||||||
// removes all dynamic (CID-dependent) ones in non-blank BoltDB instances.
|
// removes all dynamic (CID-dependent) ones in non-blank BoltDB instances.
|
||||||
func (db *DB) Reset() error {
|
func (db *DB) Reset(ctx context.Context) error {
|
||||||
db.modeMtx.RLock()
|
db.modeMtx.Lock()
|
||||||
defer db.modeMtx.RUnlock()
|
defer db.modeMtx.Unlock()
|
||||||
|
|
||||||
if db.mode.NoMetabase() {
|
if db.mode.NoMetabase() {
|
||||||
return ErrDegradedMode
|
return ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
return metaerr.Wrap(db.init(true))
|
return metaerr.Wrap(db.init(ctx, true))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) init(reset bool) error {
|
func (db *DB) init(ctx context.Context, reset bool) error {
|
||||||
if db.mode.NoMetabase() || db.mode.ReadOnly() {
|
if db.mode.NoMetabase() || db.mode.ReadOnly() {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
if reset {
|
||||||
mStaticBuckets := map[string]struct{}{
|
if err := db.database.DropAll(); err != nil {
|
||||||
string(containerVolumeBucketName): {},
|
return nil
|
||||||
string(graveyardBucketName): {},
|
}
|
||||||
string(toMoveItBucketName): {},
|
return db.database.Update(func(tx *badger.Txn) error {
|
||||||
string(garbageBucketName): {},
|
return updateVersion(tx, version)
|
||||||
string(shardInfoBucket): {},
|
})
|
||||||
string(bucketNameLocked): {},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return db.boltDB.Update(func(tx *bbolt.Tx) error {
|
return db.database.Update(func(tx *badger.Txn) error {
|
||||||
var err error
|
err := checkVersion(tx, db.initialized)
|
||||||
if !reset {
|
|
||||||
// Normal open, check version and update if not initialized.
|
|
||||||
err := checkVersion(tx, db.initialized)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for k := range mStaticBuckets {
|
|
||||||
name := []byte(k)
|
|
||||||
if reset {
|
|
||||||
err := tx.DeleteBucket(name)
|
|
||||||
if err != nil && !errors.Is(err, bbolt.ErrBucketNotFound) {
|
|
||||||
return fmt.Errorf("could not delete static bucket %s: %w", k, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err := tx.CreateBucketIfNotExists(name)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not create static bucket %s: %w", k, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !reset {
|
|
||||||
err = syncCounter(tx, false)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not sync object counter: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
err = tx.ForEach(func(name []byte, b *bbolt.Bucket) error {
|
|
||||||
if _, ok := mStaticBuckets[string(name)]; !ok {
|
|
||||||
return tx.DeleteBucket(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return updateVersion(tx, version)
|
err = syncCounter(ctx, tx, false)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("could not sync object counter: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// SyncCounters forces to synchronize the object counters.
|
// SyncCounters forces to synchronize the object counters.
|
||||||
func (db *DB) SyncCounters() error {
|
func (db *DB) SyncCounters(ctx context.Context) error {
|
||||||
db.modeMtx.RLock()
|
db.modeMtx.RLock()
|
||||||
defer db.modeMtx.RUnlock()
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
@ -169,16 +195,30 @@ func (db *DB) SyncCounters() error {
|
||||||
return ErrReadOnlyMode
|
return ErrReadOnlyMode
|
||||||
}
|
}
|
||||||
|
|
||||||
return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error {
|
return metaerr.Wrap(db.database.Update(func(tx *badger.Txn) error {
|
||||||
return syncCounter(tx, true)
|
return syncCounter(ctx, tx, true)
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close closes boltDB instance.
|
// Close closes boltDB instance
|
||||||
|
// and reports metabase metric.
|
||||||
func (db *DB) Close() error {
|
func (db *DB) Close() error {
|
||||||
|
db.modeMtx.Lock()
|
||||||
|
defer db.modeMtx.Unlock()
|
||||||
|
|
||||||
|
return db.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) close() error {
|
||||||
|
if db.closed != nil {
|
||||||
|
close(db.closed)
|
||||||
|
db.wg.Wait()
|
||||||
|
db.closed = nil
|
||||||
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
if db.boltDB != nil {
|
if db.database != nil {
|
||||||
err = metaerr.Wrap(db.boltDB.Close())
|
err = metaerr.Wrap(db.database.Close())
|
||||||
}
|
}
|
||||||
if err == nil {
|
if err == nil {
|
||||||
db.metrics.Close()
|
db.metrics.Close()
|
||||||
|
@ -202,14 +242,19 @@ func (db *DB) Reload(opts ...Option) (bool, error) {
|
||||||
defer db.modeMtx.Unlock()
|
defer db.modeMtx.Unlock()
|
||||||
|
|
||||||
if db.mode.NoMetabase() || c.info.Path != "" && filepath.Clean(db.info.Path) != filepath.Clean(c.info.Path) {
|
if db.mode.NoMetabase() || c.info.Path != "" && filepath.Clean(db.info.Path) != filepath.Clean(c.info.Path) {
|
||||||
if err := db.Close(); err != nil {
|
if err := db.close(); err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
db.mode = mode.Degraded
|
db.mode = mode.Degraded
|
||||||
db.metrics.SetMode(mode.Degraded)
|
db.metrics.SetMode(mode.Degraded)
|
||||||
db.info.Path = c.info.Path
|
db.info.Path = c.info.Path
|
||||||
if err := db.openBolt(); err != nil {
|
opts := badgerOptions(db.info.Path, false)
|
||||||
|
if c.noSync {
|
||||||
|
opts.SyncWrites = false
|
||||||
|
db.log.Warn("metabase open with no_sync=true", zap.String("path", db.info.Path))
|
||||||
|
}
|
||||||
|
if err := db.openBadger(opts); err != nil {
|
||||||
return false, metaerr.Wrap(fmt.Errorf("%w: %v", ErrDegradedMode, err))
|
return false, metaerr.Wrap(fmt.Errorf("%w: %v", ErrDegradedMode, err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,8 +15,9 @@ import (
|
||||||
|
|
||||||
func TestReset(t *testing.T) {
|
func TestReset(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
err := db.Reset()
|
err := db.Reset(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
obj := testutil.GenerateObject()
|
obj := testutil.GenerateObject()
|
||||||
|
@ -46,7 +47,7 @@ func TestReset(t *testing.T) {
|
||||||
assertExists(addr, true, nil)
|
assertExists(addr, true, nil)
|
||||||
assertExists(addrToInhume, false, client.IsErrObjectAlreadyRemoved)
|
assertExists(addrToInhume, false, client.IsErrObjectAlreadyRemoved)
|
||||||
|
|
||||||
err = db.Reset()
|
err = db.Reset(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
assertExists(addr, false, nil)
|
assertExists(addr, false, nil)
|
||||||
|
|
|
@ -1,41 +1,50 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
|
||||||
var objectPhyCounterKey = []byte("phy_counter")
|
var (
|
||||||
var objectLogicCounterKey = []byte("logic_counter")
|
errInvalidKeyLenght = errors.New("invalid key length")
|
||||||
|
errInvalidKeyPrefix = errors.New("invalid key prefix")
|
||||||
type objectType uint8
|
errInvalidValueLenght = errors.New("invalid value length")
|
||||||
|
errInvalidContainerIDValue = errors.New("invalid container ID value")
|
||||||
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
_ objectType = iota
|
containerObjectCountKeySize = 1 + cidSize + 2
|
||||||
phy
|
containerObjectCountPrefixSize = 1 + cidSize
|
||||||
logical
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ObjectCounters groups object counter
|
// ObjectCounters groups object counter
|
||||||
// according to metabase state.
|
// according to metabase state.
|
||||||
type ObjectCounters struct {
|
type ObjectCounters struct {
|
||||||
logic uint64
|
Logic uint64
|
||||||
phy uint64
|
Phy uint64
|
||||||
|
User uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
// Logic returns logical object counter.
|
func (o ObjectCounters) IsZero() bool {
|
||||||
func (o ObjectCounters) Logic() uint64 {
|
return o.Phy == 0 && o.Logic == 0 && o.User == 0
|
||||||
return o.logic
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phy returns physical object counter.
|
type objectCounterValue struct {
|
||||||
func (o ObjectCounters) Phy() uint64 {
|
Logic int64
|
||||||
return o.phy
|
Phy int64
|
||||||
|
User int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// ObjectCounters returns object counters that metabase has
|
// ObjectCounters returns object counters that metabase has
|
||||||
|
@ -43,7 +52,7 @@ func (o ObjectCounters) Phy() uint64 {
|
||||||
//
|
//
|
||||||
// Returns only the errors that do not allow reading counter
|
// Returns only the errors that do not allow reading counter
|
||||||
// in Bolt database.
|
// in Bolt database.
|
||||||
func (db *DB) ObjectCounters() (cc ObjectCounters, err error) {
|
func (db *DB) ObjectCounters(ctx context.Context) (ObjectCounters, error) {
|
||||||
db.modeMtx.RLock()
|
db.modeMtx.RLock()
|
||||||
defer db.modeMtx.RUnlock()
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
@ -51,63 +60,140 @@ func (db *DB) ObjectCounters() (cc ObjectCounters, err error) {
|
||||||
return ObjectCounters{}, ErrDegradedMode
|
return ObjectCounters{}, ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
var cc map[cid.ID]ObjectCounters
|
||||||
b := tx.Bucket(shardInfoBucket)
|
err := db.database.View(func(tx *badger.Txn) error {
|
||||||
if b != nil {
|
var err error
|
||||||
data := b.Get(objectPhyCounterKey)
|
cc, err = containerObjectCounters(ctx, tx, nil)
|
||||||
if len(data) == 8 {
|
return err
|
||||||
cc.phy = binary.LittleEndian.Uint64(data)
|
|
||||||
}
|
|
||||||
|
|
||||||
data = b.Get(objectLogicCounterKey)
|
|
||||||
if len(data) == 8 {
|
|
||||||
cc.logic = binary.LittleEndian.Uint64(data)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
return cc, metaerr.Wrap(err)
|
return ObjectCounters{}, metaerr.Wrap(err)
|
||||||
|
}
|
||||||
|
var result ObjectCounters
|
||||||
|
for _, v := range cc {
|
||||||
|
result.Logic += v.Logic
|
||||||
|
result.Phy += v.Phy
|
||||||
|
result.User += v.User
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateCounter updates the object counter. Tx MUST be writable.
|
type ContainerCounters struct {
|
||||||
// If inc == `true`, increases the counter, decreases otherwise.
|
Counts map[cid.ID]ObjectCounters
|
||||||
func (db *DB) updateCounter(tx *bbolt.Tx, typ objectType, delta uint64, inc bool) error {
|
}
|
||||||
b := tx.Bucket(shardInfoBucket)
|
|
||||||
if b == nil {
|
// ContainerCounters returns object counters for each container
|
||||||
return nil
|
// that metabase has tracked since it was opened and initialized.
|
||||||
|
//
|
||||||
|
// Returns only the errors that do not allow reading counter
|
||||||
|
// in Bolt database.
|
||||||
|
//
|
||||||
|
// It is guaranteed that the ContainerCounters fields are not nil.
|
||||||
|
func (db *DB) ContainerCounters(ctx context.Context) (ContainerCounters, error) {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("ContainerCounters", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
|
||||||
|
ctx, span := tracing.StartSpanFromContext(ctx, "metabase.ContainerCounters")
|
||||||
|
defer span.End()
|
||||||
|
|
||||||
|
cc := ContainerCounters{
|
||||||
|
Counts: make(map[cid.ID]ObjectCounters),
|
||||||
|
}
|
||||||
|
err := db.database.View(func(tx *badger.Txn) error {
|
||||||
|
var err error
|
||||||
|
cc.Counts, err = containerObjectCounters(ctx, tx, nil)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return ContainerCounters{}, metaerr.Wrap(err)
|
||||||
|
}
|
||||||
|
success = true
|
||||||
|
return cc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) ContainerCount(ctx context.Context, id cid.ID) (ObjectCounters, error) {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("ContainerCount", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
|
||||||
|
_, span := tracing.StartSpanFromContext(ctx, "metabase.ContainerCount")
|
||||||
|
defer span.End()
|
||||||
|
|
||||||
|
db.modeMtx.RLock()
|
||||||
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
if db.mode.NoMetabase() {
|
||||||
|
return ObjectCounters{}, ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
var counter uint64
|
var cc map[cid.ID]ObjectCounters
|
||||||
var counterKey []byte
|
err := db.database.View(func(tx *badger.Txn) error {
|
||||||
|
var err error
|
||||||
switch typ {
|
cc, err = containerObjectCounters(ctx, tx, &id)
|
||||||
case phy:
|
return err
|
||||||
counterKey = objectPhyCounterKey
|
})
|
||||||
case logical:
|
if err != nil {
|
||||||
counterKey = objectLogicCounterKey
|
return ObjectCounters{}, metaerr.Wrap(err)
|
||||||
default:
|
|
||||||
panic("unknown object type counter")
|
|
||||||
}
|
}
|
||||||
|
return cc[id], nil
|
||||||
|
}
|
||||||
|
|
||||||
data := b.Get(counterKey)
|
func containerCounterKey(cnrID cid.ID, bucketID uint16) []byte {
|
||||||
if len(data) == 8 {
|
result := make([]byte, containerObjectCountKeySize)
|
||||||
counter = binary.LittleEndian.Uint64(data)
|
result[0] = containerCountersPrefix
|
||||||
|
cnrID.Encode(result[1:])
|
||||||
|
binary.LittleEndian.PutUint16(result[containerObjectCountPrefixSize:], bucketID)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func incCounters(tx *badger.Txn, cnrID cid.ID, isUserObject bool, bucketID uint16) error {
|
||||||
|
delta := objectCounterValue{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
}
|
}
|
||||||
|
if isUserObject {
|
||||||
if inc {
|
delta.User = 1
|
||||||
counter += delta
|
|
||||||
} else if counter <= delta {
|
|
||||||
counter = 0
|
|
||||||
} else {
|
|
||||||
counter -= delta
|
|
||||||
}
|
}
|
||||||
|
return editContainerCounterValue(tx, cnrID, delta, bucketID)
|
||||||
|
}
|
||||||
|
|
||||||
newCounter := make([]byte, 8)
|
func updateContainerCounter(tx *badger.Txn, delta map[cid.ID]objectCounterValue, bucketIDs map[cid.ID]uint16) error {
|
||||||
binary.LittleEndian.PutUint64(newCounter, counter)
|
for cnrID, cnrDelta := range delta {
|
||||||
|
bucketID, found := bucketIDs[cnrID]
|
||||||
|
if !found {
|
||||||
|
return fmt.Errorf("bucket ID not found for container %s", cnrID)
|
||||||
|
}
|
||||||
|
if err := editContainerCounterValue(tx, cnrID, cnrDelta, bucketID); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
return b.Put(counterKey, newCounter)
|
func editContainerCounterValue(tx *badger.Txn, cnrID cid.ID, delta objectCounterValue, bucketID uint16) error {
|
||||||
|
key := containerCounterKey(cnrID, bucketID)
|
||||||
|
val, err := valueOrNil(tx, key)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
setValue := delta
|
||||||
|
if val != nil {
|
||||||
|
exited, err := parseContainerCounterValue(val)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
setValue = mergeObjectCounterValues(setValue, exited)
|
||||||
|
}
|
||||||
|
return tx.Set(key, marshalContainerCounterValue(setValue))
|
||||||
}
|
}
|
||||||
|
|
||||||
// syncCounter updates object counters according to metabase state:
|
// syncCounter updates object counters according to metabase state:
|
||||||
|
@ -116,58 +202,267 @@ func (db *DB) updateCounter(tx *bbolt.Tx, typ objectType, delta uint64, inc bool
|
||||||
//
|
//
|
||||||
// Does nothing if counters are not empty and force is false. If force is
|
// Does nothing if counters are not empty and force is false. If force is
|
||||||
// true, updates the counters anyway.
|
// true, updates the counters anyway.
|
||||||
func syncCounter(tx *bbolt.Tx, force bool) error {
|
func syncCounter(ctx context.Context, tx *badger.Txn, force bool) error {
|
||||||
b, err := tx.CreateBucketIfNotExists(shardInfoBucket)
|
if !force && containerObjectCounterInitialized(ctx, tx) {
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not get shard info bucket: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !force && len(b.Get(objectPhyCounterKey)) == 8 && len(b.Get(objectLogicCounterKey)) == 8 {
|
|
||||||
// the counters are already inited
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// drop existed counters
|
||||||
|
err := deleteByPrefix(ctx, tx, []byte{containerCountersPrefix})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
counters, err := getActualObjectCounters(tx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return setObjectCounters(tx, counters)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getActualObjectCounters(tx *badger.Txn) (map[cid.ID]ObjectCounters, error) {
|
||||||
var addr oid.Address
|
var addr oid.Address
|
||||||
var phyCounter uint64
|
var isAvailable bool
|
||||||
var logicCounter uint64
|
counters := make(map[cid.ID]ObjectCounters)
|
||||||
|
|
||||||
graveyardBKT := tx.Bucket(graveyardBucketName)
|
err := iteratePhyObjects(tx, func(cnr cid.ID, objID oid.ID, obj *objectSDK.Object) error {
|
||||||
garbageBKT := tx.Bucket(garbageBucketName)
|
if v, ok := counters[cnr]; ok {
|
||||||
key := make([]byte, addressKeySize)
|
v.Phy++
|
||||||
|
counters[cnr] = v
|
||||||
err = iteratePhyObjects(tx, func(cnr cid.ID, obj oid.ID) error {
|
} else {
|
||||||
phyCounter++
|
counters[cnr] = ObjectCounters{
|
||||||
|
Phy: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
addr.SetContainer(cnr)
|
addr.SetContainer(cnr)
|
||||||
addr.SetObject(obj)
|
addr.SetObject(objID)
|
||||||
|
isAvailable = false
|
||||||
|
|
||||||
// check if an object is available: not with GCMark
|
st, err := inGraveyardWithKey(tx, addr)
|
||||||
// and not covered with a tombstone
|
if err != nil {
|
||||||
if inGraveyardWithKey(addressKey(addr, key), graveyardBKT, garbageBKT) == 0 {
|
return err
|
||||||
logicCounter++
|
}
|
||||||
|
|
||||||
|
if st == 0 {
|
||||||
|
if v, ok := counters[cnr]; ok {
|
||||||
|
v.Logic++
|
||||||
|
counters[cnr] = v
|
||||||
|
} else {
|
||||||
|
counters[cnr] = ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isAvailable = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAvailable && IsUserObject(obj) {
|
||||||
|
if v, ok := counters[cnr]; ok {
|
||||||
|
v.User++
|
||||||
|
counters[cnr] = v
|
||||||
|
} else {
|
||||||
|
counters[cnr] = ObjectCounters{
|
||||||
|
User: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not iterate objects: %w", err)
|
return nil, fmt.Errorf("could not iterate objects: %w", err)
|
||||||
}
|
}
|
||||||
|
return counters, nil
|
||||||
|
}
|
||||||
|
|
||||||
data := make([]byte, 8)
|
func setObjectCounters(tx *badger.Txn, counters map[cid.ID]ObjectCounters) error {
|
||||||
binary.LittleEndian.PutUint64(data, phyCounter)
|
for cnrID, count := range counters {
|
||||||
|
delta := objectCounterValue{
|
||||||
err = b.Put(objectPhyCounterKey, data)
|
Logic: int64(count.Logic),
|
||||||
if err != nil {
|
Phy: int64(count.Phy),
|
||||||
return fmt.Errorf("could not update phy object counter: %w", err)
|
User: int64(count.User),
|
||||||
|
}
|
||||||
|
// this function called by init or refill, so no other updates should happen
|
||||||
|
// so here bucketID = 0 can be used
|
||||||
|
if err := editContainerCounterValue(tx, cnrID, delta, 0); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data = make([]byte, 8)
|
|
||||||
binary.LittleEndian.PutUint64(data, logicCounter)
|
|
||||||
|
|
||||||
err = b.Put(objectLogicCounterKey, data)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not update logic object counter: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func IsUserObject(obj *objectSDK.Object) bool {
|
||||||
|
_, hasParentID := obj.ParentID()
|
||||||
|
return obj.Type() == objectSDK.TypeRegular &&
|
||||||
|
(obj.SplitID() == nil ||
|
||||||
|
(hasParentID && len(obj.Children()) == 0))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ZeroCountContainers returns containers with objects count = 0 in metabase.
|
||||||
|
func (db *DB) ZeroCountContainers(ctx context.Context) ([]cid.ID, error) {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("ZeroCountContainers", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
|
||||||
|
ctx, span := tracing.StartSpanFromContext(ctx, "metabase.ZeroCountContainers")
|
||||||
|
defer span.End()
|
||||||
|
|
||||||
|
db.modeMtx.RLock()
|
||||||
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
if db.mode.NoMetabase() {
|
||||||
|
return nil, ErrDegradedMode
|
||||||
|
}
|
||||||
|
|
||||||
|
var result []cid.ID
|
||||||
|
|
||||||
|
var cc map[cid.ID]ObjectCounters
|
||||||
|
err := db.database.View(func(tx *badger.Txn) error {
|
||||||
|
var err error
|
||||||
|
cc, err = containerObjectCounters(ctx, tx, nil)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, metaerr.Wrap(err)
|
||||||
|
}
|
||||||
|
for cnrID, c := range cc {
|
||||||
|
if c.IsZero() {
|
||||||
|
result = append(result, cnrID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
success = true
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) DeleteContainerCount(ctx context.Context, id cid.ID) error {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("DeleteContainerCount", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
|
||||||
|
_, span := tracing.StartSpanFromContext(ctx, "metabase.DeleteContainerCount",
|
||||||
|
trace.WithAttributes(
|
||||||
|
attribute.Stringer("container_id", id),
|
||||||
|
))
|
||||||
|
defer span.End()
|
||||||
|
|
||||||
|
db.modeMtx.RLock()
|
||||||
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
if db.mode.NoMetabase() {
|
||||||
|
return ErrDegradedMode
|
||||||
|
}
|
||||||
|
|
||||||
|
if db.mode.ReadOnly() {
|
||||||
|
return ErrReadOnlyMode
|
||||||
|
}
|
||||||
|
|
||||||
|
prefix := make([]byte, containerObjectCountPrefixSize)
|
||||||
|
prefix[0] = containerCountersPrefix
|
||||||
|
id.Encode(prefix[1:])
|
||||||
|
|
||||||
|
err := db.database.Update(func(txn *badger.Txn) error {
|
||||||
|
return deleteByPrefix(ctx, txn, prefix)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return metaerr.Wrap(err)
|
||||||
|
}
|
||||||
|
success = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func containerObjectCounterInitialized(ctx context.Context, tx *badger.Txn) bool {
|
||||||
|
_, err := containerObjectCounters(ctx, tx, nil)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func containerObjectCounters(ctx context.Context, tx *badger.Txn, cnrID *cid.ID) (map[cid.ID]ObjectCounters, error) {
|
||||||
|
prefix := []byte{containerCountersPrefix}
|
||||||
|
if cnrID != nil {
|
||||||
|
buf := make([]byte, cidSize)
|
||||||
|
cnrID.Encode(buf)
|
||||||
|
prefix = append(prefix, buf...)
|
||||||
|
}
|
||||||
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
|
Prefix: prefix,
|
||||||
|
PrefetchValues: true,
|
||||||
|
})
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
|
counters := make(map[cid.ID]objectCounterValue)
|
||||||
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
var cnrID cid.ID
|
||||||
|
if !parseContainerID(&cnrID, it.Item().Key()) {
|
||||||
|
return nil, errInvalidContainerIDValue
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := it.Item().Value(func(val []byte) error {
|
||||||
|
oc, err := parseContainerCounterValue(val)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
counters[cnrID] = mergeObjectCounterValues(counters[cnrID], oc)
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid container object counter value: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return normilizeObjectCounters(counters)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseContainerCounterValue return phy, logic values.
|
||||||
|
func parseContainerCounterValue(buf []byte) (objectCounterValue, error) {
|
||||||
|
if len(buf) != 24 {
|
||||||
|
return objectCounterValue{}, errInvalidValueLenght
|
||||||
|
}
|
||||||
|
return objectCounterValue{
|
||||||
|
Phy: int64(binary.LittleEndian.Uint64(buf[:8])),
|
||||||
|
Logic: int64(binary.LittleEndian.Uint64(buf[8:16])),
|
||||||
|
User: int64(binary.LittleEndian.Uint64(buf[16:])),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func marshalContainerCounterValue(v objectCounterValue) []byte {
|
||||||
|
buf := make([]byte, 24)
|
||||||
|
binary.LittleEndian.PutUint64(buf[:8], uint64(v.Phy))
|
||||||
|
binary.LittleEndian.PutUint64(buf[8:16], uint64(v.Logic))
|
||||||
|
binary.LittleEndian.PutUint64(buf[16:], uint64(v.User))
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeObjectCounterValues(lhs, rhs objectCounterValue) objectCounterValue {
|
||||||
|
lhs.Logic += rhs.Logic
|
||||||
|
lhs.Phy += rhs.Phy
|
||||||
|
lhs.User += rhs.User
|
||||||
|
return lhs
|
||||||
|
}
|
||||||
|
|
||||||
|
func normilizeObjectCounters(values map[cid.ID]objectCounterValue) (map[cid.ID]ObjectCounters, error) {
|
||||||
|
result := make(map[cid.ID]ObjectCounters, len(values))
|
||||||
|
for k, v := range values {
|
||||||
|
if v.Logic < 0 || v.Phy < 0 || v.User < 0 {
|
||||||
|
return nil, fmt.Errorf("invalid container object counter for container ID %s", k.EncodeToString())
|
||||||
|
}
|
||||||
|
var oc ObjectCounters
|
||||||
|
oc.Logic = uint64(v.Logic)
|
||||||
|
oc.Phy = uint64(v.Phy)
|
||||||
|
oc.User = uint64(v.User)
|
||||||
|
result[k] = oc
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@ import (
|
||||||
objectcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
objectcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||||
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
||||||
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
|
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
|
||||||
|
@ -21,62 +22,117 @@ func TestCounters(t *testing.T) {
|
||||||
t.Run("defaults", func(t *testing.T) {
|
t.Run("defaults", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
c, err := db.ObjectCounters()
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
c, err := db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Zero(t, c.Phy())
|
require.Zero(t, c.Phy)
|
||||||
require.Zero(t, c.Logic())
|
require.Zero(t, c.Logic)
|
||||||
|
require.Zero(t, c.User)
|
||||||
|
|
||||||
|
cc, err := db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Zero(t, len(cc.Counts))
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("put", func(t *testing.T) {
|
t.Run("put", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
oo := make([]*objectSDK.Object, 0, objCount)
|
oo := make([]*objectSDK.Object, 0, objCount)
|
||||||
for i := 0; i < objCount; i++ {
|
for i := 0; i < objCount; i++ {
|
||||||
oo = append(oo, testutil.GenerateObject())
|
oo = append(oo, testutil.GenerateObject())
|
||||||
}
|
}
|
||||||
|
|
||||||
var prm meta.PutPrm
|
var prm meta.PutPrm
|
||||||
|
exp := make(map[cid.ID]meta.ObjectCounters)
|
||||||
|
|
||||||
for i := 0; i < objCount; i++ {
|
for i := 0; i < objCount; i++ {
|
||||||
prm.SetObject(oo[i])
|
prm.SetObject(oo[i])
|
||||||
|
cnrID, _ := oo[i].ContainerID()
|
||||||
|
c := meta.ObjectCounters{}
|
||||||
|
exp[cnrID] = meta.ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
|
User: 1,
|
||||||
|
}
|
||||||
|
|
||||||
_, err := db.Put(context.Background(), prm)
|
_, err := db.Put(context.Background(), prm)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
c, err := db.ObjectCounters()
|
c, err = db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
require.Equal(t, uint64(i+1), c.Phy())
|
require.Equal(t, uint64(i+1), c.Phy)
|
||||||
require.Equal(t, uint64(i+1), c.Logic())
|
require.Equal(t, uint64(i+1), c.Logic)
|
||||||
|
|
||||||
|
cc, err := db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("delete", func(t *testing.T) {
|
t.Run("delete", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
oo := putObjs(t, db, objCount, false)
|
oo := putObjs(t, db, objCount, false)
|
||||||
|
|
||||||
|
exp := make(map[cid.ID]meta.ObjectCounters)
|
||||||
|
for _, obj := range oo {
|
||||||
|
cnrID, _ := obj.ContainerID()
|
||||||
|
exp[cnrID] = meta.ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
|
User: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var prm meta.DeletePrm
|
var prm meta.DeletePrm
|
||||||
for i := objCount - 1; i >= 0; i-- {
|
for i := objCount - 1; i >= 0; i-- {
|
||||||
prm.SetAddresses(objectcore.AddressOf(oo[i]))
|
prm.Address = objectcore.AddressOf(oo[i])
|
||||||
|
|
||||||
res, err := db.Delete(context.Background(), prm)
|
res, err := db.Delete(context.Background(), prm)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(1), res.AvailableObjectsRemoved())
|
require.Equal(t, uint64(1), res.LogicCount)
|
||||||
|
|
||||||
c, err := db.ObjectCounters()
|
c, err := db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
require.Equal(t, uint64(i), c.Phy())
|
require.Equal(t, uint64(i), c.Phy)
|
||||||
require.Equal(t, uint64(i), c.Logic())
|
require.Equal(t, uint64(i), c.Logic)
|
||||||
|
require.Equal(t, uint64(i), c.User)
|
||||||
|
|
||||||
|
cnrID, _ := oo[i].ContainerID()
|
||||||
|
if v, ok := exp[cnrID]; ok {
|
||||||
|
v.Phy--
|
||||||
|
v.Logic--
|
||||||
|
v.User--
|
||||||
|
exp[cnrID] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
cc, err := db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("inhume", func(t *testing.T) {
|
t.Run("inhume", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
oo := putObjs(t, db, objCount, false)
|
oo := putObjs(t, db, objCount, false)
|
||||||
|
|
||||||
|
exp := make(map[cid.ID]meta.ObjectCounters)
|
||||||
|
for _, obj := range oo {
|
||||||
|
cnrID, _ := obj.ContainerID()
|
||||||
|
exp[cnrID] = meta.ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
|
User: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inhumedObjs := make([]oid.Address, objCount/2)
|
inhumedObjs := make([]oid.Address, objCount/2)
|
||||||
|
|
||||||
for i, o := range oo {
|
for i, o := range oo {
|
||||||
|
@ -87,66 +143,136 @@ func TestCounters(t *testing.T) {
|
||||||
inhumedObjs[i] = objectcore.AddressOf(o)
|
inhumedObjs[i] = objectcore.AddressOf(o)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, addr := range inhumedObjs {
|
||||||
|
if v, ok := exp[addr.Container()]; ok {
|
||||||
|
v.Logic--
|
||||||
|
v.User--
|
||||||
|
if v.IsZero() {
|
||||||
|
delete(exp, addr.Container())
|
||||||
|
} else {
|
||||||
|
exp[addr.Container()] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var prm meta.InhumePrm
|
var prm meta.InhumePrm
|
||||||
prm.SetTombstoneAddress(oidtest.Address())
|
prm.SetTombstoneAddress(oidtest.Address())
|
||||||
prm.SetAddresses(inhumedObjs...)
|
prm.SetAddresses(inhumedObjs...)
|
||||||
|
|
||||||
res, err := db.Inhume(context.Background(), prm)
|
res, err := db.Inhume(context.Background(), prm)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(len(inhumedObjs)), res.AvailableInhumed())
|
require.Equal(t, uint64(len(inhumedObjs)), res.LogicInhumed())
|
||||||
|
require.Equal(t, uint64(len(inhumedObjs)), res.UserInhumed())
|
||||||
|
|
||||||
c, err := db.ObjectCounters()
|
c, err := db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
require.Equal(t, uint64(objCount), c.Phy())
|
require.Equal(t, uint64(objCount), c.Phy)
|
||||||
require.Equal(t, uint64(objCount-len(inhumedObjs)), c.Logic())
|
require.Equal(t, uint64(objCount-len(inhumedObjs)), c.Logic)
|
||||||
|
require.Equal(t, uint64(objCount-len(inhumedObjs)), c.User)
|
||||||
|
|
||||||
|
cc, err := db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("put_split", func(t *testing.T) {
|
t.Run("put_split", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
parObj := testutil.GenerateObject()
|
parObj := testutil.GenerateObject()
|
||||||
|
|
||||||
|
exp := make(map[cid.ID]meta.ObjectCounters)
|
||||||
|
|
||||||
// put objects and check that parent info
|
// put objects and check that parent info
|
||||||
// does not affect the counter
|
// does not affect the counter
|
||||||
for i := 0; i < objCount; i++ {
|
for i := 0; i < objCount; i++ {
|
||||||
o := testutil.GenerateObject()
|
o := testutil.GenerateObject()
|
||||||
if i < objCount/2 { // half of the objs will have the parent
|
if i < objCount/2 { // half of the objs will have the parent
|
||||||
o.SetParent(parObj)
|
o.SetParent(parObj)
|
||||||
|
o.SetSplitID(objectSDK.NewSplitID())
|
||||||
|
}
|
||||||
|
|
||||||
|
cnrID, _ := o.ContainerID()
|
||||||
|
exp[cnrID] = meta.ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
|
User: 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
require.NoError(t, putBig(db, o))
|
require.NoError(t, putBig(db, o))
|
||||||
|
|
||||||
c, err := db.ObjectCounters()
|
c, err := db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(i+1), c.Phy())
|
require.Equal(t, uint64(i+1), c.Phy)
|
||||||
require.Equal(t, uint64(i+1), c.Logic())
|
require.Equal(t, uint64(i+1), c.Logic)
|
||||||
|
require.Equal(t, uint64(i+1), c.User)
|
||||||
|
|
||||||
|
cc, err := db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("delete_split", func(t *testing.T) {
|
t.Run("delete_split", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
oo := putObjs(t, db, objCount, true)
|
oo := putObjs(t, db, objCount, true)
|
||||||
|
|
||||||
|
exp := make(map[cid.ID]meta.ObjectCounters)
|
||||||
|
for _, obj := range oo {
|
||||||
|
cnrID, _ := obj.ContainerID()
|
||||||
|
exp[cnrID] = meta.ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
|
User: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// delete objects that have parent info
|
// delete objects that have parent info
|
||||||
// and check that it does not affect
|
// and check that it does not affect
|
||||||
// the counter
|
// the counter
|
||||||
for i, o := range oo {
|
for i, o := range oo {
|
||||||
require.NoError(t, metaDelete(db, objectcore.AddressOf(o)))
|
addr := objectcore.AddressOf(o)
|
||||||
|
require.NoError(t, metaDelete(db, addr))
|
||||||
|
|
||||||
c, err := db.ObjectCounters()
|
c, err := db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(objCount-i-1), c.Phy())
|
require.Equal(t, uint64(objCount-i-1), c.Phy)
|
||||||
require.Equal(t, uint64(objCount-i-1), c.Logic())
|
require.Equal(t, uint64(objCount-i-1), c.Logic)
|
||||||
|
require.Equal(t, uint64(objCount-i-1), c.User)
|
||||||
|
|
||||||
|
if v, ok := exp[addr.Container()]; ok {
|
||||||
|
v.Logic--
|
||||||
|
v.Phy--
|
||||||
|
v.User--
|
||||||
|
if v.IsZero() {
|
||||||
|
delete(exp, addr.Container())
|
||||||
|
} else {
|
||||||
|
exp[addr.Container()] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("inhume_split", func(t *testing.T) {
|
t.Run("inhume_split", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
oo := putObjs(t, db, objCount, true)
|
oo := putObjs(t, db, objCount, true)
|
||||||
|
|
||||||
|
exp := make(map[cid.ID]meta.ObjectCounters)
|
||||||
|
for _, obj := range oo {
|
||||||
|
cnrID, _ := obj.ContainerID()
|
||||||
|
exp[cnrID] = meta.ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
|
User: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inhumedObjs := make([]oid.Address, objCount/2)
|
inhumedObjs := make([]oid.Address, objCount/2)
|
||||||
|
|
||||||
for i, o := range oo {
|
for i, o := range oo {
|
||||||
|
@ -157,6 +283,18 @@ func TestCounters(t *testing.T) {
|
||||||
inhumedObjs[i] = objectcore.AddressOf(o)
|
inhumedObjs[i] = objectcore.AddressOf(o)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, addr := range inhumedObjs {
|
||||||
|
if v, ok := exp[addr.Container()]; ok {
|
||||||
|
v.Logic--
|
||||||
|
v.User--
|
||||||
|
if v.IsZero() {
|
||||||
|
delete(exp, addr.Container())
|
||||||
|
} else {
|
||||||
|
exp[addr.Container()] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var prm meta.InhumePrm
|
var prm meta.InhumePrm
|
||||||
prm.SetTombstoneAddress(oidtest.Address())
|
prm.SetTombstoneAddress(oidtest.Address())
|
||||||
prm.SetAddresses(inhumedObjs...)
|
prm.SetAddresses(inhumedObjs...)
|
||||||
|
@ -164,14 +302,69 @@ func TestCounters(t *testing.T) {
|
||||||
_, err := db.Inhume(context.Background(), prm)
|
_, err := db.Inhume(context.Background(), prm)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
c, err := db.ObjectCounters()
|
c, err := db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
require.Equal(t, uint64(objCount), c.Phy())
|
require.Equal(t, uint64(objCount), c.Phy)
|
||||||
require.Equal(t, uint64(objCount-len(inhumedObjs)), c.Logic())
|
require.Equal(t, uint64(objCount-len(inhumedObjs)), c.Logic)
|
||||||
|
require.Equal(t, uint64(objCount-len(inhumedObjs)), c.User)
|
||||||
|
|
||||||
|
cc, err := db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDoublePut(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
obj := testutil.GenerateObject()
|
||||||
|
|
||||||
|
exp := make(map[cid.ID]meta.ObjectCounters)
|
||||||
|
cnrID, _ := obj.ContainerID()
|
||||||
|
exp[cnrID] = meta.ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
|
User: 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
var prm meta.PutPrm
|
||||||
|
prm.SetObject(obj)
|
||||||
|
pr, err := db.Put(context.Background(), prm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.True(t, pr.Inserted)
|
||||||
|
|
||||||
|
c, err := db.ObjectCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, uint64(1), c.Phy)
|
||||||
|
require.Equal(t, uint64(1), c.Logic)
|
||||||
|
require.Equal(t, uint64(1), c.User)
|
||||||
|
|
||||||
|
cc, err := db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
|
|
||||||
|
pr, err = db.Put(context.Background(), prm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.False(t, pr.Inserted)
|
||||||
|
|
||||||
|
c, err = db.ObjectCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, uint64(1), c.Phy)
|
||||||
|
require.Equal(t, uint64(1), c.Logic)
|
||||||
|
require.Equal(t, uint64(1), c.User)
|
||||||
|
|
||||||
|
cc, err = db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
|
}
|
||||||
|
|
||||||
func TestCounters_Expired(t *testing.T) {
|
func TestCounters_Expired(t *testing.T) {
|
||||||
// That test is about expired objects without
|
// That test is about expired objects without
|
||||||
// GCMark yet. Such objects should be treated as
|
// GCMark yet. Such objects should be treated as
|
||||||
|
@ -184,18 +377,34 @@ func TestCounters_Expired(t *testing.T) {
|
||||||
|
|
||||||
es := &epochState{epoch}
|
es := &epochState{epoch}
|
||||||
db := newDB(t, meta.WithEpochState(es))
|
db := newDB(t, meta.WithEpochState(es))
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
oo := make([]oid.Address, objCount)
|
oo := make([]oid.Address, objCount)
|
||||||
for i := range oo {
|
for i := range oo {
|
||||||
oo[i] = putWithExpiration(t, db, objectSDK.TypeRegular, epoch+1)
|
oo[i] = putWithExpiration(t, db, objectSDK.TypeRegular, epoch+1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exp := make(map[cid.ID]meta.ObjectCounters)
|
||||||
|
for _, addr := range oo {
|
||||||
|
exp[addr.Container()] = meta.ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
Phy: 1,
|
||||||
|
User: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 1. objects are available and counters are correct
|
// 1. objects are available and counters are correct
|
||||||
|
|
||||||
c, err := db.ObjectCounters()
|
c, err := db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(objCount), c.Phy())
|
require.Equal(t, uint64(objCount), c.Phy)
|
||||||
require.Equal(t, uint64(objCount), c.Logic())
|
require.Equal(t, uint64(objCount), c.Logic)
|
||||||
|
require.Equal(t, uint64(objCount), c.User)
|
||||||
|
|
||||||
|
cc, err := db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
|
|
||||||
for _, o := range oo {
|
for _, o := range oo {
|
||||||
_, err := metaGet(db, o, true)
|
_, err := metaGet(db, o, true)
|
||||||
|
@ -207,10 +416,16 @@ func TestCounters_Expired(t *testing.T) {
|
||||||
|
|
||||||
es.e = epoch + 2
|
es.e = epoch + 2
|
||||||
|
|
||||||
c, err = db.ObjectCounters()
|
c, err = db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(objCount), c.Phy())
|
require.Equal(t, uint64(objCount), c.Phy)
|
||||||
require.Equal(t, uint64(objCount), c.Logic())
|
require.Equal(t, uint64(objCount), c.Logic)
|
||||||
|
require.Equal(t, uint64(objCount), c.User)
|
||||||
|
|
||||||
|
cc, err = db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
|
|
||||||
for _, o := range oo {
|
for _, o := range oo {
|
||||||
_, err := metaGet(db, o, true)
|
_, err := metaGet(db, o, true)
|
||||||
|
@ -227,48 +442,91 @@ func TestCounters_Expired(t *testing.T) {
|
||||||
|
|
||||||
inhumeRes, err := db.Inhume(context.Background(), inhumePrm)
|
inhumeRes, err := db.Inhume(context.Background(), inhumePrm)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(1), inhumeRes.AvailableInhumed())
|
require.Equal(t, uint64(1), inhumeRes.LogicInhumed())
|
||||||
|
require.Equal(t, uint64(1), inhumeRes.UserInhumed())
|
||||||
|
|
||||||
c, err = db.ObjectCounters()
|
c, err = db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
require.Equal(t, uint64(len(oo)), c.Phy())
|
require.Equal(t, uint64(len(oo)), c.Phy)
|
||||||
require.Equal(t, uint64(len(oo)-1), c.Logic())
|
require.Equal(t, uint64(len(oo)-1), c.Logic)
|
||||||
|
require.Equal(t, uint64(len(oo)-1), c.User)
|
||||||
|
|
||||||
|
if v, ok := exp[oo[0].Container()]; ok {
|
||||||
|
v.Logic--
|
||||||
|
v.User--
|
||||||
|
if v.IsZero() {
|
||||||
|
delete(exp, oo[0].Container())
|
||||||
|
} else {
|
||||||
|
exp[oo[0].Container()] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cc, err = db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
|
|
||||||
// 4. `Delete` an object with GCMark should decrease the
|
// 4. `Delete` an object with GCMark should decrease the
|
||||||
// phy counter but does not affect the logic counter (after
|
// phy counter but does not affect the logic counter (after
|
||||||
// that step they should be equal)
|
// that step they should be equal)
|
||||||
|
|
||||||
var deletePrm meta.DeletePrm
|
var deletePrm meta.DeletePrm
|
||||||
deletePrm.SetAddresses(oo[0])
|
deletePrm.Address = oo[0]
|
||||||
|
|
||||||
deleteRes, err := db.Delete(context.Background(), deletePrm)
|
deleteRes, err := db.Delete(context.Background(), deletePrm)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Zero(t, deleteRes.AvailableObjectsRemoved())
|
require.Zero(t, deleteRes.LogicCount)
|
||||||
|
require.Zero(t, deleteRes.UserCount)
|
||||||
|
|
||||||
|
if v, ok := exp[oo[0].Container()]; ok {
|
||||||
|
v.Phy--
|
||||||
|
exp[oo[0].Container()] = v
|
||||||
|
}
|
||||||
|
|
||||||
oo = oo[1:]
|
oo = oo[1:]
|
||||||
|
|
||||||
c, err = db.ObjectCounters()
|
c, err = db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(len(oo)), c.Phy())
|
require.Equal(t, uint64(len(oo)), c.Phy)
|
||||||
require.Equal(t, uint64(len(oo)), c.Logic())
|
require.Equal(t, uint64(len(oo)), c.Logic)
|
||||||
|
require.Equal(t, uint64(len(oo)), c.User)
|
||||||
|
|
||||||
|
cc, err = db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
|
|
||||||
// 5 `Delete` an expired object (like it would the control
|
// 5 `Delete` an expired object (like it would the control
|
||||||
// service do) should decrease both counters despite the
|
// service do) should decrease both counters despite the
|
||||||
// expiration fact
|
// expiration fact
|
||||||
|
|
||||||
deletePrm.SetAddresses(oo[0])
|
deletePrm.Address = oo[0]
|
||||||
|
|
||||||
deleteRes, err = db.Delete(context.Background(), deletePrm)
|
deleteRes, err = db.Delete(context.Background(), deletePrm)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(1), deleteRes.AvailableObjectsRemoved())
|
require.Equal(t, uint64(1), deleteRes.LogicCount)
|
||||||
|
require.Equal(t, uint64(1), deleteRes.UserCount)
|
||||||
|
|
||||||
|
if v, ok := exp[oo[0].Container()]; ok {
|
||||||
|
v.Phy--
|
||||||
|
v.Logic--
|
||||||
|
v.User--
|
||||||
|
exp[oo[0].Container()] = v
|
||||||
|
}
|
||||||
|
|
||||||
oo = oo[1:]
|
oo = oo[1:]
|
||||||
|
|
||||||
c, err = db.ObjectCounters()
|
c, err = db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, uint64(len(oo)), c.Phy())
|
require.Equal(t, uint64(len(oo)), c.Phy)
|
||||||
require.Equal(t, uint64(len(oo)), c.Logic())
|
require.Equal(t, uint64(len(oo)), c.Logic)
|
||||||
|
require.Equal(t, uint64(len(oo)), c.User)
|
||||||
|
|
||||||
|
cc, err = db.ContainerCounters(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Equal(t, meta.ContainerCounters{Counts: exp}, cc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func putObjs(t *testing.T, db *meta.DB, count int, withParent bool) []*objectSDK.Object {
|
func putObjs(t *testing.T, db *meta.DB, count int, withParent bool) []*objectSDK.Object {
|
||||||
|
@ -281,6 +539,7 @@ func putObjs(t *testing.T, db *meta.DB, count int, withParent bool) []*objectSDK
|
||||||
o := testutil.GenerateObject()
|
o := testutil.GenerateObject()
|
||||||
if withParent {
|
if withParent {
|
||||||
o.SetParent(parent)
|
o.SetParent(parent)
|
||||||
|
o.SetSplitID(objectSDK.NewSplitID())
|
||||||
}
|
}
|
||||||
|
|
||||||
oo = append(oo, o)
|
oo = append(oo, o)
|
||||||
|
@ -289,11 +548,11 @@ func putObjs(t *testing.T, db *meta.DB, count int, withParent bool) []*objectSDK
|
||||||
_, err = db.Put(context.Background(), prm)
|
_, err = db.Put(context.Background(), prm)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
c, err := db.ObjectCounters()
|
c, err := db.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
require.Equal(t, uint64(i+1), c.Phy())
|
require.Equal(t, uint64(i+1), c.Phy)
|
||||||
require.Equal(t, uint64(i+1), c.Logic())
|
require.Equal(t, uint64(i+1), c.Logic)
|
||||||
}
|
}
|
||||||
|
|
||||||
return oo
|
return oo
|
||||||
|
|
|
@ -9,22 +9,16 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
|
||||||
|
|
||||||
v2object "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
v2object "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"github.com/mr-tron/base58"
|
"github.com/mr-tron/base58"
|
||||||
"go.etcd.io/bbolt"
|
"go.etcd.io/bbolt"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
|
|
||||||
type matcher struct {
|
|
||||||
matchSlow func(string, []byte, string) bool
|
|
||||||
matchBucket func(*bbolt.Bucket, string, string, func([]byte, []byte) error) error
|
|
||||||
}
|
|
||||||
|
|
||||||
// EpochState is an interface that provides access to the
|
// EpochState is an interface that provides access to the
|
||||||
// current epoch number.
|
// current epoch number.
|
||||||
type EpochState interface {
|
type EpochState interface {
|
||||||
|
@ -39,28 +33,26 @@ type DB struct {
|
||||||
modeMtx sync.RWMutex
|
modeMtx sync.RWMutex
|
||||||
mode mode.Mode
|
mode mode.Mode
|
||||||
|
|
||||||
matchers map[objectSDK.SearchMatchType]matcher
|
database *badger.DB
|
||||||
|
|
||||||
boltDB *bbolt.DB
|
|
||||||
|
|
||||||
initialized bool
|
initialized bool
|
||||||
|
|
||||||
|
bucketIDs *dbBucketDispatcher
|
||||||
|
|
||||||
|
wg sync.WaitGroup
|
||||||
|
closed chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Option is an option of DB constructor.
|
// Option is an option of DB constructor.
|
||||||
type Option func(*cfg)
|
type Option func(*cfg)
|
||||||
|
|
||||||
type cfg struct {
|
type cfg struct {
|
||||||
boltOptions *bbolt.Options // optional
|
|
||||||
|
|
||||||
boltBatchSize int
|
|
||||||
boltBatchDelay time.Duration
|
|
||||||
|
|
||||||
info Info
|
info Info
|
||||||
|
|
||||||
log *logger.Logger
|
log *logger.Logger
|
||||||
|
|
||||||
epochState EpochState
|
epochState EpochState
|
||||||
metrics Metrics
|
metrics Metrics
|
||||||
|
noSync bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func defaultCfg() *cfg {
|
func defaultCfg() *cfg {
|
||||||
|
@ -68,10 +60,8 @@ func defaultCfg() *cfg {
|
||||||
info: Info{
|
info: Info{
|
||||||
Permission: os.ModePerm, // 0777
|
Permission: os.ModePerm, // 0777
|
||||||
},
|
},
|
||||||
boltBatchDelay: bbolt.DefaultMaxBatchDelay,
|
log: &logger.Logger{Logger: zap.L()},
|
||||||
boltBatchSize: bbolt.DefaultMaxBatchSize,
|
metrics: &noopMetrics{},
|
||||||
log: &logger.Logger{Logger: zap.L()},
|
|
||||||
metrics: &noopMetrics{},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,25 +78,8 @@ func New(opts ...Option) *DB {
|
||||||
}
|
}
|
||||||
|
|
||||||
return &DB{
|
return &DB{
|
||||||
cfg: c,
|
cfg: c,
|
||||||
matchers: map[objectSDK.SearchMatchType]matcher{
|
bucketIDs: newDBBucketDispatcher(),
|
||||||
objectSDK.MatchUnknown: {
|
|
||||||
matchSlow: unknownMatcher,
|
|
||||||
matchBucket: unknownMatcherBucket,
|
|
||||||
},
|
|
||||||
objectSDK.MatchStringEqual: {
|
|
||||||
matchSlow: stringEqualMatcher,
|
|
||||||
matchBucket: stringEqualMatcherBucket,
|
|
||||||
},
|
|
||||||
objectSDK.MatchStringNotEqual: {
|
|
||||||
matchSlow: stringNotEqualMatcher,
|
|
||||||
matchBucket: stringNotEqualMatcherBucket,
|
|
||||||
},
|
|
||||||
objectSDK.MatchCommonPrefix: {
|
|
||||||
matchSlow: stringCommonPrefixMatcher,
|
|
||||||
matchBucket: stringCommonPrefixMatcherBucket,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,37 +231,6 @@ func unknownMatcherBucket(_ *bbolt.Bucket, _ string, _ string, _ func([]byte, []
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// bucketKeyHelper returns byte representation of val that is used as a key
|
|
||||||
// in boltDB. Useful for getting filter values from unique and list indexes.
|
|
||||||
func bucketKeyHelper(hdr string, val string) []byte {
|
|
||||||
switch hdr {
|
|
||||||
case v2object.FilterHeaderParent:
|
|
||||||
v, err := base58.Decode(val)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return v
|
|
||||||
case v2object.FilterHeaderPayloadHash:
|
|
||||||
v, err := hex.DecodeString(val)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return v
|
|
||||||
case v2object.FilterHeaderSplitID:
|
|
||||||
s := objectSDK.NewSplitID()
|
|
||||||
|
|
||||||
err := s.Parse(val)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return s.ToV2()
|
|
||||||
default:
|
|
||||||
return []byte(val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetLogger sets logger. It is used after the shard ID was generated to use it in logs.
|
// SetLogger sets logger. It is used after the shard ID was generated to use it in logs.
|
||||||
func (db *DB) SetLogger(l *logger.Logger) {
|
func (db *DB) SetLogger(l *logger.Logger) {
|
||||||
db.log = l
|
db.log = l
|
||||||
|
@ -306,13 +248,6 @@ func WithLogger(l *logger.Logger) Option {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithBoltDBOptions returns option to specify BoltDB options.
|
|
||||||
func WithBoltDBOptions(opts *bbolt.Options) Option {
|
|
||||||
return func(c *cfg) {
|
|
||||||
c.boltOptions = opts
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithPath returns option to set system path to Metabase.
|
// WithPath returns option to set system path to Metabase.
|
||||||
func WithPath(path string) Option {
|
func WithPath(path string) Option {
|
||||||
return func(c *cfg) {
|
return func(c *cfg) {
|
||||||
|
@ -328,28 +263,6 @@ func WithPermissions(perm fs.FileMode) Option {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithMaxBatchSize returns option to specify maximum concurrent operations
|
|
||||||
// to be processed in a single transactions.
|
|
||||||
// This option is missing from `bbolt.Options` but is set right after DB is open.
|
|
||||||
func WithMaxBatchSize(s int) Option {
|
|
||||||
return func(c *cfg) {
|
|
||||||
if s != 0 {
|
|
||||||
c.boltBatchSize = s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithMaxBatchDelay returns option to specify maximum time to wait before
|
|
||||||
// the batch of concurrent transactions is processed.
|
|
||||||
// This option is missing from `bbolt.Options` but is set right after DB is open.
|
|
||||||
func WithMaxBatchDelay(d time.Duration) Option {
|
|
||||||
return func(c *cfg) {
|
|
||||||
if d != 0 {
|
|
||||||
c.boltBatchDelay = d
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithEpochState return option to specify a source of current epoch height.
|
// WithEpochState return option to specify a source of current epoch height.
|
||||||
func WithEpochState(s EpochState) Option {
|
func WithEpochState(s EpochState) Option {
|
||||||
return func(c *cfg) {
|
return func(c *cfg) {
|
||||||
|
@ -363,3 +276,9 @@ func WithMetrics(m Metrics) Option {
|
||||||
c.metrics = m
|
c.metrics = m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithNoSync(v bool) Option {
|
||||||
|
return func(c *cfg) {
|
||||||
|
c.noSync = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package meta_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"os"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -45,7 +44,7 @@ func newDB(t testing.TB, opts ...meta.Option) *meta.DB {
|
||||||
bdb := meta.New(
|
bdb := meta.New(
|
||||||
append([]meta.Option{
|
append([]meta.Option{
|
||||||
meta.WithPath(filepath.Join(t.TempDir(), "metabase")),
|
meta.WithPath(filepath.Join(t.TempDir(), "metabase")),
|
||||||
meta.WithPermissions(0600),
|
meta.WithPermissions(0o600),
|
||||||
meta.WithEpochState(epochState{}),
|
meta.WithEpochState(epochState{}),
|
||||||
}, opts...)...,
|
}, opts...)...,
|
||||||
)
|
)
|
||||||
|
@ -53,11 +52,6 @@ func newDB(t testing.TB, opts ...meta.Option) *meta.DB {
|
||||||
require.NoError(t, bdb.Open(context.Background(), false))
|
require.NoError(t, bdb.Open(context.Background(), false))
|
||||||
require.NoError(t, bdb.Init())
|
require.NoError(t, bdb.Init())
|
||||||
|
|
||||||
t.Cleanup(func() {
|
|
||||||
bdb.Close()
|
|
||||||
os.Remove(bdb.DumpInfo().Path)
|
|
||||||
})
|
|
||||||
|
|
||||||
return bdb
|
return bdb
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
@ -12,52 +11,26 @@ import (
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client"
|
||||||
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
|
||||||
"go.opentelemetry.io/otel/trace"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var errFailedToRemoveUniqueIndexes = errors.New("can't remove unique indexes")
|
||||||
|
|
||||||
// DeletePrm groups the parameters of Delete operation.
|
// DeletePrm groups the parameters of Delete operation.
|
||||||
type DeletePrm struct {
|
type DeletePrm struct {
|
||||||
addrs []oid.Address
|
Address oid.Address
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteRes groups the resulting values of Delete operation.
|
// DeleteRes groups the resulting values of Delete operation.
|
||||||
type DeleteRes struct {
|
type DeleteRes struct {
|
||||||
rawRemoved uint64
|
PhyCount uint64
|
||||||
availableRemoved uint64
|
LogicCount uint64
|
||||||
sizes []uint64
|
UserCount uint64
|
||||||
availableSizes []uint64
|
PhySize uint64
|
||||||
}
|
LogicSize uint64
|
||||||
|
|
||||||
// AvailableObjectsRemoved returns the number of removed available
|
|
||||||
// objects.
|
|
||||||
func (d DeleteRes) AvailableObjectsRemoved() uint64 {
|
|
||||||
return d.availableRemoved
|
|
||||||
}
|
|
||||||
|
|
||||||
// RawObjectsRemoved returns the number of removed raw objects.
|
|
||||||
func (d DeleteRes) RawObjectsRemoved() uint64 {
|
|
||||||
return d.rawRemoved
|
|
||||||
}
|
|
||||||
|
|
||||||
// RemovedPhysicalObjectSizes returns the sizes of removed physical objects.
|
|
||||||
func (d DeleteRes) RemovedPhysicalObjectSizes() []uint64 {
|
|
||||||
return d.sizes
|
|
||||||
}
|
|
||||||
|
|
||||||
// RemovedLogicalObjectSizes returns the sizes of removed logical objects.
|
|
||||||
func (d DeleteRes) RemovedLogicalObjectSizes() []uint64 {
|
|
||||||
return d.availableSizes
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetAddresses is a Delete option to set the addresses of the objects to delete.
|
|
||||||
//
|
|
||||||
// Option is required.
|
|
||||||
func (p *DeletePrm) SetAddresses(addrs ...oid.Address) {
|
|
||||||
p.addrs = addrs
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type referenceNumber struct {
|
type referenceNumber struct {
|
||||||
|
@ -68,8 +41,6 @@ type referenceNumber struct {
|
||||||
obj *objectSDK.Object
|
obj *objectSDK.Object
|
||||||
}
|
}
|
||||||
|
|
||||||
type referenceCounter map[string]*referenceNumber
|
|
||||||
|
|
||||||
// Delete removed object records from metabase indexes.
|
// Delete removed object records from metabase indexes.
|
||||||
func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
|
func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
|
||||||
var (
|
var (
|
||||||
|
@ -80,10 +51,7 @@ func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
|
||||||
db.metrics.AddMethodDuration("Delete", time.Since(startedAt), deleted)
|
db.metrics.AddMethodDuration("Delete", time.Since(startedAt), deleted)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.Delete",
|
_, span := tracing.StartSpanFromContext(ctx, "metabase.Delete")
|
||||||
trace.WithAttributes(
|
|
||||||
attribute.Int("addr_count", len(prm.addrs)),
|
|
||||||
))
|
|
||||||
defer span.End()
|
defer span.End()
|
||||||
|
|
||||||
db.modeMtx.RLock()
|
db.modeMtx.RLock()
|
||||||
|
@ -95,87 +63,63 @@ func (db *DB) Delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) {
|
||||||
return DeleteRes{}, ErrReadOnlyMode
|
return DeleteRes{}, ErrReadOnlyMode
|
||||||
}
|
}
|
||||||
|
|
||||||
var rawRemoved uint64
|
bucketID, release := db.bucketIDs.BucketID(prm.Address.Container())
|
||||||
var availableRemoved uint64
|
defer release()
|
||||||
var err error
|
|
||||||
var sizes = make([]uint64, len(prm.addrs))
|
|
||||||
var availableSizes = make([]uint64, len(prm.addrs))
|
|
||||||
|
|
||||||
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
|
var err error
|
||||||
// We need to clear slice because tx can try to execute multiple times.
|
var res DeleteRes
|
||||||
rawRemoved, availableRemoved, err = db.deleteGroup(tx, prm.addrs, sizes, availableSizes)
|
|
||||||
|
err = db.database.Update(func(tx *badger.Txn) error {
|
||||||
|
res, err = db.deleteByAddress(ctx, tx, prm.Address, bucketID)
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
deleted = true
|
deleted = true
|
||||||
for i := range prm.addrs {
|
storagelog.Write(db.log,
|
||||||
storagelog.Write(db.log,
|
storagelog.AddressField(prm.Address),
|
||||||
storagelog.AddressField(prm.addrs[i]),
|
storagelog.OpField("metabase DELETE"))
|
||||||
storagelog.OpField("metabase DELETE"))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return DeleteRes{
|
return res, metaerr.Wrap(err)
|
||||||
rawRemoved: rawRemoved,
|
|
||||||
availableRemoved: availableRemoved,
|
|
||||||
sizes: sizes,
|
|
||||||
availableSizes: availableSizes,
|
|
||||||
}, metaerr.Wrap(err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// deleteGroup deletes object from the metabase. Handles removal of the
|
// deleteGroup deletes object from the metabase. Handles removal of the
|
||||||
// references of the split objects.
|
// references of the split objects.
|
||||||
// The first return value is a physical objects removed number: physical
|
func (db *DB) deleteByAddress(ctx context.Context, tx *badger.Txn, addr oid.Address, bucketID uint16) (DeleteRes, error) {
|
||||||
// objects that were stored. The second return value is a logical objects
|
refCounter := &referenceNumber{}
|
||||||
// removed number: objects that were available (without Tombstones, GCMarks
|
|
||||||
// non-expired, etc.)
|
|
||||||
func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address, sizes []uint64, availableSizes []uint64) (uint64, uint64, error) {
|
|
||||||
refCounter := make(referenceCounter, len(addrs))
|
|
||||||
currEpoch := db.epochState.CurrentEpoch()
|
currEpoch := db.epochState.CurrentEpoch()
|
||||||
|
res, err := db.delete(ctx, tx, addr, refCounter, currEpoch)
|
||||||
|
if err != nil {
|
||||||
|
return DeleteRes{}, err
|
||||||
|
}
|
||||||
|
|
||||||
var rawDeleted uint64
|
if err := db.updateCountersDelete(tx, addr.Container(), res, bucketID); err != nil {
|
||||||
var availableDeleted uint64
|
return DeleteRes{}, err
|
||||||
|
}
|
||||||
|
|
||||||
for i := range addrs {
|
if refCounter.cur == refCounter.all {
|
||||||
removed, available, size, err := db.delete(tx, addrs[i], refCounter, currEpoch)
|
err := db.deleteObject(ctx, tx, refCounter.obj, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0, err // maybe log and continue?
|
return DeleteRes{}, err
|
||||||
}
|
|
||||||
|
|
||||||
if removed {
|
|
||||||
rawDeleted++
|
|
||||||
sizes[i] = size
|
|
||||||
}
|
|
||||||
|
|
||||||
if available {
|
|
||||||
availableDeleted++
|
|
||||||
availableSizes[i] = size
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
if rawDeleted > 0 {
|
func (db *DB) updateCountersDelete(tx *badger.Txn, cnrID cid.ID, res DeleteRes, bucketID uint16) error {
|
||||||
err := db.updateCounter(tx, phy, rawDeleted, false)
|
delta := map[cid.ID]objectCounterValue{
|
||||||
if err != nil {
|
cnrID: {
|
||||||
return 0, 0, fmt.Errorf("could not decrease phy object counter: %w", err)
|
Logic: -1 * int64(res.LogicCount),
|
||||||
}
|
Phy: -1 * int64(res.PhyCount),
|
||||||
|
User: -1 * int64(res.UserCount),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
bucketIDs := map[cid.ID]uint16{
|
||||||
if availableDeleted > 0 {
|
cnrID: bucketID,
|
||||||
err := db.updateCounter(tx, logical, availableDeleted, false)
|
|
||||||
if err != nil {
|
|
||||||
return 0, 0, fmt.Errorf("could not decrease logical object counter: %w", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if err := updateContainerCounter(tx, delta, bucketIDs); err != nil {
|
||||||
for _, refNum := range refCounter {
|
return fmt.Errorf("could not decrease container object counter: %w", err)
|
||||||
if refNum.cur == refNum.all {
|
|
||||||
err := db.deleteObject(tx, refNum.obj, true)
|
|
||||||
if err != nil {
|
|
||||||
return rawDeleted, availableDeleted, err // maybe log and continue?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
return rawDeleted, availableDeleted, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete removes object indexes from the metabase. Counts the references
|
// delete removes object indexes from the metabase. Counts the references
|
||||||
|
@ -183,207 +127,161 @@ func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []oid.Address, sizes []uint64, ava
|
||||||
// The first return value indicates if an object has been removed. (removing a
|
// The first return value indicates if an object has been removed. (removing a
|
||||||
// non-exist object is error-free). The second return value indicates if an
|
// non-exist object is error-free). The second return value indicates if an
|
||||||
// object was available before the removal (for calculating the logical object
|
// object was available before the removal (for calculating the logical object
|
||||||
// counter). The third return value is removed object payload size.
|
// counter). The third return value The fourth return value is removed object payload size.
|
||||||
func (db *DB) delete(tx *bbolt.Tx, addr oid.Address, refCounter referenceCounter, currEpoch uint64) (bool, bool, uint64, error) {
|
func (db *DB) delete(ctx context.Context, tx *badger.Txn, addr oid.Address, refCounter *referenceNumber, currEpoch uint64) (DeleteRes, error) {
|
||||||
key := make([]byte, addressKeySize)
|
status, err := inGraveyardWithKey(tx, addr)
|
||||||
addrKey := addressKey(addr, key)
|
if err != nil {
|
||||||
garbageBKT := tx.Bucket(garbageBucketName)
|
return DeleteRes{}, err
|
||||||
graveyardBKT := tx.Bucket(graveyardBucketName)
|
|
||||||
|
|
||||||
removeAvailableObject := inGraveyardWithKey(addrKey, graveyardBKT, garbageBKT) == 0
|
|
||||||
|
|
||||||
// remove record from the garbage bucket
|
|
||||||
if garbageBKT != nil {
|
|
||||||
err := garbageBKT.Delete(addrKey)
|
|
||||||
if err != nil {
|
|
||||||
return false, false, 0, fmt.Errorf("could not remove from garbage bucket: %w", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
removeAvailableObject := status == 0
|
||||||
|
|
||||||
// unmarshal object, work only with physically stored (raw == true) objects
|
// unmarshal object, work only with physically stored (raw == true) objects
|
||||||
obj, err := db.get(tx, addr, key, false, true, currEpoch)
|
obj, err := get(ctx, tx, addr, false, true, currEpoch)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if client.IsErrObjectNotFound(err) {
|
||||||
|
err := deleteFromGarbage(tx, addr)
|
||||||
|
if err != nil {
|
||||||
|
return DeleteRes{}, fmt.Errorf("could not remove from garbage bucket: %w", err)
|
||||||
|
}
|
||||||
|
return DeleteRes{}, nil
|
||||||
|
}
|
||||||
var siErr *objectSDK.SplitInfoError
|
var siErr *objectSDK.SplitInfoError
|
||||||
|
if errors.As(err, &siErr) {
|
||||||
if client.IsErrObjectNotFound(err) || errors.As(err, &siErr) {
|
// if object is virtual (parent) then do nothing, it will be deleted with last child
|
||||||
return false, false, 0, nil
|
return DeleteRes{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return false, false, 0, err
|
return DeleteRes{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove record from the garbage bucket
|
||||||
|
err = deleteFromGarbage(tx, addr)
|
||||||
|
if err != nil {
|
||||||
|
return DeleteRes{}, fmt.Errorf("could not remove from garbage bucket: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// if object is an only link to a parent, then remove parent
|
// if object is an only link to a parent, then remove parent
|
||||||
if parent := obj.Parent(); parent != nil {
|
if parent := obj.Parent(); parent != nil {
|
||||||
parAddr := object.AddressOf(parent)
|
parAddr := object.AddressOf(parent)
|
||||||
sParAddr := addressKey(parAddr, key)
|
parentLen, err := parentLength(ctx, tx, parAddr)
|
||||||
k := string(sParAddr)
|
if err != nil {
|
||||||
|
return DeleteRes{}, fmt.Errorf("failed to get parent count for object %s: %w", parAddr, err)
|
||||||
nRef, ok := refCounter[k]
|
|
||||||
if !ok {
|
|
||||||
nRef = &referenceNumber{
|
|
||||||
all: parentLength(tx, parAddr),
|
|
||||||
addr: parAddr,
|
|
||||||
obj: parent,
|
|
||||||
}
|
|
||||||
|
|
||||||
refCounter[k] = nRef
|
|
||||||
}
|
}
|
||||||
|
refCounter.addr = parAddr
|
||||||
nRef.cur++
|
refCounter.all = parentLen
|
||||||
|
refCounter.obj = parent
|
||||||
|
refCounter.cur = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isUserObject := IsUserObject(obj)
|
||||||
|
|
||||||
// remove object
|
// remove object
|
||||||
err = db.deleteObject(tx, obj, false)
|
err = db.deleteObject(ctx, tx, obj, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, false, 0, fmt.Errorf("could not remove object: %w", err)
|
return DeleteRes{}, fmt.Errorf("could not remove object: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return true, removeAvailableObject, obj.PayloadSize(), nil
|
var result DeleteRes
|
||||||
|
result.PhyCount = 1
|
||||||
|
result.PhySize = obj.PayloadSize()
|
||||||
|
if removeAvailableObject {
|
||||||
|
result.LogicCount = 1
|
||||||
|
result.LogicSize = obj.PayloadSize()
|
||||||
|
}
|
||||||
|
if removeAvailableObject && isUserObject {
|
||||||
|
result.UserCount = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) deleteObject(
|
func (db *DB) deleteObject(
|
||||||
tx *bbolt.Tx,
|
ctx context.Context,
|
||||||
|
tx *badger.Txn,
|
||||||
obj *objectSDK.Object,
|
obj *objectSDK.Object,
|
||||||
isParent bool,
|
isParent bool,
|
||||||
) error {
|
) error {
|
||||||
err := delUniqueIndexes(tx, obj, isParent)
|
err := delUniqueIndexes(ctx, tx, obj, isParent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("can't remove unique indexes")
|
return errFailedToRemoveUniqueIndexes
|
||||||
}
|
}
|
||||||
|
|
||||||
err = updateListIndexes(tx, obj, delListIndexItem)
|
err = updateListIndexes(tx, obj, deleteByKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("can't remove list indexes: %w", err)
|
return fmt.Errorf("can't remove list indexes: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = updateFKBTIndexes(tx, obj, delFKBTIndexItem)
|
err = updateFKBTIndexes(tx, obj, deleteByKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("can't remove fake bucket tree indexes: %w", err)
|
return fmt.Errorf("can't remove fake bucket tree indexes: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
if isParent {
|
||||||
}
|
// remove record from the garbage bucket, because regular object deletion does nothing for virtual object
|
||||||
|
err := deleteFromGarbage(tx, object.AddressOf(obj))
|
||||||
// parentLength returns amount of available children from parentid index.
|
if err != nil {
|
||||||
func parentLength(tx *bbolt.Tx, addr oid.Address) int {
|
return fmt.Errorf("could not remove from garbage bucket: %w", err)
|
||||||
bucketName := make([]byte, bucketKeySize)
|
|
||||||
|
|
||||||
bkt := tx.Bucket(parentBucketName(addr.Container(), bucketName[:]))
|
|
||||||
if bkt == nil {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
lst, err := decodeList(bkt.Get(objectKey(addr.Object(), bucketName[:])))
|
|
||||||
if err != nil {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return len(lst)
|
|
||||||
}
|
|
||||||
|
|
||||||
func delUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem) {
|
|
||||||
bkt := tx.Bucket(item.name)
|
|
||||||
if bkt != nil {
|
|
||||||
_ = bkt.Delete(item.key) // ignore error, best effort there
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func delFKBTIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
|
||||||
bkt := tx.Bucket(item.name)
|
|
||||||
if bkt == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
fkbtRoot := bkt.Bucket(item.key)
|
|
||||||
if fkbtRoot == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
_ = fkbtRoot.Delete(item.val) // ignore error, best effort there
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func delListIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
|
||||||
bkt := tx.Bucket(item.name)
|
|
||||||
if bkt == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
lst, err := decodeList(bkt.Get(item.key))
|
|
||||||
if err != nil || len(lst) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove element from the list
|
|
||||||
for i := range lst {
|
|
||||||
if bytes.Equal(item.val, lst[i]) {
|
|
||||||
copy(lst[i:], lst[i+1:])
|
|
||||||
lst = lst[:len(lst)-1]
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if list empty, remove the key from <list> bucket
|
|
||||||
if len(lst) == 0 {
|
|
||||||
_ = bkt.Delete(item.key) // ignore error, best effort there
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// if list is not empty, then update it
|
|
||||||
encodedLst, err := encodeList(lst)
|
|
||||||
if err != nil {
|
|
||||||
return nil // ignore error, best effort there
|
|
||||||
}
|
|
||||||
|
|
||||||
_ = bkt.Put(item.key, encodedLst) // ignore error, best effort there
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func delUniqueIndexes(tx *bbolt.Tx, obj *objectSDK.Object, isParent bool) error {
|
func deleteFromGarbage(tx *badger.Txn, addr oid.Address) error {
|
||||||
addr := object.AddressOf(obj)
|
return tx.Delete(garbageKey(addr.Container(), addr.Object()))
|
||||||
|
}
|
||||||
|
|
||||||
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
|
// parentLength returns amount of available children from parentid index.
|
||||||
addrKey := addressKey(addr, make([]byte, addressKeySize))
|
func parentLength(ctx context.Context, tx *badger.Txn, addr oid.Address) (int, error) {
|
||||||
cnr := addr.Container()
|
var result int
|
||||||
bucketName := make([]byte, bucketKeySize)
|
prefix := parentKeyLongPrefix(addr.Container(), addr.Object())
|
||||||
|
const batchSize = 1000
|
||||||
|
for {
|
||||||
|
ids, err := selectByPrefixBatch(ctx, tx, prefix, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
result += len(ids)
|
||||||
|
if len(ids) < batchSize {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func delParent(ctx context.Context, tx *badger.Txn, addr oid.Address) error {
|
||||||
|
prefix := parentKeyLongPrefix(addr.Container(), addr.Object())
|
||||||
|
return deleteByPrefix(ctx, tx, prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
func delUniqueIndexes(ctx context.Context, tx *badger.Txn, obj *objectSDK.Object, isParent bool) error {
|
||||||
|
addr := object.AddressOf(obj)
|
||||||
|
|
||||||
// add value to primary unique bucket
|
// add value to primary unique bucket
|
||||||
if !isParent {
|
if !isParent {
|
||||||
|
var key []byte
|
||||||
switch obj.Type() {
|
switch obj.Type() {
|
||||||
case objectSDK.TypeRegular:
|
case objectSDK.TypeRegular:
|
||||||
bucketName = primaryBucketName(cnr, bucketName)
|
key = primaryKey(addr.Container(), addr.Object())
|
||||||
case objectSDK.TypeTombstone:
|
case objectSDK.TypeTombstone:
|
||||||
bucketName = tombstoneBucketName(cnr, bucketName)
|
key = tombstoneKey(addr.Container(), addr.Object())
|
||||||
case objectSDK.TypeLock:
|
case objectSDK.TypeLock:
|
||||||
bucketName = bucketNameLockers(cnr, bucketName)
|
key = lockersKey(addr.Container(), addr.Object())
|
||||||
default:
|
default:
|
||||||
return ErrUnknownObjectType
|
return ErrUnknownObjectType
|
||||||
}
|
}
|
||||||
|
|
||||||
delUniqueIndexItem(tx, namedBucketItem{
|
if err := tx.Delete(key); err != nil {
|
||||||
name: bucketName,
|
return err
|
||||||
key: objKey,
|
}
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
delUniqueIndexItem(tx, namedBucketItem{
|
if err := delParent(ctx, tx, addr); err != nil {
|
||||||
name: parentBucketName(cnr, bucketName),
|
return err
|
||||||
key: objKey,
|
}
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
delUniqueIndexItem(tx, namedBucketItem{ // remove from storage id index
|
if err := tx.Delete(smallKey(addr.Container(), addr.Object())); err != nil {
|
||||||
name: smallBucketName(cnr, bucketName),
|
return err
|
||||||
key: objKey,
|
}
|
||||||
})
|
return tx.Delete(rootKey(addr.Container(), addr.Object()))
|
||||||
delUniqueIndexItem(tx, namedBucketItem{ // remove from root index
|
|
||||||
name: rootBucketName(cnr, bucketName),
|
|
||||||
key: objKey,
|
|
||||||
})
|
|
||||||
delUniqueIndexItem(tx, namedBucketItem{ // remove from ToMoveIt index
|
|
||||||
name: toMoveItBucketName,
|
|
||||||
key: addrKey,
|
|
||||||
})
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
|
|
||||||
func TestDB_Delete(t *testing.T) {
|
func TestDB_Delete(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
parent := testutil.GenerateObjectWithCID(cnr)
|
parent := testutil.GenerateObjectWithCID(cnr)
|
||||||
|
@ -32,15 +33,6 @@ func TestDB_Delete(t *testing.T) {
|
||||||
err := putBig(db, child)
|
err := putBig(db, child)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// fill ToMoveIt index
|
|
||||||
err = metaToMoveIt(db, object.AddressOf(child))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// check if Movable list is not empty
|
|
||||||
l, err := metaMovable(db)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, l, 1)
|
|
||||||
|
|
||||||
// try to remove parent, should be no-op, error-free
|
// try to remove parent, should be no-op, error-free
|
||||||
err = metaDelete(db, object.AddressOf(parent))
|
err = metaDelete(db, object.AddressOf(parent))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
@ -60,11 +52,6 @@ func TestDB_Delete(t *testing.T) {
|
||||||
err = metaDelete(db, object.AddressOf(child))
|
err = metaDelete(db, object.AddressOf(child))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// check if there is no data in Movable index
|
|
||||||
l, err = metaMovable(db)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, l, 0)
|
|
||||||
|
|
||||||
// check if they marked as already removed
|
// check if they marked as already removed
|
||||||
|
|
||||||
ok, err := metaExists(db, object.AddressOf(child))
|
ok, err := metaExists(db, object.AddressOf(child))
|
||||||
|
@ -78,6 +65,7 @@ func TestDB_Delete(t *testing.T) {
|
||||||
|
|
||||||
func TestDeleteAllChildren(t *testing.T) {
|
func TestDeleteAllChildren(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -104,8 +92,8 @@ func TestDeleteAllChildren(t *testing.T) {
|
||||||
require.True(t, errors.As(err, &siErr))
|
require.True(t, errors.As(err, &siErr))
|
||||||
|
|
||||||
// remove all children in single call
|
// remove all children in single call
|
||||||
err = metaDelete(db, object.AddressOf(child1), object.AddressOf(child2))
|
require.NoError(t, metaDelete(db, object.AddressOf(child1)))
|
||||||
require.NoError(t, err)
|
require.NoError(t, metaDelete(db, object.AddressOf(child2)))
|
||||||
|
|
||||||
// parent should not be found now
|
// parent should not be found now
|
||||||
ex, err := metaExists(db, object.AddressOf(parent))
|
ex, err := metaExists(db, object.AddressOf(parent))
|
||||||
|
@ -115,6 +103,7 @@ func TestDeleteAllChildren(t *testing.T) {
|
||||||
|
|
||||||
func TestGraveOnlyDelete(t *testing.T) {
|
func TestGraveOnlyDelete(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
addr := oidtest.Address()
|
addr := oidtest.Address()
|
||||||
|
|
||||||
|
@ -127,6 +116,7 @@ func TestGraveOnlyDelete(t *testing.T) {
|
||||||
|
|
||||||
func TestExpiredObject(t *testing.T) {
|
func TestExpiredObject(t *testing.T) {
|
||||||
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
checkExpiredObjects(t, db, func(exp, nonExp *objectSDK.Object) {
|
checkExpiredObjects(t, db, func(exp, nonExp *objectSDK.Object) {
|
||||||
// removing expired object should be error-free
|
// removing expired object should be error-free
|
||||||
|
@ -136,9 +126,84 @@ func TestExpiredObject(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func metaDelete(db *meta.DB, addrs ...oid.Address) error {
|
func TestDelete(t *testing.T) {
|
||||||
|
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
|
cnr := cidtest.ID()
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
obj := testutil.GenerateObjectWithCID(cnr)
|
||||||
|
|
||||||
|
var prm meta.PutPrm
|
||||||
|
prm.SetObject(obj)
|
||||||
|
prm.SetStorageID([]byte("0/0"))
|
||||||
|
_, err := db.Put(context.Background(), prm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var inhumePrm meta.InhumePrm
|
||||||
|
inhumePrm.SetAddresses(object.AddressOf(obj))
|
||||||
|
_, err = db.Inhume(context.Background(), inhumePrm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var addrs []oid.Address
|
||||||
|
var iprm meta.GarbageIterationPrm
|
||||||
|
iprm.SetHandler(func(o meta.GarbageObject) error {
|
||||||
|
addrs = append(addrs, o.Address())
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
require.NoError(t, db.IterateOverGarbage(context.Background(), iprm))
|
||||||
|
require.Equal(t, 10, len(addrs))
|
||||||
var deletePrm meta.DeletePrm
|
var deletePrm meta.DeletePrm
|
||||||
deletePrm.SetAddresses(addrs...)
|
for _, addr := range addrs {
|
||||||
|
deletePrm.Address = addr
|
||||||
|
_, err := db.Delete(context.Background(), deletePrm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
addrs = nil
|
||||||
|
iprm.SetHandler(func(o meta.GarbageObject) error {
|
||||||
|
addrs = append(addrs, o.Address())
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
require.NoError(t, db.IterateOverGarbage(context.Background(), iprm))
|
||||||
|
require.Equal(t, 0, len(addrs))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteDropsGCMarkIfObjectNotFound(t *testing.T) {
|
||||||
|
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
|
addr := oidtest.Address()
|
||||||
|
|
||||||
|
var prm meta.InhumePrm
|
||||||
|
prm.SetAddresses(addr)
|
||||||
|
prm.SetGCMark()
|
||||||
|
_, err := db.Inhume(context.Background(), prm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var garbageCount int
|
||||||
|
var itPrm meta.GarbageIterationPrm
|
||||||
|
itPrm.SetHandler(func(g meta.GarbageObject) error {
|
||||||
|
garbageCount++
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
require.NoError(t, db.IterateOverGarbage(context.Background(), itPrm))
|
||||||
|
require.Equal(t, 1, garbageCount)
|
||||||
|
|
||||||
|
var delPrm meta.DeletePrm
|
||||||
|
delPrm.Address = addr
|
||||||
|
_, err = db.Delete(context.Background(), delPrm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
garbageCount = 0
|
||||||
|
require.NoError(t, db.IterateOverGarbage(context.Background(), itPrm))
|
||||||
|
require.Equal(t, 0, garbageCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
func metaDelete(db *meta.DB, addr oid.Address) error {
|
||||||
|
var deletePrm meta.DeletePrm
|
||||||
|
deletePrm.Address = addr
|
||||||
|
|
||||||
_, err := db.Delete(context.Background(), deletePrm)
|
_, err := db.Delete(context.Background(), deletePrm)
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -1,19 +1,18 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status"
|
apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status"
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
@ -69,8 +68,8 @@ func (db *DB) Exists(ctx context.Context, prm ExistsPrm) (res ExistsRes, err err
|
||||||
|
|
||||||
currEpoch := db.epochState.CurrentEpoch()
|
currEpoch := db.epochState.CurrentEpoch()
|
||||||
|
|
||||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
err = db.database.View(func(tx *badger.Txn) error {
|
||||||
res.exists, err = db.exists(tx, prm.addr, currEpoch)
|
res.exists, err = exists(ctx, tx, prm.addr, currEpoch)
|
||||||
|
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
|
@ -78,9 +77,13 @@ func (db *DB) Exists(ctx context.Context, prm ExistsPrm) (res ExistsRes, err err
|
||||||
return res, metaerr.Wrap(err)
|
return res, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) (exists bool, err error) {
|
func exists(ctx context.Context, tx *badger.Txn, addr oid.Address, currEpoch uint64) (exists bool, err error) {
|
||||||
|
status, err := objectStatus(ctx, tx, addr, currEpoch)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
// check graveyard and object expiration first
|
// check graveyard and object expiration first
|
||||||
switch objectStatus(tx, addr, currEpoch) {
|
switch status {
|
||||||
case 1:
|
case 1:
|
||||||
return false, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
return false, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||||
case 2:
|
case 2:
|
||||||
|
@ -89,28 +92,32 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) (exists b
|
||||||
return false, ErrObjectIsExpired
|
return false, ErrObjectIsExpired
|
||||||
}
|
}
|
||||||
|
|
||||||
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
|
v, err := valueOrNil(tx, primaryKey(addr.Container(), addr.Object()))
|
||||||
|
if err != nil {
|
||||||
cnr := addr.Container()
|
return false, err
|
||||||
key := make([]byte, bucketKeySize)
|
}
|
||||||
|
if v != nil {
|
||||||
// if graveyard is empty, then check if object exists in primary bucket
|
|
||||||
if inBucket(tx, primaryBucketName(cnr, key), objKey) {
|
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// if primary bucket is empty, then check if object exists in parent bucket
|
children, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), 1) // try to found any child
|
||||||
if inBucket(tx, parentBucketName(cnr, key), objKey) {
|
if err != nil {
|
||||||
splitInfo, err := getSplitInfo(tx, cnr, objKey)
|
return false, err
|
||||||
|
}
|
||||||
|
if len(children) > 0 {
|
||||||
|
splitInfo, err := getSplitInfo(tx, addr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return false, logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo))
|
return false, logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo))
|
||||||
}
|
}
|
||||||
|
|
||||||
// if parent bucket is empty, then check if object exists in typed buckets
|
t, err := firstIrregularObjectType(tx, addr.Container(), addr.Object())
|
||||||
return firstIrregularObjectType(tx, cnr, objKey) != objectSDK.TypeRegular, nil
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return t != objectSDK.TypeRegular, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// objectStatus returns:
|
// objectStatus returns:
|
||||||
|
@ -118,86 +125,68 @@ func (db *DB) exists(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) (exists b
|
||||||
// - 1 if object with GC mark;
|
// - 1 if object with GC mark;
|
||||||
// - 2 if object is covered with tombstone;
|
// - 2 if object is covered with tombstone;
|
||||||
// - 3 if object is expired.
|
// - 3 if object is expired.
|
||||||
func objectStatus(tx *bbolt.Tx, addr oid.Address, currEpoch uint64) uint8 {
|
func objectStatus(ctx context.Context, tx *badger.Txn, addr oid.Address, currEpoch uint64) (uint8, error) {
|
||||||
// locked object could not be removed/marked with GC/expired
|
// locked object could not be removed/marked with GC/expired
|
||||||
if objectLocked(tx, addr.Container(), addr.Object()) {
|
locked, err := objectLocked(ctx, tx, addr.Container(), addr.Object())
|
||||||
return 0
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if locked {
|
||||||
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// we check only if the object is expired in the current
|
st, err := inGraveyardWithKey(tx, addr)
|
||||||
// epoch since it is considered the only corner case: the
|
if err != nil {
|
||||||
// GC is expected to collect all the objects that have
|
return 0, err
|
||||||
// expired previously for less than the one epoch duration
|
}
|
||||||
|
if st > 0 {
|
||||||
|
return st, nil
|
||||||
|
}
|
||||||
|
|
||||||
expired := isExpiredWithAttribute(tx, objectV2.SysAttributeExpEpoch, addr, currEpoch)
|
expired, err := isExpired(ctx, tx, addr, currEpoch)
|
||||||
if !expired {
|
if err != nil {
|
||||||
expired = isExpiredWithAttribute(tx, objectV2.SysAttributeExpEpochNeoFS, addr, currEpoch)
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if expired {
|
if expired {
|
||||||
return 3
|
return 3, nil
|
||||||
}
|
}
|
||||||
|
return 0, nil
|
||||||
graveyardBkt := tx.Bucket(graveyardBucketName)
|
|
||||||
garbageBkt := tx.Bucket(garbageBucketName)
|
|
||||||
addrKey := addressKey(addr, make([]byte, addressKeySize))
|
|
||||||
return inGraveyardWithKey(addrKey, graveyardBkt, garbageBkt)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func inGraveyardWithKey(addrKey []byte, graveyard, garbageBCK *bbolt.Bucket) uint8 {
|
func inGraveyardWithKey(tx *badger.Txn, addr oid.Address) (uint8, error) {
|
||||||
if graveyard == nil {
|
v, err := valueOrNil(tx, graveyardKey(addr.Container(), addr.Object()))
|
||||||
// incorrect metabase state, does not make
|
if err != nil {
|
||||||
// sense to check garbage bucket
|
return 0, err
|
||||||
return 0
|
}
|
||||||
|
if v != nil {
|
||||||
|
return 2, nil
|
||||||
|
}
|
||||||
|
v, err = valueOrNil(tx, garbageKey(addr.Container(), addr.Object()))
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if v != nil {
|
||||||
|
return 1, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
val := graveyard.Get(addrKey)
|
return 0, nil
|
||||||
if val == nil {
|
|
||||||
if garbageBCK == nil {
|
|
||||||
// incorrect node state
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
val = garbageBCK.Get(addrKey)
|
|
||||||
if val != nil {
|
|
||||||
// object has been marked with GC
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// neither in the graveyard
|
|
||||||
// nor was marked with GC mark
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// object in the graveyard
|
|
||||||
return 2
|
|
||||||
}
|
|
||||||
|
|
||||||
// inBucket checks if key <key> is present in bucket <name>.
|
|
||||||
func inBucket(tx *bbolt.Tx, name, key []byte) bool {
|
|
||||||
bkt := tx.Bucket(name)
|
|
||||||
if bkt == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// using `get` as `exists`: https://github.com/boltdb/bolt/issues/321
|
|
||||||
val := bkt.Get(key)
|
|
||||||
|
|
||||||
return len(val) != 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// getSplitInfo returns SplitInfo structure from root index. Returns error
|
// getSplitInfo returns SplitInfo structure from root index. Returns error
|
||||||
// if there is no `key` record in root index.
|
// if there is no `key` record in root index.
|
||||||
func getSplitInfo(tx *bbolt.Tx, cnr cid.ID, key []byte) (*objectSDK.SplitInfo, error) {
|
func getSplitInfo(tx *badger.Txn, addr oid.Address) (*objectSDK.SplitInfo, error) {
|
||||||
bucketName := rootBucketName(cnr, make([]byte, bucketKeySize))
|
rawSplitInfo, err := valueOrNil(tx, rootKey(addr.Container(), addr.Object()))
|
||||||
rawSplitInfo := getFromBucket(tx, bucketName, key)
|
if err != nil {
|
||||||
if len(rawSplitInfo) == 0 {
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(rawSplitInfo) == 0 || bytes.Equal(zeroValue, rawSplitInfo) {
|
||||||
return nil, ErrLackSplitInfo
|
return nil, ErrLackSplitInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
splitInfo := objectSDK.NewSplitInfo()
|
splitInfo := objectSDK.NewSplitInfo()
|
||||||
|
|
||||||
err := splitInfo.Unmarshal(rawSplitInfo)
|
err = splitInfo.Unmarshal(rawSplitInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("can't unmarshal split info from root index: %w", err)
|
return nil, fmt.Errorf("can't unmarshal split info from root index: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ const currEpoch = 1000
|
||||||
|
|
||||||
func TestDB_Exists(t *testing.T) {
|
func TestDB_Exists(t *testing.T) {
|
||||||
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
t.Run("no object", func(t *testing.T) {
|
t.Run("no object", func(t *testing.T) {
|
||||||
nonExist := testutil.GenerateObject()
|
nonExist := testutil.GenerateObject()
|
||||||
|
|
|
@ -3,16 +3,13 @@ package meta
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
@ -44,46 +41,11 @@ func (db *DB) FilterExpired(ctx context.Context, epoch uint64, addresses []oid.A
|
||||||
}
|
}
|
||||||
|
|
||||||
result := make([]oid.Address, 0, len(addresses))
|
result := make([]oid.Address, 0, len(addresses))
|
||||||
containerIDToObjectIDs := make(map[cid.ID][]oid.ID)
|
err := db.database.View(func(tx *badger.Txn) error {
|
||||||
for _, addr := range addresses {
|
var e error
|
||||||
containerIDToObjectIDs[addr.Container()] = append(containerIDToObjectIDs[addr.Container()], addr.Object())
|
result, e = selectExpiredObjects(ctx, tx, epoch, addresses)
|
||||||
}
|
return e
|
||||||
|
|
||||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
|
||||||
for containerID, objectIDs := range containerIDToObjectIDs {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return ErrInterruptIterator
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
expiredNeoFS, err := selectExpiredObjectIDs(tx, objectV2.SysAttributeExpEpochNeoFS, epoch, containerID, objectIDs)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
expiredSys, err := selectExpiredObjectIDs(tx, objectV2.SysAttributeExpEpoch, epoch, containerID, objectIDs)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, o := range expiredNeoFS {
|
|
||||||
var a oid.Address
|
|
||||||
a.SetContainer(containerID)
|
|
||||||
a.SetObject(o)
|
|
||||||
result = append(result, a)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, o := range expiredSys {
|
|
||||||
var a oid.Address
|
|
||||||
a.SetContainer(containerID)
|
|
||||||
a.SetObject(o)
|
|
||||||
result = append(result, a)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
})
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, metaerr.Wrap(err)
|
return nil, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
@ -91,76 +53,179 @@ func (db *DB) FilterExpired(ctx context.Context, epoch uint64, addresses []oid.A
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func isExpiredWithAttribute(tx *bbolt.Tx, attr string, addr oid.Address, currEpoch uint64) bool {
|
func isExpired(ctx context.Context, tx *badger.Txn, addr oid.Address, currEpoch uint64) (bool, error) {
|
||||||
// bucket with objects that have expiration attr
|
prefix := []byte{expiredPrefix}
|
||||||
attrKey := make([]byte, bucketKeySize+len(attr))
|
opts := badger.IteratorOptions{
|
||||||
expirationBucket := tx.Bucket(attributeBucketName(addr.Container(), attr, attrKey))
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
if expirationBucket != nil {
|
Prefix: []byte{expiredPrefix},
|
||||||
// bucket that contains objects that expire in the current epoch
|
}
|
||||||
prevEpochBkt := expirationBucket.Bucket([]byte(strconv.FormatUint(currEpoch-1, 10)))
|
it := tx.NewIterator(opts)
|
||||||
if prevEpochBkt != nil {
|
defer it.Close()
|
||||||
rawOID := objectKey(addr.Object(), make([]byte, objectKeySize))
|
|
||||||
if prevEpochBkt.Get(rawOID) != nil {
|
// iteration does in ascending order by expiration epoch.
|
||||||
return true
|
// gc does expired objects collect every epoch, so here should be not so much items.
|
||||||
}
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key())
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if expEpoch >= currEpoch {
|
||||||
|
return false, nil // keys are ordered by epoch, so next items will be discarded anyway.
|
||||||
|
}
|
||||||
|
|
||||||
|
curAddr, err := addressFromExpiredKey(it.Item().Key())
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
if curAddr == addr {
|
||||||
|
return true, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return false, nil
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func selectExpiredObjectIDs(tx *bbolt.Tx, attr string, epoch uint64, containerID cid.ID, objectIDs []oid.ID) ([]oid.ID, error) {
|
func selectExpiredObjects(ctx context.Context, tx *badger.Txn, epoch uint64, objects []oid.Address) ([]oid.Address, error) {
|
||||||
result := make([]oid.ID, 0)
|
result := make([]oid.Address, 0)
|
||||||
notResolved := make(map[oid.ID]struct{})
|
objMap := make(map[oid.Address]struct{})
|
||||||
for _, oid := range objectIDs {
|
for _, obj := range objects {
|
||||||
notResolved[oid] = struct{}{}
|
objMap[obj] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
expiredBuffer := make([]oid.ID, 0)
|
prefix := []byte{expiredPrefix}
|
||||||
objectKeyBuffer := make([]byte, objectKeySize)
|
opts := badger.IteratorOptions{
|
||||||
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
expirationBucketKey := make([]byte, bucketKeySize+len(attr))
|
Prefix: []byte{expiredPrefix},
|
||||||
expirationBucket := tx.Bucket(attributeBucketName(containerID, attr, expirationBucketKey))
|
|
||||||
if expirationBucket == nil {
|
|
||||||
return result, nil // all not expired
|
|
||||||
}
|
}
|
||||||
|
it := tx.NewIterator(opts)
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
err := expirationBucket.ForEach(func(epochExpBucketKey, _ []byte) error {
|
// iteration does in ascending order by expiration epoch.
|
||||||
bucketExpiresAfter, err := strconv.ParseUint(string(epochExpBucketKey), 10, 64)
|
// gc does expired objects collect every epoch, so here should be not so much items.
|
||||||
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not parse expiration epoch: %w", err)
|
return nil, err
|
||||||
} else if bucketExpiresAfter >= epoch {
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
epochExpirationBucket := expirationBucket.Bucket(epochExpBucketKey)
|
if expEpoch >= epoch {
|
||||||
if epochExpirationBucket == nil {
|
return result, nil // keys are ordered by epoch, so next items will be discarded anyway.
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
expiredBuffer = expiredBuffer[:0]
|
addr, err := addressFromExpiredKey(it.Item().Key())
|
||||||
for oid := range notResolved {
|
if err != nil {
|
||||||
key := objectKey(oid, objectKeyBuffer)
|
return nil, err
|
||||||
if epochExpirationBucket.Get(key) != nil {
|
|
||||||
expiredBuffer = append(expiredBuffer, oid)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if _, ok := objMap[addr]; ok {
|
||||||
for _, oid := range expiredBuffer {
|
result = append(result, addr)
|
||||||
delete(notResolved, oid)
|
|
||||||
result = append(result, oid)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(notResolved) == 0 {
|
|
||||||
return errBreakBucketForEach
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
if err != nil && !errors.Is(err, errBreakBucketForEach) {
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IterateExpired iterates over all objects in DB which are out of date
|
||||||
|
// relative to epoch. Locked objects are not included (do not confuse
|
||||||
|
// with objects of type LOCK).
|
||||||
|
//
|
||||||
|
// If h returns ErrInterruptIterator, nil returns immediately.
|
||||||
|
// Returns other errors of h directly.
|
||||||
|
func (db *DB) IterateExpired(ctx context.Context, epoch uint64, h ExpiredObjectHandler) error {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("IterateExpired", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
_, span := tracing.StartSpanFromContext(ctx, "metabase.IterateExpired",
|
||||||
|
trace.WithAttributes(
|
||||||
|
attribute.String("epoch", strconv.FormatUint(epoch, 10)),
|
||||||
|
))
|
||||||
|
defer span.End()
|
||||||
|
|
||||||
|
db.modeMtx.RLock()
|
||||||
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
if db.mode.NoMetabase() {
|
||||||
|
return ErrDegradedMode
|
||||||
|
}
|
||||||
|
|
||||||
|
err := metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||||
|
return iterateExpired(ctx, tx, epoch, h)
|
||||||
|
}))
|
||||||
|
success = err == nil
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func iterateExpired(ctx context.Context, tx *badger.Txn, epoch uint64, h ExpiredObjectHandler) error {
|
||||||
|
prefix := []byte{expiredPrefix}
|
||||||
|
opts := badger.IteratorOptions{
|
||||||
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
|
Prefix: []byte{expiredPrefix},
|
||||||
|
}
|
||||||
|
it := tx.NewIterator(opts)
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
|
// iteration does in ascending order by expiration epoch.
|
||||||
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
expEpoch, err := expirationEpochFromExpiredKey(it.Item().Key())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if expEpoch >= epoch {
|
||||||
|
return nil // keys are ordered by epoch, so next items will be discarded anyway.
|
||||||
|
}
|
||||||
|
|
||||||
|
addr, err := addressFromExpiredKey(it.Item().Key())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ignore locked objects.
|
||||||
|
//
|
||||||
|
// To slightly optimize performance we can check only REGULAR objects
|
||||||
|
// (only they can be locked), but it's more reliable.
|
||||||
|
isLocked, err := objectLocked(ctx, tx, addr.Container(), addr.Object())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if isLocked {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
objType, err := firstIrregularObjectType(tx, addr.Container(), addr.Object())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := h(&ExpiredObject{
|
||||||
|
typ: objType,
|
||||||
|
addr: addr,
|
||||||
|
}); err != nil {
|
||||||
|
if errors.Is(err, ErrInterruptIterator) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -2,17 +2,57 @@ package meta_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||||
|
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
||||||
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestExpiredIterationOrder(t *testing.T) {
|
||||||
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
|
containerID := cidtest.ID()
|
||||||
|
|
||||||
|
for i := 1; i <= 1000; i++ {
|
||||||
|
obj := testutil.GenerateObject()
|
||||||
|
obj.SetContainerID(containerID)
|
||||||
|
setExpiration(obj, uint64(i))
|
||||||
|
err := putBig(db, obj)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var previous uint64
|
||||||
|
require.NoError(t, db.IterateExpired(context.Background(), 2000, func(eo *meta.ExpiredObject) error {
|
||||||
|
var prm meta.GetPrm
|
||||||
|
prm.SetAddress(eo.Address())
|
||||||
|
obj, err := db.Get(context.Background(), prm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var found bool
|
||||||
|
for _, attr := range obj.Header().Attributes() {
|
||||||
|
if attr.Key() == objectV2.SysAttributeExpEpoch {
|
||||||
|
found = true
|
||||||
|
epoch, err := strconv.ParseUint(attr.Value(), 10, 64)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, previous+1, epoch)
|
||||||
|
previous = epoch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
require.True(t, found)
|
||||||
|
return nil
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
func TestDB_SelectExpired(t *testing.T) {
|
func TestDB_SelectExpired(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
containerID1 := cidtest.ID()
|
containerID1 := cidtest.ID()
|
||||||
|
|
||||||
|
|
|
@ -9,10 +9,9 @@ import (
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status"
|
apistatus "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/client/status"
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
@ -77,9 +76,8 @@ func (db *DB) Get(ctx context.Context, prm GetPrm) (res GetRes, err error) {
|
||||||
|
|
||||||
currEpoch := db.epochState.CurrentEpoch()
|
currEpoch := db.epochState.CurrentEpoch()
|
||||||
|
|
||||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
err = db.database.View(func(tx *badger.Txn) error {
|
||||||
key := make([]byte, addressKeySize)
|
res.hdr, err = get(ctx, tx, prm.addr, true, prm.raw, currEpoch)
|
||||||
res.hdr, err = db.get(tx, prm.addr, key, true, prm.raw, currEpoch)
|
|
||||||
|
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
|
@ -87,9 +85,13 @@ func (db *DB) Get(ctx context.Context, prm GetPrm) (res GetRes, err error) {
|
||||||
return res, metaerr.Wrap(err)
|
return res, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) {
|
func get(ctx context.Context, tx *badger.Txn, addr oid.Address, checkStatus, raw bool, currEpoch uint64) (*objectSDK.Object, error) {
|
||||||
if checkStatus {
|
if checkStatus {
|
||||||
switch objectStatus(tx, addr, currEpoch) {
|
st, err := objectStatus(ctx, tx, addr, currEpoch)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
switch st {
|
||||||
case 1:
|
case 1:
|
||||||
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||||
case 2:
|
case 2:
|
||||||
|
@ -98,73 +100,68 @@ func (db *DB) get(tx *bbolt.Tx, addr oid.Address, key []byte, checkStatus, raw b
|
||||||
return nil, ErrObjectIsExpired
|
return nil, ErrObjectIsExpired
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
key = objectKey(addr.Object(), key)
|
|
||||||
cnr := addr.Container()
|
|
||||||
obj := objectSDK.New()
|
obj := objectSDK.New()
|
||||||
bucketName := make([]byte, bucketKeySize)
|
|
||||||
|
|
||||||
// check in primary index
|
// check in primary index
|
||||||
data := getFromBucket(tx, primaryBucketName(cnr, bucketName), key)
|
data, err := valueOrNil(tx, primaryKey(addr.Container(), addr.Object()))
|
||||||
if len(data) != 0 {
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if data != nil {
|
||||||
return obj, obj.Unmarshal(data)
|
return obj, obj.Unmarshal(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
// if not found then check in tombstone index
|
// if not found then check in tombstone index
|
||||||
data = getFromBucket(tx, tombstoneBucketName(cnr, bucketName), key)
|
data, err = valueOrNil(tx, tombstoneKey(addr.Container(), addr.Object()))
|
||||||
if len(data) != 0 {
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if data != nil {
|
||||||
return obj, obj.Unmarshal(data)
|
return obj, obj.Unmarshal(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
// if not found then check in locker index
|
// if not found then check in locker index
|
||||||
data = getFromBucket(tx, bucketNameLockers(cnr, bucketName), key)
|
data, err = valueOrNil(tx, lockersKey(addr.Container(), addr.Object()))
|
||||||
if len(data) != 0 {
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if data != nil {
|
||||||
return obj, obj.Unmarshal(data)
|
return obj, obj.Unmarshal(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
// if not found then check if object is a virtual
|
// if not found then check if object is a virtual
|
||||||
return getVirtualObject(tx, cnr, key, raw)
|
return getVirtualObject(ctx, tx, addr, raw)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getFromBucket(tx *bbolt.Tx, name, key []byte) []byte {
|
func getVirtualObject(ctx context.Context, tx *badger.Txn, addr oid.Address, raw bool) (*objectSDK.Object, error) {
|
||||||
bkt := tx.Bucket(name)
|
|
||||||
if bkt == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return bkt.Get(key)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSDK.Object, error) {
|
|
||||||
if raw {
|
if raw {
|
||||||
return nil, getSplitInfoError(tx, cnr, key)
|
return nil, getSplitInfoError(tx, addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
bucketName := make([]byte, bucketKeySize)
|
binObjIDs, err := selectByPrefixBatch(ctx, tx, parentKeyLongPrefix(addr.Container(), addr.Object()), 1)
|
||||||
parentBucket := tx.Bucket(parentBucketName(cnr, bucketName))
|
|
||||||
if parentBucket == nil {
|
|
||||||
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
|
||||||
}
|
|
||||||
|
|
||||||
relativeLst, err := decodeList(parentBucket.Get(key))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(relativeLst) == 0 { // this should never happen though
|
if len(binObjIDs) == 0 { // this should never happen though
|
||||||
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||||
}
|
}
|
||||||
|
|
||||||
// pick last item, for now there is not difference which address to pick
|
phyObjAddr, err := addressOfTargetFromParentKey(binObjIDs[0])
|
||||||
// but later list might be sorted so first or last value can be more
|
|
||||||
// prioritized to choose
|
|
||||||
virtualOID := relativeLst[len(relativeLst)-1]
|
|
||||||
data := getFromBucket(tx, primaryBucketName(cnr, bucketName), virtualOID)
|
|
||||||
|
|
||||||
child := objectSDK.New()
|
|
||||||
|
|
||||||
err = child.Unmarshal(data)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := valueOrNil(tx, primaryKey(phyObjAddr.Container(), phyObjAddr.Object()))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if data == nil { // this should never happen though #2
|
||||||
|
return nil, logicerr.Wrap(new(apistatus.ObjectNotFound))
|
||||||
|
}
|
||||||
|
child := objectSDK.New()
|
||||||
|
if err := child.Unmarshal(data); err != nil {
|
||||||
return nil, fmt.Errorf("can't unmarshal child with parent: %w", err)
|
return nil, fmt.Errorf("can't unmarshal child with parent: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,8 +174,8 @@ func getVirtualObject(tx *bbolt.Tx, cnr cid.ID, key []byte, raw bool) (*objectSD
|
||||||
return par, nil
|
return par, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getSplitInfoError(tx *bbolt.Tx, cnr cid.ID, key []byte) error {
|
func getSplitInfoError(tx *badger.Txn, addr oid.Address) error {
|
||||||
splitInfo, err := getSplitInfo(tx, cnr, key)
|
splitInfo, err := getSplitInfo(tx, addr)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo))
|
return logicerr.Wrap(objectSDK.NewSplitInfoError(splitInfo))
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,6 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
"runtime"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||||
|
@ -22,6 +21,7 @@ import (
|
||||||
|
|
||||||
func TestDB_Get(t *testing.T) {
|
func TestDB_Get(t *testing.T) {
|
||||||
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
raw := testutil.GenerateObject()
|
raw := testutil.GenerateObject()
|
||||||
|
|
||||||
|
@ -175,11 +175,9 @@ func BenchmarkGet(b *testing.B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func benchmarkGet(b *testing.B, numOfObj int) {
|
func benchmarkGet(b *testing.B, numOfObj int) {
|
||||||
prepareDb := func(batchSize int) (*meta.DB, []oid.Address) {
|
prepareDb := func(_ int) (*meta.DB, []oid.Address) {
|
||||||
db := newDB(b,
|
db := newDB(b)
|
||||||
meta.WithMaxBatchSize(batchSize),
|
defer func() { require.NoError(b, db.Close()) }()
|
||||||
meta.WithMaxBatchDelay(10*time.Millisecond),
|
|
||||||
)
|
|
||||||
addrs := make([]oid.Address, 0, numOfObj)
|
addrs := make([]oid.Address, 0, numOfObj)
|
||||||
|
|
||||||
for i := 0; i < numOfObj; i++ {
|
for i := 0; i < numOfObj; i++ {
|
||||||
|
|
|
@ -9,8 +9,9 @@ import (
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
)
|
)
|
||||||
|
|
||||||
// GarbageObject represents descriptor of the
|
// GarbageObject represents descriptor of the
|
||||||
|
@ -80,8 +81,8 @@ func (db *DB) IterateOverGarbage(ctx context.Context, p GarbageIterationPrm) err
|
||||||
return ErrDegradedMode
|
return ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
err := metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
err := metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||||
return db.iterateDeletedObj(tx, gcHandler{p.h}, p.offset)
|
return db.iterateDeletedObj(ctx, tx, gcHandler{p.h}, p.offset)
|
||||||
}))
|
}))
|
||||||
success = err == nil
|
success = err == nil
|
||||||
return err
|
return err
|
||||||
|
@ -160,8 +161,8 @@ func (db *DB) IterateOverGraveyard(ctx context.Context, p GraveyardIterationPrm)
|
||||||
return ErrDegradedMode
|
return ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
return metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
return metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||||
return db.iterateDeletedObj(tx, graveyardHandler{p.h}, p.offset)
|
return db.iterateDeletedObj(ctx, tx, graveyardHandler{p.h}, p.offset)
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,40 +196,53 @@ func (g graveyardHandler) handleKV(k, v []byte) error {
|
||||||
return g.h(o)
|
return g.h(o)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address) error {
|
func (db *DB) iterateDeletedObj(ctx context.Context, tx *badger.Txn, h kvHandler, offset *oid.Address) error {
|
||||||
var bkt *bbolt.Bucket
|
var prefix []byte
|
||||||
switch t := h.(type) {
|
switch t := h.(type) {
|
||||||
case graveyardHandler:
|
case graveyardHandler:
|
||||||
bkt = tx.Bucket(graveyardBucketName)
|
prefix = []byte{graveyardPrefix}
|
||||||
case gcHandler:
|
case gcHandler:
|
||||||
bkt = tx.Bucket(garbageBucketName)
|
prefix = []byte{garbagePrefix}
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("metabase: unknown iteration object hadler: %T", t))
|
panic(fmt.Sprintf("metabase: unknown iteration object hadler: %T", t))
|
||||||
}
|
}
|
||||||
|
var seekKey []byte
|
||||||
c := bkt.Cursor()
|
if offset != nil {
|
||||||
var k, v []byte
|
cidBytes := make([]byte, cidSize)
|
||||||
|
offset.Container().Encode(cidBytes)
|
||||||
if offset == nil {
|
oidBytes := make([]byte, objectKeySize)
|
||||||
k, v = c.First()
|
offset.Object().Encode(oidBytes)
|
||||||
} else {
|
seekKey = append(prefix, cidBytes...)
|
||||||
rawAddr := addressKey(*offset, make([]byte, addressKeySize))
|
seekKey = append(seekKey, oidBytes...)
|
||||||
|
|
||||||
k, v = c.Seek(rawAddr)
|
|
||||||
if bytes.Equal(k, rawAddr) {
|
|
||||||
// offset was found, move
|
|
||||||
// cursor to the next element
|
|
||||||
k, v = c.Next()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for ; k != nil; k, v = c.Next() {
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
err := h.handleKV(k, v)
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
|
Prefix: prefix,
|
||||||
|
PrefetchValues: true,
|
||||||
|
})
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
|
for it.Seek(seekKey); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
if bytes.Equal(it.Item().Key(), seekKey) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
key := it.Item().KeyCopy(nil)
|
||||||
|
value, err := it.Item().ValueCopy(nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err = h.handleKV(key, value); err != nil {
|
||||||
if errors.Is(err, ErrInterruptIterator) {
|
if errors.Is(err, ErrInterruptIterator) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -237,7 +251,7 @@ func (db *DB) iterateDeletedObj(tx *bbolt.Tx, h kvHandler, offset *oid.Address)
|
||||||
}
|
}
|
||||||
|
|
||||||
func garbageFromKV(k []byte) (res GarbageObject, err error) {
|
func garbageFromKV(k []byte) (res GarbageObject, err error) {
|
||||||
err = decodeAddressFromKey(&res.addr, k)
|
res.addr, err = addressFromGarbageKey(k)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("could not parse address: %w", err)
|
err = fmt.Errorf("could not parse address: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -246,15 +260,44 @@ func garbageFromKV(k []byte) (res GarbageObject, err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func graveFromKV(k, v []byte) (res TombstonedObject, err error) {
|
func graveFromKV(k, v []byte) (res TombstonedObject, err error) {
|
||||||
if err = decodeAddressFromKey(&res.addr, k); err != nil {
|
res.addr, err = addressFromGraveyardKey(k)
|
||||||
|
if err != nil {
|
||||||
err = fmt.Errorf("decode tombstone target from key: %w", err)
|
err = fmt.Errorf("decode tombstone target from key: %w", err)
|
||||||
} else if err = decodeAddressFromKey(&res.tomb, v); err != nil {
|
return
|
||||||
err = fmt.Errorf("decode tombstone address from value: %w", err)
|
}
|
||||||
|
res.tomb, err = decodeAddressFromGrave(v)
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("decode tombstone address from value: %w", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func encodeAddressToGrave(addr oid.Address) []byte {
|
||||||
|
value := make([]byte, cidSize+objectKeySize)
|
||||||
|
addr.Container().Encode(value)
|
||||||
|
addr.Object().Encode(value[cidSize:])
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func decodeAddressFromGrave(v []byte) (oid.Address, error) {
|
||||||
|
if len(v) != cidSize+objectKeySize {
|
||||||
|
return oid.Address{}, errInvalidValueLenght
|
||||||
|
}
|
||||||
|
var cont cid.ID
|
||||||
|
if err := cont.Decode(v[:cidSize]); err != nil {
|
||||||
|
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||||
|
}
|
||||||
|
var obj oid.ID
|
||||||
|
if err := obj.Decode(v[cidSize:]); err != nil {
|
||||||
|
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||||
|
}
|
||||||
|
var result oid.Address
|
||||||
|
result.SetContainer(cont)
|
||||||
|
result.SetObject(obj)
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
// DropGraves deletes tombstoned objects from the
|
// DropGraves deletes tombstoned objects from the
|
||||||
// graveyard bucket.
|
// graveyard bucket.
|
||||||
//
|
//
|
||||||
|
@ -280,16 +323,9 @@ func (db *DB) DropGraves(ctx context.Context, tss []TombstonedObject) error {
|
||||||
return ErrReadOnlyMode
|
return ErrReadOnlyMode
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := make([]byte, addressKeySize)
|
return db.database.Update(func(tx *badger.Txn) error {
|
||||||
|
|
||||||
return db.boltDB.Update(func(tx *bbolt.Tx) error {
|
|
||||||
bkt := tx.Bucket(graveyardBucketName)
|
|
||||||
if bkt == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, ts := range tss {
|
for _, ts := range tss {
|
||||||
err := bkt.Delete(addressKey(ts.Address(), buf))
|
err := tx.Delete(graveyardKey(ts.Address().Container(), ts.Address().Object()))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ import (
|
||||||
|
|
||||||
func TestDB_IterateDeletedObjects_EmptyDB(t *testing.T) {
|
func TestDB_IterateDeletedObjects_EmptyDB(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
var counter int
|
var counter int
|
||||||
var iterGravePRM meta.GraveyardIterationPrm
|
var iterGravePRM meta.GraveyardIterationPrm
|
||||||
|
@ -40,6 +41,7 @@ func TestDB_IterateDeletedObjects_EmptyDB(t *testing.T) {
|
||||||
|
|
||||||
func TestDB_Iterate_OffsetNotFound(t *testing.T) {
|
func TestDB_Iterate_OffsetNotFound(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
obj1 := testutil.GenerateObject()
|
obj1 := testutil.GenerateObject()
|
||||||
obj2 := testutil.GenerateObject()
|
obj2 := testutil.GenerateObject()
|
||||||
|
@ -110,6 +112,7 @@ func TestDB_Iterate_OffsetNotFound(t *testing.T) {
|
||||||
|
|
||||||
func TestDB_IterateDeletedObjects(t *testing.T) {
|
func TestDB_IterateDeletedObjects(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
// generate and put 4 objects
|
// generate and put 4 objects
|
||||||
obj1 := testutil.GenerateObject()
|
obj1 := testutil.GenerateObject()
|
||||||
|
@ -196,6 +199,7 @@ func TestDB_IterateDeletedObjects(t *testing.T) {
|
||||||
|
|
||||||
func TestDB_IterateOverGraveyard_Offset(t *testing.T) {
|
func TestDB_IterateOverGraveyard_Offset(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
// generate and put 4 objects
|
// generate and put 4 objects
|
||||||
obj1 := testutil.GenerateObject()
|
obj1 := testutil.GenerateObject()
|
||||||
|
@ -294,6 +298,7 @@ func TestDB_IterateOverGraveyard_Offset(t *testing.T) {
|
||||||
|
|
||||||
func TestDB_IterateOverGarbage_Offset(t *testing.T) {
|
func TestDB_IterateOverGarbage_Offset(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
// generate and put 4 objects
|
// generate and put 4 objects
|
||||||
obj1 := testutil.GenerateObject()
|
obj1 := testutil.GenerateObject()
|
||||||
|
@ -385,6 +390,7 @@ func TestDB_IterateOverGarbage_Offset(t *testing.T) {
|
||||||
|
|
||||||
func TestDB_DropGraves(t *testing.T) {
|
func TestDB_DropGraves(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
// generate and put 2 objects
|
// generate and put 2 objects
|
||||||
obj1 := testutil.GenerateObject()
|
obj1 := testutil.GenerateObject()
|
||||||
|
|
|
@ -1,65 +0,0 @@
|
||||||
package meta
|
|
||||||
|
|
||||||
import (
|
|
||||||
"crypto/rand"
|
|
||||||
"math"
|
|
||||||
mrand "math/rand"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/nspcc-dev/neo-go/pkg/io"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func Test_getVarUint(t *testing.T) {
|
|
||||||
data := make([]byte, 10)
|
|
||||||
for _, val := range []uint64{0, 0xfc, 0xfd, 0xfffe, 0xffff, 0xfffffffe, 0xffffffff, math.MaxUint64} {
|
|
||||||
expSize := io.PutVarUint(data, val)
|
|
||||||
actual, actSize, err := getVarUint(data)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Equal(t, val, actual)
|
|
||||||
require.Equal(t, expSize, actSize, "value: %x", val)
|
|
||||||
|
|
||||||
_, _, err = getVarUint(data[:expSize-1])
|
|
||||||
require.Error(t, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_decodeList(t *testing.T) {
|
|
||||||
t.Run("empty", func(t *testing.T) {
|
|
||||||
lst, err := decodeList(nil)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.True(t, len(lst) == 0)
|
|
||||||
})
|
|
||||||
t.Run("empty, 0 len", func(t *testing.T) {
|
|
||||||
lst, err := decodeList([]byte{0})
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.True(t, len(lst) == 0)
|
|
||||||
})
|
|
||||||
t.Run("bad len", func(t *testing.T) {
|
|
||||||
_, err := decodeList([]byte{0xfe})
|
|
||||||
require.Error(t, err)
|
|
||||||
})
|
|
||||||
t.Run("random", func(t *testing.T) {
|
|
||||||
r := mrand.New(mrand.NewSource(time.Now().Unix()))
|
|
||||||
expected := make([][]byte, 20)
|
|
||||||
for i := range expected {
|
|
||||||
expected[i] = make([]byte, r.Uint32()%10)
|
|
||||||
rand.Read(expected[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := encodeList(expected)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
actual, err := decodeList(data)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Equal(t, expected, actual)
|
|
||||||
|
|
||||||
t.Run("unexpected EOF", func(t *testing.T) {
|
|
||||||
for i := 1; i < len(data)-1; i++ {
|
|
||||||
_, err := decodeList(data[:i])
|
|
||||||
require.Error(t, err)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
|
@ -5,8 +5,10 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"slices"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
|
@ -14,7 +16,7 @@ import (
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
)
|
)
|
||||||
|
|
||||||
// InhumePrm encapsulates parameters for Inhume operation.
|
// InhumePrm encapsulates parameters for Inhume operation.
|
||||||
|
@ -30,21 +32,34 @@ type InhumePrm struct {
|
||||||
|
|
||||||
// DeletionInfo contains details on deleted object.
|
// DeletionInfo contains details on deleted object.
|
||||||
type DeletionInfo struct {
|
type DeletionInfo struct {
|
||||||
Size uint64
|
Size uint64
|
||||||
CID cid.ID
|
CID cid.ID
|
||||||
|
IsUser bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// InhumeRes encapsulates results of Inhume operation.
|
// InhumeRes encapsulates results of Inhume operation.
|
||||||
type InhumeRes struct {
|
type InhumeRes struct {
|
||||||
deletedLockObj []oid.Address
|
deletedLockObj []oid.Address
|
||||||
availableImhumed uint64
|
logicInhumed uint64
|
||||||
deletionDetails []DeletionInfo
|
userInhumed uint64
|
||||||
|
inhumedByCnrID map[cid.ID]ObjectCounters
|
||||||
|
deletionDetails []DeletionInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
// AvailableInhumed return number of available object
|
// LogicInhumed return number of logic object
|
||||||
// that have been inhumed.
|
// that have been inhumed.
|
||||||
func (i InhumeRes) AvailableInhumed() uint64 {
|
func (i InhumeRes) LogicInhumed() uint64 {
|
||||||
return i.availableImhumed
|
return i.logicInhumed
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i InhumeRes) UserInhumed() uint64 {
|
||||||
|
return i.userInhumed
|
||||||
|
}
|
||||||
|
|
||||||
|
// InhumedByCnrID return number of object
|
||||||
|
// that have been inhumed by container ID.
|
||||||
|
func (i InhumeRes) InhumedByCnrID() map[cid.ID]ObjectCounters {
|
||||||
|
return i.inhumedByCnrID
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeletedLockObjects returns deleted object of LOCK
|
// DeletedLockObjects returns deleted object of LOCK
|
||||||
|
@ -68,11 +83,32 @@ func (i InhumeRes) GetDeletionInfoByIndex(target int) DeletionInfo {
|
||||||
|
|
||||||
// StoreDeletionInfo stores size of deleted object and associated container ID
|
// StoreDeletionInfo stores size of deleted object and associated container ID
|
||||||
// in corresponding arrays.
|
// in corresponding arrays.
|
||||||
func (i *InhumeRes) storeDeletionInfo(containerID cid.ID, deletedSize uint64) {
|
func (i *InhumeRes) storeDeletionInfo(containerID cid.ID, deletedSize uint64, isUser bool) {
|
||||||
i.deletionDetails = append(i.deletionDetails, DeletionInfo{
|
i.deletionDetails = append(i.deletionDetails, DeletionInfo{
|
||||||
Size: deletedSize,
|
Size: deletedSize,
|
||||||
CID: containerID,
|
CID: containerID,
|
||||||
|
IsUser: isUser,
|
||||||
})
|
})
|
||||||
|
i.logicInhumed++
|
||||||
|
if isUser {
|
||||||
|
i.userInhumed++
|
||||||
|
}
|
||||||
|
|
||||||
|
if v, ok := i.inhumedByCnrID[containerID]; ok {
|
||||||
|
v.Logic++
|
||||||
|
if isUser {
|
||||||
|
v.User++
|
||||||
|
}
|
||||||
|
i.inhumedByCnrID[containerID] = v
|
||||||
|
} else {
|
||||||
|
v = ObjectCounters{
|
||||||
|
Logic: 1,
|
||||||
|
}
|
||||||
|
if isUser {
|
||||||
|
v.User = 1
|
||||||
|
}
|
||||||
|
i.inhumedByCnrID[containerID] = v
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetAddresses sets a list of object addresses that should be inhumed.
|
// SetAddresses sets a list of object addresses that should be inhumed.
|
||||||
|
@ -122,7 +158,7 @@ var ErrLockObjectRemoval = logicerr.New("lock object removal")
|
||||||
//
|
//
|
||||||
// NOTE: Marks any object with GC mark (despite any prohibitions on operations
|
// NOTE: Marks any object with GC mark (despite any prohibitions on operations
|
||||||
// with that object) if WithForceGCMark option has been provided.
|
// with that object) if WithForceGCMark option has been provided.
|
||||||
func (db *DB) Inhume(ctx context.Context, prm InhumePrm) (res InhumeRes, err error) {
|
func (db *DB) Inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) {
|
||||||
var (
|
var (
|
||||||
startedAt = time.Now()
|
startedAt = time.Now()
|
||||||
success = false
|
success = false
|
||||||
|
@ -142,50 +178,70 @@ func (db *DB) Inhume(ctx context.Context, prm InhumePrm) (res InhumeRes, err err
|
||||||
return InhumeRes{}, ErrReadOnlyMode
|
return InhumeRes{}, ErrReadOnlyMode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
res := InhumeRes{
|
||||||
|
inhumedByCnrID: make(map[cid.ID]ObjectCounters),
|
||||||
|
}
|
||||||
|
bucketIDs, release := db.acquireBucketIDs(prm)
|
||||||
|
defer release()
|
||||||
currEpoch := db.epochState.CurrentEpoch()
|
currEpoch := db.epochState.CurrentEpoch()
|
||||||
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
|
err := db.database.Update(func(tx *badger.Txn) error {
|
||||||
return db.inhumeTx(tx, currEpoch, prm, &res)
|
return db.inhumeTx(ctx, tx, currEpoch, prm, bucketIDs, &res)
|
||||||
})
|
})
|
||||||
success = err == nil
|
success = err == nil
|
||||||
|
if success {
|
||||||
|
for _, addr := range prm.target {
|
||||||
|
storagelog.Write(db.log,
|
||||||
|
storagelog.AddressField(addr),
|
||||||
|
storagelog.OpField("metabase INHUME"))
|
||||||
|
}
|
||||||
|
}
|
||||||
return res, metaerr.Wrap(err)
|
return res, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes) error {
|
func (db *DB) acquireBucketIDs(prm InhumePrm) (map[cid.ID]uint16, func()) {
|
||||||
garbageBKT := tx.Bucket(garbageBucketName)
|
unique := make(map[cid.ID]struct{})
|
||||||
graveyardBKT := tx.Bucket(graveyardBucketName)
|
for _, addr := range prm.target {
|
||||||
|
unique[addr.Container()] = struct{}{}
|
||||||
|
}
|
||||||
|
containers := make([]cid.ID, 0, len(unique))
|
||||||
|
for contID := range unique {
|
||||||
|
containers = append(containers, contID)
|
||||||
|
}
|
||||||
|
slices.SortFunc(containers, func(lhs, rhs cid.ID) int {
|
||||||
|
return bytes.Compare(lhs[:], rhs[:])
|
||||||
|
})
|
||||||
|
result := make(map[cid.ID]uint16, len(unique))
|
||||||
|
releases := make([]func(), len(unique))
|
||||||
|
|
||||||
bkt, value, err := db.getInhumeTargetBucketAndValue(garbageBKT, graveyardBKT, &prm)
|
for i, contID := range containers {
|
||||||
|
result[contID], releases[i] = db.bucketIDs.BucketID(contID)
|
||||||
|
}
|
||||||
|
return result, func() {
|
||||||
|
for i := range releases {
|
||||||
|
releases[len(releases)-i-1]()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) inhumeTx(ctx context.Context, tx *badger.Txn, epoch uint64, prm InhumePrm, bucketIDs map[cid.ID]uint16, res *InhumeRes) error {
|
||||||
|
keyer, value, err := getInhumeTargetBucketAndValue(tx, prm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := make([]byte, addressKeySize)
|
|
||||||
for i := range prm.target {
|
for i := range prm.target {
|
||||||
id := prm.target[i].Object()
|
id := prm.target[i].Object()
|
||||||
cnr := prm.target[i].Container()
|
cnr := prm.target[i].Container()
|
||||||
|
|
||||||
// prevent locked objects to be inhumed
|
|
||||||
if !prm.forceRemoval && objectLocked(tx, cnr, id) {
|
|
||||||
return new(apistatus.ObjectLocked)
|
|
||||||
}
|
|
||||||
|
|
||||||
var lockWasChecked bool
|
var lockWasChecked bool
|
||||||
|
|
||||||
// prevent lock objects to be inhumed
|
|
||||||
// if `Inhume` was called not with the
|
|
||||||
// `WithForceGCMark` option
|
|
||||||
if !prm.forceRemoval {
|
if !prm.forceRemoval {
|
||||||
if isLockObject(tx, cnr, id) {
|
if err := checkNotLockerOrLocked(ctx, tx, cnr, id); err != nil {
|
||||||
return ErrLockObjectRemoval
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
lockWasChecked = true
|
lockWasChecked = true
|
||||||
}
|
}
|
||||||
|
|
||||||
obj, err := db.get(tx, prm.target[i], buf, false, true, epoch)
|
obj, err := get(ctx, tx, prm.target[i], false, true, epoch)
|
||||||
targetKey := addressKey(prm.target[i], buf)
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
err = db.updateDeleteInfo(tx, garbageBKT, graveyardBKT, targetKey, cnr, obj, res)
|
err = db.updateDeleteInfo(tx, prm.target[i], obj, bucketIDs, res)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -193,18 +249,18 @@ func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes
|
||||||
|
|
||||||
if prm.tomb != nil {
|
if prm.tomb != nil {
|
||||||
var isTomb bool
|
var isTomb bool
|
||||||
isTomb, err = db.markAsGC(graveyardBKT, garbageBKT, targetKey)
|
isTomb, err = markAsGC(tx, prm.target[i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if isTomb {
|
if isTomb {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// consider checking if target is already in graveyard?
|
// consider checking if target is already in graveyard?
|
||||||
err = bkt.Put(targetKey, value)
|
key := keyer(prm.target[i])
|
||||||
|
err = tx.Set(key, value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -217,14 +273,51 @@ func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes
|
||||||
// the LOCK type
|
// the LOCK type
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
isLock, err := isLockObject(tx, cnr, id)
|
||||||
if isLockObject(tx, cnr, id) {
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if isLock {
|
||||||
res.deletedLockObj = append(res.deletedLockObj, prm.target[i])
|
res.deletedLockObj = append(res.deletedLockObj, prm.target[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return db.updateCounter(tx, logical, res.availableImhumed, false)
|
return db.applyInhumeResToCounters(tx, bucketIDs, res)
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkNotLockerOrLocked(ctx context.Context, tx *badger.Txn, cnr cid.ID, id oid.ID) error {
|
||||||
|
// prevent locked objects to be inhumed
|
||||||
|
locked, err := objectLocked(ctx, tx, cnr, id)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if locked {
|
||||||
|
return new(apistatus.ObjectLocked)
|
||||||
|
}
|
||||||
|
// prevent lock objects to be inhumed
|
||||||
|
// if `Inhume` was called not with the
|
||||||
|
// `WithForceGCMark` option
|
||||||
|
isLock, err := isLockObject(tx, cnr, id)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if isLock {
|
||||||
|
return ErrLockObjectRemoval
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) applyInhumeResToCounters(tx *badger.Txn, bucketIDs map[cid.ID]uint16, res *InhumeRes) error {
|
||||||
|
counters := make(map[cid.ID]objectCounterValue, len(res.inhumedByCnrID))
|
||||||
|
for contID, inhumed := range res.inhumedByCnrID {
|
||||||
|
counters[contID] = objectCounterValue{
|
||||||
|
Logic: -1 * int64(inhumed.Logic),
|
||||||
|
Phy: -1 * int64(inhumed.Phy),
|
||||||
|
User: -1 * int64(inhumed.User),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return updateContainerCounter(tx, counters, bucketIDs)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getInhumeTargetBucketAndValue return target bucket to store inhume result and value that will be put in the bucket.
|
// getInhumeTargetBucketAndValue return target bucket to store inhume result and value that will be put in the bucket.
|
||||||
|
@ -237,31 +330,33 @@ func (db *DB) inhumeTx(tx *bbolt.Tx, epoch uint64, prm InhumePrm, res *InhumeRes
|
||||||
// 1. tombstone address if Inhume was called with
|
// 1. tombstone address if Inhume was called with
|
||||||
// a Tombstone
|
// a Tombstone
|
||||||
// 2. zeroValue if Inhume was called with a GC mark
|
// 2. zeroValue if Inhume was called with a GC mark
|
||||||
func (db *DB) getInhumeTargetBucketAndValue(garbageBKT, graveyardBKT *bbolt.Bucket, prm *InhumePrm) (targetBucket *bbolt.Bucket, value []byte, err error) {
|
func getInhumeTargetBucketAndValue(tx *badger.Txn, prm InhumePrm) (key func(addr oid.Address) []byte, value []byte, err error) {
|
||||||
if prm.tomb != nil {
|
if prm.tomb != nil {
|
||||||
targetBucket = graveyardBKT
|
|
||||||
tombKey := addressKey(*prm.tomb, make([]byte, addressKeySize))
|
|
||||||
|
|
||||||
// it is forbidden to have a tomb-on-tomb in FrostFS,
|
// it is forbidden to have a tomb-on-tomb in FrostFS,
|
||||||
// so graveyard keys must not be addresses of tombstones
|
// so graveyard keys must not be addresses of tombstones
|
||||||
data := targetBucket.Get(tombKey)
|
tombKey := graveyardKey(prm.tomb.Container(), prm.tomb.Object())
|
||||||
if data != nil {
|
v, err := valueOrNil(tx, tombKey)
|
||||||
err := targetBucket.Delete(tombKey)
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
if v != nil {
|
||||||
|
err := tx.Delete(tombKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("could not remove grave with tombstone key: %w", err)
|
return nil, nil, fmt.Errorf("could not remove grave with tombstone key: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
value = tombKey
|
return func(addr oid.Address) []byte {
|
||||||
} else {
|
return graveyardKey(addr.Container(), addr.Object())
|
||||||
targetBucket = garbageBKT
|
}, encodeAddressToGrave(*prm.tomb), nil
|
||||||
value = zeroValue
|
|
||||||
}
|
}
|
||||||
return targetBucket, value, nil
|
return func(addr oid.Address) []byte {
|
||||||
|
return garbageKey(addr.Container(), addr.Object())
|
||||||
|
}, zeroValue, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) markAsGC(graveyardBKT, garbageBKT *bbolt.Bucket, key []byte) (bool, error) {
|
func markAsGC(tx *badger.Txn, addr oid.Address) (bool, error) {
|
||||||
targetIsTomb, err := isTomb(graveyardBKT, key)
|
targetIsTomb, err := isTomb(tx, addr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
@ -273,20 +368,27 @@ func (db *DB) markAsGC(graveyardBKT, garbageBKT *bbolt.Bucket, key []byte) (bool
|
||||||
|
|
||||||
// if tombstone appears object must be
|
// if tombstone appears object must be
|
||||||
// additionally marked with GC
|
// additionally marked with GC
|
||||||
return false, garbageBKT.Put(key, zeroValue)
|
key := garbageKey(addr.Container(), addr.Object())
|
||||||
|
return false, tx.Set(key, zeroValue)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) updateDeleteInfo(tx *bbolt.Tx, garbageBKT, graveyardBKT *bbolt.Bucket, targetKey []byte, cnr cid.ID, obj *objectSDK.Object, res *InhumeRes) error {
|
func (db *DB) updateDeleteInfo(tx *badger.Txn, addr oid.Address, obj *objectSDK.Object, bucketIDs map[cid.ID]uint16, res *InhumeRes) error {
|
||||||
containerID, _ := obj.ContainerID()
|
st, err := inGraveyardWithKey(tx, addr)
|
||||||
if inGraveyardWithKey(targetKey, graveyardBKT, garbageBKT) == 0 {
|
if err != nil {
|
||||||
res.availableImhumed++
|
return err
|
||||||
res.storeDeletionInfo(containerID, obj.PayloadSize())
|
}
|
||||||
|
if st == 0 {
|
||||||
|
res.storeDeletionInfo(addr.Container(), obj.PayloadSize(), IsUserObject(obj))
|
||||||
}
|
}
|
||||||
|
|
||||||
// if object is stored, and it is regular object then update bucket
|
// if object is stored, and it is regular object then update bucket
|
||||||
// with container size estimations
|
// with container size estimations
|
||||||
if obj.Type() == objectSDK.TypeRegular {
|
if obj.Type() == objectSDK.TypeRegular {
|
||||||
err := changeContainerSize(tx, cnr, obj.PayloadSize(), false)
|
bucketID, found := bucketIDs[addr.Container()]
|
||||||
|
if !found {
|
||||||
|
panic("bucketID not found")
|
||||||
|
}
|
||||||
|
err := changeContainerSize(tx, addr.Container(), -1*int64(obj.PayloadSize()), bucketID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -294,25 +396,39 @@ func (db *DB) updateDeleteInfo(tx *bbolt.Tx, garbageBKT, graveyardBKT *bbolt.Buc
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func isTomb(graveyardBucket *bbolt.Bucket, key []byte) (bool, error) {
|
func isTomb(tx *badger.Txn, addr oid.Address) (bool, error) {
|
||||||
targetIsTomb := false
|
targetIsTomb := false
|
||||||
|
expectedValue := make([]byte, cidSize+objectKeySize)
|
||||||
|
addr.Container().Encode(expectedValue)
|
||||||
|
addr.Object().Encode(expectedValue[cidSize:])
|
||||||
|
|
||||||
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
|
Prefix: []byte{graveyardPrefix},
|
||||||
|
PrefetchValues: true,
|
||||||
|
})
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
// iterate over graveyard and check if target address
|
// iterate over graveyard and check if target address
|
||||||
// is the address of tombstone in graveyard.
|
// is the address of tombstone in graveyard.
|
||||||
err := graveyardBucket.ForEach(func(k, v []byte) error {
|
// check if graveyard has record with key corresponding
|
||||||
// check if graveyard has record with key corresponding
|
// to tombstone address (at least one)
|
||||||
// to tombstone address (at least one)
|
for it.Seek(nil); it.ValidForPrefix([]byte{graveyardPrefix}); it.Next() {
|
||||||
targetIsTomb = bytes.Equal(v, key)
|
err := it.Item().Value(func(val []byte) error {
|
||||||
|
targetIsTomb = bytes.Equal(expectedValue, val)
|
||||||
|
|
||||||
if targetIsTomb {
|
if targetIsTomb {
|
||||||
// break bucket iterator
|
// break bucket iterator
|
||||||
return errBreakBucketForEach
|
return errBreakBucketForEach
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, errBreakBucketForEach) {
|
||||||
|
return targetIsTomb, nil
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil && !errors.Is(err, errBreakBucketForEach) {
|
|
||||||
return false, err
|
|
||||||
}
|
}
|
||||||
return targetIsTomb, nil
|
return targetIsTomb, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ import (
|
||||||
|
|
||||||
func TestDB_Inhume(t *testing.T) {
|
func TestDB_Inhume(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
raw := testutil.GenerateObject()
|
raw := testutil.GenerateObject()
|
||||||
testutil.AddAttribute(raw, "foo", "bar")
|
testutil.AddAttribute(raw, "foo", "bar")
|
||||||
|
@ -37,6 +38,7 @@ func TestDB_Inhume(t *testing.T) {
|
||||||
|
|
||||||
func TestInhumeTombOnTomb(t *testing.T) {
|
func TestInhumeTombOnTomb(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
var (
|
var (
|
||||||
err error
|
err error
|
||||||
|
@ -99,6 +101,7 @@ func TestInhumeTombOnTomb(t *testing.T) {
|
||||||
|
|
||||||
func TestInhumeLocked(t *testing.T) {
|
func TestInhumeLocked(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
locked := oidtest.Address()
|
locked := oidtest.Address()
|
||||||
|
|
||||||
|
|
|
@ -3,21 +3,15 @@ package meta
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"strconv"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
|
"github.com/dgraph-io/badger/v4"
|
||||||
|
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
"go.opentelemetry.io/otel/attribute"
|
|
||||||
"go.opentelemetry.io/otel/trace"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExpiredObject is a descriptor of expired object from DB.
|
// ExpiredObject is a descriptor of expired object from DB.
|
||||||
|
@ -44,99 +38,7 @@ type ExpiredObjectHandler func(*ExpiredObject) error
|
||||||
// as a "break" keyword.
|
// as a "break" keyword.
|
||||||
var ErrInterruptIterator = logicerr.New("iterator is interrupted")
|
var ErrInterruptIterator = logicerr.New("iterator is interrupted")
|
||||||
|
|
||||||
// IterateExpired iterates over all objects in DB which are out of date
|
var errInvalidAttributeKey = errors.New("invalid userr attribute key")
|
||||||
// relative to epoch. Locked objects are not included (do not confuse
|
|
||||||
// with objects of type LOCK).
|
|
||||||
//
|
|
||||||
// If h returns ErrInterruptIterator, nil returns immediately.
|
|
||||||
// Returns other errors of h directly.
|
|
||||||
func (db *DB) IterateExpired(ctx context.Context, epoch uint64, h ExpiredObjectHandler) error {
|
|
||||||
var (
|
|
||||||
startedAt = time.Now()
|
|
||||||
success = false
|
|
||||||
)
|
|
||||||
defer func() {
|
|
||||||
db.metrics.AddMethodDuration("IterateExpired", time.Since(startedAt), success)
|
|
||||||
}()
|
|
||||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.IterateExpired",
|
|
||||||
trace.WithAttributes(
|
|
||||||
attribute.String("epoch", strconv.FormatUint(epoch, 10)),
|
|
||||||
))
|
|
||||||
defer span.End()
|
|
||||||
|
|
||||||
db.modeMtx.RLock()
|
|
||||||
defer db.modeMtx.RUnlock()
|
|
||||||
|
|
||||||
if db.mode.NoMetabase() {
|
|
||||||
return ErrDegradedMode
|
|
||||||
}
|
|
||||||
|
|
||||||
err := metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
|
||||||
return db.iterateExpired(tx, epoch, h)
|
|
||||||
}))
|
|
||||||
success = err == nil
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (db *DB) iterateExpired(tx *bbolt.Tx, epoch uint64, h ExpiredObjectHandler) error {
|
|
||||||
err := tx.ForEach(func(name []byte, b *bbolt.Bucket) error {
|
|
||||||
cidBytes := cidFromAttributeBucket(name, objectV2.SysAttributeExpEpoch)
|
|
||||||
if cidBytes == nil {
|
|
||||||
cidBytes = cidFromAttributeBucket(name, objectV2.SysAttributeExpEpochNeoFS)
|
|
||||||
if cidBytes == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var cnrID cid.ID
|
|
||||||
err := cnrID.Decode(cidBytes)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not parse container ID of expired bucket: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return b.ForEachBucket(func(expKey []byte) error {
|
|
||||||
bktExpired := b.Bucket(expKey)
|
|
||||||
expiresAfter, err := strconv.ParseUint(string(expKey), 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not parse expiration epoch: %w", err)
|
|
||||||
} else if expiresAfter >= epoch {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return bktExpired.ForEach(func(idKey, _ []byte) error {
|
|
||||||
var id oid.ID
|
|
||||||
|
|
||||||
err = id.Decode(idKey)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not parse ID of expired object: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ignore locked objects.
|
|
||||||
//
|
|
||||||
// To slightly optimize performance we can check only REGULAR objects
|
|
||||||
// (only they can be locked), but it's more reliable.
|
|
||||||
if objectLocked(tx, cnrID, id) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var addr oid.Address
|
|
||||||
addr.SetContainer(cnrID)
|
|
||||||
addr.SetObject(id)
|
|
||||||
|
|
||||||
return h(&ExpiredObject{
|
|
||||||
typ: firstIrregularObjectType(tx, cnrID, idKey),
|
|
||||||
addr: addr,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
if errors.Is(err, ErrInterruptIterator) {
|
|
||||||
err = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// IterateCoveredByTombstones iterates over all objects in DB which are covered
|
// IterateCoveredByTombstones iterates over all objects in DB which are covered
|
||||||
// by tombstone with string address from tss. Locked objects are not included
|
// by tombstone with string address from tss. Locked objects are not included
|
||||||
|
@ -164,68 +66,99 @@ func (db *DB) IterateCoveredByTombstones(ctx context.Context, tss map[string]oid
|
||||||
return ErrDegradedMode
|
return ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
return db.boltDB.View(func(tx *bbolt.Tx) error {
|
return db.database.View(func(tx *badger.Txn) error {
|
||||||
return db.iterateCoveredByTombstones(tx, tss, h)
|
return db.iterateCoveredByTombstones(ctx, tx, tss, h)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) iterateCoveredByTombstones(tx *bbolt.Tx, tss map[string]oid.Address, h func(oid.Address) error) error {
|
func (db *DB) iterateCoveredByTombstones(ctx context.Context, tx *badger.Txn, tss map[string]oid.Address, h func(oid.Address) error) error {
|
||||||
bktGraveyard := tx.Bucket(graveyardBucketName)
|
prefix := []byte{graveyardPrefix}
|
||||||
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
|
Prefix: prefix,
|
||||||
|
PrefetchValues: true,
|
||||||
|
})
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
err := bktGraveyard.ForEach(func(k, v []byte) error {
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
var addr oid.Address
|
select {
|
||||||
if err := decodeAddressFromKey(&addr, v); err != nil {
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
var tombstoneAddress oid.Address
|
||||||
|
if err := it.Item().Value(func(val []byte) error {
|
||||||
|
var e error
|
||||||
|
tombstoneAddress, e = decodeAddressFromGrave(val)
|
||||||
|
return e
|
||||||
|
}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if _, ok := tss[addr.EncodeToString()]; ok {
|
if _, ok := tss[tombstoneAddress.EncodeToString()]; !ok {
|
||||||
var addr oid.Address
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
err := decodeAddressFromKey(&addr, k)
|
var objectAddress oid.Address
|
||||||
if err != nil {
|
var err error
|
||||||
return fmt.Errorf("could not parse address of the object under tombstone: %w", err)
|
objectAddress, err = addressFromGraveyardKey(it.Item().Key())
|
||||||
}
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if objectLocked(tx, addr.Container(), addr.Object()) {
|
isLocked, err := objectLocked(ctx, tx, objectAddress.Container(), objectAddress.Object())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if isLocked {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := h(objectAddress); err != nil {
|
||||||
|
if errors.Is(err, ErrInterruptIterator) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
return err
|
||||||
return h(addr)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
if errors.Is(err, ErrInterruptIterator) {
|
|
||||||
err = nil
|
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func iteratePhyObjects(tx *bbolt.Tx, f func(cid.ID, oid.ID) error) error {
|
func iteratePhyObjects(tx *badger.Txn, f func(cid.ID, oid.ID, *objectSDK.Object) error) error {
|
||||||
var cid cid.ID
|
if err := iteratePhyObjectsWithPrefix(tx, primaryPrefix, f); err != nil {
|
||||||
var oid oid.ID
|
return err
|
||||||
|
}
|
||||||
|
if err := iteratePhyObjectsWithPrefix(tx, lockersPrefix, f); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := iteratePhyObjectsWithPrefix(tx, tombstonePrefix, f); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
return tx.ForEach(func(name []byte, b *bbolt.Bucket) error {
|
func iteratePhyObjectsWithPrefix(tx *badger.Txn, typePrefix byte, f func(cid.ID, oid.ID, *objectSDK.Object) error) error {
|
||||||
b58CID, postfix := parseContainerIDWithPrefix(&cid, name)
|
prefix := []byte{typePrefix}
|
||||||
if len(b58CID) == 0 {
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
return nil
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
}
|
Prefix: prefix,
|
||||||
|
PrefetchValues: true,
|
||||||
switch postfix {
|
|
||||||
case primaryPrefix,
|
|
||||||
lockersPrefix,
|
|
||||||
tombstonePrefix:
|
|
||||||
default:
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return b.ForEach(func(k, v []byte) error {
|
|
||||||
if oid.Decode(k) == nil {
|
|
||||||
return f(cid, oid)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
|
addr, err := addressFromKey(typePrefix, it.Item().Key())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
obj := objectSDK.New()
|
||||||
|
if err := it.Item().Value(func(val []byte) error {
|
||||||
|
return obj.Unmarshal(val)
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := f(addr.Container(), addr.Object(), obj); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@ import (
|
||||||
|
|
||||||
func TestDB_IterateExpired(t *testing.T) {
|
func TestDB_IterateExpired(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
const epoch = 13
|
const epoch = 13
|
||||||
|
|
||||||
|
@ -68,6 +69,7 @@ func putWithExpiration(t *testing.T, db *meta.DB, typ objectSDK.Type, expiresAt
|
||||||
|
|
||||||
func TestDB_IterateCoveredByTombstones(t *testing.T) {
|
func TestDB_IterateCoveredByTombstones(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
ts := oidtest.Address()
|
ts := oidtest.Address()
|
||||||
protected1 := oidtest.Address()
|
protected1 := oidtest.Address()
|
||||||
|
|
|
@ -2,16 +2,17 @@ package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"slices"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
objectcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
objectcore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
@ -21,10 +22,33 @@ import (
|
||||||
// cursor. Use nil cursor object to start listing again.
|
// cursor. Use nil cursor object to start listing again.
|
||||||
var ErrEndOfListing = logicerr.New("end of object listing")
|
var ErrEndOfListing = logicerr.New("end of object listing")
|
||||||
|
|
||||||
|
type listPrefix struct {
|
||||||
|
prefix []byte
|
||||||
|
keyParser func(k []byte) (oid.Address, error)
|
||||||
|
objectType objectSDK.Type
|
||||||
|
}
|
||||||
|
|
||||||
|
var listPrefixes = []listPrefix{
|
||||||
|
{
|
||||||
|
prefix: []byte{primaryPrefix},
|
||||||
|
keyParser: addressFromPrimaryKey,
|
||||||
|
objectType: objectSDK.TypeRegular,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
prefix: []byte{lockersPrefix},
|
||||||
|
keyParser: addressFromLockersKey,
|
||||||
|
objectType: objectSDK.TypeLock,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
prefix: []byte{tombstonePrefix},
|
||||||
|
keyParser: addressFromTombstoneKey,
|
||||||
|
objectType: objectSDK.TypeTombstone,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
// Cursor is a type for continuous object listing.
|
// Cursor is a type for continuous object listing.
|
||||||
type Cursor struct {
|
type Cursor struct {
|
||||||
bucketName []byte
|
lastKey []byte
|
||||||
inBucketOffset []byte
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListPrm contains parameters for ListWithCursor operation.
|
// ListPrm contains parameters for ListWithCursor operation.
|
||||||
|
@ -89,151 +113,99 @@ func (db *DB) ListWithCursor(ctx context.Context, prm ListPrm) (res ListRes, err
|
||||||
return res, ErrDegradedMode
|
return res, ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
result := make([]objectcore.AddressWithType, 0, prm.count)
|
if prm.count == 0 {
|
||||||
|
return ListRes{}, ErrEndOfListing
|
||||||
|
}
|
||||||
|
|
||||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
err = db.database.View(func(tx *badger.Txn) error {
|
||||||
res.addrList, res.cursor, err = db.listWithCursor(tx, result, prm.count, prm.cursor)
|
res.addrList, res.cursor, err = db.listWithCursor(ctx, tx, prm.count, prm.cursor)
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
success = err == nil
|
success = err == nil
|
||||||
return res, metaerr.Wrap(err)
|
return res, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) listWithCursor(tx *bbolt.Tx, result []objectcore.AddressWithType, count int, cursor *Cursor) ([]objectcore.AddressWithType, *Cursor, error) {
|
func (db *DB) listWithCursor(ctx context.Context, tx *badger.Txn, count int, cursor *Cursor) ([]objectcore.AddressWithType, *Cursor, error) {
|
||||||
threshold := cursor == nil // threshold is a flag to ignore cursor
|
var prefix []byte
|
||||||
var bucketName []byte
|
var lastSeen []byte
|
||||||
|
if cursor != nil {
|
||||||
c := tx.Cursor()
|
prefix = []byte{cursor.lastKey[0]}
|
||||||
name, _ := c.First()
|
lastSeen = cursor.lastKey
|
||||||
|
} else {
|
||||||
if !threshold {
|
prefix = listPrefixes[0].prefix
|
||||||
name, _ = c.Seek(cursor.bucketName)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var containerID cid.ID
|
idx := slices.IndexFunc(listPrefixes, func(e listPrefix) bool {
|
||||||
var offset []byte
|
return e.prefix[0] == prefix[0]
|
||||||
graveyardBkt := tx.Bucket(graveyardBucketName)
|
})
|
||||||
garbageBkt := tx.Bucket(garbageBucketName)
|
if idx < 0 {
|
||||||
|
return nil, nil, fmt.Errorf("invalid prefix value %d", prefix[0])
|
||||||
var rawAddr = make([]byte, cidSize, addressKeySize)
|
|
||||||
|
|
||||||
loop:
|
|
||||||
for ; name != nil; name, _ = c.Next() {
|
|
||||||
cidRaw, prefix := parseContainerIDWithPrefix(&containerID, name)
|
|
||||||
if cidRaw == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
var objType objectSDK.Type
|
|
||||||
|
|
||||||
switch prefix {
|
|
||||||
case primaryPrefix:
|
|
||||||
objType = objectSDK.TypeRegular
|
|
||||||
case lockersPrefix:
|
|
||||||
objType = objectSDK.TypeLock
|
|
||||||
case tombstonePrefix:
|
|
||||||
objType = objectSDK.TypeTombstone
|
|
||||||
default:
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
bkt := tx.Bucket(name)
|
|
||||||
if bkt != nil {
|
|
||||||
copy(rawAddr, cidRaw)
|
|
||||||
result, offset, cursor = selectNFromBucket(bkt, objType, graveyardBkt, garbageBkt, rawAddr, containerID,
|
|
||||||
result, count, cursor, threshold)
|
|
||||||
}
|
|
||||||
bucketName = name
|
|
||||||
if len(result) >= count {
|
|
||||||
break loop
|
|
||||||
}
|
|
||||||
|
|
||||||
// set threshold flag after first `selectNFromBucket` invocation
|
|
||||||
// first invocation must look for cursor object
|
|
||||||
threshold = true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if offset != nil {
|
var next Cursor
|
||||||
// new slice is much faster but less memory efficient
|
result := make([]objectcore.AddressWithType, 0, count)
|
||||||
// we need to copy, because offset exists during bbolt tx
|
for ; idx < len(listPrefixes); idx++ {
|
||||||
cursor.inBucketOffset = make([]byte, len(offset))
|
indexResult, lastIndexSeen, err := listByPrefix(ctx, tx, lastSeen, idx, count-len(result))
|
||||||
copy(cursor.inBucketOffset, offset)
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
result = append(result, indexResult...)
|
||||||
|
if len(lastIndexSeen) > 0 {
|
||||||
|
next.lastKey = lastIndexSeen
|
||||||
|
}
|
||||||
|
if len(result) == count {
|
||||||
|
return result, &next, nil
|
||||||
|
}
|
||||||
|
lastSeen = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(result) == 0 {
|
if len(result) == 0 {
|
||||||
return nil, nil, ErrEndOfListing
|
return nil, nil, ErrEndOfListing
|
||||||
}
|
}
|
||||||
|
return result, &next, nil
|
||||||
// new slice is much faster but less memory efficient
|
|
||||||
// we need to copy, because bucketName exists during bbolt tx
|
|
||||||
cursor.bucketName = make([]byte, len(bucketName))
|
|
||||||
copy(cursor.bucketName, bucketName)
|
|
||||||
|
|
||||||
return result, cursor, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// selectNFromBucket similar to selectAllFromBucket but uses cursor to find
|
func listByPrefix(ctx context.Context, tx *badger.Txn, lastSeen []byte, idx int, count int) ([]objectcore.AddressWithType, []byte, error) {
|
||||||
// object to start selecting from. Ignores inhumed objects.
|
var result []objectcore.AddressWithType
|
||||||
func selectNFromBucket(bkt *bbolt.Bucket, // main bucket
|
for {
|
||||||
objType objectSDK.Type, // type of the objects stored in the main bucket
|
kvs, err := selectByPrefixAndSeek(ctx, tx, listPrefixes[idx].prefix, lastSeen, listPrefixes[idx].objectType == objectSDK.TypeRegular, count-len(result))
|
||||||
graveyardBkt, garbageBkt *bbolt.Bucket, // cached graveyard buckets
|
if err != nil {
|
||||||
cidRaw []byte, // container ID prefix, optimization
|
return nil, nil, err
|
||||||
cnt cid.ID, // container ID
|
|
||||||
to []objectcore.AddressWithType, // listing result
|
|
||||||
limit int, // stop listing at `limit` items in result
|
|
||||||
cursor *Cursor, // start from cursor object
|
|
||||||
threshold bool, // ignore cursor and start immediately
|
|
||||||
) ([]objectcore.AddressWithType, []byte, *Cursor) {
|
|
||||||
if cursor == nil {
|
|
||||||
cursor = new(Cursor)
|
|
||||||
}
|
|
||||||
|
|
||||||
count := len(to)
|
|
||||||
c := bkt.Cursor()
|
|
||||||
k, _ := c.First()
|
|
||||||
|
|
||||||
offset := cursor.inBucketOffset
|
|
||||||
|
|
||||||
if !threshold {
|
|
||||||
c.Seek(offset)
|
|
||||||
k, _ = c.Next() // we are looking for objects _after_ the cursor
|
|
||||||
}
|
|
||||||
|
|
||||||
for ; k != nil; k, _ = c.Next() {
|
|
||||||
if count >= limit {
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
if len(kvs) == 0 {
|
||||||
var obj oid.ID
|
return result, lastSeen, nil
|
||||||
if err := obj.Decode(k); err != nil {
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
for _, kv := range kvs {
|
||||||
|
lastSeen = kv.Key
|
||||||
|
addr, err := listPrefixes[idx].keyParser(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
st, err := inGraveyardWithKey(tx, addr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
if st > 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
offset = k
|
var isLinkingObj bool
|
||||||
if inGraveyardWithKey(append(cidRaw, k...), graveyardBkt, garbageBkt) > 0 {
|
if listPrefixes[idx].objectType == objectSDK.TypeRegular {
|
||||||
continue
|
var o objectSDK.Object
|
||||||
|
if err := o.Unmarshal(kv.Value); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
isLinkingObj = isLinkObject(&o)
|
||||||
|
}
|
||||||
|
|
||||||
|
result = append(result, objectcore.AddressWithType{
|
||||||
|
Address: addr,
|
||||||
|
Type: listPrefixes[idx].objectType,
|
||||||
|
IsLinkingObject: isLinkingObj,
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(result) == count {
|
||||||
|
return result, lastSeen, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var a oid.Address
|
|
||||||
a.SetContainer(cnt)
|
|
||||||
a.SetObject(obj)
|
|
||||||
to = append(to, objectcore.AddressWithType{Address: a, Type: objType})
|
|
||||||
count++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return to, offset, cursor
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseContainerIDWithPrefix(containerID *cid.ID, name []byte) ([]byte, byte) {
|
|
||||||
if len(name) < bucketKeySize {
|
|
||||||
return nil, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
rawID := name[1:bucketKeySize]
|
|
||||||
|
|
||||||
if err := containerID.Decode(rawID); err != nil {
|
|
||||||
return nil, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return rawID, name[0]
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,6 @@ package meta_test
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"sort"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
|
@ -13,7 +12,6 @@ import (
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
|
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func BenchmarkListWithCursor(b *testing.B) {
|
func BenchmarkListWithCursor(b *testing.B) {
|
||||||
|
@ -30,9 +28,8 @@ func BenchmarkListWithCursor(b *testing.B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func listWithCursorPrepareDB(b *testing.B) *meta.DB {
|
func listWithCursorPrepareDB(b *testing.B) *meta.DB {
|
||||||
db := newDB(b, meta.WithMaxBatchSize(1), meta.WithBoltDBOptions(&bbolt.Options{
|
db := newDB(b)
|
||||||
NoSync: true,
|
defer func() { require.NoError(b, db.Close()) }()
|
||||||
})) // faster single-thread generation
|
|
||||||
|
|
||||||
obj := testutil.GenerateObject()
|
obj := testutil.GenerateObject()
|
||||||
for i := 0; i < 100_000; i++ { // should be a multiple of all batch sizes
|
for i := 0; i < 100_000; i++ { // should be a multiple of all batch sizes
|
||||||
|
@ -70,6 +67,7 @@ func TestLisObjectsWithCursor(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
const (
|
const (
|
||||||
containers = 5
|
containers = 5
|
||||||
|
@ -126,8 +124,6 @@ func TestLisObjectsWithCursor(t *testing.T) {
|
||||||
expected = append(expected, object.AddressWithType{Address: object.AddressOf(child), Type: objectSDK.TypeRegular})
|
expected = append(expected, object.AddressWithType{Address: object.AddressOf(child), Type: objectSDK.TypeRegular})
|
||||||
}
|
}
|
||||||
|
|
||||||
expected = sortAddresses(expected)
|
|
||||||
|
|
||||||
t.Run("success with various count", func(t *testing.T) {
|
t.Run("success with various count", func(t *testing.T) {
|
||||||
for countPerReq := 1; countPerReq <= total; countPerReq++ {
|
for countPerReq := 1; countPerReq <= total; countPerReq++ {
|
||||||
got := make([]object.AddressWithType, 0, total)
|
got := make([]object.AddressWithType, 0, total)
|
||||||
|
@ -148,10 +144,8 @@ func TestLisObjectsWithCursor(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
_, _, err = metaListWithCursor(db, uint32(countPerReq), cursor)
|
_, _, err = metaListWithCursor(db, uint32(countPerReq), cursor)
|
||||||
require.ErrorIs(t, err, meta.ErrEndOfListing, "count:%d", countPerReq, cursor)
|
require.ErrorIs(t, err, meta.ErrEndOfListing, "count:%d, cursor:%v", countPerReq, cursor)
|
||||||
|
require.ElementsMatch(t, expected, got, "count:%d", countPerReq)
|
||||||
got = sortAddresses(got)
|
|
||||||
require.Equal(t, expected, got, "count:%d", countPerReq)
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -165,6 +159,7 @@ func TestAddObjectDuringListingWithCursor(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
const total = 5
|
const total = 5
|
||||||
|
|
||||||
|
@ -211,14 +206,6 @@ func TestAddObjectDuringListingWithCursor(t *testing.T) {
|
||||||
for _, v := range expected {
|
for _, v := range expected {
|
||||||
require.Equal(t, 1, v)
|
require.Equal(t, 1, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func sortAddresses(addrWithType []object.AddressWithType) []object.AddressWithType {
|
|
||||||
sort.Slice(addrWithType, func(i, j int) bool {
|
|
||||||
return addrWithType[i].Address.EncodeToString() < addrWithType[j].Address.EncodeToString()
|
|
||||||
})
|
|
||||||
return addrWithType
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func metaListWithCursor(db *meta.DB, count uint32, cursor *meta.Cursor) ([]object.AddressWithType, *meta.Cursor, error) {
|
func metaListWithCursor(db *meta.DB, count uint32, cursor *meta.Cursor) ([]object.AddressWithType, *meta.Cursor, error) {
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
|
@ -13,23 +11,16 @@ import (
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
|
||||||
var bucketNameLocked = []byte{lockedPrefix}
|
|
||||||
|
|
||||||
type keyValue struct {
|
type keyValue struct {
|
||||||
Key []byte
|
Key []byte
|
||||||
Value []byte
|
Value []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns name of the bucket with objects of type LOCK for specified container.
|
|
||||||
func bucketNameLockers(idCnr cid.ID, key []byte) []byte {
|
|
||||||
return bucketName(idCnr, lockersPrefix, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lock marks objects as locked with another object. All objects are from the
|
// Lock marks objects as locked with another object. All objects are from the
|
||||||
// specified container.
|
// specified container.
|
||||||
//
|
//
|
||||||
|
@ -66,66 +57,43 @@ func (db *DB) Lock(ctx context.Context, cnr cid.ID, locker oid.ID, locked []oid.
|
||||||
panic("empty locked list")
|
panic("empty locked list")
|
||||||
}
|
}
|
||||||
|
|
||||||
err := db.lockInternal(locked, cnr, locker)
|
err := db.database.Update(func(txn *badger.Txn) error {
|
||||||
|
return lockInternal(txn, locked, cnr, locker)
|
||||||
|
})
|
||||||
success = err == nil
|
success = err == nil
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) lockInternal(locked []oid.ID, cnr cid.ID, locker oid.ID) error {
|
func lockInternal(tx *badger.Txn, locked []oid.ID, cnr cid.ID, locker oid.ID) error {
|
||||||
bucketKeysLocked := make([][]byte, len(locked))
|
t, err := firstIrregularObjectType(tx, cnr, locked...)
|
||||||
for i := range locked {
|
if err != nil {
|
||||||
bucketKeysLocked[i] = objectKey(locked[i], make([]byte, objectKeySize))
|
return err
|
||||||
|
}
|
||||||
|
if t != objectSDK.TypeRegular {
|
||||||
|
return logicerr.Wrap(new(apistatus.LockNonRegularObject))
|
||||||
}
|
}
|
||||||
key := make([]byte, cidSize)
|
|
||||||
|
|
||||||
return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error {
|
for _, objID := range locked {
|
||||||
if firstIrregularObjectType(tx, cnr, bucketKeysLocked...) != objectSDK.TypeRegular {
|
key := lockedKey(cnr, objID, locker)
|
||||||
return logicerr.Wrap(new(apistatus.LockNonRegularObject))
|
v, err := valueOrNil(tx, key)
|
||||||
}
|
|
||||||
|
|
||||||
bucketLocked := tx.Bucket(bucketNameLocked)
|
|
||||||
|
|
||||||
cnr.Encode(key)
|
|
||||||
bucketLockedContainer, err := bucketLocked.CreateBucketIfNotExists(key)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("create container bucket for locked objects %v: %w", cnr, err)
|
return err
|
||||||
|
}
|
||||||
|
if v != nil {
|
||||||
|
// already locked by locker
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
keyLocker := objectKey(locker, key)
|
if err := tx.Set(key, zeroValue); err != nil {
|
||||||
var exLockers [][]byte
|
return err
|
||||||
var updLockers []byte
|
|
||||||
|
|
||||||
loop:
|
|
||||||
for i := range bucketKeysLocked {
|
|
||||||
exLockers, err = decodeList(bucketLockedContainer.Get(bucketKeysLocked[i]))
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("decode list of object lockers: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range exLockers {
|
|
||||||
if bytes.Equal(exLockers[i], keyLocker) {
|
|
||||||
continue loop
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
updLockers, err = encodeList(append(exLockers, keyLocker))
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("encode list of object lockers: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = bucketLockedContainer.Put(bucketKeysLocked[i], updLockers)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("update list of object lockers: %w", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// FreeLockedBy unlocks all objects in DB which are locked by lockers.
|
// FreeLockedBy unlocks all objects in DB which are locked by lockers.
|
||||||
// Returns slice of unlocked object ID's or an error.
|
// Returns slice of unlocked object ID's or an error.
|
||||||
func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) {
|
func (db *DB) FreeLockedBy(ctx context.Context, lockers []oid.Address) ([]oid.Address, error) {
|
||||||
var (
|
var (
|
||||||
startedAt = time.Now()
|
startedAt = time.Now()
|
||||||
success = false
|
success = false
|
||||||
|
@ -143,9 +111,9 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) {
|
||||||
|
|
||||||
var unlockedObjects []oid.Address
|
var unlockedObjects []oid.Address
|
||||||
|
|
||||||
if err := db.boltDB.Update(func(tx *bbolt.Tx) error {
|
if err := db.database.Update(func(tx *badger.Txn) error {
|
||||||
for i := range lockers {
|
for i := range lockers {
|
||||||
unlocked, err := freePotentialLocks(tx, lockers[i].Container(), lockers[i].Object())
|
unlocked, err := freePotentialLocks(ctx, tx, lockers[i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -161,18 +129,14 @@ func (db *DB) FreeLockedBy(lockers []oid.Address) ([]oid.Address, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// checks if specified object is locked in the specified container.
|
// checks if specified object is locked in the specified container.
|
||||||
func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool {
|
func objectLocked(ctx context.Context, tx *badger.Txn, idCnr cid.ID, idObj oid.ID) (bool, error) {
|
||||||
bucketLocked := tx.Bucket(bucketNameLocked)
|
prefix := lockedKeyLongPrefix(idCnr, idObj)
|
||||||
if bucketLocked != nil {
|
|
||||||
key := make([]byte, cidSize)
|
|
||||||
idCnr.Encode(key)
|
|
||||||
bucketLockedContainer := bucketLocked.Bucket(key)
|
|
||||||
if bucketLockedContainer != nil {
|
|
||||||
return bucketLockedContainer.Get(objectKey(idObj, key)) != nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
items, err := selectByPrefixBatch(ctx, tx, prefix, 1)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return len(items) > 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// releases all records about the objects locked by the locker.
|
// releases all records about the objects locked by the locker.
|
||||||
|
@ -181,96 +145,64 @@ func objectLocked(tx *bbolt.Tx, idCnr cid.ID, idObj oid.ID) bool {
|
||||||
// Operation is very resource-intensive, which is caused by the admissibility
|
// Operation is very resource-intensive, which is caused by the admissibility
|
||||||
// of multiple locks. Also, if we knew what objects are locked, it would be
|
// of multiple locks. Also, if we knew what objects are locked, it would be
|
||||||
// possible to speed up the execution.
|
// possible to speed up the execution.
|
||||||
func freePotentialLocks(tx *bbolt.Tx, idCnr cid.ID, locker oid.ID) ([]oid.Address, error) {
|
func freePotentialLocks(ctx context.Context, tx *badger.Txn, locker oid.Address) ([]oid.Address, error) {
|
||||||
var unlockedObjects []oid.Address
|
var unlockedObjects []oid.Address
|
||||||
bucketLocked := tx.Bucket(bucketNameLocked)
|
|
||||||
if bucketLocked == nil {
|
|
||||||
return unlockedObjects, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
key := make([]byte, cidSize)
|
|
||||||
idCnr.Encode(key)
|
|
||||||
|
|
||||||
bucketLockedContainer := bucketLocked.Bucket(key)
|
|
||||||
if bucketLockedContainer == nil {
|
|
||||||
return unlockedObjects, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
keyLocker := objectKey(locker, key)
|
|
||||||
updates := make([]keyValue, 0)
|
|
||||||
err := bucketLockedContainer.ForEach(func(k, v []byte) error {
|
|
||||||
keyLockers, err := decodeList(v)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("decode list of lockers in locked bucket: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range keyLockers {
|
|
||||||
if bytes.Equal(keyLockers[i], keyLocker) {
|
|
||||||
if len(keyLockers) == 1 {
|
|
||||||
updates = append(updates, keyValue{
|
|
||||||
Key: k,
|
|
||||||
Value: nil,
|
|
||||||
})
|
|
||||||
|
|
||||||
var id oid.ID
|
|
||||||
err = id.Decode(k)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("decode unlocked object id error: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var addr oid.Address
|
|
||||||
addr.SetContainer(idCnr)
|
|
||||||
addr.SetObject(id)
|
|
||||||
|
|
||||||
unlockedObjects = append(unlockedObjects, addr)
|
|
||||||
} else {
|
|
||||||
// exclude locker
|
|
||||||
keyLockers = append(keyLockers[:i], keyLockers[i+1:]...)
|
|
||||||
|
|
||||||
v, err = encodeList(keyLockers)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("encode updated list of lockers: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
updates = append(updates, keyValue{
|
|
||||||
Key: k,
|
|
||||||
Value: v,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
|
locked, err := lockedObjects(tx, locker)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = applyBucketUpdates(bucketLockedContainer, updates); err != nil {
|
for _, lockedObject := range locked {
|
||||||
return nil, err
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tx.Delete(lockedKey(locker.Container(), lockedObject, locker.Object())); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
isLocked, err := objectLocked(ctx, tx, locker.Container(), lockedObject)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if !isLocked { // deleted locker was the last one
|
||||||
|
var addr oid.Address
|
||||||
|
addr.SetContainer(locker.Container())
|
||||||
|
addr.SetObject(lockedObject)
|
||||||
|
unlockedObjects = append(unlockedObjects, addr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return unlockedObjects, nil
|
return unlockedObjects, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func applyBucketUpdates(bucket *bbolt.Bucket, updates []keyValue) error {
|
func lockedObjects(tx *badger.Txn, locker oid.Address) ([]oid.ID, error) {
|
||||||
for _, update := range updates {
|
var lockedByLocker []oid.ID
|
||||||
if update.Value == nil {
|
|
||||||
err := bucket.Delete(update.Key)
|
prefix := lockedKeyShortPrefix(locker.Container())
|
||||||
if err != nil {
|
it := tx.NewIterator(badger.IteratorOptions{
|
||||||
return fmt.Errorf("delete locked object record from locked bucket: %w", err)
|
PrefetchSize: badger.DefaultIteratorOptions.PrefetchSize,
|
||||||
}
|
Prefix: prefix,
|
||||||
} else {
|
})
|
||||||
err := bucket.Put(update.Key, update.Value)
|
defer it.Close()
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("update list of lockers: %w", err)
|
for it.Seek(nil); it.ValidForPrefix(prefix); it.Next() {
|
||||||
}
|
currentLockerObjID, err := lockerObjectIDFromLockedKey(it.Item().Key())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if !currentLockerObjID.Equals(locker.Object()) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
currentObjectID, err := objectIDFromLockedKey(it.Item().Key())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
lockedByLocker = append(lockedByLocker, currentObjectID)
|
||||||
}
|
}
|
||||||
return nil
|
return lockedByLocker, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsLockedPrm groups the parameters of IsLocked operation.
|
// IsLockedPrm groups the parameters of IsLocked operation.
|
||||||
|
@ -319,9 +251,10 @@ func (db *DB) IsLocked(ctx context.Context, prm IsLockedPrm) (res IsLockedRes, e
|
||||||
if db.mode.NoMetabase() {
|
if db.mode.NoMetabase() {
|
||||||
return res, ErrDegradedMode
|
return res, ErrDegradedMode
|
||||||
}
|
}
|
||||||
err = metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
err = metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||||
res.locked = objectLocked(tx, prm.addr.Container(), prm.addr.Object())
|
var e error
|
||||||
return nil
|
res.locked, e = objectLocked(ctx, tx, prm.addr.Container(), prm.addr.Object())
|
||||||
|
return e
|
||||||
}))
|
}))
|
||||||
success = err == nil
|
success = err == nil
|
||||||
return res, err
|
return res, err
|
||||||
|
|
|
@ -21,6 +21,7 @@ func TestDB_Lock(t *testing.T) {
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
t.Run("empty locked list", func(t *testing.T) {
|
t.Run("empty locked list", func(t *testing.T) {
|
||||||
require.Panics(t, func() { _ = db.Lock(context.Background(), cnr, oid.ID{}, nil) })
|
require.Panics(t, func() { _ = db.Lock(context.Background(), cnr, oid.ID{}, nil) })
|
||||||
|
@ -116,7 +117,7 @@ func TestDB_Lock(t *testing.T) {
|
||||||
require.Len(t, res.DeletedLockObjects(), 1)
|
require.Len(t, res.DeletedLockObjects(), 1)
|
||||||
require.Equal(t, objectcore.AddressOf(lockObj), res.DeletedLockObjects()[0])
|
require.Equal(t, objectcore.AddressOf(lockObj), res.DeletedLockObjects()[0])
|
||||||
|
|
||||||
_, err = db.FreeLockedBy([]oid.Address{lockAddr})
|
_, err = db.FreeLockedBy(context.Background(), []oid.Address{lockAddr})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
inhumePrm.SetAddresses(objAddr)
|
inhumePrm.SetAddresses(objAddr)
|
||||||
|
@ -147,7 +148,7 @@ func TestDB_Lock(t *testing.T) {
|
||||||
|
|
||||||
// unlock just objects that were locked by
|
// unlock just objects that were locked by
|
||||||
// just removed locker
|
// just removed locker
|
||||||
_, err = db.FreeLockedBy([]oid.Address{res.DeletedLockObjects()[0]})
|
_, err = db.FreeLockedBy(context.Background(), []oid.Address{res.DeletedLockObjects()[0]})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// removing objects after unlock
|
// removing objects after unlock
|
||||||
|
@ -182,6 +183,7 @@ func TestDB_Lock_Expired(t *testing.T) {
|
||||||
es := &epochState{e: 123}
|
es := &epochState{e: 123}
|
||||||
|
|
||||||
db := newDB(t, meta.WithEpochState(es))
|
db := newDB(t, meta.WithEpochState(es))
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
// put an object
|
// put an object
|
||||||
addr := putWithExpiration(t, db, objectSDK.TypeRegular, 124)
|
addr := putWithExpiration(t, db, objectSDK.TypeRegular, 124)
|
||||||
|
@ -203,6 +205,7 @@ func TestDB_IsLocked(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
// existing and locked objs
|
// existing and locked objs
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -18,26 +19,21 @@ func (db *DB) SetMode(m mode.Mode) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if !db.mode.NoMetabase() {
|
if !db.mode.NoMetabase() {
|
||||||
if err := db.Close(); err != nil {
|
if err := db.close(); err != nil {
|
||||||
return fmt.Errorf("can't set metabase mode (old=%s, new=%s): %w", db.mode, m, err)
|
return fmt.Errorf("can't set metabase mode (old=%s, new=%s): %w", db.mode, m, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
if m.NoMetabase() {
|
||||||
switch {
|
db.database = nil
|
||||||
case m.NoMetabase():
|
} else {
|
||||||
db.boltDB = nil
|
err := db.openDB(m)
|
||||||
case m.ReadOnly():
|
if err == nil && !m.ReadOnly() {
|
||||||
err = db.Open(context.TODO(), true)
|
err = metaerr.Wrap(db.init(context.TODO(), false))
|
||||||
default:
|
}
|
||||||
err = db.Open(context.TODO(), false)
|
if err != nil {
|
||||||
}
|
return fmt.Errorf("can't set metabase mode (old=%s, new=%s): %w", db.mode, m, err)
|
||||||
if err == nil && !m.NoMetabase() && !m.ReadOnly() {
|
}
|
||||||
err = db.Init()
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't set metabase mode (old=%s, new=%s): %w", db.mode, m, err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
db.mode = m
|
db.mode = m
|
||||||
|
|
36
pkg/local_object_storage/metabase/mode_test.go
Normal file
36
pkg/local_object_storage/metabase/mode_test.go
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
package meta
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type epochStateTest struct{}
|
||||||
|
|
||||||
|
func (s epochStateTest) CurrentEpoch() uint64 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_Mode(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
bdb := New([]Option{
|
||||||
|
WithPath(filepath.Join(t.TempDir(), "metabase")),
|
||||||
|
WithPermissions(0o600),
|
||||||
|
WithEpochState(epochStateTest{}),
|
||||||
|
}...)
|
||||||
|
|
||||||
|
require.NoError(t, bdb.Open(context.Background(), false))
|
||||||
|
require.Nil(t, bdb.database)
|
||||||
|
require.NoError(t, bdb.Init())
|
||||||
|
require.Nil(t, bdb.database)
|
||||||
|
require.NoError(t, bdb.Close())
|
||||||
|
|
||||||
|
require.NoError(t, bdb.Open(context.Background(), false))
|
||||||
|
require.Nil(t, bdb.database)
|
||||||
|
require.NoError(t, bdb.Init())
|
||||||
|
require.Nil(t, bdb.database)
|
||||||
|
require.NoError(t, bdb.Close())
|
||||||
|
}
|
|
@ -1,144 +0,0 @@
|
||||||
package meta
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
"go.opentelemetry.io/otel/attribute"
|
|
||||||
"go.opentelemetry.io/otel/trace"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ToMoveItPrm groups the parameters of ToMoveIt operation.
|
|
||||||
type ToMoveItPrm struct {
|
|
||||||
addr oid.Address
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToMoveItRes groups the resulting values of ToMoveIt operation.
|
|
||||||
type ToMoveItRes struct{}
|
|
||||||
|
|
||||||
// SetAddress sets address of the object to move into another shard.
|
|
||||||
func (p *ToMoveItPrm) SetAddress(addr oid.Address) {
|
|
||||||
p.addr = addr
|
|
||||||
}
|
|
||||||
|
|
||||||
// DoNotMovePrm groups the parameters of DoNotMove operation.
|
|
||||||
type DoNotMovePrm struct {
|
|
||||||
addr oid.Address
|
|
||||||
}
|
|
||||||
|
|
||||||
// DoNotMoveRes groups the resulting values of DoNotMove operation.
|
|
||||||
type DoNotMoveRes struct{}
|
|
||||||
|
|
||||||
// SetAddress sets address of the object to prevent moving into another shard.
|
|
||||||
func (p *DoNotMovePrm) SetAddress(addr oid.Address) {
|
|
||||||
p.addr = addr
|
|
||||||
}
|
|
||||||
|
|
||||||
// MovablePrm groups the parameters of Movable operation.
|
|
||||||
type MovablePrm struct{}
|
|
||||||
|
|
||||||
// MovableRes groups the resulting values of Movable operation.
|
|
||||||
type MovableRes struct {
|
|
||||||
addrList []oid.Address
|
|
||||||
}
|
|
||||||
|
|
||||||
// AddressList returns resulting addresses of Movable operation.
|
|
||||||
func (p MovableRes) AddressList() []oid.Address {
|
|
||||||
return p.addrList
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToMoveIt marks objects to move it into another shard. This useful for
|
|
||||||
// faster HRW fetching.
|
|
||||||
func (db *DB) ToMoveIt(ctx context.Context, prm ToMoveItPrm) (res ToMoveItRes, err error) {
|
|
||||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.ToMoveIt",
|
|
||||||
trace.WithAttributes(
|
|
||||||
attribute.String("address", prm.addr.EncodeToString()),
|
|
||||||
))
|
|
||||||
defer span.End()
|
|
||||||
|
|
||||||
db.modeMtx.RLock()
|
|
||||||
defer db.modeMtx.RUnlock()
|
|
||||||
|
|
||||||
if db.mode.NoMetabase() {
|
|
||||||
return res, ErrDegradedMode
|
|
||||||
} else if db.mode.ReadOnly() {
|
|
||||||
return res, ErrReadOnlyMode
|
|
||||||
}
|
|
||||||
|
|
||||||
key := make([]byte, addressKeySize)
|
|
||||||
key = addressKey(prm.addr, key)
|
|
||||||
|
|
||||||
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
|
|
||||||
toMoveIt := tx.Bucket(toMoveItBucketName)
|
|
||||||
return toMoveIt.Put(key, zeroValue)
|
|
||||||
})
|
|
||||||
|
|
||||||
return res, metaerr.Wrap(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// DoNotMove removes `MoveIt` mark from the object.
|
|
||||||
func (db *DB) DoNotMove(prm DoNotMovePrm) (res DoNotMoveRes, err error) {
|
|
||||||
db.modeMtx.RLock()
|
|
||||||
defer db.modeMtx.RUnlock()
|
|
||||||
|
|
||||||
if db.mode.NoMetabase() {
|
|
||||||
return res, ErrDegradedMode
|
|
||||||
} else if db.mode.ReadOnly() {
|
|
||||||
return res, ErrReadOnlyMode
|
|
||||||
}
|
|
||||||
|
|
||||||
key := make([]byte, addressKeySize)
|
|
||||||
key = addressKey(prm.addr, key)
|
|
||||||
|
|
||||||
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
|
|
||||||
toMoveIt := tx.Bucket(toMoveItBucketName)
|
|
||||||
return toMoveIt.Delete(key)
|
|
||||||
})
|
|
||||||
|
|
||||||
return res, metaerr.Wrap(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Movable returns list of marked objects to move into other shard.
|
|
||||||
func (db *DB) Movable(_ MovablePrm) (MovableRes, error) {
|
|
||||||
db.modeMtx.RLock()
|
|
||||||
defer db.modeMtx.RUnlock()
|
|
||||||
|
|
||||||
if db.mode.NoMetabase() {
|
|
||||||
return MovableRes{}, ErrDegradedMode
|
|
||||||
}
|
|
||||||
|
|
||||||
var strAddrs []string
|
|
||||||
|
|
||||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
|
||||||
toMoveIt := tx.Bucket(toMoveItBucketName)
|
|
||||||
return toMoveIt.ForEach(func(k, v []byte) error {
|
|
||||||
strAddrs = append(strAddrs, string(k))
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return MovableRes{}, metaerr.Wrap(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// we can parse strings to structures in-place, but probably it seems
|
|
||||||
// more efficient to keep bolt db TX code smaller because it might be
|
|
||||||
// bottleneck.
|
|
||||||
addrs := make([]oid.Address, len(strAddrs))
|
|
||||||
|
|
||||||
for i := range strAddrs {
|
|
||||||
err = decodeAddressFromKey(&addrs[i], []byte(strAddrs[i]))
|
|
||||||
if err != nil {
|
|
||||||
return MovableRes{}, metaerr.Wrap(fmt.Errorf("can't parse object address %v: %w",
|
|
||||||
strAddrs[i], err))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return MovableRes{
|
|
||||||
addrList: addrs,
|
|
||||||
}, nil
|
|
||||||
}
|
|
|
@ -1,84 +0,0 @@
|
||||||
package meta_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
|
||||||
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestDB_Movable(t *testing.T) {
|
|
||||||
db := newDB(t)
|
|
||||||
|
|
||||||
raw1 := testutil.GenerateObject()
|
|
||||||
raw2 := testutil.GenerateObject()
|
|
||||||
|
|
||||||
// put two objects in metabase
|
|
||||||
err := putBig(db, raw1)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
err = putBig(db, raw2)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// check if toMoveIt index empty
|
|
||||||
toMoveList, err := metaMovable(db)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, toMoveList, 0)
|
|
||||||
|
|
||||||
// mark to move object2
|
|
||||||
err = metaToMoveIt(db, object.AddressOf(raw2))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// check if toMoveIt index contains address of object 2
|
|
||||||
toMoveList, err = metaMovable(db)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, toMoveList, 1)
|
|
||||||
require.Contains(t, toMoveList, object.AddressOf(raw2))
|
|
||||||
|
|
||||||
// remove from toMoveIt index non existing address
|
|
||||||
err = metaDoNotMove(db, object.AddressOf(raw1))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// check if toMoveIt index hasn't changed
|
|
||||||
toMoveList, err = metaMovable(db)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, toMoveList, 1)
|
|
||||||
|
|
||||||
// remove from toMoveIt index existing address
|
|
||||||
err = metaDoNotMove(db, object.AddressOf(raw2))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// check if toMoveIt index is empty now
|
|
||||||
toMoveList, err = metaMovable(db)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Len(t, toMoveList, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func metaToMoveIt(db *meta.DB, addr oid.Address) error {
|
|
||||||
var toMovePrm meta.ToMoveItPrm
|
|
||||||
toMovePrm.SetAddress(addr)
|
|
||||||
|
|
||||||
_, err := db.ToMoveIt(context.Background(), toMovePrm)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func metaMovable(db *meta.DB) ([]oid.Address, error) {
|
|
||||||
r, err := db.Movable(meta.MovablePrm{})
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return r.AddressList(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func metaDoNotMove(db *meta.DB, addr oid.Address) error {
|
|
||||||
var doNotMovePrm meta.DoNotMovePrm
|
|
||||||
doNotMovePrm.SetAddress(addr)
|
|
||||||
|
|
||||||
_, err := db.DoNotMove(doNotMovePrm)
|
|
||||||
return err
|
|
||||||
}
|
|
62
pkg/local_object_storage/metabase/parse.go
Normal file
62
pkg/local_object_storage/metabase/parse.go
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
package meta
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
|
"github.com/dgraph-io/badger/v4"
|
||||||
|
)
|
||||||
|
|
||||||
|
// valueOrNil returns value or nil, if key not found.
|
||||||
|
// value must be used only inside transaction.
|
||||||
|
func valueOrNil(tx *badger.Txn, key []byte) ([]byte, error) {
|
||||||
|
i, err := tx.Get(key)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, badger.ErrKeyNotFound) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var value []byte
|
||||||
|
if err := i.Value(func(val []byte) error {
|
||||||
|
value = val
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return value, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseInt64Value(v []byte) (int64, bool) {
|
||||||
|
if len(v) == 0 {
|
||||||
|
return 0, true
|
||||||
|
}
|
||||||
|
if len(v) != 8 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return int64(binary.LittleEndian.Uint64(v)), true
|
||||||
|
}
|
||||||
|
|
||||||
|
func marshalInt64(v int64) []byte {
|
||||||
|
buf := make([]byte, 8)
|
||||||
|
binary.LittleEndian.PutUint64(buf, uint64(v))
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseContainerIDWithIgnore(dst *cid.ID, name []byte, ignore map[string]struct{}) bool {
|
||||||
|
if len(name) < bucketKeySize {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if _, ok := ignore[string(name[1:bucketKeySize])]; ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return dst.Decode(name[1:bucketKeySize]) == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseContainerID(dst *cid.ID, name []byte) bool {
|
||||||
|
if len(name) < bucketKeySize {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return dst.Decode(name[1:bucketKeySize]) == nil
|
||||||
|
}
|
|
@ -2,12 +2,12 @@ package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/binary"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
gio "io"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
objectV2 "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/object"
|
||||||
objectCore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
objectCore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
|
storagelog "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/log"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
|
@ -16,18 +16,11 @@ import (
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"github.com/nspcc-dev/neo-go/pkg/io"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
|
||||||
type (
|
|
||||||
namedBucketItem struct {
|
|
||||||
name, key, val []byte
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
// PutPrm groups the parameters of Put operation.
|
// PutPrm groups the parameters of Put operation.
|
||||||
type PutPrm struct {
|
type PutPrm struct {
|
||||||
obj *objectSDK.Object
|
obj *objectSDK.Object
|
||||||
|
@ -36,7 +29,9 @@ type PutPrm struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// PutRes groups the resulting values of Put operation.
|
// PutRes groups the resulting values of Put operation.
|
||||||
type PutRes struct{}
|
type PutRes struct {
|
||||||
|
Inserted bool
|
||||||
|
}
|
||||||
|
|
||||||
// SetObject is a Put option to set object to save.
|
// SetObject is a Put option to set object to save.
|
||||||
func (p *PutPrm) SetObject(obj *objectSDK.Object) {
|
func (p *PutPrm) SetObject(obj *objectSDK.Object) {
|
||||||
|
@ -52,6 +47,8 @@ var (
|
||||||
ErrUnknownObjectType = errors.New("unknown object type")
|
ErrUnknownObjectType = errors.New("unknown object type")
|
||||||
ErrIncorrectSplitInfoUpdate = errors.New("updating split info on object without it")
|
ErrIncorrectSplitInfoUpdate = errors.New("updating split info on object without it")
|
||||||
ErrIncorrectRootObject = errors.New("invalid root object")
|
ErrIncorrectRootObject = errors.New("invalid root object")
|
||||||
|
|
||||||
|
errInvalidUserAttributeKeyFormat = errors.New("invalid user attribute key format")
|
||||||
)
|
)
|
||||||
|
|
||||||
// Put saves object header in metabase. Object payload expected to be cut.
|
// Put saves object header in metabase. Object payload expected to be cut.
|
||||||
|
@ -83,10 +80,27 @@ func (db *DB) Put(ctx context.Context, prm PutPrm) (res PutRes, err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
currEpoch := db.epochState.CurrentEpoch()
|
currEpoch := db.epochState.CurrentEpoch()
|
||||||
|
cnr, ok := prm.obj.ContainerID()
|
||||||
|
if !ok {
|
||||||
|
return PutRes{}, errors.New("missing container in object")
|
||||||
|
}
|
||||||
|
|
||||||
err = db.boltDB.Batch(func(tx *bbolt.Tx) error {
|
bucketID, release := db.bucketIDs.BucketID(cnr)
|
||||||
return db.put(tx, prm.obj, prm.id, nil, currEpoch)
|
defer release()
|
||||||
})
|
|
||||||
|
const retryCount = 10
|
||||||
|
for i := 0; i < retryCount; i++ {
|
||||||
|
err = db.database.Update(func(tx *badger.Txn) error {
|
||||||
|
var e error
|
||||||
|
res, e = db.put(ctx, tx, prm.obj, prm.id, nil, currEpoch, bucketID)
|
||||||
|
return e
|
||||||
|
})
|
||||||
|
if errors.Is(err, badger.ErrConflict) {
|
||||||
|
time.Sleep(retryTimeout)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
if err == nil {
|
if err == nil {
|
||||||
success = true
|
success = true
|
||||||
storagelog.Write(db.log,
|
storagelog.Write(db.log,
|
||||||
|
@ -97,42 +111,46 @@ func (db *DB) Put(ctx context.Context, prm PutPrm) (res PutRes, err error) {
|
||||||
return res, metaerr.Wrap(err)
|
return res, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) put(tx *bbolt.Tx,
|
func (db *DB) put(
|
||||||
|
ctx context.Context,
|
||||||
|
tx *badger.Txn,
|
||||||
obj *objectSDK.Object,
|
obj *objectSDK.Object,
|
||||||
id []byte,
|
id []byte,
|
||||||
si *objectSDK.SplitInfo,
|
si *objectSDK.SplitInfo,
|
||||||
currEpoch uint64) error {
|
currEpoch uint64,
|
||||||
|
bucketID uint16,
|
||||||
|
) (PutRes, error) {
|
||||||
cnr, ok := obj.ContainerID()
|
cnr, ok := obj.ContainerID()
|
||||||
if !ok {
|
if !ok {
|
||||||
return errors.New("missing container in object")
|
return PutRes{}, errors.New("missing container in object")
|
||||||
}
|
}
|
||||||
|
|
||||||
isParent := si != nil
|
isParent := si != nil
|
||||||
|
|
||||||
exists, err := db.exists(tx, objectCore.AddressOf(obj), currEpoch)
|
exists, err := exists(ctx, tx, objectCore.AddressOf(obj), currEpoch)
|
||||||
|
|
||||||
var splitInfoError *objectSDK.SplitInfoError
|
var splitInfoError *objectSDK.SplitInfoError
|
||||||
if errors.As(err, &splitInfoError) {
|
if errors.As(err, &splitInfoError) {
|
||||||
exists = true // object exists, however it is virtual
|
exists = true // object exists, however it is virtual
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return err // return any error besides SplitInfoError
|
return PutRes{}, err // return any error besides SplitInfoError
|
||||||
}
|
}
|
||||||
|
|
||||||
if exists {
|
if exists {
|
||||||
return db.updateObj(tx, obj, id, si, isParent)
|
return PutRes{}, db.updateObj(tx, obj, id, si, isParent)
|
||||||
}
|
}
|
||||||
|
|
||||||
return db.insertObject(tx, obj, id, si, isParent, cnr, currEpoch)
|
return PutRes{Inserted: true}, db.insertObject(ctx, tx, obj, id, si, isParent, cnr, currEpoch, bucketID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) updateObj(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool) error {
|
func (db *DB) updateObj(tx *badger.Txn, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool) error {
|
||||||
// most right child and split header overlap parent so we have to
|
// most right child and split header overlap parent so we have to
|
||||||
// check if object exists to not overwrite it twice
|
// check if object exists to not overwrite it twice
|
||||||
|
|
||||||
// When storage engine moves objects between different sub-storages,
|
// When storage engine moves objects between different sub-storages,
|
||||||
// it calls metabase.Put method with new storage ID, thus triggering this code.
|
// it calls metabase.Put method with new storage ID, thus triggering this code.
|
||||||
if !isParent && id != nil {
|
if !isParent && id != nil {
|
||||||
return updateStorageID(tx, objectCore.AddressOf(obj), id)
|
return setStorageID(tx, objectCore.AddressOf(obj), id, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// when storage already has last object in split hierarchy and there is
|
// when storage already has last object in split hierarchy and there is
|
||||||
|
@ -145,14 +163,14 @@ func (db *DB) updateObj(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *obje
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool, cnr cid.ID, currEpoch uint64) error {
|
func (db *DB) insertObject(ctx context.Context, tx *badger.Txn, obj *objectSDK.Object, id []byte, si *objectSDK.SplitInfo, isParent bool, cnr cid.ID, currEpoch uint64, bucketID uint16) error {
|
||||||
if par := obj.Parent(); par != nil && !isParent { // limit depth by two
|
if par := obj.Parent(); par != nil && !isParent { // limit depth by two
|
||||||
parentSI, err := splitInfoFromObject(obj)
|
parentSI, err := splitInfoFromObject(obj)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = db.put(tx, par, id, parentSI, currEpoch)
|
_, err = db.put(ctx, tx, par, id, parentSI, currEpoch, bucketID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -168,30 +186,22 @@ func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *o
|
||||||
return fmt.Errorf("can't put list indexes: %w", err)
|
return fmt.Errorf("can't put list indexes: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = updateFKBTIndexes(tx, obj, putFKBTIndexItem)
|
err = updateFKBTIndexes(tx, obj, putListIndexItem)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("can't put fake bucket tree indexes: %w", err)
|
return fmt.Errorf("can't put fake bucket tree indexes: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// update container volume size estimation
|
// update container volume size estimation
|
||||||
if obj.Type() == objectSDK.TypeRegular && !isParent {
|
if obj.Type() == objectSDK.TypeRegular && !isParent {
|
||||||
err = changeContainerSize(tx, cnr, obj.PayloadSize(), true)
|
err = changeContainerSize(tx, cnr, int64(obj.PayloadSize()), bucketID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !isParent {
|
if !isParent {
|
||||||
err = db.updateCounter(tx, phy, 1, true)
|
if err = incCounters(tx, cnr, IsUserObject(obj), bucketID); err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return fmt.Errorf("could not increase phy object counter: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// it is expected that putting an unavailable object is
|
|
||||||
// impossible and should be handled on the higher levels
|
|
||||||
err = db.updateCounter(tx, logical, 1, true)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not increase logical object counter: %w", err)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,26 +209,24 @@ func (db *DB) insertObject(tx *bbolt.Tx, obj *objectSDK.Object, id []byte, si *o
|
||||||
}
|
}
|
||||||
|
|
||||||
func putUniqueIndexes(
|
func putUniqueIndexes(
|
||||||
tx *bbolt.Tx,
|
tx *badger.Txn,
|
||||||
obj *objectSDK.Object,
|
obj *objectSDK.Object,
|
||||||
si *objectSDK.SplitInfo,
|
si *objectSDK.SplitInfo,
|
||||||
id []byte,
|
id []byte,
|
||||||
) error {
|
) error {
|
||||||
isParent := si != nil
|
isParent := si != nil
|
||||||
addr := objectCore.AddressOf(obj)
|
addr := objectCore.AddressOf(obj)
|
||||||
cnr := addr.Container()
|
|
||||||
objKey := objectKey(addr.Object(), make([]byte, objectKeySize))
|
|
||||||
|
|
||||||
bucketName := make([]byte, bucketKeySize)
|
|
||||||
// add value to primary unique bucket
|
// add value to primary unique bucket
|
||||||
if !isParent {
|
if !isParent {
|
||||||
|
var key []byte
|
||||||
switch obj.Type() {
|
switch obj.Type() {
|
||||||
case objectSDK.TypeRegular:
|
case objectSDK.TypeRegular:
|
||||||
bucketName = primaryBucketName(cnr, bucketName)
|
key = primaryKey(addr.Container(), addr.Object())
|
||||||
case objectSDK.TypeTombstone:
|
case objectSDK.TypeTombstone:
|
||||||
bucketName = tombstoneBucketName(cnr, bucketName)
|
key = tombstoneKey(addr.Container(), addr.Object())
|
||||||
case objectSDK.TypeLock:
|
case objectSDK.TypeLock:
|
||||||
bucketName = bucketNameLockers(cnr, bucketName)
|
key = lockersKey(addr.Container(), addr.Object())
|
||||||
default:
|
default:
|
||||||
return ErrUnknownObjectType
|
return ErrUnknownObjectType
|
||||||
}
|
}
|
||||||
|
@ -228,23 +236,14 @@ func putUniqueIndexes(
|
||||||
return fmt.Errorf("can't marshal object header: %w", err)
|
return fmt.Errorf("can't marshal object header: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = putUniqueIndexItem(tx, namedBucketItem{
|
err = tx.Set(key, rawObject)
|
||||||
name: bucketName,
|
|
||||||
key: objKey,
|
|
||||||
val: rawObject,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// index storageID if it is present
|
// index storageID if it is present
|
||||||
if id != nil {
|
if id != nil {
|
||||||
err = putUniqueIndexItem(tx, namedBucketItem{
|
if err = setStorageID(tx, objectCore.AddressOf(obj), id, false); err != nil {
|
||||||
name: smallBucketName(cnr, bucketName),
|
|
||||||
key: objKey,
|
|
||||||
val: id,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -262,13 +261,11 @@ func putUniqueIndexes(
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("can't marshal split info: %w", err)
|
return fmt.Errorf("can't marshal split info: %w", err)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
splitInfo = zeroValue
|
||||||
}
|
}
|
||||||
|
|
||||||
err = putUniqueIndexItem(tx, namedBucketItem{
|
err = tx.Set(rootKey(addr.Container(), addr.Object()), splitInfo)
|
||||||
name: rootBucketName(cnr, bucketName),
|
|
||||||
key: objKey,
|
|
||||||
val: splitInfo,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -277,22 +274,15 @@ func putUniqueIndexes(
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type updateIndexItemFunc = func(tx *bbolt.Tx, item namedBucketItem) error
|
type updateIndexItemFunc = func(tx *badger.Txn, key []byte) error
|
||||||
|
|
||||||
func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error {
|
func updateListIndexes(tx *badger.Txn, obj *objectSDK.Object, f updateIndexItemFunc) error {
|
||||||
idObj, _ := obj.ID()
|
idObj, _ := obj.ID()
|
||||||
cnr, _ := obj.ContainerID()
|
cnr, _ := obj.ContainerID()
|
||||||
objKey := objectKey(idObj, make([]byte, objectKeySize))
|
|
||||||
bucketName := make([]byte, bucketKeySize)
|
|
||||||
|
|
||||||
cs, _ := obj.PayloadChecksum()
|
cs, _ := obj.PayloadChecksum()
|
||||||
|
|
||||||
// index payload hashes
|
// index payload hashes
|
||||||
err := f(tx, namedBucketItem{
|
err := f(tx, payloadHashKey(cnr, idObj, cs.Value()))
|
||||||
name: payloadHashBucketName(cnr, bucketName),
|
|
||||||
key: cs.Value(),
|
|
||||||
val: objKey,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -301,11 +291,7 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
|
||||||
|
|
||||||
// index parent ids
|
// index parent ids
|
||||||
if ok {
|
if ok {
|
||||||
err := f(tx, namedBucketItem{
|
err := f(tx, parentKey(cnr, idParent, idObj))
|
||||||
name: parentBucketName(cnr, bucketName),
|
|
||||||
key: objectKey(idParent, make([]byte, objectKeySize)),
|
|
||||||
val: objKey,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -313,44 +299,43 @@ func updateListIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
|
||||||
|
|
||||||
// index split ids
|
// index split ids
|
||||||
if obj.SplitID() != nil {
|
if obj.SplitID() != nil {
|
||||||
err := f(tx, namedBucketItem{
|
err := f(tx, splitKey(cnr, idObj, obj.SplitID().ToV2()))
|
||||||
name: splitBucketName(cnr, bucketName),
|
|
||||||
key: obj.SplitID().ToV2(),
|
|
||||||
val: objKey,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, attr := range obj.Attributes() {
|
||||||
|
if attr.Key() != objectV2.SysAttributeExpEpochNeoFS && attr.Key() != objectV2.SysAttributeExpEpoch {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
expEpoch, err := strconv.ParseUint(attr.Value(), 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return errInvalidUserAttributeKeyFormat
|
||||||
|
}
|
||||||
|
err = f(tx, expiredKey(cnr, idObj, expEpoch))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFunc) error {
|
func updateFKBTIndexes(tx *badger.Txn, obj *objectSDK.Object, f updateIndexItemFunc) error {
|
||||||
id, _ := obj.ID()
|
id, _ := obj.ID()
|
||||||
cnr, _ := obj.ContainerID()
|
cnr, _ := obj.ContainerID()
|
||||||
objKey := objectKey(id, make([]byte, objectKeySize))
|
|
||||||
|
|
||||||
attrs := obj.Attributes()
|
attrs := obj.Attributes()
|
||||||
|
err := f(tx, ownerKey(cnr, id, []byte(obj.OwnerID().EncodeToString())))
|
||||||
key := make([]byte, bucketKeySize)
|
|
||||||
err := f(tx, namedBucketItem{
|
|
||||||
name: ownerBucketName(cnr, key),
|
|
||||||
key: []byte(obj.OwnerID().EncodeToString()),
|
|
||||||
val: objKey,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// user specified attributes
|
// user specified attributes
|
||||||
for i := range attrs {
|
for i := range attrs {
|
||||||
key = attributeBucketName(cnr, attrs[i].Key(), key)
|
err := f(tx, attributeKey(cnr, id, attrs[i].Key(), attrs[i].Value()))
|
||||||
err := f(tx, namedBucketItem{
|
|
||||||
name: key,
|
|
||||||
key: []byte(attrs[i].Value()),
|
|
||||||
val: objKey,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -359,153 +344,24 @@ func updateFKBTIndexes(tx *bbolt.Tx, obj *objectSDK.Object, f updateIndexItemFun
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func putUniqueIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
func putListIndexItem(tx *badger.Txn, key []byte) error {
|
||||||
bkt, err := tx.CreateBucketIfNotExists(item.name)
|
return tx.Set(key, zeroValue)
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't create index %v: %w", item.name, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return bkt.Put(item.key, item.val)
|
|
||||||
}
|
|
||||||
|
|
||||||
func putFKBTIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
|
||||||
bkt, err := tx.CreateBucketIfNotExists(item.name)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't create index %v: %w", item.name, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
fkbtRoot, err := bkt.CreateBucketIfNotExists(item.key)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't create fake bucket tree index %v: %w", item.key, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return fkbtRoot.Put(item.val, zeroValue)
|
|
||||||
}
|
|
||||||
|
|
||||||
func putListIndexItem(tx *bbolt.Tx, item namedBucketItem) error {
|
|
||||||
bkt, err := tx.CreateBucketIfNotExists(item.name)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't create index %v: %w", item.name, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
lst, err := decodeList(bkt.Get(item.key))
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't decode leaf list %v: %w", item.key, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
lst = append(lst, item.val)
|
|
||||||
|
|
||||||
encodedLst, err := encodeList(lst)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't encode leaf list %v: %w", item.key, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return bkt.Put(item.key, encodedLst)
|
|
||||||
}
|
|
||||||
|
|
||||||
// encodeList decodes list of bytes into a single blog for list bucket indexes.
|
|
||||||
func encodeList(lst [][]byte) ([]byte, error) {
|
|
||||||
w := io.NewBufBinWriter()
|
|
||||||
w.WriteVarUint(uint64(len(lst)))
|
|
||||||
for i := range lst {
|
|
||||||
w.WriteVarBytes(lst[i])
|
|
||||||
}
|
|
||||||
if w.Err != nil {
|
|
||||||
return nil, w.Err
|
|
||||||
}
|
|
||||||
return w.Bytes(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// decodeList decodes blob into the list of bytes from list bucket index.
|
|
||||||
func decodeList(data []byte) (lst [][]byte, err error) {
|
|
||||||
if len(data) == 0 {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var offset uint64
|
|
||||||
size, n, err := getVarUint(data)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
offset += uint64(n)
|
|
||||||
lst = make([][]byte, size, size+1)
|
|
||||||
for i := range lst {
|
|
||||||
sz, n, err := getVarUint(data[offset:])
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
offset += uint64(n)
|
|
||||||
|
|
||||||
next := offset + sz
|
|
||||||
if uint64(len(data)) < next {
|
|
||||||
return nil, gio.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
lst[i] = data[offset:next]
|
|
||||||
offset = next
|
|
||||||
}
|
|
||||||
return lst, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getVarUint(data []byte) (uint64, int, error) {
|
|
||||||
if len(data) == 0 {
|
|
||||||
return 0, 0, gio.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
|
|
||||||
switch b := data[0]; b {
|
|
||||||
case 0xfd:
|
|
||||||
if len(data) < 3 {
|
|
||||||
return 0, 1, gio.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
return uint64(binary.LittleEndian.Uint16(data[1:])), 3, nil
|
|
||||||
case 0xfe:
|
|
||||||
if len(data) < 5 {
|
|
||||||
return 0, 1, gio.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
return uint64(binary.LittleEndian.Uint32(data[1:])), 5, nil
|
|
||||||
case 0xff:
|
|
||||||
if len(data) < 9 {
|
|
||||||
return 0, 1, gio.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
return binary.LittleEndian.Uint64(data[1:]), 9, nil
|
|
||||||
default:
|
|
||||||
return uint64(b), 1, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// updateStorageID for existing objects if they were moved from one
|
|
||||||
// storage location to another.
|
|
||||||
func updateStorageID(tx *bbolt.Tx, addr oid.Address, id []byte) error {
|
|
||||||
key := make([]byte, bucketKeySize)
|
|
||||||
bkt, err := tx.CreateBucketIfNotExists(smallBucketName(addr.Container(), key))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return bkt.Put(objectKey(addr.Object(), key), id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateSpliInfo for existing objects if storage filled with extra information
|
// updateSpliInfo for existing objects if storage filled with extra information
|
||||||
// about last object in split hierarchy or linking object.
|
// about last object in split hierarchy or linking object.
|
||||||
func updateSplitInfo(tx *bbolt.Tx, addr oid.Address, from *objectSDK.SplitInfo) error {
|
func updateSplitInfo(tx *badger.Txn, addr oid.Address, from *objectSDK.SplitInfo) error {
|
||||||
key := make([]byte, bucketKeySize)
|
key := rootKey(addr.Container(), addr.Object())
|
||||||
bkt := tx.Bucket(rootBucketName(addr.Container(), key))
|
rawSplitInfo, err := valueOrNil(tx, key)
|
||||||
if bkt == nil {
|
if err != nil {
|
||||||
// if object doesn't exists and we want to update split info on it
|
return err
|
||||||
// then ignore, this should never happen
|
|
||||||
return ErrIncorrectSplitInfoUpdate
|
|
||||||
}
|
}
|
||||||
|
|
||||||
objectKey := objectKey(addr.Object(), key)
|
|
||||||
|
|
||||||
rawSplitInfo := bkt.Get(objectKey)
|
|
||||||
if len(rawSplitInfo) == 0 {
|
if len(rawSplitInfo) == 0 {
|
||||||
return ErrIncorrectSplitInfoUpdate
|
return ErrIncorrectSplitInfoUpdate
|
||||||
}
|
}
|
||||||
|
|
||||||
to := objectSDK.NewSplitInfo()
|
to := objectSDK.NewSplitInfo()
|
||||||
|
err = to.Unmarshal(rawSplitInfo)
|
||||||
err := to.Unmarshal(rawSplitInfo)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("can't unmarshal split info from root index: %w", err)
|
return fmt.Errorf("can't unmarshal split info from root index: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -517,7 +373,7 @@ func updateSplitInfo(tx *bbolt.Tx, addr oid.Address, from *objectSDK.SplitInfo)
|
||||||
return fmt.Errorf("can't marhsal merged split info: %w", err)
|
return fmt.Errorf("can't marhsal merged split info: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return bkt.Put(objectKey, rawSplitInfo)
|
return tx.Set(key, rawSplitInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
// splitInfoFromObject returns split info based on last or linkin object.
|
// splitInfoFromObject returns split info based on last or linkin object.
|
||||||
|
|
|
@ -2,11 +2,9 @@ package meta_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"runtime"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||||
|
@ -18,7 +16,7 @@ import (
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func prepareObjects(t testing.TB, n int) []*objectSDK.Object {
|
func prepareObjects(n int) []*objectSDK.Object {
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
parentID := objecttest.ID()
|
parentID := objecttest.ID()
|
||||||
objs := make([]*objectSDK.Object, n)
|
objs := make([]*objectSDK.Object, n)
|
||||||
|
@ -43,16 +41,15 @@ func prepareObjects(t testing.TB, n int) []*objectSDK.Object {
|
||||||
|
|
||||||
func BenchmarkPut(b *testing.B) {
|
func BenchmarkPut(b *testing.B) {
|
||||||
b.Run("parallel", func(b *testing.B) {
|
b.Run("parallel", func(b *testing.B) {
|
||||||
db := newDB(b,
|
db := newDB(b)
|
||||||
meta.WithMaxBatchDelay(time.Millisecond*10),
|
defer func() { require.NoError(b, db.Close()) }()
|
||||||
meta.WithMaxBatchSize(runtime.NumCPU()))
|
|
||||||
// Ensure the benchmark is bound by CPU and not waiting batch-delay time.
|
// Ensure the benchmark is bound by CPU and not waiting batch-delay time.
|
||||||
b.SetParallelism(1)
|
b.SetParallelism(1)
|
||||||
|
|
||||||
var index atomic.Int64
|
var index atomic.Int64
|
||||||
index.Store(-1)
|
index.Store(-1)
|
||||||
|
|
||||||
objs := prepareObjects(b, b.N)
|
objs := prepareObjects(b.N)
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
b.ReportAllocs()
|
b.ReportAllocs()
|
||||||
b.RunParallel(func(pb *testing.PB) {
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
@ -64,12 +61,11 @@ func BenchmarkPut(b *testing.B) {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
b.Run("sequential", func(b *testing.B) {
|
b.Run("sequential", func(b *testing.B) {
|
||||||
db := newDB(b,
|
db := newDB(b)
|
||||||
meta.WithMaxBatchDelay(time.Millisecond*10),
|
defer func() { require.NoError(b, db.Close()) }()
|
||||||
meta.WithMaxBatchSize(1))
|
|
||||||
var index atomic.Int64
|
var index atomic.Int64
|
||||||
index.Store(-1)
|
index.Store(-1)
|
||||||
objs := prepareObjects(b, b.N)
|
objs := prepareObjects(b.N)
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
b.ReportAllocs()
|
b.ReportAllocs()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
|
@ -82,6 +78,7 @@ func BenchmarkPut(b *testing.B) {
|
||||||
|
|
||||||
func TestDB_PutBlobovniczaUpdate(t *testing.T) {
|
func TestDB_PutBlobovniczaUpdate(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
raw1 := testutil.GenerateObject()
|
raw1 := testutil.GenerateObject()
|
||||||
storageID := []byte{1, 2, 3, 4}
|
storageID := []byte{1, 2, 3, 4}
|
||||||
|
|
55
pkg/local_object_storage/metabase/reset_test.go
Normal file
55
pkg/local_object_storage/metabase/reset_test.go
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
package meta
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/testutil"
|
||||||
|
"github.com/dgraph-io/badger/v4"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type epochState struct{ e uint64 }
|
||||||
|
|
||||||
|
func (s epochState) CurrentEpoch() uint64 {
|
||||||
|
return s.e
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResetDropsContainerBuckets(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
db := New(
|
||||||
|
[]Option{
|
||||||
|
WithPath(filepath.Join(t.TempDir(), "metabase")),
|
||||||
|
WithPermissions(0o600),
|
||||||
|
WithEpochState(epochState{}),
|
||||||
|
}...,
|
||||||
|
)
|
||||||
|
|
||||||
|
require.NoError(t, db.Open(context.Background(), false))
|
||||||
|
require.NoError(t, db.Init())
|
||||||
|
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
|
for idx := 0; idx < 100; idx++ {
|
||||||
|
var putPrm PutPrm
|
||||||
|
putPrm.SetObject(testutil.GenerateObject())
|
||||||
|
putPrm.SetStorageID([]byte(fmt.Sprintf("0/%d", idx)))
|
||||||
|
_, err := db.Put(context.Background(), putPrm)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, db.Reset(context.Background()))
|
||||||
|
|
||||||
|
require.NoError(t, db.database.Update(func(tx *badger.Txn) error {
|
||||||
|
it := tx.NewIterator(badger.DefaultIteratorOptions)
|
||||||
|
defer it.Close()
|
||||||
|
|
||||||
|
for it.Seek(nil); it.Valid(); it.Next() {
|
||||||
|
require.Equal(t, byte(shardInfoPrefix), it.Item().Key()[0], "unexpected prefix: %d", it.Item().Key()[0])
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}))
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
|
@ -15,12 +16,16 @@ import (
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
|
"github.com/dgraph-io/badger/v4"
|
||||||
|
"github.com/mr-tron/base58"
|
||||||
"go.etcd.io/bbolt"
|
"go.etcd.io/bbolt"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const batchSize = 1000
|
||||||
|
|
||||||
type (
|
type (
|
||||||
// filterGroup is a structure that have search filters grouped by access
|
// filterGroup is a structure that have search filters grouped by access
|
||||||
// method. We have fast filters that looks for indexes and do not unmarshal
|
// method. We have fast filters that looks for indexes and do not unmarshal
|
||||||
|
@ -91,14 +96,14 @@ func (db *DB) Select(ctx context.Context, prm SelectPrm) (res SelectRes, err err
|
||||||
|
|
||||||
currEpoch := db.epochState.CurrentEpoch()
|
currEpoch := db.epochState.CurrentEpoch()
|
||||||
|
|
||||||
return res, metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
|
return res, metaerr.Wrap(db.database.View(func(tx *badger.Txn) error {
|
||||||
res.addrList, err = db.selectObjects(tx, prm.cnr, prm.filters, currEpoch)
|
res.addrList, err = db.selectObjects(ctx, tx, prm.cnr, prm.filters, currEpoch)
|
||||||
success = err == nil
|
success = err == nil
|
||||||
return err
|
return err
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters, currEpoch uint64) ([]oid.Address, error) {
|
func (db *DB) selectObjects(ctx context.Context, tx *badger.Txn, cnr cid.ID, fs objectSDK.SearchFilters, currEpoch uint64) ([]oid.Address, error) {
|
||||||
group, err := groupFilters(fs)
|
group, err := groupFilters(fs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -119,10 +124,10 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters
|
||||||
if len(group.fastFilters) == 0 {
|
if len(group.fastFilters) == 0 {
|
||||||
expLen = 1
|
expLen = 1
|
||||||
|
|
||||||
db.selectAll(tx, cnr, mAddr)
|
db.selectAll(ctx, tx, cnr, mAddr)
|
||||||
} else {
|
} else {
|
||||||
for i := range group.fastFilters {
|
for i := range group.fastFilters {
|
||||||
db.selectFastFilter(tx, cnr, group.fastFilters[i], mAddr, i)
|
db.selectFastFilter(ctx, tx, cnr, group.fastFilters[i], mAddr, i, currEpoch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,21 +138,20 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters
|
||||||
continue // ignore objects with unmatched fast filters
|
continue // ignore objects with unmatched fast filters
|
||||||
}
|
}
|
||||||
|
|
||||||
var id oid.ID
|
var addr oid.Address
|
||||||
err = id.Decode([]byte(a))
|
if err := addr.DecodeString(a); err != nil {
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var addr oid.Address
|
st, err := objectStatus(ctx, tx, addr, currEpoch)
|
||||||
addr.SetContainer(cnr)
|
if err != nil {
|
||||||
addr.SetObject(id)
|
return nil, err
|
||||||
|
}
|
||||||
if objectStatus(tx, addr, currEpoch) > 0 {
|
if st > 0 {
|
||||||
continue // ignore removed objects
|
continue // ignore removed objects
|
||||||
}
|
}
|
||||||
|
|
||||||
if !db.matchSlowFilters(tx, addr, group.slowFilters, currEpoch) {
|
if !db.matchSlowFilters(ctx, tx, addr, group.slowFilters, currEpoch) {
|
||||||
continue // ignore objects with unmatched slow filters
|
continue // ignore objects with unmatched slow filters
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,98 +162,135 @@ func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters
|
||||||
}
|
}
|
||||||
|
|
||||||
// selectAll adds to resulting cache all available objects in metabase.
|
// selectAll adds to resulting cache all available objects in metabase.
|
||||||
func (db *DB) selectAll(tx *bbolt.Tx, cnr cid.ID, to map[string]int) {
|
func (db *DB) selectAll(ctx context.Context, tx *badger.Txn, cnr cid.ID, to map[string]int) {
|
||||||
bucketName := make([]byte, bucketKeySize)
|
db.selectAllWithPrefix(ctx, tx, primaryKeyPrefix(cnr), addressFromPrimaryKey, to, 0)
|
||||||
selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, 0)
|
db.selectAllWithPrefix(ctx, tx, tombstoneKeyPrefix(cnr), addressFromTombstoneKey, to, 0)
|
||||||
selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, 0)
|
db.selectAllWithPrefix(ctx, tx, parentKeyShortPrefix(cnr), addressOfParentFromParentKey, to, 0)
|
||||||
selectAllFromBucket(tx, parentBucketName(cnr, bucketName), to, 0)
|
db.selectAllWithPrefix(ctx, tx, lockersKeyPrefix(cnr), addressFromLockersKey, to, 0)
|
||||||
selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, 0)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// selectAllFromBucket goes through all keys in bucket and adds them in a
|
// selectAllFromBucket goes through all keys in bucket and adds them in a
|
||||||
// resulting cache. Keys should be stringed object ids.
|
// resulting cache. Keys should be stringed object ids.
|
||||||
func selectAllFromBucket(tx *bbolt.Tx, name []byte, to map[string]int, fNum int) {
|
func (db *DB) selectAllWithPrefix(ctx context.Context, tx *badger.Txn, prefix []byte, keyParser func(ket []byte) (oid.Address, error), to map[string]int, fNum int) {
|
||||||
bkt := tx.Bucket(name)
|
db.selectWithPrefix(ctx, tx, prefix, keyParser, func(oid.Address) bool { return true }, to, fNum)
|
||||||
if bkt == nil {
|
}
|
||||||
return
|
|
||||||
|
func (db *DB) selectWithPrefix(ctx context.Context, tx *badger.Txn, prefix []byte, keyParser func([]byte) (oid.Address, error), condition func(oid.Address) bool, to map[string]int, fNum int) {
|
||||||
|
var lastSeen []byte
|
||||||
|
for {
|
||||||
|
kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix,
|
||||||
|
zap.ByteString("prefix", prefix),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, kv := range kvs {
|
||||||
|
lastSeen = kv.Key
|
||||||
|
addr, err := keyParser(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParseAddressFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if condition(addr) {
|
||||||
|
markAddressInCache(to, fNum, addr.EncodeToString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(kvs) < batchSize {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_ = bkt.ForEach(func(k, v []byte) error {
|
|
||||||
markAddressInCache(to, fNum, string(k))
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// selectFastFilter makes fast optimized checks for well known buckets or
|
// selectFastFilter makes fast optimized checks for well known buckets or
|
||||||
// looking through user attribute buckets otherwise.
|
// looking through user attribute buckets otherwise.
|
||||||
func (db *DB) selectFastFilter(
|
func (db *DB) selectFastFilter(
|
||||||
tx *bbolt.Tx,
|
ctx context.Context,
|
||||||
|
tx *badger.Txn,
|
||||||
cnr cid.ID, // container we search on
|
cnr cid.ID, // container we search on
|
||||||
f objectSDK.SearchFilter, // fast filter
|
f objectSDK.SearchFilter, // fast filter
|
||||||
to map[string]int, // resulting cache
|
to map[string]int, // resulting cache
|
||||||
fNum int, // index of filter
|
fNum int, // index of filter
|
||||||
|
currEpoch uint64,
|
||||||
) {
|
) {
|
||||||
currEpoch := db.epochState.CurrentEpoch()
|
|
||||||
bucketName := make([]byte, bucketKeySize)
|
|
||||||
switch f.Header() {
|
switch f.Header() {
|
||||||
case v2object.FilterHeaderObjectID:
|
case v2object.FilterHeaderObjectID:
|
||||||
db.selectObjectID(tx, f, cnr, to, fNum, currEpoch)
|
db.selectObjectID(ctx, tx, f, cnr, to, fNum, currEpoch)
|
||||||
case v2object.FilterHeaderOwnerID:
|
case v2object.FilterHeaderOwnerID:
|
||||||
bucketName := ownerBucketName(cnr, bucketName)
|
db.selectOwnerID(ctx, tx, f, cnr, to, fNum)
|
||||||
db.selectFromFKBT(tx, bucketName, f, to, fNum)
|
|
||||||
case v2object.FilterHeaderPayloadHash:
|
case v2object.FilterHeaderPayloadHash:
|
||||||
bucketName := payloadHashBucketName(cnr, bucketName)
|
db.selectPayloadHash(ctx, tx, f, cnr, to, fNum)
|
||||||
db.selectFromList(tx, bucketName, f, to, fNum)
|
|
||||||
case v2object.FilterHeaderObjectType:
|
case v2object.FilterHeaderObjectType:
|
||||||
for _, bucketName := range bucketNamesForType(cnr, f.Operation(), f.Value()) {
|
for _, prefix := range prefixesForType(cnr, f.Operation(), f.Value()) {
|
||||||
selectAllFromBucket(tx, bucketName, to, fNum)
|
db.selectAllWithPrefix(ctx, tx, prefix.prefix, prefix.keyParser, to, fNum)
|
||||||
}
|
}
|
||||||
case v2object.FilterHeaderParent:
|
case v2object.FilterHeaderParent:
|
||||||
bucketName := parentBucketName(cnr, bucketName)
|
db.selectParent(ctx, tx, f, cnr, to, fNum)
|
||||||
db.selectFromList(tx, bucketName, f, to, fNum)
|
|
||||||
case v2object.FilterHeaderSplitID:
|
case v2object.FilterHeaderSplitID:
|
||||||
bucketName := splitBucketName(cnr, bucketName)
|
db.selectSplitID(ctx, tx, f, cnr, to, fNum)
|
||||||
db.selectFromList(tx, bucketName, f, to, fNum)
|
|
||||||
case v2object.FilterPropertyRoot:
|
case v2object.FilterPropertyRoot:
|
||||||
selectAllFromBucket(tx, rootBucketName(cnr, bucketName), to, fNum)
|
db.selectAllWithPrefix(ctx, tx, rootKeyPrefix(cnr), addressFromRootKey, to, fNum)
|
||||||
case v2object.FilterPropertyPhy:
|
case v2object.FilterPropertyPhy:
|
||||||
selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, fNum)
|
db.selectAllWithPrefix(ctx, tx, primaryKeyPrefix(cnr), addressFromPrimaryKey, to, fNum)
|
||||||
selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, fNum)
|
db.selectAllWithPrefix(ctx, tx, tombstoneKeyPrefix(cnr), addressFromTombstoneKey, to, fNum)
|
||||||
selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, fNum)
|
db.selectAllWithPrefix(ctx, tx, lockersKeyPrefix(cnr), addressFromLockersKey, to, fNum)
|
||||||
default: // user attribute
|
default: // user attribute
|
||||||
bucketName := attributeBucketName(cnr, f.Header(), bucketName)
|
|
||||||
|
|
||||||
if f.Operation() == objectSDK.MatchNotPresent {
|
if f.Operation() == objectSDK.MatchNotPresent {
|
||||||
selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, to, fNum)
|
db.selectWithoutAttribute(ctx, tx, cnr, f.Header(), to, fNum)
|
||||||
} else {
|
} else {
|
||||||
db.selectFromFKBT(tx, bucketName, f, to, fNum)
|
db.selectByAttribute(ctx, tx, cnr, f, to, fNum)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var mBucketNaming = map[string][]func(cid.ID, []byte) []byte{
|
type prefixer struct {
|
||||||
v2object.TypeRegular.String(): {primaryBucketName, parentBucketName},
|
prefixer func(cid.ID) []byte
|
||||||
v2object.TypeTombstone.String(): {tombstoneBucketName},
|
keyParser func(key []byte) (oid.Address, error)
|
||||||
v2object.TypeLock.String(): {bucketNameLockers},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func allBucketNames(cnr cid.ID) (names [][]byte) {
|
type prefixWithKeyParser struct {
|
||||||
for _, fns := range mBucketNaming {
|
prefix []byte
|
||||||
for _, fn := range fns {
|
keyParser func(key []byte) (oid.Address, error)
|
||||||
names = append(names, fn(cnr, make([]byte, bucketKeySize)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal string) (names [][]byte) {
|
var typeToPrefix = map[string][]prefixer{
|
||||||
|
v2object.TypeRegular.String(): {
|
||||||
|
prefixer{
|
||||||
|
prefixer: primaryKeyPrefix,
|
||||||
|
keyParser: addressFromPrimaryKey,
|
||||||
|
},
|
||||||
|
prefixer{
|
||||||
|
prefixer: parentKeyShortPrefix,
|
||||||
|
keyParser: addressOfParentFromParentKey,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
v2object.TypeTombstone.String(): {
|
||||||
|
prefixer{
|
||||||
|
prefixer: tombstoneKeyPrefix,
|
||||||
|
keyParser: addressFromTombstoneKey,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
v2object.TypeLock.String(): {
|
||||||
|
prefixer{
|
||||||
|
prefixer: lockersKeyPrefix,
|
||||||
|
keyParser: addressFromLockersKey,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func prefixesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal string) (prefixes []prefixWithKeyParser) {
|
||||||
appendNames := func(key string) {
|
appendNames := func(key string) {
|
||||||
fns, ok := mBucketNaming[key]
|
prefixers, ok := typeToPrefix[key]
|
||||||
if ok {
|
if ok {
|
||||||
for _, fn := range fns {
|
for _, prefixer := range prefixers {
|
||||||
names = append(names, fn(cnr, make([]byte, bucketKeySize)))
|
prefixes = append(prefixes, prefixWithKeyParser{
|
||||||
|
prefix: prefixer.prefixer(cnr),
|
||||||
|
keyParser: prefixer.keyParser,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,7 +298,7 @@ func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal str
|
||||||
switch mType {
|
switch mType {
|
||||||
default:
|
default:
|
||||||
case objectSDK.MatchStringNotEqual:
|
case objectSDK.MatchStringNotEqual:
|
||||||
for key := range mBucketNaming {
|
for key := range typeToPrefix {
|
||||||
if key != typeVal {
|
if key != typeVal {
|
||||||
appendNames(key)
|
appendNames(key)
|
||||||
}
|
}
|
||||||
|
@ -265,7 +306,7 @@ func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal str
|
||||||
case objectSDK.MatchStringEqual:
|
case objectSDK.MatchStringEqual:
|
||||||
appendNames(typeVal)
|
appendNames(typeVal)
|
||||||
case objectSDK.MatchCommonPrefix:
|
case objectSDK.MatchCommonPrefix:
|
||||||
for key := range mBucketNaming {
|
for key := range typeToPrefix {
|
||||||
if strings.HasPrefix(key, typeVal) {
|
if strings.HasPrefix(key, typeVal) {
|
||||||
appendNames(key)
|
appendNames(key)
|
||||||
}
|
}
|
||||||
|
@ -277,145 +318,163 @@ func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal str
|
||||||
|
|
||||||
// selectFromList looks into <fkbt> index to find list of addresses to add in
|
// selectFromList looks into <fkbt> index to find list of addresses to add in
|
||||||
// resulting cache.
|
// resulting cache.
|
||||||
func (db *DB) selectFromFKBT(
|
func (db *DB) selectByAttribute(
|
||||||
tx *bbolt.Tx,
|
ctx context.Context,
|
||||||
name []byte, // fkbt root bucket name
|
tx *badger.Txn,
|
||||||
|
cnr cid.ID,
|
||||||
f objectSDK.SearchFilter, // filter for operation and value
|
f objectSDK.SearchFilter, // filter for operation and value
|
||||||
to map[string]int, // resulting cache
|
to map[string]int, // resulting cache
|
||||||
fNum int, // index of filter
|
fNum int, // index of filter
|
||||||
) { //
|
) { //
|
||||||
matchFunc, ok := db.matchers[f.Operation()]
|
var prefix []byte
|
||||||
if !ok {
|
var condition func([]byte) bool
|
||||||
|
switch f.Operation() {
|
||||||
|
default:
|
||||||
db.log.Debug(logs.MetabaseMissingMatcher, zap.Uint32("operation", uint32(f.Operation())))
|
db.log.Debug(logs.MetabaseMissingMatcher, zap.Uint32("operation", uint32(f.Operation())))
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
case objectSDK.MatchUnknown:
|
||||||
|
|
||||||
fkbtRoot := tx.Bucket(name)
|
|
||||||
if fkbtRoot == nil {
|
|
||||||
return
|
return
|
||||||
}
|
case objectSDK.MatchStringEqual, objectSDK.MatchCommonPrefix:
|
||||||
|
prefix = attributeKeyPrefix(cnr, f.Header(), f.Value())
|
||||||
err := matchFunc.matchBucket(fkbtRoot, f.Header(), f.Value(), func(k, _ []byte) error {
|
condition = func([]byte) bool { return true }
|
||||||
fkbtLeaf := fkbtRoot.Bucket(k)
|
case objectSDK.MatchStringNotEqual:
|
||||||
if fkbtLeaf == nil {
|
prefix = attributeKeyPrefix(cnr, f.Header(), "")
|
||||||
return nil
|
fromRequestValue := []byte(f.Value())
|
||||||
|
condition = func(fromDBValue []byte) bool {
|
||||||
|
return !bytes.Equal(fromDBValue, fromRequestValue)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return fkbtLeaf.ForEach(func(k, _ []byte) error {
|
var lastSeen []byte
|
||||||
markAddressInCache(to, fNum, string(k))
|
for {
|
||||||
|
kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix,
|
||||||
|
zap.ByteString("prefix", prefix),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, kv := range kvs {
|
||||||
|
lastSeen = kv.Key
|
||||||
|
attrValue, err := attributeValueFromAttributeKey(kv.Key, f.Header())
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParseAttributeValueFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
if condition(attrValue) {
|
||||||
})
|
addr, err := addressFromAttributeKey(kv.Key, f.Header())
|
||||||
})
|
if err != nil {
|
||||||
if err != nil {
|
db.log.Debug(logs.FailedToParseAddressFromKey,
|
||||||
db.log.Debug(logs.MetabaseErrorInFKBTSelection, zap.String("error", err.Error()))
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
markAddressInCache(to, fNum, addr.EncodeToString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(kvs) < batchSize {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// selectOutsideFKBT looks into all incl buckets to find list of addresses outside <fkbt> to add in
|
// selectOutsideFKBT looks into all incl buckets to find list of addresses outside <fkbt> to add in
|
||||||
// resulting cache.
|
// resulting cache.
|
||||||
func selectOutsideFKBT(
|
func (db *DB) selectWithoutAttribute(
|
||||||
tx *bbolt.Tx,
|
ctx context.Context,
|
||||||
incl [][]byte, // buckets
|
tx *badger.Txn,
|
||||||
name []byte, // fkbt root bucket name
|
cnr cid.ID,
|
||||||
|
attributeKey string, // fkbt root bucket name
|
||||||
to map[string]int, // resulting cache
|
to map[string]int, // resulting cache
|
||||||
fNum int, // index of filter
|
fNum int, // index of filter
|
||||||
) {
|
) {
|
||||||
mExcl := make(map[string]struct{})
|
for _, prefixers := range typeToPrefix {
|
||||||
|
for _, prefixer := range prefixers {
|
||||||
bktExcl := tx.Bucket(name)
|
db.selectWithoutAttributeForPrexier(ctx, prefixer, cnr, tx, attributeKey, to, fNum)
|
||||||
if bktExcl != nil {
|
|
||||||
_ = bktExcl.ForEachBucket(func(k []byte) error {
|
|
||||||
exclBktLeaf := bktExcl.Bucket(k)
|
|
||||||
return exclBktLeaf.ForEach(func(k, _ []byte) error {
|
|
||||||
mExcl[string(k)] = struct{}{}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range incl {
|
|
||||||
bktIncl := tx.Bucket(incl[i])
|
|
||||||
if bktIncl == nil {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_ = bktIncl.ForEach(func(k, _ []byte) error {
|
|
||||||
if _, ok := mExcl[string(k)]; !ok {
|
|
||||||
markAddressInCache(to, fNum, string(k))
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// selectFromList looks into <list> index to find list of addresses to add in
|
func (db *DB) selectWithoutAttributeForPrexier(ctx context.Context, prefixer prefixer, cnr cid.ID, tx *badger.Txn, attributeKey string, to map[string]int, fNum int) {
|
||||||
// resulting cache.
|
prefix := prefixer.prefixer(cnr)
|
||||||
func (db *DB) selectFromList(
|
var lastSeen []byte
|
||||||
tx *bbolt.Tx,
|
for {
|
||||||
name []byte, // list root bucket name
|
kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, true, batchSize)
|
||||||
f objectSDK.SearchFilter, // filter for operation and value
|
|
||||||
to map[string]int, // resulting cache
|
|
||||||
fNum int, // index of filter
|
|
||||||
) { //
|
|
||||||
bkt := tx.Bucket(name)
|
|
||||||
if bkt == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
lst [][]byte
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
|
|
||||||
switch op := f.Operation(); op {
|
|
||||||
case objectSDK.MatchStringEqual:
|
|
||||||
lst, err = decodeList(bkt.Get(bucketKeyHelper(f.Header(), f.Value())))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
db.log.Debug(logs.MetabaseCantDecodeListBucketLeaf, zap.String("error", err.Error()))
|
db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix,
|
||||||
|
zap.ByteString("prefix", prefix),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
default:
|
for _, kv := range kvs {
|
||||||
fMatch, ok := db.matchers[op]
|
lastSeen = kv.Key
|
||||||
if !ok {
|
addr, err := prefixer.keyParser(kv.Key)
|
||||||
db.log.Debug(logs.MetabaseUnknownOperation, zap.Uint32("operation", uint32(op)))
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = fMatch.matchBucket(bkt, f.Header(), f.Value(), func(key, val []byte) error {
|
|
||||||
l, err := decodeList(val)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
db.log.Debug(logs.MetabaseCantDecodeListBucketLeaf,
|
db.log.Debug(logs.FailedToParseAddressFromKey,
|
||||||
zap.String("error", err.Error()),
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lst = append(lst, l...)
|
obj := objectSDK.New()
|
||||||
|
if err := obj.Unmarshal(kv.Value); err != nil {
|
||||||
|
db.log.Debug(logs.ShardCouldNotUnmarshalObject, zap.Stringer("address", addr), zap.Error(err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
var hasAttribute bool
|
||||||
}); err != nil {
|
for _, attr := range obj.Attributes() {
|
||||||
db.log.Debug(logs.MetabaseCantIterateOverTheBucket,
|
if attr.Key() == attributeKey {
|
||||||
zap.String("error", err.Error()),
|
hasAttribute = true
|
||||||
)
|
break
|
||||||
|
}
|
||||||
return
|
}
|
||||||
|
if hasAttribute {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
markAddressInCache(to, fNum, addr.EncodeToString())
|
||||||
|
}
|
||||||
|
if len(kvs) < batchSize {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
for i := range lst {
|
|
||||||
markAddressInCache(to, fNum, string(lst[i]))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// selectObjectID processes objectID filter with in-place optimizations.
|
type matcher struct {
|
||||||
|
matchSlow func(string, []byte, string) bool
|
||||||
|
matchBucket func(*bbolt.Bucket, string, string, func([]byte, []byte) error) error
|
||||||
|
}
|
||||||
|
|
||||||
|
var matchers map[objectSDK.SearchMatchType]matcher = map[objectSDK.SearchMatchType]matcher{
|
||||||
|
objectSDK.MatchUnknown: {
|
||||||
|
matchSlow: unknownMatcher,
|
||||||
|
matchBucket: unknownMatcherBucket,
|
||||||
|
},
|
||||||
|
objectSDK.MatchStringEqual: {
|
||||||
|
matchSlow: stringEqualMatcher,
|
||||||
|
matchBucket: stringEqualMatcherBucket,
|
||||||
|
},
|
||||||
|
objectSDK.MatchStringNotEqual: {
|
||||||
|
matchSlow: stringNotEqualMatcher,
|
||||||
|
matchBucket: stringNotEqualMatcherBucket,
|
||||||
|
},
|
||||||
|
objectSDK.MatchCommonPrefix: {
|
||||||
|
matchSlow: stringCommonPrefixMatcher,
|
||||||
|
matchBucket: stringCommonPrefixMatcherBucket,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
func (db *DB) selectObjectID(
|
func (db *DB) selectObjectID(
|
||||||
tx *bbolt.Tx,
|
ctx context.Context,
|
||||||
|
tx *badger.Txn,
|
||||||
f objectSDK.SearchFilter,
|
f objectSDK.SearchFilter,
|
||||||
cnr cid.ID,
|
cnr cid.ID,
|
||||||
to map[string]int, // resulting cache
|
to map[string]int, // resulting cache
|
||||||
|
@ -428,67 +487,358 @@ func (db *DB) selectObjectID(
|
||||||
addr.SetObject(id)
|
addr.SetObject(id)
|
||||||
|
|
||||||
var splitInfoError *objectSDK.SplitInfoError
|
var splitInfoError *objectSDK.SplitInfoError
|
||||||
ok, err := db.exists(tx, addr, currEpoch)
|
ok, err := exists(ctx, tx, addr, currEpoch)
|
||||||
if (err == nil && ok) || errors.As(err, &splitInfoError) {
|
if (err == nil && ok) || errors.As(err, &splitInfoError) {
|
||||||
raw := make([]byte, objectKeySize)
|
markAddressInCache(to, fNum, addr.EncodeToString())
|
||||||
id.Encode(raw)
|
|
||||||
markAddressInCache(to, fNum, string(raw))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var condition func(oid.Address) bool
|
||||||
switch op := f.Operation(); op {
|
switch op := f.Operation(); op {
|
||||||
case objectSDK.MatchStringEqual:
|
case objectSDK.MatchStringEqual:
|
||||||
var id oid.ID
|
var id oid.ID
|
||||||
if err := id.DecodeString(f.Value()); err == nil {
|
if err := id.DecodeString(f.Value()); err != nil {
|
||||||
appendOID(id)
|
return
|
||||||
|
}
|
||||||
|
appendOID(id)
|
||||||
|
return
|
||||||
|
case objectSDK.MatchUnknown:
|
||||||
|
return
|
||||||
|
case objectSDK.MatchStringNotEqual:
|
||||||
|
var id oid.ID
|
||||||
|
if err := id.DecodeString(f.Value()); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
condition = func(a oid.Address) bool {
|
||||||
|
return !a.Container().Equals(cnr) || !a.Object().Equals(id)
|
||||||
|
}
|
||||||
|
case objectSDK.MatchCommonPrefix:
|
||||||
|
condition = func(a oid.Address) bool {
|
||||||
|
return a.Container().Equals(cnr) && strings.HasPrefix(
|
||||||
|
a.Object().EncodeToString(),
|
||||||
|
f.Value(),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
fMatch, ok := db.matchers[op]
|
db.log.Debug(logs.MetabaseUnknownOperation,
|
||||||
if !ok {
|
zap.Uint32("operation", uint32(f.Operation())),
|
||||||
db.log.Debug(logs.MetabaseUnknownOperation,
|
)
|
||||||
zap.Uint32("operation", uint32(f.Operation())),
|
return
|
||||||
)
|
}
|
||||||
|
|
||||||
|
for _, prefix := range prefixesForType(cnr, objectSDK.MatchStringNotEqual, "") {
|
||||||
|
db.selectWithPrefix(ctx, tx, prefix.prefix, prefix.keyParser, condition, to, fNum)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) selectOwnerID(ctx context.Context, tx *badger.Txn, f objectSDK.SearchFilter, cnr cid.ID, to map[string]int, fNum int) {
|
||||||
|
var condition func([]byte) bool
|
||||||
|
var prefix []byte
|
||||||
|
switch op := f.Operation(); op {
|
||||||
|
case objectSDK.MatchCommonPrefix, objectSDK.MatchStringEqual:
|
||||||
|
prefix = ownerKeyLongPrefix(cnr, []byte(f.Value()))
|
||||||
|
condition = func([]byte) bool { return true }
|
||||||
|
case objectSDK.MatchUnknown:
|
||||||
|
return
|
||||||
|
case objectSDK.MatchStringNotEqual:
|
||||||
|
prefix = ownerKeyShortPrefix(cnr)
|
||||||
|
ownerID := []byte(f.Value())
|
||||||
|
condition = func(fromDB []byte) bool { return !bytes.Equal(fromDB, ownerID) }
|
||||||
|
default:
|
||||||
|
db.log.Debug(logs.MetabaseUnknownOperation,
|
||||||
|
zap.Uint32("operation", uint32(f.Operation())),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastSeen []byte
|
||||||
|
for {
|
||||||
|
kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix,
|
||||||
|
zap.ByteString("prefix", prefix),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, bucketName := range bucketNamesForType(cnr, objectSDK.MatchStringNotEqual, "") {
|
for _, kv := range kvs {
|
||||||
// copy-paste from DB.selectAllFrom
|
lastSeen = kv.Key
|
||||||
bkt := tx.Bucket(bucketName)
|
owner, err := ownerFromOwnerKey(kv.Key)
|
||||||
if bkt == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
err := fMatch.matchBucket(bkt, f.Header(), f.Value(), func(k, v []byte) error {
|
|
||||||
var id oid.ID
|
|
||||||
if err := id.Decode(k); err == nil {
|
|
||||||
appendOID(id)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
db.log.Debug(logs.MetabaseCouldNotIterateOverTheBuckets,
|
db.log.Debug(logs.FailedToParseOwnerFromKey,
|
||||||
zap.String("error", err.Error()),
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
if condition(owner) {
|
||||||
|
addr, err := addressFromOwnerKey(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParseAddressFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
markAddressInCache(to, fNum, addr.EncodeToString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(kvs) < batchSize {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) selectPayloadHash(ctx context.Context, tx *badger.Txn, f objectSDK.SearchFilter, cnr cid.ID, to map[string]int, fNum int) {
|
||||||
|
var condition func([]byte) bool
|
||||||
|
var prefix []byte
|
||||||
|
switch op := f.Operation(); op {
|
||||||
|
case objectSDK.MatchUnknown:
|
||||||
|
return
|
||||||
|
case objectSDK.MatchCommonPrefix:
|
||||||
|
value, checkLast, ok := destringifyValue(f.Header(), f.Value(), true)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
prefixValue := value
|
||||||
|
if checkLast {
|
||||||
|
prefixValue = value[:len(value)-1]
|
||||||
|
}
|
||||||
|
if len(value) == 0 {
|
||||||
|
condition = func([]byte) bool { return true }
|
||||||
|
prefix = payloadHashKeyShortPrefix(cnr)
|
||||||
|
} else {
|
||||||
|
prefix = payloadHashKeyLongPrefix(cnr, prefixValue)
|
||||||
|
condition = func(fromDB []byte) bool {
|
||||||
|
if checkLast && (len(fromDB) == len(prefixValue) || fromDB[len(prefixValue)]>>4 != value[len(value)-1]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case objectSDK.MatchStringEqual:
|
||||||
|
value, _, ok := destringifyValue(f.Header(), f.Value(), false)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
prefix = payloadHashKeyLongPrefix(cnr, value)
|
||||||
|
condition = func([]byte) bool { return true }
|
||||||
|
case objectSDK.MatchStringNotEqual:
|
||||||
|
value, _, ok := destringifyValue(f.Header(), f.Value(), false)
|
||||||
|
prefix = payloadHashKeyShortPrefix(cnr)
|
||||||
|
condition = func(fromDB []byte) bool { return !ok || !bytes.Equal(fromDB, value) }
|
||||||
|
default:
|
||||||
|
db.log.Debug(logs.MetabaseUnknownOperation, zap.Uint32("operation", uint32(f.Operation())))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastSeen []byte
|
||||||
|
for {
|
||||||
|
kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix,
|
||||||
|
zap.ByteString("prefix", prefix),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, kv := range kvs {
|
||||||
|
lastSeen = kv.Key
|
||||||
|
hash, err := payloadHashFromPayloadHashKey(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParsePayloadHashFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if condition(hash) {
|
||||||
|
addr, err := addressFromPayloadHashKey(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParseAddressFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
markAddressInCache(to, fNum, addr.EncodeToString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(kvs) < batchSize {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) selectParent(
|
||||||
|
ctx context.Context,
|
||||||
|
tx *badger.Txn,
|
||||||
|
f objectSDK.SearchFilter,
|
||||||
|
cnr cid.ID,
|
||||||
|
to map[string]int, // resulting cache
|
||||||
|
fNum int, // index of filter
|
||||||
|
) {
|
||||||
|
var condition func(oid.ID) bool
|
||||||
|
var prefix []byte
|
||||||
|
switch op := f.Operation(); op {
|
||||||
|
case objectSDK.MatchStringEqual:
|
||||||
|
var parentObjID oid.ID
|
||||||
|
if err := parentObjID.DecodeString(f.Value()); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
prefix = parentKeyLongPrefix(cnr, parentObjID)
|
||||||
|
condition = func(oid.ID) bool { return true }
|
||||||
|
case objectSDK.MatchCommonPrefix:
|
||||||
|
v, err := base58.Decode(f.Value())
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
prefix = append(parentKeyShortPrefix(cnr), v...)
|
||||||
|
condition = func(oid.ID) bool { return true }
|
||||||
|
case objectSDK.MatchUnknown:
|
||||||
|
return
|
||||||
|
case objectSDK.MatchStringNotEqual:
|
||||||
|
var parentObjID oid.ID
|
||||||
|
if err := parentObjID.DecodeString(f.Value()); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
prefix = parentKeyShortPrefix(cnr)
|
||||||
|
condition = func(parentFromDB oid.ID) bool { return !parentFromDB.Equals(parentObjID) }
|
||||||
|
default:
|
||||||
|
db.log.Debug(logs.MetabaseUnknownOperation,
|
||||||
|
zap.Uint32("operation", uint32(f.Operation())),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastSeen []byte
|
||||||
|
for {
|
||||||
|
kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix,
|
||||||
|
zap.ByteString("prefix", prefix),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, kv := range kvs {
|
||||||
|
lastSeen = kv.Key
|
||||||
|
parentAddr, err := addressOfParentFromParentKey(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParseAddressFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if condition(parentAddr.Object()) {
|
||||||
|
targetAddr, err := addressOfTargetFromParentKey(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParseAddressFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
markAddressInCache(to, fNum, targetAddr.EncodeToString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(kvs) < batchSize {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (db *DB) selectSplitID(
|
||||||
|
ctx context.Context,
|
||||||
|
tx *badger.Txn,
|
||||||
|
f objectSDK.SearchFilter,
|
||||||
|
cnr cid.ID,
|
||||||
|
to map[string]int, // resulting cache
|
||||||
|
fNum int, // index of filter
|
||||||
|
) {
|
||||||
|
var condition func([]byte) bool
|
||||||
|
var prefix []byte
|
||||||
|
switch op := f.Operation(); op {
|
||||||
|
case objectSDK.MatchStringEqual:
|
||||||
|
s := objectSDK.NewSplitID()
|
||||||
|
err := s.Parse(f.Value())
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
prefix = splitKeyLongPrefix(cnr, s.ToV2())
|
||||||
|
condition = func([]byte) bool { return true }
|
||||||
|
case objectSDK.MatchCommonPrefix:
|
||||||
|
prefix = splitKeyLongPrefix(cnr, []byte(f.Value()))
|
||||||
|
condition = func([]byte) bool { return true }
|
||||||
|
case objectSDK.MatchUnknown:
|
||||||
|
return
|
||||||
|
case objectSDK.MatchStringNotEqual:
|
||||||
|
prefix = splitKeyShortPrefix(cnr)
|
||||||
|
splitIDFromRequest := []byte(f.Value())
|
||||||
|
condition = func(splitIDFromDB []byte) bool { return !bytes.Equal(splitIDFromRequest, splitIDFromDB) }
|
||||||
|
default:
|
||||||
|
db.log.Debug(logs.MetabaseUnknownOperation,
|
||||||
|
zap.Uint32("operation", uint32(f.Operation())),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastSeen []byte
|
||||||
|
for {
|
||||||
|
kvs, err := selectByPrefixAndSeek(ctx, tx, prefix, lastSeen, false, batchSize)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.MetabaseCouldNotIterateOverThePrefix,
|
||||||
|
zap.ByteString("prefix", prefix),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, kv := range kvs {
|
||||||
|
lastSeen = kv.Key
|
||||||
|
splitID, err := splitIDFromSplitKey(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParseSplitIDFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if condition(splitID) {
|
||||||
|
addr, err := addressFromSplitKey(kv.Key)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Debug(logs.FailedToParseAddressFromKey,
|
||||||
|
zap.ByteString("key", kv.Key),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
markAddressInCache(to, fNum, addr.EncodeToString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(kvs) < batchSize {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// matchSlowFilters return true if object header is matched by all slow filters.
|
// matchSlowFilters return true if object header is matched by all slow filters.
|
||||||
func (db *DB) matchSlowFilters(tx *bbolt.Tx, addr oid.Address, f objectSDK.SearchFilters, currEpoch uint64) bool {
|
func (db *DB) matchSlowFilters(ctx context.Context, tx *badger.Txn, addr oid.Address, f objectSDK.SearchFilters, currEpoch uint64) bool {
|
||||||
if len(f) == 0 {
|
if len(f) == 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := make([]byte, addressKeySize)
|
obj, err := get(ctx, tx, addr, true, false, currEpoch)
|
||||||
obj, err := db.get(tx, addr, buf, true, false, currEpoch)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range f {
|
for i := range f {
|
||||||
matchFunc, ok := db.matchers[f[i].Operation()]
|
matchFunc, ok := matchers[f[i].Operation()]
|
||||||
if !ok {
|
if !ok {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ func TestDB_SelectUserAttributes(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -147,6 +148,7 @@ func TestDB_SelectRootPhyParent(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -300,6 +302,7 @@ func TestDB_SelectInhume(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -334,6 +337,7 @@ func TestDB_SelectPayloadHash(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -404,6 +408,7 @@ func TestDB_SelectWithSlowFilters(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -511,6 +516,7 @@ func TestDB_SelectObjectID(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -626,6 +632,7 @@ func TestDB_SelectSplitID(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -682,6 +689,7 @@ func TestDB_SelectContainerID(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
cnr := cidtest.ID()
|
cnr := cidtest.ID()
|
||||||
|
|
||||||
|
@ -729,6 +737,8 @@ func TestDB_SelectContainerID(t *testing.T) {
|
||||||
func BenchmarkSelect(b *testing.B) {
|
func BenchmarkSelect(b *testing.B) {
|
||||||
const objCount = 1000
|
const objCount = 1000
|
||||||
db := newDB(b)
|
db := newDB(b)
|
||||||
|
defer func() { require.NoError(b, db.Close()) }()
|
||||||
|
|
||||||
cid := cidtest.ID()
|
cid := cidtest.ID()
|
||||||
|
|
||||||
for i := 0; i < objCount; i++ {
|
for i := 0; i < objCount; i++ {
|
||||||
|
@ -769,6 +779,7 @@ func TestExpiredObjects(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
db := newDB(t, meta.WithEpochState(epochState{currEpoch}))
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
checkExpiredObjects(t, db, func(exp, nonExp *objectSDK.Object) {
|
checkExpiredObjects(t, db, func(exp, nonExp *objectSDK.Object) {
|
||||||
cidExp, _ := exp.ContainerID()
|
cidExp, _ := exp.ContainerID()
|
||||||
|
|
|
@ -1,53 +1,85 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"github.com/nspcc-dev/neo-go/pkg/util/slice"
|
metamode "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var shardIDKey = []byte("id")
|
||||||
shardInfoBucket = []byte{shardInfoPrefix}
|
|
||||||
shardIDKey = []byte("id")
|
// GetShardID sets metabase operation mode
|
||||||
)
|
// and reads shard id from db.
|
||||||
|
// If id is missing, returns nil, nil.
|
||||||
|
//
|
||||||
|
// GetShardID does not report any metrics.
|
||||||
|
func (db *DB) GetShardID(mode metamode.Mode) ([]byte, error) {
|
||||||
|
db.modeMtx.Lock()
|
||||||
|
defer db.modeMtx.Unlock()
|
||||||
|
db.mode = mode
|
||||||
|
|
||||||
|
if err := db.openDB(mode); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to open metabase: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
id, err := db.readShardID()
|
||||||
|
|
||||||
|
if cErr := metaerr.Wrap(db.database.Close()); cErr != nil {
|
||||||
|
err = errors.Join(err, fmt.Errorf("failed to close metabase: %w", cErr))
|
||||||
|
}
|
||||||
|
|
||||||
|
return id, metaerr.Wrap(err)
|
||||||
|
}
|
||||||
|
|
||||||
// ReadShardID reads shard id from db.
|
// ReadShardID reads shard id from db.
|
||||||
// If id is missing, returns nil, nil.
|
// If id is missing, returns nil, nil.
|
||||||
func (db *DB) ReadShardID() ([]byte, error) {
|
func (db *DB) readShardID() ([]byte, error) {
|
||||||
db.modeMtx.RLock()
|
|
||||||
defer db.modeMtx.RUnlock()
|
|
||||||
|
|
||||||
if db.mode.NoMetabase() {
|
|
||||||
return nil, ErrDegradedMode
|
|
||||||
}
|
|
||||||
|
|
||||||
var id []byte
|
var id []byte
|
||||||
err := db.boltDB.View(func(tx *bbolt.Tx) error {
|
err := db.database.View(func(tx *badger.Txn) error {
|
||||||
b := tx.Bucket(shardInfoBucket)
|
v, err := valueOrNil(tx, shardInfoKey(shardIDKey))
|
||||||
if b != nil {
|
if err != nil {
|
||||||
id = slice.Copy(b.Get(shardIDKey))
|
return err
|
||||||
}
|
}
|
||||||
|
id = bytes.Clone(v)
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
return id, metaerr.Wrap(err)
|
return id, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteShardID writes shard it to db.
|
// SetShardID sets metabase operation mode
|
||||||
func (db *DB) WriteShardID(id []byte) error {
|
// and writes shard id to db.
|
||||||
db.modeMtx.RLock()
|
func (db *DB) SetShardID(id []byte, mode metamode.Mode) error {
|
||||||
defer db.modeMtx.RUnlock()
|
db.modeMtx.Lock()
|
||||||
|
defer db.modeMtx.Unlock()
|
||||||
|
db.mode = mode
|
||||||
|
|
||||||
if db.mode.NoMetabase() {
|
if mode.ReadOnly() {
|
||||||
return ErrDegradedMode
|
|
||||||
} else if db.mode.ReadOnly() {
|
|
||||||
return ErrReadOnlyMode
|
return ErrReadOnlyMode
|
||||||
}
|
}
|
||||||
|
|
||||||
return metaerr.Wrap(db.boltDB.Update(func(tx *bbolt.Tx) error {
|
if err := db.openDB(mode); err != nil {
|
||||||
b, err := tx.CreateBucketIfNotExists(shardInfoBucket)
|
return fmt.Errorf("failed to open metabase: %w", err)
|
||||||
if err != nil {
|
}
|
||||||
return err
|
|
||||||
}
|
err := db.writeShardID(id)
|
||||||
return b.Put(shardIDKey, id)
|
if err == nil {
|
||||||
|
db.metrics.SetMode(mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cErr := metaerr.Wrap(db.database.Close()); cErr != nil {
|
||||||
|
err = errors.Join(err, fmt.Errorf("failed to close metabase: %w", cErr))
|
||||||
|
}
|
||||||
|
|
||||||
|
return metaerr.Wrap(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeShardID writes shard id to db.
|
||||||
|
func (db *DB) writeShardID(id []byte) error {
|
||||||
|
return metaerr.Wrap(db.database.Update(func(tx *badger.Txn) error {
|
||||||
|
return tx.Set(shardInfoKey(shardIDKey), id)
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/internal/metaerr"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"github.com/nspcc-dev/neo-go/pkg/util/slice"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
)
|
)
|
||||||
|
@ -37,6 +37,14 @@ func (r StorageIDRes) StorageID() []byte {
|
||||||
// StorageID returns storage descriptor for objects from the blobstor.
|
// StorageID returns storage descriptor for objects from the blobstor.
|
||||||
// It is put together with the object can makes get/delete operation faster.
|
// It is put together with the object can makes get/delete operation faster.
|
||||||
func (db *DB) StorageID(ctx context.Context, prm StorageIDPrm) (res StorageIDRes, err error) {
|
func (db *DB) StorageID(ctx context.Context, prm StorageIDPrm) (res StorageIDRes, err error) {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("StorageID", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
|
||||||
_, span := tracing.StartSpanFromContext(ctx, "metabase.StorageID",
|
_, span := tracing.StartSpanFromContext(ctx, "metabase.StorageID",
|
||||||
trace.WithAttributes(
|
trace.WithAttributes(
|
||||||
attribute.String("address", prm.addr.EncodeToString()),
|
attribute.String("address", prm.addr.EncodeToString()),
|
||||||
|
@ -50,28 +58,25 @@ func (db *DB) StorageID(ctx context.Context, prm StorageIDPrm) (res StorageIDRes
|
||||||
return res, ErrDegradedMode
|
return res, ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
err = db.boltDB.View(func(tx *bbolt.Tx) error {
|
err = db.database.View(func(tx *badger.Txn) error {
|
||||||
res.id, err = db.storageID(tx, prm.addr)
|
res.id, err = db.storageID(tx, prm.addr)
|
||||||
|
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
|
success = err == nil
|
||||||
return res, metaerr.Wrap(err)
|
return res, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (db *DB) storageID(tx *bbolt.Tx, addr oid.Address) ([]byte, error) {
|
func (db *DB) storageID(tx *badger.Txn, addr oid.Address) ([]byte, error) {
|
||||||
key := make([]byte, bucketKeySize)
|
storageID, err := valueOrNil(tx, smallKey(addr.Container(), addr.Object()))
|
||||||
smallBucket := tx.Bucket(smallBucketName(addr.Container(), key))
|
if err != nil {
|
||||||
if smallBucket == nil {
|
return nil, err
|
||||||
return nil, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
storageID := smallBucket.Get(objectKey(addr.Object(), key))
|
|
||||||
if storageID == nil {
|
if storageID == nil {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return slice.Copy(storageID), nil
|
return bytes.Clone(storageID), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateStorageIDPrm groups the parameters of UpdateStorageID operation.
|
// UpdateStorageIDPrm groups the parameters of UpdateStorageID operation.
|
||||||
|
@ -95,6 +100,14 @@ func (p *UpdateStorageIDPrm) SetStorageID(id []byte) {
|
||||||
|
|
||||||
// UpdateStorageID updates storage descriptor for objects from the blobstor.
|
// UpdateStorageID updates storage descriptor for objects from the blobstor.
|
||||||
func (db *DB) UpdateStorageID(prm UpdateStorageIDPrm) (res UpdateStorageIDRes, err error) {
|
func (db *DB) UpdateStorageID(prm UpdateStorageIDPrm) (res UpdateStorageIDRes, err error) {
|
||||||
|
var (
|
||||||
|
startedAt = time.Now()
|
||||||
|
success = false
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
db.metrics.AddMethodDuration("UpdateStorageID", time.Since(startedAt), success)
|
||||||
|
}()
|
||||||
|
|
||||||
db.modeMtx.RLock()
|
db.modeMtx.RLock()
|
||||||
defer db.modeMtx.RUnlock()
|
defer db.modeMtx.RUnlock()
|
||||||
|
|
||||||
|
@ -104,16 +117,33 @@ func (db *DB) UpdateStorageID(prm UpdateStorageIDPrm) (res UpdateStorageIDRes, e
|
||||||
return res, ErrReadOnlyMode
|
return res, ErrReadOnlyMode
|
||||||
}
|
}
|
||||||
|
|
||||||
currEpoch := db.epochState.CurrentEpoch()
|
for i := 0; i < retryCount; i++ {
|
||||||
|
err = db.database.Update(func(tx *badger.Txn) error {
|
||||||
err = db.boltDB.Batch(func(tx *bbolt.Tx) error {
|
return setStorageID(tx, prm.addr, prm.id, true)
|
||||||
exists, err := db.exists(tx, prm.addr, currEpoch)
|
})
|
||||||
if err == nil && exists || errors.As(err, new(logicerr.Logical)) {
|
if errors.Is(err, badger.ErrConflict) {
|
||||||
err = updateStorageID(tx, prm.addr, prm.id)
|
time.Sleep(retryTimeout)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
break
|
||||||
return err
|
}
|
||||||
})
|
success = err == nil
|
||||||
|
|
||||||
return res, metaerr.Wrap(err)
|
return res, metaerr.Wrap(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setStorageID for existing objects if they were moved from one
|
||||||
|
// storage location to another.
|
||||||
|
func setStorageID(tx *badger.Txn, addr oid.Address, id []byte, override bool) error {
|
||||||
|
key := smallKey(addr.Container(), addr.Object())
|
||||||
|
if override {
|
||||||
|
return tx.Set(key, id)
|
||||||
|
}
|
||||||
|
v, err := valueOrNil(tx, key)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if v == nil {
|
||||||
|
return tx.Set(key, id)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -15,9 +15,11 @@ func TestDB_StorageID(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
raw1 := testutil.GenerateObject()
|
raw1 := testutil.GenerateObject()
|
||||||
raw2 := testutil.GenerateObject()
|
raw2 := testutil.GenerateObject()
|
||||||
|
deleted := testutil.GenerateObject()
|
||||||
|
|
||||||
storageID := []byte{1, 2, 3, 4}
|
storageID := []byte{1, 2, 3, 4}
|
||||||
|
|
||||||
|
@ -34,6 +36,15 @@ func TestDB_StorageID(t *testing.T) {
|
||||||
err = putBig(db, raw2)
|
err = putBig(db, raw2)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// put object with storageID and delete it
|
||||||
|
err = metaPut(db, deleted, storageID)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
cnrID, ok := deleted.ContainerID()
|
||||||
|
require.True(t, ok)
|
||||||
|
ts := testutil.GenerateObjectWithCID(cnrID)
|
||||||
|
require.NoError(t, metaInhume(db, object.AddressOf(deleted), object.AddressOf(ts)))
|
||||||
|
|
||||||
// check StorageID for object without storageID
|
// check StorageID for object without storageID
|
||||||
fetchedStorageID, err = metaStorageID(db, object.AddressOf(raw2))
|
fetchedStorageID, err = metaStorageID(db, object.AddressOf(raw2))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
@ -44,15 +55,52 @@ func TestDB_StorageID(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, storageID, fetchedStorageID)
|
require.Equal(t, storageID, fetchedStorageID)
|
||||||
|
|
||||||
|
// check StorageID for deleted object with storageID
|
||||||
|
fetchedStorageID, err = metaStorageID(db, object.AddressOf(deleted))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, storageID, fetchedStorageID)
|
||||||
|
|
||||||
t.Run("update", func(t *testing.T) {
|
t.Run("update", func(t *testing.T) {
|
||||||
|
storageID := []byte{1, 2, 3, 4, 5}
|
||||||
require.NoError(t, metaUpdateStorageID(db, object.AddressOf(raw2), storageID))
|
require.NoError(t, metaUpdateStorageID(db, object.AddressOf(raw2), storageID))
|
||||||
|
require.NoError(t, metaUpdateStorageID(db, object.AddressOf(deleted), storageID))
|
||||||
|
|
||||||
fetchedStorageID, err = metaStorageID(db, object.AddressOf(raw2))
|
fetchedStorageID, err = metaStorageID(db, object.AddressOf(raw2))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, storageID, fetchedStorageID)
|
require.Equal(t, storageID, fetchedStorageID)
|
||||||
|
|
||||||
|
fetchedStorageID, err = metaStorageID(db, object.AddressOf(deleted))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, storageID, fetchedStorageID)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPutWritecacheDataRace(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
db := newDB(t)
|
||||||
|
defer func() { require.NoError(t, db.Close()) }()
|
||||||
|
|
||||||
|
putStorageID := []byte{1, 2, 3}
|
||||||
|
wcStorageID := []byte{1, 2, 3, 4, 5}
|
||||||
|
o := testutil.GenerateObject()
|
||||||
|
|
||||||
|
fetchedStorageID, err := metaStorageID(db, object.AddressOf(o))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Nil(t, fetchedStorageID)
|
||||||
|
|
||||||
|
// writecache flushes object and updates storageID before object actually saved to the metabase
|
||||||
|
metaUpdateStorageID(db, object.AddressOf(o), wcStorageID)
|
||||||
|
|
||||||
|
// put object completes with writecache's storageID
|
||||||
|
err = metaPut(db, o, putStorageID)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
fetchedStorageID, err = metaStorageID(db, object.AddressOf(o))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, wcStorageID, fetchedStorageID)
|
||||||
|
}
|
||||||
|
|
||||||
func metaUpdateStorageID(db *meta.DB, addr oid.Address, id []byte) error {
|
func metaUpdateStorageID(db *meta.DB, addr oid.Address, id []byte) error {
|
||||||
var sidPrm meta.UpdateStorageIDPrm
|
var sidPrm meta.UpdateStorageIDPrm
|
||||||
sidPrm.SetAddress(addr)
|
sidPrm.SetAddress(addr)
|
||||||
|
|
|
@ -3,28 +3,16 @@ package meta
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var zeroValue = []byte{0xFF}
|
||||||
// graveyardBucketName stores rows with the objects that have been
|
|
||||||
// covered with Tombstone objects. That objects should not be returned
|
|
||||||
// from the node and should not be accepted by the node from other
|
|
||||||
// nodes.
|
|
||||||
graveyardBucketName = []byte{graveyardPrefix}
|
|
||||||
// garbageBucketName stores rows with the objects that should be physically
|
|
||||||
// deleted by the node (Garbage Collector routine).
|
|
||||||
garbageBucketName = []byte{garbagePrefix}
|
|
||||||
toMoveItBucketName = []byte{toMoveItPrefix}
|
|
||||||
containerVolumeBucketName = []byte{containerVolumePrefix}
|
|
||||||
|
|
||||||
zeroValue = []byte{0xFF}
|
|
||||||
)
|
|
||||||
|
|
||||||
// Prefix bytes for database keys. All ids and addresses are encoded in binary
|
// Prefix bytes for database keys. All ids and addresses are encoded in binary
|
||||||
// unless specified otherwise.
|
// unless specified otherwise.
|
||||||
|
@ -39,13 +27,13 @@ const (
|
||||||
// Key: object address
|
// Key: object address
|
||||||
// Value: dummy value
|
// Value: dummy value
|
||||||
garbagePrefix
|
garbagePrefix
|
||||||
// toMoveItPrefix is used for bucket containing IDs of objects that are candidates for moving
|
// _ Previous usage was for for bucket containing IDs of objects that are candidates for moving
|
||||||
// to another shard.
|
// to another shard.
|
||||||
toMoveItPrefix
|
_
|
||||||
// containerVolumePrefix is used for storing container size estimations.
|
// containerSizePrefix is used for storing container size estimations.
|
||||||
// Key: container ID
|
// Key: container ID
|
||||||
// Value: container size in bytes as little-endian uint64
|
// Value: container size in bytes as little-endian uint64
|
||||||
containerVolumePrefix
|
containerSizePrefix
|
||||||
// lockedPrefix is used for storing locked objects information.
|
// lockedPrefix is used for storing locked objects information.
|
||||||
// Key: container ID
|
// Key: container ID
|
||||||
// Value: bucket mapping objects locked to the list of corresponding LOCK objects.
|
// Value: bucket mapping objects locked to the list of corresponding LOCK objects.
|
||||||
|
@ -111,6 +99,14 @@ const (
|
||||||
// Key: split ID
|
// Key: split ID
|
||||||
// Value: list of object IDs
|
// Value: list of object IDs
|
||||||
splitPrefix
|
splitPrefix
|
||||||
|
|
||||||
|
// containerCountersPrefix is used for storing container object counters.
|
||||||
|
// Key: container ID + type
|
||||||
|
// Value: container size in bytes as little-endian uint64
|
||||||
|
containerCountersPrefix
|
||||||
|
|
||||||
|
// expiredPrefix used to store expiration info.
|
||||||
|
expiredPrefix
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -120,134 +116,538 @@ const (
|
||||||
addressKeySize = cidSize + objectKeySize
|
addressKeySize = cidSize + objectKeySize
|
||||||
)
|
)
|
||||||
|
|
||||||
func bucketName(cnr cid.ID, prefix byte, key []byte) []byte {
|
func keyPrefix(cnr cid.ID, prefix byte) []byte {
|
||||||
key[0] = prefix
|
result := make([]byte, 1+cidSize)
|
||||||
cnr.Encode(key[1:])
|
result[0] = prefix
|
||||||
return key[:bucketKeySize]
|
cnr.Encode(result[1:])
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// primaryBucketName returns <CID>.
|
func keyObject(prefix byte, cnr cid.ID, objID oid.ID) []byte {
|
||||||
func primaryBucketName(cnr cid.ID, key []byte) []byte {
|
result := make([]byte, 1+cidSize+objectKeySize)
|
||||||
return bucketName(cnr, primaryPrefix, key)
|
result[0] = prefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
objID.Encode(result[1+cidSize:])
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// tombstoneBucketName returns <CID>_TS.
|
func addressFromKey(prefix byte, key []byte) (oid.Address, error) {
|
||||||
func tombstoneBucketName(cnr cid.ID, key []byte) []byte {
|
if len(key) != 1+cidSize+objectKeySize {
|
||||||
return bucketName(cnr, tombstonePrefix, key)
|
return oid.Address{}, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if key[0] != prefix {
|
||||||
|
return oid.Address{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
var cont cid.ID
|
||||||
|
if err := cont.Decode(key[1 : 1+cidSize]); err != nil {
|
||||||
|
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||||
|
}
|
||||||
|
var obj oid.ID
|
||||||
|
if err := obj.Decode(key[1+cidSize:]); err != nil {
|
||||||
|
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||||
|
}
|
||||||
|
var result oid.Address
|
||||||
|
result.SetContainer(cont)
|
||||||
|
result.SetObject(obj)
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// smallBucketName returns <CID>_small.
|
// primaryKeyPrefix returns primaryPrefix_<CID>.
|
||||||
func smallBucketName(cnr cid.ID, key []byte) []byte {
|
func primaryKeyPrefix(cnr cid.ID) []byte {
|
||||||
return bucketName(cnr, smallPrefix, key)
|
return keyPrefix(cnr, primaryPrefix)
|
||||||
}
|
}
|
||||||
|
|
||||||
// attributeBucketName returns <CID>_attr_<attributeKey>.
|
func primaryKey(cnr cid.ID, objID oid.ID) []byte {
|
||||||
func attributeBucketName(cnr cid.ID, attributeKey string, key []byte) []byte {
|
return keyObject(primaryPrefix, cnr, objID)
|
||||||
key[0] = userAttributePrefix
|
|
||||||
cnr.Encode(key[1:])
|
|
||||||
return append(key[:bucketKeySize], attributeKey...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns <CID> from attributeBucketName result, nil otherwise.
|
func addressFromPrimaryKey(v []byte) (oid.Address, error) {
|
||||||
func cidFromAttributeBucket(val []byte, attributeKey string) []byte {
|
return addressFromKey(primaryPrefix, v)
|
||||||
if len(val) < bucketKeySize || val[0] != userAttributePrefix || !bytes.Equal(val[bucketKeySize:], []byte(attributeKey)) {
|
}
|
||||||
return nil
|
|
||||||
|
// tombstoneKeyPrefix returns tombstonePrefix_<CID>.
|
||||||
|
func tombstoneKeyPrefix(cnr cid.ID) []byte {
|
||||||
|
return keyPrefix(cnr, tombstonePrefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
func tombstoneKey(cnr cid.ID, objID oid.ID) []byte {
|
||||||
|
return keyObject(tombstonePrefix, cnr, objID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromTombstoneKey(v []byte) (oid.Address, error) {
|
||||||
|
return addressFromKey(tombstonePrefix, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func garbageKey(cnr cid.ID, objID oid.ID) []byte {
|
||||||
|
return keyObject(garbagePrefix, cnr, objID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromGarbageKey(v []byte) (oid.Address, error) {
|
||||||
|
return addressFromKey(garbagePrefix, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func graveyardKey(cnr cid.ID, objID oid.ID) []byte {
|
||||||
|
return keyObject(graveyardPrefix, cnr, objID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromGraveyardKey(v []byte) (oid.Address, error) {
|
||||||
|
return addressFromKey(graveyardPrefix, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func smallKey(cnr cid.ID, obj oid.ID) []byte {
|
||||||
|
return keyObject(smallPrefix, cnr, obj)
|
||||||
|
}
|
||||||
|
|
||||||
|
// attributeKeyPrefix returns userAttributePrefix_<attributeKey>_<CID>_<attributeValue>.
|
||||||
|
func attributeKeyPrefix(cnr cid.ID, attributeKey, attributeValue string) []byte {
|
||||||
|
result := make([]byte, 1+len(attributeKey)+cidSize+len(attributeValue))
|
||||||
|
result[0] = userAttributePrefix
|
||||||
|
copy(result[1:], []byte(attributeKey))
|
||||||
|
cnr.Encode(result[1+len(attributeKey):])
|
||||||
|
copy(result[1+len(attributeKey)+cidSize:], []byte(attributeValue))
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// userAttributePrefix+attributeKey+<CID>+attributeValue+<OID>.
|
||||||
|
func attributeKey(cnr cid.ID, objID oid.ID, attributeKey, attributeValue string) []byte {
|
||||||
|
result := make([]byte, 1+len(attributeKey)+cidSize+len(attributeValue)+objectKeySize)
|
||||||
|
result[0] = userAttributePrefix
|
||||||
|
copy(result[1:], []byte(attributeKey))
|
||||||
|
cnr.Encode(result[1+len(attributeKey):])
|
||||||
|
copy(result[1+len(attributeKey)+cidSize:], []byte(attributeValue))
|
||||||
|
objID.Encode(result[1+cidSize+len(attributeKey)+len(attributeValue):])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns attributeValue from attributeKey result, nil otherwise.
|
||||||
|
func attributeValueFromAttributeKey(key []byte, attributeKey string) ([]byte, error) {
|
||||||
|
if len(key) < 1+len(attributeKey)+cidSize+objectKeySize {
|
||||||
|
return nil, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if key[0] != userAttributePrefix {
|
||||||
|
return nil, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
if !bytes.Equal(key[1:1+len(attributeKey)], []byte(attributeKey)) {
|
||||||
|
return nil, errInvalidAttributeKey
|
||||||
}
|
}
|
||||||
|
|
||||||
return val[1:bucketKeySize]
|
return key[1+len(attributeKey)+cidSize : len(key)-objectKeySize], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// payloadHashBucketName returns <CID>_payloadhash.
|
func addressFromAttributeKey(key []byte, attributeKey string) (oid.Address, error) {
|
||||||
func payloadHashBucketName(cnr cid.ID, key []byte) []byte {
|
if len(key) < 1+len(attributeKey)+cidSize+objectKeySize {
|
||||||
return bucketName(cnr, payloadHashPrefix, key)
|
return oid.Address{}, errInvalidKeyLenght
|
||||||
}
|
|
||||||
|
|
||||||
// rootBucketName returns <CID>_root.
|
|
||||||
func rootBucketName(cnr cid.ID, key []byte) []byte {
|
|
||||||
return bucketName(cnr, rootPrefix, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ownerBucketName returns <CID>_ownerid.
|
|
||||||
func ownerBucketName(cnr cid.ID, key []byte) []byte {
|
|
||||||
return bucketName(cnr, ownerPrefix, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
// parentBucketName returns <CID>_parent.
|
|
||||||
func parentBucketName(cnr cid.ID, key []byte) []byte {
|
|
||||||
return bucketName(cnr, parentPrefix, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
// splitBucketName returns <CID>_splitid.
|
|
||||||
func splitBucketName(cnr cid.ID, key []byte) []byte {
|
|
||||||
return bucketName(cnr, splitPrefix, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
// addressKey returns key for K-V tables when key is a whole address.
|
|
||||||
func addressKey(addr oid.Address, key []byte) []byte {
|
|
||||||
addr.Container().Encode(key)
|
|
||||||
addr.Object().Encode(key[cidSize:])
|
|
||||||
return key[:addressKeySize]
|
|
||||||
}
|
|
||||||
|
|
||||||
// parses object address formed by addressKey.
|
|
||||||
func decodeAddressFromKey(dst *oid.Address, k []byte) error {
|
|
||||||
if len(k) != addressKeySize {
|
|
||||||
return fmt.Errorf("invalid length")
|
|
||||||
}
|
}
|
||||||
|
if key[0] != userAttributePrefix {
|
||||||
|
return oid.Address{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
if !bytes.Equal(key[1:1+len(attributeKey)], []byte(attributeKey)) {
|
||||||
|
return oid.Address{}, errInvalidAttributeKey
|
||||||
|
}
|
||||||
|
var cnrID cid.ID
|
||||||
|
if err := cnrID.Decode(key[1+len(attributeKey) : 1+len(attributeKey)+cidSize]); err != nil {
|
||||||
|
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||||
|
}
|
||||||
|
var objID oid.ID
|
||||||
|
if err := objID.Decode(key[len(key)-objectKeySize:]); err != nil {
|
||||||
|
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||||
|
}
|
||||||
|
var result oid.Address
|
||||||
|
result.SetContainer(cnrID)
|
||||||
|
result.SetObject(objID)
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// payloadHashKeyLongPrefix returns payloadHashPrefix_<CID>_hash.
|
||||||
|
func payloadHashKeyLongPrefix(cnr cid.ID, hash []byte) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+len(hash))
|
||||||
|
result[0] = payloadHashPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
copy(result[1+cidSize:], hash)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// payloadHashKeyShortPrefix returns payloadHashPrefix_<CID>.
|
||||||
|
func payloadHashKeyShortPrefix(cnr cid.ID) []byte {
|
||||||
|
result := make([]byte, 1+cidSize)
|
||||||
|
result[0] = payloadHashPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// payloadHashKey returns payloadHashPrefix_<CID>_hash_<OID>.
|
||||||
|
func payloadHashKey(cnr cid.ID, obj oid.ID, hash []byte) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+len(hash)+objectKeySize)
|
||||||
|
result[0] = payloadHashPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
copy(result[1+cidSize:], hash)
|
||||||
|
obj.Encode(result[1+cidSize+len(hash):])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromPayloadHashKey(k []byte) (oid.Address, error) {
|
||||||
|
if len(k) < 1+cidSize+objectKeySize {
|
||||||
|
return oid.Address{}, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if k[0] != payloadHashPrefix {
|
||||||
|
return oid.Address{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
var cnr cid.ID
|
var cnr cid.ID
|
||||||
if err := cnr.Decode(k[:cidSize]); err != nil {
|
if err := cnr.Decode(k[1 : 1+cidSize]); err != nil {
|
||||||
return err
|
return oid.Address{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var obj oid.ID
|
var obj oid.ID
|
||||||
if err := obj.Decode(k[cidSize:]); err != nil {
|
if err := obj.Decode(k[len(k)-objectKeySize:]); err != nil {
|
||||||
return err
|
return oid.Address{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
dst.SetObject(obj)
|
var result oid.Address
|
||||||
dst.SetContainer(cnr)
|
result.SetObject(obj)
|
||||||
return nil
|
result.SetContainer(cnr)
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// objectKey returns key for K-V tables when key is an object id.
|
func payloadHashFromPayloadHashKey(k []byte) ([]byte, error) {
|
||||||
func objectKey(obj oid.ID, key []byte) []byte {
|
if len(k) < 1+cidSize+objectKeySize {
|
||||||
obj.Encode(key)
|
return nil, errInvalidKeyLenght
|
||||||
return key[:objectKeySize]
|
}
|
||||||
|
if k[0] != payloadHashPrefix {
|
||||||
|
return nil, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes.Clone(k[1+cidSize : len(k)-objectKeySize]), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// rootBucketName returns rootPrefix_<CID>.
|
||||||
|
func rootKeyPrefix(cnr cid.ID) []byte {
|
||||||
|
return keyPrefix(cnr, rootPrefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
func rootKey(cnr cid.ID, objID oid.ID) []byte {
|
||||||
|
return keyObject(rootPrefix, cnr, objID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromRootKey(key []byte) (oid.Address, error) {
|
||||||
|
return addressFromKey(rootPrefix, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ownerKey returns ownerPrefix_<CID>_owner_<OID>.
|
||||||
|
func ownerKey(cnr cid.ID, obj oid.ID, owner []byte) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+len(owner)+objectKeySize)
|
||||||
|
result[0] = ownerPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
copy(result[1+cidSize:], owner)
|
||||||
|
obj.Encode(result[1+cidSize+len(owner):])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// ownerKeyShortPrefix returns ownerPrefix_<CID>.
|
||||||
|
func ownerKeyShortPrefix(cnr cid.ID) []byte {
|
||||||
|
result := make([]byte, 1+cidSize)
|
||||||
|
result[0] = ownerPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// ownerKeyLongPrefix returns ownerPrefix_<CID>_owner.
|
||||||
|
func ownerKeyLongPrefix(cnr cid.ID, owner []byte) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+len(owner))
|
||||||
|
result[0] = ownerPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
copy(result[1+cidSize:], owner)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromOwnerKey(k []byte) (oid.Address, error) {
|
||||||
|
if len(k) < 1+cidSize+objectKeySize {
|
||||||
|
return oid.Address{}, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if k[0] != ownerPrefix {
|
||||||
|
return oid.Address{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
var cnr cid.ID
|
||||||
|
if err := cnr.Decode(k[1 : 1+cidSize]); err != nil {
|
||||||
|
return oid.Address{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var obj oid.ID
|
||||||
|
if err := obj.Decode(k[len(k)-objectKeySize:]); err != nil {
|
||||||
|
return oid.Address{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var result oid.Address
|
||||||
|
result.SetObject(obj)
|
||||||
|
result.SetContainer(cnr)
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ownerFromOwnerKey(k []byte) ([]byte, error) {
|
||||||
|
if len(k) < 1+cidSize+objectKeySize {
|
||||||
|
return nil, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if k[0] != ownerPrefix {
|
||||||
|
return nil, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
return bytes.Clone(k[1+cidSize : len(k)-objectKeySize]), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parentKeyShortPrefix returns parentPrefix_<CID>.
|
||||||
|
func parentKeyShortPrefix(cnr cid.ID) []byte {
|
||||||
|
result := make([]byte, 1+cidSize)
|
||||||
|
result[0] = parentPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressOfParentFromParentKey(key []byte) (oid.Address, error) {
|
||||||
|
return addressFromKey(parentPrefix, key[:1+cidSize+objectKeySize])
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressOfTargetFromParentKey(key []byte) (oid.Address, error) {
|
||||||
|
if len(key) != 1+cidSize+objectKeySize+objectKeySize {
|
||||||
|
return oid.Address{}, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if key[0] != parentPrefix {
|
||||||
|
return oid.Address{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
var cont cid.ID
|
||||||
|
if err := cont.Decode(key[1 : 1+cidSize]); err != nil {
|
||||||
|
return oid.Address{}, fmt.Errorf("failed to decode container ID: %w", err)
|
||||||
|
}
|
||||||
|
var obj oid.ID
|
||||||
|
if err := obj.Decode(key[1+cidSize+objectKeySize:]); err != nil {
|
||||||
|
return oid.Address{}, fmt.Errorf("failed to decode object ID: %w", err)
|
||||||
|
}
|
||||||
|
var result oid.Address
|
||||||
|
result.SetContainer(cont)
|
||||||
|
result.SetObject(obj)
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parentKeyLongPrefix returns parentPrefix_<CID>_<parent_OID>.
|
||||||
|
func parentKeyLongPrefix(cnr cid.ID, parentObj oid.ID) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+objectKeySize)
|
||||||
|
result[0] = parentPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
parentObj.Encode(result[bucketKeySize:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func parentKey(cnr cid.ID, parentObj, obj oid.ID) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+objectKeySize+objectKeySize)
|
||||||
|
result[0] = parentPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
parentObj.Encode(result[1+cidSize:])
|
||||||
|
obj.Encode(result[1+cidSize+objectKeySize:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitKeyLongPrefix returns splitPrefix_<CID>_splitID.
|
||||||
|
func splitKeyLongPrefix(cnr cid.ID, splitID []byte) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+len(splitID))
|
||||||
|
result[0] = splitPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
copy(result[1+cidSize:], splitID)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitKeyShortPrefix returns splitPrefix_<CID>.
|
||||||
|
func splitKeyShortPrefix(cnr cid.ID) []byte {
|
||||||
|
result := make([]byte, 1+cidSize)
|
||||||
|
result[0] = splitPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitKey returns splitPrefix_<CID>_splitID_<OID>.
|
||||||
|
func splitKey(cnr cid.ID, obj oid.ID, splitID []byte) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+len(splitID)+objectKeySize)
|
||||||
|
result[0] = splitPrefix
|
||||||
|
cnr.Encode(result[1:])
|
||||||
|
copy(result[1+cidSize:], splitID)
|
||||||
|
obj.Encode(result[1+cidSize+len(splitID):])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromSplitKey(key []byte) (oid.Address, error) {
|
||||||
|
if len(key) < 1+cidSize+objectKeySize {
|
||||||
|
return oid.Address{}, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if key[0] != splitPrefix {
|
||||||
|
return oid.Address{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
var cnr cid.ID
|
||||||
|
if err := cnr.Decode(key[1 : 1+cidSize]); err != nil {
|
||||||
|
return oid.Address{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var obj oid.ID
|
||||||
|
if err := obj.Decode(key[len(key)-objectKeySize:]); err != nil {
|
||||||
|
return oid.Address{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var result oid.Address
|
||||||
|
result.SetObject(obj)
|
||||||
|
result.SetContainer(cnr)
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitIDFromSplitKey(key []byte) ([]byte, error) {
|
||||||
|
if len(key) < 1+cidSize+objectKeySize {
|
||||||
|
return nil, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if key[0] != splitPrefix {
|
||||||
|
return nil, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes.Clone(key[1+cidSize : len(key)-objectKeySize]), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns prefix of the keys with objects of type LOCK for specified container.
|
||||||
|
func lockersKeyPrefix(idCnr cid.ID) []byte {
|
||||||
|
return keyPrefix(idCnr, lockersPrefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
func lockersKey(cnrID cid.ID, objID oid.ID) []byte {
|
||||||
|
return keyObject(lockersPrefix, cnrID, objID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromLockersKey(v []byte) (oid.Address, error) {
|
||||||
|
return addressFromKey(lockersPrefix, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns lockedPrefix_<CID>_<OID>.
|
||||||
|
func lockedKeyLongPrefix(cnrID cid.ID, objID oid.ID) []byte {
|
||||||
|
prefix := make([]byte, 1+cidSize+objectKeySize)
|
||||||
|
prefix[0] = lockedPrefix
|
||||||
|
cnrID.Encode(prefix[1:])
|
||||||
|
objID.Encode(prefix[1+cidSize:])
|
||||||
|
return prefix
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns lockedPrefix_<CID>.
|
||||||
|
func lockedKeyShortPrefix(cnrID cid.ID) []byte {
|
||||||
|
prefix := make([]byte, 1+cidSize)
|
||||||
|
prefix[0] = lockedPrefix
|
||||||
|
cnrID.Encode(prefix[1:])
|
||||||
|
return prefix
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns lockedPrefix_<CID>_<OID>_<LOCKER_OID>.
|
||||||
|
func lockedKey(cnrID cid.ID, objID, lockerObjID oid.ID) []byte {
|
||||||
|
result := make([]byte, 1+cidSize+objectKeySize+objectKeySize)
|
||||||
|
result[0] = lockedPrefix
|
||||||
|
cnrID.Encode(result[1:])
|
||||||
|
objID.Encode(result[1+cidSize:])
|
||||||
|
lockerObjID.Encode(result[1+cidSize+objectKeySize:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func lockerObjectIDFromLockedKey(k []byte) (oid.ID, error) {
|
||||||
|
if len(k) != 1+cidSize+objectKeySize+objectKeySize {
|
||||||
|
return oid.ID{}, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if k[0] != lockedPrefix {
|
||||||
|
return oid.ID{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
var result oid.ID
|
||||||
|
if err := result.Decode(k[1+cidSize+objectKeySize:]); err != nil {
|
||||||
|
return oid.ID{}, fmt.Errorf("failed to decode lockers object ID: %w", err)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func objectIDFromLockedKey(k []byte) (oid.ID, error) {
|
||||||
|
if len(k) != 1+cidSize+objectKeySize+objectKeySize {
|
||||||
|
return oid.ID{}, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if k[0] != lockedPrefix {
|
||||||
|
return oid.ID{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
var result oid.ID
|
||||||
|
if err := result.Decode(k[1+cidSize : 1+cidSize+objectKeySize]); err != nil {
|
||||||
|
return oid.ID{}, fmt.Errorf("failed to decode locked object ID: %w", err)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// if meets irregular object container in objs - returns its type, otherwise returns object.TypeRegular.
|
// if meets irregular object container in objs - returns its type, otherwise returns object.TypeRegular.
|
||||||
//
|
//
|
||||||
// firstIrregularObjectType(tx, cnr, obj) usage allows getting object type.
|
// firstIrregularObjectType(tx, cnr, obj) usage allows getting object type.
|
||||||
func firstIrregularObjectType(tx *bbolt.Tx, idCnr cid.ID, objs ...[]byte) objectSDK.Type {
|
func firstIrregularObjectType(tx *badger.Txn, idCnr cid.ID, objs ...oid.ID) (objectSDK.Type, error) {
|
||||||
if len(objs) == 0 {
|
for _, objID := range objs {
|
||||||
panic("empty object list in firstIrregularObjectType")
|
key := tombstoneKey(idCnr, objID)
|
||||||
}
|
v, err := valueOrNil(tx, key)
|
||||||
|
if err != nil {
|
||||||
var keys [2][1 + cidSize]byte
|
return objectSDK.TypeRegular, err
|
||||||
|
}
|
||||||
irregularTypeBuckets := [...]struct {
|
if v != nil {
|
||||||
typ objectSDK.Type
|
return objectSDK.TypeTombstone, nil
|
||||||
name []byte
|
}
|
||||||
}{
|
key = lockersKey(idCnr, objID)
|
||||||
{objectSDK.TypeTombstone, tombstoneBucketName(idCnr, keys[0][:])},
|
v, err = valueOrNil(tx, key)
|
||||||
{objectSDK.TypeLock, bucketNameLockers(idCnr, keys[1][:])},
|
if err != nil {
|
||||||
}
|
return objectSDK.TypeRegular, err
|
||||||
|
}
|
||||||
for i := range objs {
|
if v != nil {
|
||||||
for j := range irregularTypeBuckets {
|
return objectSDK.TypeLock, nil
|
||||||
if inBucket(tx, irregularTypeBuckets[j].name, objs[i]) {
|
|
||||||
return irregularTypeBuckets[j].typ
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return objectSDK.TypeRegular
|
return objectSDK.TypeRegular, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// return true if provided object is of LOCK type.
|
// return true if provided object is of LOCK type.
|
||||||
func isLockObject(tx *bbolt.Tx, idCnr cid.ID, obj oid.ID) bool {
|
func isLockObject(tx *badger.Txn, idCnr cid.ID, obj oid.ID) (bool, error) {
|
||||||
return inBucket(tx,
|
key := lockersKey(idCnr, obj)
|
||||||
bucketNameLockers(idCnr, make([]byte, bucketKeySize)),
|
v, err := valueOrNil(tx, key)
|
||||||
objectKey(obj, make([]byte, objectKeySize)))
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return v != nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func expiredKey(cnr cid.ID, obj oid.ID, epoch uint64) []byte {
|
||||||
|
result := make([]byte, 1+8+cidSize+objectKeySize)
|
||||||
|
result[0] = expiredPrefix
|
||||||
|
// BigEndian is important for iteration order
|
||||||
|
binary.BigEndian.PutUint64(result[1:1+8], epoch)
|
||||||
|
cnr.Encode(result[1+8 : 1+8+cidSize])
|
||||||
|
obj.Encode(result[1+8+cidSize:])
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func expirationEpochFromExpiredKey(key []byte) (uint64, error) {
|
||||||
|
if len(key) != 1+8+cidSize+objectKeySize {
|
||||||
|
return 0, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if key[0] != expiredPrefix {
|
||||||
|
return 0, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
// BigEndian is important for iteration order
|
||||||
|
return binary.BigEndian.Uint64(key[1 : 1+8]), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func addressFromExpiredKey(key []byte) (oid.Address, error) {
|
||||||
|
if len(key) != 1+8+cidSize+objectKeySize {
|
||||||
|
return oid.Address{}, errInvalidKeyLenght
|
||||||
|
}
|
||||||
|
if key[0] != expiredPrefix {
|
||||||
|
return oid.Address{}, errInvalidKeyPrefix
|
||||||
|
}
|
||||||
|
var cnr cid.ID
|
||||||
|
if err := cnr.Decode(key[1+8 : 1+8+cidSize]); err != nil {
|
||||||
|
return oid.Address{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var obj oid.ID
|
||||||
|
if err := obj.Decode(key[len(key)-objectKeySize:]); err != nil {
|
||||||
|
return oid.Address{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var result oid.Address
|
||||||
|
result.SetObject(obj)
|
||||||
|
result.SetContainer(cnr)
|
||||||
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/util/logicerr"
|
||||||
"go.etcd.io/bbolt"
|
"github.com/dgraph-io/badger/v4"
|
||||||
)
|
)
|
||||||
|
|
||||||
// version contains current metabase version.
|
// version contains current metabase version.
|
||||||
|
@ -18,19 +18,19 @@ var versionKey = []byte("version")
|
||||||
// the current code version.
|
// the current code version.
|
||||||
var ErrOutdatedVersion = logicerr.New("invalid version, resynchronization is required")
|
var ErrOutdatedVersion = logicerr.New("invalid version, resynchronization is required")
|
||||||
|
|
||||||
func checkVersion(tx *bbolt.Tx, initialized bool) error {
|
func checkVersion(tx *badger.Txn, initialized bool) error {
|
||||||
var knownVersion bool
|
var knownVersion bool
|
||||||
|
|
||||||
b := tx.Bucket(shardInfoBucket)
|
data, err := valueOrNil(tx, shardInfoKey(versionKey))
|
||||||
if b != nil {
|
if err != nil {
|
||||||
data := b.Get(versionKey)
|
return err
|
||||||
if len(data) == 8 {
|
}
|
||||||
knownVersion = true
|
if len(data) == 8 {
|
||||||
|
knownVersion = true
|
||||||
|
|
||||||
stored := binary.LittleEndian.Uint64(data)
|
stored := binary.LittleEndian.Uint64(data)
|
||||||
if stored != version {
|
if stored != version {
|
||||||
return fmt.Errorf("%w: expected=%d, stored=%d", ErrOutdatedVersion, version, stored)
|
return fmt.Errorf("%w: expected=%d, stored=%d", ErrOutdatedVersion, version, stored)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,13 +49,15 @@ func checkVersion(tx *bbolt.Tx, initialized bool) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateVersion(tx *bbolt.Tx, version uint64) error {
|
func shardInfoKey(key []byte) []byte {
|
||||||
|
result := make([]byte, len(key))
|
||||||
|
result[0] = shardInfoPrefix
|
||||||
|
copy(result[1:], key)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateVersion(tx *badger.Txn, version uint64) error {
|
||||||
data := make([]byte, 8)
|
data := make([]byte, 8)
|
||||||
binary.LittleEndian.PutUint64(data, version)
|
binary.LittleEndian.PutUint64(data, version)
|
||||||
|
return tx.Set(shardInfoKey(versionKey), data)
|
||||||
b, err := tx.CreateBucketIfNotExists(shardInfoBucket)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("can't create auxiliary bucket: %w", err)
|
|
||||||
}
|
|
||||||
return b.Put(versionKey, data)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,8 +8,9 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||||
|
"github.com/dgraph-io/badger/v4"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type epochStateImpl struct{}
|
type epochStateImpl struct{}
|
||||||
|
@ -23,22 +24,23 @@ func TestVersion(t *testing.T) {
|
||||||
|
|
||||||
newDB := func(t *testing.T) *DB {
|
newDB := func(t *testing.T) *DB {
|
||||||
return New(WithPath(filepath.Join(dir, t.Name())),
|
return New(WithPath(filepath.Join(dir, t.Name())),
|
||||||
WithPermissions(0600), WithEpochState(epochStateImpl{}))
|
WithPermissions(0o600), WithEpochState(epochStateImpl{}))
|
||||||
}
|
}
|
||||||
check := func(t *testing.T, db *DB) {
|
check := func(t *testing.T, db *DB) {
|
||||||
require.NoError(t, db.boltDB.View(func(tx *bbolt.Tx) error {
|
require.NoError(t, db.database.View(func(tx *badger.Txn) error {
|
||||||
b := tx.Bucket(shardInfoBucket)
|
item, err := tx.Get(shardInfoKey(versionKey))
|
||||||
if b == nil {
|
if err != nil {
|
||||||
return errors.New("shard info bucket not found")
|
return err
|
||||||
}
|
}
|
||||||
data := b.Get(versionKey)
|
return item.Value(func(val []byte) error {
|
||||||
if len(data) != 8 {
|
if len(val) != 8 {
|
||||||
return errors.New("invalid version data")
|
return errors.New("invalid version data")
|
||||||
}
|
}
|
||||||
if stored := binary.LittleEndian.Uint64(data); stored != version {
|
if stored := binary.LittleEndian.Uint64(val); stored != version {
|
||||||
return fmt.Errorf("invalid version: %d != %d", stored, version)
|
return fmt.Errorf("invalid version: %d != %d", stored, version)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
})
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
t.Run("simple", func(t *testing.T) {
|
t.Run("simple", func(t *testing.T) {
|
||||||
|
@ -57,9 +59,7 @@ func TestVersion(t *testing.T) {
|
||||||
})
|
})
|
||||||
t.Run("old data", func(t *testing.T) {
|
t.Run("old data", func(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
require.NoError(t, db.Open(context.Background(), false))
|
require.NoError(t, db.SetShardID([]byte{1, 2, 3, 4}, mode.ReadWrite))
|
||||||
require.NoError(t, db.WriteShardID([]byte{1, 2, 3, 4}))
|
|
||||||
require.NoError(t, db.Close())
|
|
||||||
|
|
||||||
require.NoError(t, db.Open(context.Background(), false))
|
require.NoError(t, db.Open(context.Background(), false))
|
||||||
require.NoError(t, db.Init())
|
require.NoError(t, db.Init())
|
||||||
|
@ -69,7 +69,7 @@ func TestVersion(t *testing.T) {
|
||||||
t.Run("invalid version", func(t *testing.T) {
|
t.Run("invalid version", func(t *testing.T) {
|
||||||
db := newDB(t)
|
db := newDB(t)
|
||||||
require.NoError(t, db.Open(context.Background(), false))
|
require.NoError(t, db.Open(context.Background(), false))
|
||||||
require.NoError(t, db.boltDB.Update(func(tx *bbolt.Tx) error {
|
require.NoError(t, db.database.Update(func(tx *badger.Txn) error {
|
||||||
return updateVersion(tx, version+1)
|
return updateVersion(tx, version+1)
|
||||||
}))
|
}))
|
||||||
require.NoError(t, db.Close())
|
require.NoError(t, db.Close())
|
||||||
|
@ -80,7 +80,7 @@ func TestVersion(t *testing.T) {
|
||||||
|
|
||||||
t.Run("reset", func(t *testing.T) {
|
t.Run("reset", func(t *testing.T) {
|
||||||
require.NoError(t, db.Open(context.Background(), false))
|
require.NoError(t, db.Open(context.Background(), false))
|
||||||
require.NoError(t, db.Reset())
|
require.NoError(t, db.Reset(context.Background()))
|
||||||
check(t, db)
|
check(t, db)
|
||||||
require.NoError(t, db.Close())
|
require.NoError(t, db.Close())
|
||||||
})
|
})
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package shard
|
package shard
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
|
||||||
|
@ -30,7 +31,7 @@ func (s *Shard) ContainerSize(prm ContainerSizePrm) (ContainerSizeRes, error) {
|
||||||
return ContainerSizeRes{}, ErrDegradedMode
|
return ContainerSizeRes{}, ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
size, err := s.metaBase.ContainerSize(prm.cnr)
|
size, err := s.metaBase.ContainerSize(context.Background(), prm.cnr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ContainerSizeRes{}, fmt.Errorf("could not get container size: %w", err)
|
return ContainerSizeRes{}, fmt.Errorf("could not get container size: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@ import (
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *Shard) handleMetabaseFailure(stage string, err error) error {
|
func (s *Shard) handleMetabaseFailure(stage string, err error) error {
|
||||||
|
@ -169,49 +170,66 @@ func (s *Shard) Init(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Shard) refillMetabase(ctx context.Context) error {
|
func (s *Shard) refillMetabase(ctx context.Context) error {
|
||||||
err := s.metaBase.Reset()
|
err := s.metaBase.Reset(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not reset metabase: %w", err)
|
return fmt.Errorf("could not reset metabase: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
obj := objectSDK.New()
|
eg, egCtx := errgroup.WithContext(ctx)
|
||||||
|
if s.cfg.refillMetabaseWorkersCount > 0 {
|
||||||
|
eg.SetLimit(s.cfg.refillMetabaseWorkersCount)
|
||||||
|
}
|
||||||
|
|
||||||
err = blobstor.IterateBinaryObjects(ctx, s.blobStor, func(addr oid.Address, data []byte, descriptor []byte) error {
|
itErr := blobstor.IterateBinaryObjects(egCtx, s.blobStor, func(addr oid.Address, data []byte, descriptor []byte) error {
|
||||||
if err := obj.Unmarshal(data); err != nil {
|
eg.Go(func() error {
|
||||||
s.log.Warn(logs.ShardCouldNotUnmarshalObject,
|
obj := objectSDK.New()
|
||||||
zap.Stringer("address", addr),
|
if err := obj.Unmarshal(data); err != nil {
|
||||||
zap.String("err", err.Error()))
|
s.log.Warn(logs.ShardCouldNotUnmarshalObject,
|
||||||
|
zap.Stringer("address", addr),
|
||||||
|
zap.String("err", err.Error()))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
switch obj.Type() {
|
||||||
|
case objectSDK.TypeTombstone:
|
||||||
|
err = s.refillTombstoneObject(egCtx, obj)
|
||||||
|
case objectSDK.TypeLock:
|
||||||
|
err = s.refillLockObject(egCtx, obj)
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var mPrm meta.PutPrm
|
||||||
|
mPrm.SetObject(obj)
|
||||||
|
mPrm.SetStorageID(descriptor)
|
||||||
|
|
||||||
|
_, err = s.metaBase.Put(egCtx, mPrm)
|
||||||
|
if err != nil && !client.IsErrObjectAlreadyRemoved(err) && !errors.Is(err, meta.ErrObjectIsExpired) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-egCtx.Done():
|
||||||
|
return egCtx.Err()
|
||||||
|
default:
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
|
||||||
switch obj.Type() {
|
|
||||||
case objectSDK.TypeTombstone:
|
|
||||||
err = s.refillTombstoneObject(ctx, obj)
|
|
||||||
case objectSDK.TypeLock:
|
|
||||||
err = s.refillLockObject(ctx, obj)
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var mPrm meta.PutPrm
|
|
||||||
mPrm.SetObject(obj)
|
|
||||||
mPrm.SetStorageID(descriptor)
|
|
||||||
|
|
||||||
_, err = s.metaBase.Put(ctx, mPrm)
|
|
||||||
if err != nil && !client.IsErrObjectAlreadyRemoved(err) && !errors.Is(err, meta.ErrObjectIsExpired) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
egErr := eg.Wait()
|
||||||
|
|
||||||
|
err = errors.Join(egErr, itErr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not put objects to the meta: %w", err)
|
return fmt.Errorf("could not put objects to the meta: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = s.metaBase.SyncCounters()
|
err = s.metaBase.SyncCounters(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not sync object counters: %w", err)
|
return fmt.Errorf("could not sync object counters: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package shard
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"io/fs"
|
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -29,7 +28,6 @@ import (
|
||||||
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
|
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
|
||||||
objecttest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/test"
|
objecttest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/test"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"go.etcd.io/bbolt"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type objAddr struct {
|
type objAddr struct {
|
||||||
|
@ -50,13 +48,6 @@ func TestShardOpen(t *testing.T) {
|
||||||
))
|
))
|
||||||
|
|
||||||
var allowedMode atomic.Int64
|
var allowedMode atomic.Int64
|
||||||
openFileMetabase := func(p string, f int, perm fs.FileMode) (*os.File, error) {
|
|
||||||
const modeMask = os.O_RDONLY | os.O_RDWR | os.O_WRONLY
|
|
||||||
if int64(f&modeMask) == allowedMode.Load() {
|
|
||||||
return os.OpenFile(p, f, perm)
|
|
||||||
}
|
|
||||||
return nil, fs.ErrPermission
|
|
||||||
}
|
|
||||||
|
|
||||||
wcOpts := writecacheconfig.Options{
|
wcOpts := writecacheconfig.Options{
|
||||||
Type: writecacheconfig.TypeBBolt,
|
Type: writecacheconfig.TypeBBolt,
|
||||||
|
@ -76,7 +67,6 @@ func TestShardOpen(t *testing.T) {
|
||||||
WithMetaBaseOptions(
|
WithMetaBaseOptions(
|
||||||
meta.WithPath(metaPath),
|
meta.WithPath(metaPath),
|
||||||
meta.WithEpochState(epochState{}),
|
meta.WithEpochState(epochState{}),
|
||||||
meta.WithBoltDBOptions(&bbolt.Options{OpenFile: openFileMetabase}),
|
|
||||||
),
|
),
|
||||||
WithPiloramaOptions(
|
WithPiloramaOptions(
|
||||||
pilorama.WithPath(filepath.Join(dir, "pilorama"))),
|
pilorama.WithPath(filepath.Join(dir, "pilorama"))),
|
||||||
|
@ -344,11 +334,11 @@ func TestRefillMetabase(t *testing.T) {
|
||||||
checkTombMembers(true)
|
checkTombMembers(true)
|
||||||
checkLocked(t, cnrLocked, locked)
|
checkLocked(t, cnrLocked, locked)
|
||||||
|
|
||||||
c, err := sh.metaBase.ObjectCounters()
|
c, err := sh.metaBase.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
phyBefore := c.Phy()
|
phyBefore := c.Phy
|
||||||
logicalBefore := c.Logic()
|
logicalBefore := c.Logic
|
||||||
|
|
||||||
err = sh.Close()
|
err = sh.Close()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
@ -379,11 +369,11 @@ func TestRefillMetabase(t *testing.T) {
|
||||||
err = sh.refillMetabase(context.Background())
|
err = sh.refillMetabase(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
c, err = sh.metaBase.ObjectCounters()
|
c, err = sh.metaBase.ObjectCounters(context.Background())
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
require.Equal(t, phyBefore, c.Phy())
|
require.Equal(t, phyBefore, c.Phy)
|
||||||
require.Equal(t, logicalBefore, c.Logic())
|
require.Equal(t, logicalBefore, c.Logic)
|
||||||
|
|
||||||
checkAllObjs(true)
|
checkAllObjs(true)
|
||||||
checkObj(object.AddressOf(tombObj), tombObj)
|
checkObj(object.AddressOf(tombObj), tombObj)
|
||||||
|
|
|
@ -23,9 +23,9 @@ func (s *Shard) LogicalObjectsCount(ctx context.Context) (uint64, error) {
|
||||||
return 0, ErrDegradedMode
|
return 0, ErrDegradedMode
|
||||||
}
|
}
|
||||||
|
|
||||||
cc, err := s.metaBase.ObjectCounters()
|
cc, err := s.metaBase.ObjectCounters(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
return cc.Logic(), nil
|
return cc.Logic, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,16 +132,16 @@ func (s *Shard) deleteFromBlobstor(ctx context.Context, addr oid.Address) error
|
||||||
|
|
||||||
func (s *Shard) deleteFromMetabase(ctx context.Context, addr oid.Address) error {
|
func (s *Shard) deleteFromMetabase(ctx context.Context, addr oid.Address) error {
|
||||||
var delPrm meta.DeletePrm
|
var delPrm meta.DeletePrm
|
||||||
delPrm.SetAddresses(addr)
|
delPrm.Address = addr
|
||||||
|
|
||||||
res, err := s.metaBase.Delete(ctx, delPrm)
|
res, err := s.metaBase.Delete(ctx, delPrm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
s.decObjectCounterBy(physical, res.RawObjectsRemoved())
|
s.decObjectCounterBy(physical, res.PhyCount)
|
||||||
s.decObjectCounterBy(logical, res.AvailableObjectsRemoved())
|
s.decObjectCounterBy(logical, res.LogicCount)
|
||||||
removedPayload := res.RemovedPhysicalObjectSizes()[0]
|
removedPayload := res.PhySize
|
||||||
logicalRemovedPayload := res.RemovedLogicalObjectSizes()[0]
|
logicalRemovedPayload := res.LogicSize
|
||||||
if logicalRemovedPayload > 0 {
|
if logicalRemovedPayload > 0 {
|
||||||
s.addToContainerSize(addr.Container().EncodeToString(), -int64(logicalRemovedPayload))
|
s.addToContainerSize(addr.Container().EncodeToString(), -int64(logicalRemovedPayload))
|
||||||
}
|
}
|
||||||
|
|
|
@ -414,8 +414,8 @@ func (s *Shard) handleExpiredObjects(ctx context.Context, expired []oid.Address)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
s.gc.metrics.AddInhumedObjectCount(res.AvailableInhumed(), objectTypeRegular)
|
s.gc.metrics.AddInhumedObjectCount(res.LogicInhumed(), objectTypeRegular)
|
||||||
s.decObjectCounterBy(logical, res.AvailableInhumed())
|
s.decObjectCounterBy(logical, res.LogicInhumed())
|
||||||
|
|
||||||
i := 0
|
i := 0
|
||||||
for i < res.GetDeletionInfoLength() {
|
for i < res.GetDeletionInfoLength() {
|
||||||
|
@ -627,8 +627,8 @@ func (s *Shard) HandleExpiredTombstones(ctx context.Context, tss []meta.Tombston
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
s.gc.metrics.AddInhumedObjectCount(res.AvailableInhumed(), objectTypeTombstone)
|
s.gc.metrics.AddInhumedObjectCount(res.LogicInhumed(), objectTypeTombstone)
|
||||||
s.decObjectCounterBy(logical, res.AvailableInhumed())
|
s.decObjectCounterBy(logical, res.LogicInhumed())
|
||||||
|
|
||||||
i := 0
|
i := 0
|
||||||
for i < res.GetDeletionInfoLength() {
|
for i < res.GetDeletionInfoLength() {
|
||||||
|
@ -651,7 +651,7 @@ func (s *Shard) HandleExpiredLocks(ctx context.Context, epoch uint64, lockers []
|
||||||
if s.GetMode().NoMetabase() {
|
if s.GetMode().NoMetabase() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
unlocked, err := s.metaBase.FreeLockedBy(lockers)
|
unlocked, err := s.metaBase.FreeLockedBy(ctx, lockers)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.log.Warn(logs.ShardFailureToUnlockObjects,
|
s.log.Warn(logs.ShardFailureToUnlockObjects,
|
||||||
zap.String("error", err.Error()),
|
zap.String("error", err.Error()),
|
||||||
|
@ -673,8 +673,8 @@ func (s *Shard) HandleExpiredLocks(ctx context.Context, epoch uint64, lockers []
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
s.gc.metrics.AddInhumedObjectCount(res.AvailableInhumed(), objectTypeLock)
|
s.gc.metrics.AddInhumedObjectCount(res.LogicInhumed(), objectTypeLock)
|
||||||
s.decObjectCounterBy(logical, res.AvailableInhumed())
|
s.decObjectCounterBy(logical, res.LogicInhumed())
|
||||||
|
|
||||||
i := 0
|
i := 0
|
||||||
for i < res.GetDeletionInfoLength() {
|
for i < res.GetDeletionInfoLength() {
|
||||||
|
@ -706,7 +706,7 @@ func (s *Shard) HandleDeletedLocks(lockers []oid.Address) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := s.metaBase.FreeLockedBy(lockers)
|
_, err := s.metaBase.FreeLockedBy(context.Background(), lockers)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.log.Warn(logs.ShardFailureToUnlockObjects,
|
s.log.Warn(logs.ShardFailureToUnlockObjects,
|
||||||
zap.String("error", err.Error()),
|
zap.String("error", err.Error()),
|
||||||
|
|
|
@ -3,6 +3,7 @@ package shard
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
||||||
"github.com/mr-tron/base58"
|
"github.com/mr-tron/base58"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
@ -39,7 +40,7 @@ func (s *Shard) UpdateID(ctx context.Context) (err error) {
|
||||||
err = cErr
|
err = cErr
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
id, err := s.metaBase.ReadShardID()
|
id, err := s.metaBase.GetShardID(mode.ReadWrite)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -66,5 +67,5 @@ func (s *Shard) UpdateID(ctx context.Context) (err error) {
|
||||||
if len(id) != 0 {
|
if len(id) != 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return s.metaBase.WriteShardID(*s.info.ID)
|
return s.metaBase.SetShardID(*s.info.ID, mode.ReadWrite)
|
||||||
}
|
}
|
||||||
|
|
|
@ -119,7 +119,7 @@ func (s *Shard) Inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) {
|
||||||
|
|
||||||
s.m.RUnlock()
|
s.m.RUnlock()
|
||||||
|
|
||||||
s.decObjectCounterBy(logical, res.AvailableInhumed())
|
s.decObjectCounterBy(logical, res.LogicInhumed())
|
||||||
|
|
||||||
i := 0
|
i := 0
|
||||||
for i < res.GetDeletionInfoLength() {
|
for i < res.GetDeletionInfoLength() {
|
||||||
|
|
|
@ -1,60 +0,0 @@
|
||||||
package shard
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
|
|
||||||
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
|
||||||
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
|
|
||||||
"go.opentelemetry.io/otel/attribute"
|
|
||||||
"go.opentelemetry.io/otel/trace"
|
|
||||||
"go.uber.org/zap"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ToMoveItPrm encapsulates parameters for ToMoveIt operation.
|
|
||||||
type ToMoveItPrm struct {
|
|
||||||
addr oid.Address
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToMoveItRes encapsulates results of ToMoveIt operation.
|
|
||||||
type ToMoveItRes struct{}
|
|
||||||
|
|
||||||
// SetAddress sets object address that should be marked to move into another
|
|
||||||
// shard.
|
|
||||||
func (p *ToMoveItPrm) SetAddress(addr oid.Address) {
|
|
||||||
p.addr = addr
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToMoveIt calls metabase.ToMoveIt method to mark object as relocatable to
|
|
||||||
// another shard.
|
|
||||||
func (s *Shard) ToMoveIt(ctx context.Context, prm ToMoveItPrm) (ToMoveItRes, error) {
|
|
||||||
ctx, span := tracing.StartSpanFromContext(ctx, "Shard.ToMoveIt",
|
|
||||||
trace.WithAttributes(
|
|
||||||
attribute.String("shard_id", s.ID().String()),
|
|
||||||
attribute.String("address", prm.addr.EncodeToString()),
|
|
||||||
))
|
|
||||||
defer span.End()
|
|
||||||
|
|
||||||
s.m.RLock()
|
|
||||||
defer s.m.RUnlock()
|
|
||||||
|
|
||||||
m := s.info.Mode
|
|
||||||
if m.ReadOnly() {
|
|
||||||
return ToMoveItRes{}, ErrReadOnlyMode
|
|
||||||
} else if m.NoMetabase() {
|
|
||||||
return ToMoveItRes{}, ErrDegradedMode
|
|
||||||
}
|
|
||||||
|
|
||||||
var toMovePrm meta.ToMoveItPrm
|
|
||||||
toMovePrm.SetAddress(prm.addr)
|
|
||||||
|
|
||||||
_, err := s.metaBase.ToMoveIt(ctx, toMovePrm)
|
|
||||||
if err != nil {
|
|
||||||
s.log.Debug(logs.ShardCouldNotMarkObjectForShardRelocationInMetabase,
|
|
||||||
zap.String("error", err.Error()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return ToMoveItRes{}, nil
|
|
||||||
}
|
|
|
@ -92,7 +92,8 @@ type MetricsWriter interface {
|
||||||
type cfg struct {
|
type cfg struct {
|
||||||
m sync.RWMutex
|
m sync.RWMutex
|
||||||
|
|
||||||
refillMetabase bool
|
refillMetabase bool
|
||||||
|
refillMetabaseWorkersCount int
|
||||||
|
|
||||||
rmBatchSize int
|
rmBatchSize int
|
||||||
|
|
||||||
|
@ -308,6 +309,13 @@ func WithRefillMetabase(v bool) Option {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithRefillMetabaseWorkersCount returns option to set count of workers to refill the Metabase on Shard's initialization step.
|
||||||
|
func WithRefillMetabaseWorkersCount(v int) Option {
|
||||||
|
return func(c *cfg) {
|
||||||
|
c.refillMetabaseWorkersCount = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// WithMode returns option to set shard's mode. Mode must be one of the predefined:
|
// WithMode returns option to set shard's mode. Mode must be one of the predefined:
|
||||||
// - mode.ReadWrite;
|
// - mode.ReadWrite;
|
||||||
// - mode.ReadOnly.
|
// - mode.ReadOnly.
|
||||||
|
@ -405,7 +413,7 @@ const (
|
||||||
|
|
||||||
func (s *Shard) updateMetrics(ctx context.Context) {
|
func (s *Shard) updateMetrics(ctx context.Context) {
|
||||||
if s.cfg.metricsWriter != nil && !s.GetMode().NoMetabase() {
|
if s.cfg.metricsWriter != nil && !s.GetMode().NoMetabase() {
|
||||||
cc, err := s.metaBase.ObjectCounters()
|
cc, err := s.metaBase.ObjectCounters(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.log.Warn(logs.ShardMetaObjectCounterRead,
|
s.log.Warn(logs.ShardMetaObjectCounterRead,
|
||||||
zap.Error(err),
|
zap.Error(err),
|
||||||
|
@ -414,8 +422,8 @@ func (s *Shard) updateMetrics(ctx context.Context) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
s.cfg.metricsWriter.SetObjectCounter(physical, cc.Phy())
|
s.cfg.metricsWriter.SetObjectCounter(physical, cc.Phy)
|
||||||
s.cfg.metricsWriter.SetObjectCounter(logical, cc.Logic())
|
s.cfg.metricsWriter.SetObjectCounter(logical, cc.Logic)
|
||||||
|
|
||||||
cnrList, err := s.metaBase.Containers(ctx)
|
cnrList, err := s.metaBase.Containers(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -426,7 +434,7 @@ func (s *Shard) updateMetrics(ctx context.Context) {
|
||||||
var totalPayload uint64
|
var totalPayload uint64
|
||||||
|
|
||||||
for i := range cnrList {
|
for i := range cnrList {
|
||||||
size, err := s.metaBase.ContainerSize(cnrList[i])
|
size, err := s.metaBase.ContainerSize(ctx, cnrList[i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.log.Warn(logs.ShardMetaCantReadContainerSize,
|
s.log.Warn(logs.ShardMetaCantReadContainerSize,
|
||||||
zap.String("cid", cnrList[i].EncodeToString()),
|
zap.String("cid", cnrList[i].EncodeToString()),
|
||||||
|
|
Loading…
Reference in a new issue