[#1223] scripts: Add script to populate metabase

Signed-off-by: Aleksey Savchuk <a.savchuk@yadro.com>
This commit is contained in:
Aleksey Savchuk 2024-07-15 14:07:32 +03:00 committed by Evgenii Stratonikov
parent a4fb7f085b
commit b9043433a0
3 changed files with 554 additions and 0 deletions

View file

@ -0,0 +1,132 @@
package internal
import (
"crypto/sha256"
"fmt"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/checksum"
cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id"
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test"
objecttest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/test"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/user"
usertest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/user/test"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/version"
"git.frostfs.info/TrueCloudLab/tzhash/tz"
"golang.org/x/exp/rand"
)
func GeneratePayloadPool(count uint, size uint) [][]byte {
pool := [][]byte{}
for i := uint(0); i < count; i++ {
payload := make([]byte, size)
_, _ = rand.Read(payload)
pool = append(pool, payload)
}
return pool
}
func GenerateAttributePool(count uint) []objectSDK.Attribute {
pool := []objectSDK.Attribute{}
for i := uint(0); i < count; i++ {
for j := uint(0); j < count; j++ {
attr := *objectSDK.NewAttribute()
attr.SetKey(fmt.Sprintf("key%d", i))
attr.SetValue(fmt.Sprintf("value%d", j))
pool = append(pool, attr)
}
}
return pool
}
func GenerateOwnerPool(count uint) []user.ID {
pool := []user.ID{}
for i := uint(0); i < count; i++ {
pool = append(pool, usertest.ID())
}
return pool
}
type ObjectOption func(obj *objectSDK.Object)
func GenerateObject(options ...ObjectOption) *objectSDK.Object {
var ver version.Version
ver.SetMajor(2)
ver.SetMinor(1)
payload := make([]byte, 0)
var csum checksum.Checksum
csum.SetSHA256(sha256.Sum256(payload))
var csumTZ checksum.Checksum
csumTZ.SetTillichZemor(tz.Sum(csum.Value()))
obj := objectSDK.New()
obj.SetID(oidtest.ID())
obj.SetOwnerID(usertest.ID())
obj.SetContainerID(cidtest.ID())
header := objecttest.Object().GetECHeader()
header.SetParent(oidtest.ID())
obj.SetECHeader(header)
obj.SetVersion(&ver)
obj.SetPayload(payload)
obj.SetPayloadSize(uint64(len(payload)))
obj.SetPayloadChecksum(csum)
obj.SetPayloadHomomorphicHash(csumTZ)
for _, option := range options {
option(obj)
}
return obj
}
func WithContainerID(cid cid.ID) ObjectOption {
return func(obj *objectSDK.Object) {
obj.SetContainerID(cid)
}
}
func WithType(typ objectSDK.Type) ObjectOption {
return func(obj *objectSDK.Object) {
obj.SetType(typ)
}
}
func WithPayloadFromPool(pool [][]byte) ObjectOption {
payload := pool[rand.Intn(len(pool))]
var csum checksum.Checksum
csum.SetSHA256(sha256.Sum256(payload))
var csumTZ checksum.Checksum
csumTZ.SetTillichZemor(tz.Sum(csum.Value()))
return func(obj *objectSDK.Object) {
obj.SetPayload(payload)
obj.SetPayloadSize(uint64(len(payload)))
obj.SetPayloadChecksum(csum)
obj.SetPayloadHomomorphicHash(csumTZ)
}
}
func WithAttributesFromPool(pool []objectSDK.Attribute, count uint) ObjectOption {
return func(obj *objectSDK.Object) {
attrs := []objectSDK.Attribute{}
for i := uint(0); i < count; i++ {
attrs = append(attrs, pool[rand.Intn(len(pool))])
}
obj.SetAttributes(attrs...)
}
}
func WithOwnerIDFromPool(pool []user.ID) ObjectOption {
return func(obj *objectSDK.Object) {
obj.SetOwnerID(pool[rand.Intn(len(pool))])
}
}

View file

@ -0,0 +1,263 @@
package internal
import (
"context"
"fmt"
"math/rand"
"sync"
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer"
"github.com/nspcc-dev/neo-go/pkg/crypto/keys"
"golang.org/x/sync/errgroup"
)
type EpochState struct{}
func (s EpochState) CurrentEpoch() uint64 {
return 0
}
func PopulateWithObjects(
ctx context.Context,
db *meta.DB,
group *errgroup.Group,
count uint,
factory func() *objectSDK.Object,
) {
digits := "0123456789"
for i := uint(0); i < count; i++ {
obj := factory()
id := []byte(fmt.Sprintf(
"%c/%c/%c",
digits[rand.Int()%len(digits)],
digits[rand.Int()%len(digits)],
digits[rand.Int()%len(digits)],
))
prm := meta.PutPrm{}
prm.SetObject(obj)
prm.SetStorageID(id)
group.Go(func() error {
if _, err := db.Put(ctx, prm); err != nil {
return fmt.Errorf("couldn't put an object: %w", err)
}
return nil
})
}
}
func PopulateWithBigObjects(
ctx context.Context,
db *meta.DB,
group *errgroup.Group,
count uint,
factory func() *objectSDK.Object,
) {
for i := uint(0); i < count; i++ {
group.Go(func() error {
if err := populateWithBigObject(ctx, db, factory); err != nil {
return fmt.Errorf("couldn't put a big object: %w", err)
}
return nil
})
}
}
func populateWithBigObject(
ctx context.Context,
db *meta.DB,
factory func() *objectSDK.Object,
) error {
t := &target{db: db}
pk, _ := keys.NewPrivateKey()
p := transformer.NewPayloadSizeLimiter(transformer.Params{
Key: &pk.PrivateKey,
NextTargetInit: func() transformer.ObjectWriter { return t },
NetworkState: EpochState{},
MaxSize: 10,
})
obj := factory()
payload := make([]byte, 30)
err := p.WriteHeader(ctx, obj)
if err != nil {
return err
}
_, err = p.Write(ctx, payload)
if err != nil {
return err
}
_, err = p.Close(ctx)
if err != nil {
return err
}
return nil
}
type target struct {
db *meta.DB
}
func (t *target) WriteObject(ctx context.Context, obj *objectSDK.Object) error {
prm := meta.PutPrm{}
prm.SetObject(obj)
_, err := t.db.Put(ctx, prm)
return err
}
func PopulateGraveyard(
ctx context.Context,
db *meta.DB,
group *errgroup.Group,
workBufferSize int,
count uint,
factory func() *objectSDK.Object,
) {
ts := factory()
ts.SetType(objectSDK.TypeTombstone)
prm := meta.PutPrm{}
prm.SetObject(ts)
group.Go(func() error {
if _, err := db.Put(ctx, prm); err != nil {
return fmt.Errorf("couldn't put a tombstone object: %w", err)
}
return nil
})
cID, _ := ts.ContainerID()
oID, _ := ts.ID()
var tsAddr oid.Address
tsAddr.SetContainer(cID)
tsAddr.SetObject(oID)
addrs := make(chan oid.Address, workBufferSize)
go func() {
defer close(addrs)
wg := &sync.WaitGroup{}
wg.Add(int(count))
for i := uint(0); i < count; i++ {
obj := factory()
prm := meta.PutPrm{}
prm.SetObject(obj)
group.Go(func() error {
defer wg.Done()
if _, err := db.Put(ctx, prm); err != nil {
return fmt.Errorf("couldn't put an object: %w", err)
}
cID, _ := obj.ContainerID()
oID, _ := obj.ID()
var addr oid.Address
addr.SetContainer(cID)
addr.SetObject(oID)
addrs <- addr
return nil
})
}
wg.Wait()
}()
go func() {
for addr := range addrs {
prm := meta.InhumePrm{}
prm.SetAddresses(addr)
prm.SetTombstoneAddress(tsAddr)
group.Go(func() error {
if _, err := db.Inhume(ctx, prm); err != nil {
return fmt.Errorf("couldn't inhume an object: %w", err)
}
return nil
})
}
}()
}
func PopulateLocked(
ctx context.Context,
db *meta.DB,
group *errgroup.Group,
workBufferSize int,
count uint,
factory func() *objectSDK.Object,
) {
locker := factory()
locker.SetType(objectSDK.TypeLock)
prm := meta.PutPrm{}
prm.SetObject(locker)
group.Go(func() error {
if _, err := db.Put(ctx, prm); err != nil {
return fmt.Errorf("couldn't put a locker object: %w", err)
}
return nil
})
ids := make(chan oid.ID, workBufferSize)
go func() {
defer close(ids)
wg := &sync.WaitGroup{}
wg.Add(int(count))
for i := uint(0); i < count; i++ {
defer wg.Done()
obj := factory()
prm := meta.PutPrm{}
prm.SetObject(obj)
group.Go(func() error {
if _, err := db.Put(ctx, prm); err != nil {
return fmt.Errorf("couldn't put an object: %w", err)
}
id, _ := obj.ID()
ids <- id
return nil
})
}
wg.Wait()
}()
go func() {
for id := range ids {
lockerCID, _ := locker.ContainerID()
lockerOID, _ := locker.ID()
group.Go(func() error {
if err := db.Lock(ctx, lockerCID, lockerOID, []oid.ID{id}); err != nil {
return fmt.Errorf("couldn't lock an object: %w", err)
}
return nil
})
}
}()
}

View file

@ -0,0 +1,159 @@
package main
import (
"context"
"errors"
"flag"
"fmt"
"os"
meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode"
"git.frostfs.info/TrueCloudLab/frostfs-node/scripts/populate-metabase/internal"
cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test"
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
"golang.org/x/sync/errgroup"
)
var (
path string
force bool
jobs uint
numContainers,
numObjects,
numAttributesPerObj,
numOwners,
numPayloads,
numAttributes uint
)
func main() {
flag.StringVar(&path, "path", "", "Path to metabase")
flag.BoolVar(&force, "force", false, "Rewrite existing database")
flag.UintVar(&jobs, "j", 10000, "Number of jobs to run")
flag.UintVar(&numContainers, "containers", 0, "Number of containers to be created")
flag.UintVar(&numObjects, "objects", 0, "Number of objects per container")
flag.UintVar(&numAttributesPerObj, "attributes", 0, "Number of attributes per object")
flag.UintVar(&numOwners, "distinct-owners", 10, "Number of distinct owners to be used")
flag.UintVar(&numPayloads, "distinct-payloads", 10, "Number of distinct payloads to be used")
flag.UintVar(&numAttributes, "distinct-attributes", 10, "Number of distinct attributes to be used")
flag.Parse()
exitIf(numPayloads == 0, "must have payloads\n")
exitIf(numAttributes == 0, "must have attributes\n")
exitIf(numOwners == 0, "must have owners\n")
exitIf(len(path) == 0, "path to metabase not specified\n")
exitIf(
numAttributesPerObj > numAttributes,
"object can't have more attributes than available\n",
)
info, err := os.Stat(path)
exitIf(
err != nil && !errors.Is(err, os.ErrNotExist),
"couldn't get path info: %s\n", err,
)
// Path exits.
if err == nil {
exitIf(info.IsDir(), "path is a directory\n")
exitIf(!force, "couldn't rewrite existing file, use '-force' flag\n")
err = os.Remove(path)
exitIf(err != nil, "couldn't remove existing file: %s\n", err)
}
err = populate()
exitIf(err != nil, "couldn't populate the metabase: %s\n", err)
}
func getObjectFactory(opts ...internal.ObjectOption) func() *objectSDK.Object {
return func() *objectSDK.Object {
return internal.GenerateObject(opts...)
}
}
func populate() (err error) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
db := meta.New([]meta.Option{
meta.WithPath(path),
meta.WithPermissions(0o600),
meta.WithEpochState(internal.EpochState{}),
}...)
if err = db.Open(ctx, mode.ReadWrite); err != nil {
return fmt.Errorf("couldn't open the metabase: %w", err)
}
defer func() {
if errOnClose := db.Close(); errOnClose != nil {
err = errors.Join(
err,
fmt.Errorf("couldn't close the metabase: %w", db.Close()),
)
}
}()
if err = db.Init(); err != nil {
return fmt.Errorf("couldn't init the metabase: %w", err)
}
payloads := internal.GeneratePayloadPool(numPayloads, 32)
attributes := internal.GenerateAttributePool(numAttributes)
owners := internal.GenerateOwnerPool(numOwners)
types := []objectSDK.Type{
objectSDK.TypeRegular,
objectSDK.TypeLock,
objectSDK.TypeTombstone,
}
eg, ctx := errgroup.WithContext(ctx)
eg.SetLimit(int(jobs))
for i := uint(0); i < numContainers; i++ {
cid := cidtest.ID()
for _, typ := range types {
internal.PopulateWithObjects(ctx, db, eg, numObjects, getObjectFactory(
internal.WithContainerID(cid),
internal.WithType(typ),
internal.WithPayloadFromPool(payloads),
internal.WithOwnerIDFromPool(owners),
internal.WithAttributesFromPool(attributes, numAttributesPerObj),
))
}
internal.PopulateWithBigObjects(ctx, db, eg, numObjects, getObjectFactory(
internal.WithContainerID(cid),
internal.WithType(objectSDK.TypeRegular),
internal.WithAttributesFromPool(attributes, numAttributesPerObj),
internal.WithOwnerIDFromPool(owners),
))
internal.PopulateGraveyard(ctx, db, eg, int(jobs), numObjects, getObjectFactory(
internal.WithContainerID(cid),
internal.WithType(objectSDK.TypeRegular),
internal.WithAttributesFromPool(attributes, numAttributesPerObj),
internal.WithOwnerIDFromPool(owners),
))
internal.PopulateLocked(ctx, db, eg, int(jobs), numObjects, getObjectFactory(
internal.WithContainerID(cid),
internal.WithType(objectSDK.TypeRegular),
internal.WithAttributesFromPool(attributes, numAttributesPerObj),
internal.WithOwnerIDFromPool(owners),
))
}
return eg.Wait()
}
func exitIf(cond bool, format string, args ...any) {
if cond {
fmt.Fprintf(os.Stderr, format, args...)
os.Exit(1)
}
}