From b9043433a098ae242c559a79afe6f1ced5068e68 Mon Sep 17 00:00:00 2001 From: Aleksey Savchuk Date: Mon, 15 Jul 2024 14:07:32 +0300 Subject: [PATCH] [#1223] scripts: Add script to populate metabase Signed-off-by: Aleksey Savchuk --- .../populate-metabase/internal/generate.go | 132 +++++++++ .../populate-metabase/internal/populate.go | 263 ++++++++++++++++++ scripts/populate-metabase/main.go | 159 +++++++++++ 3 files changed, 554 insertions(+) create mode 100644 scripts/populate-metabase/internal/generate.go create mode 100644 scripts/populate-metabase/internal/populate.go create mode 100644 scripts/populate-metabase/main.go diff --git a/scripts/populate-metabase/internal/generate.go b/scripts/populate-metabase/internal/generate.go new file mode 100644 index 000000000..d2004b673 --- /dev/null +++ b/scripts/populate-metabase/internal/generate.go @@ -0,0 +1,132 @@ +package internal + +import ( + "crypto/sha256" + "fmt" + + "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/checksum" + cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" + cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test" + objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + oidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id/test" + objecttest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/test" + "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/user" + usertest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/user/test" + "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/version" + "git.frostfs.info/TrueCloudLab/tzhash/tz" + "golang.org/x/exp/rand" +) + +func GeneratePayloadPool(count uint, size uint) [][]byte { + pool := [][]byte{} + for i := uint(0); i < count; i++ { + payload := make([]byte, size) + _, _ = rand.Read(payload) + + pool = append(pool, payload) + } + return pool +} + +func GenerateAttributePool(count uint) []objectSDK.Attribute { + pool := []objectSDK.Attribute{} + for i := uint(0); i < count; i++ { + for j := uint(0); j < count; j++ { + attr := *objectSDK.NewAttribute() + attr.SetKey(fmt.Sprintf("key%d", i)) + attr.SetValue(fmt.Sprintf("value%d", j)) + pool = append(pool, attr) + } + } + return pool +} + +func GenerateOwnerPool(count uint) []user.ID { + pool := []user.ID{} + for i := uint(0); i < count; i++ { + pool = append(pool, usertest.ID()) + } + return pool +} + +type ObjectOption func(obj *objectSDK.Object) + +func GenerateObject(options ...ObjectOption) *objectSDK.Object { + var ver version.Version + ver.SetMajor(2) + ver.SetMinor(1) + + payload := make([]byte, 0) + + var csum checksum.Checksum + csum.SetSHA256(sha256.Sum256(payload)) + + var csumTZ checksum.Checksum + csumTZ.SetTillichZemor(tz.Sum(csum.Value())) + + obj := objectSDK.New() + obj.SetID(oidtest.ID()) + obj.SetOwnerID(usertest.ID()) + obj.SetContainerID(cidtest.ID()) + + header := objecttest.Object().GetECHeader() + header.SetParent(oidtest.ID()) + obj.SetECHeader(header) + + obj.SetVersion(&ver) + obj.SetPayload(payload) + obj.SetPayloadSize(uint64(len(payload))) + obj.SetPayloadChecksum(csum) + obj.SetPayloadHomomorphicHash(csumTZ) + + for _, option := range options { + option(obj) + } + + return obj +} + +func WithContainerID(cid cid.ID) ObjectOption { + return func(obj *objectSDK.Object) { + obj.SetContainerID(cid) + } +} + +func WithType(typ objectSDK.Type) ObjectOption { + return func(obj *objectSDK.Object) { + obj.SetType(typ) + } +} + +func WithPayloadFromPool(pool [][]byte) ObjectOption { + payload := pool[rand.Intn(len(pool))] + + var csum checksum.Checksum + csum.SetSHA256(sha256.Sum256(payload)) + + var csumTZ checksum.Checksum + csumTZ.SetTillichZemor(tz.Sum(csum.Value())) + + return func(obj *objectSDK.Object) { + obj.SetPayload(payload) + obj.SetPayloadSize(uint64(len(payload))) + obj.SetPayloadChecksum(csum) + obj.SetPayloadHomomorphicHash(csumTZ) + } +} + +func WithAttributesFromPool(pool []objectSDK.Attribute, count uint) ObjectOption { + return func(obj *objectSDK.Object) { + attrs := []objectSDK.Attribute{} + for i := uint(0); i < count; i++ { + attrs = append(attrs, pool[rand.Intn(len(pool))]) + } + obj.SetAttributes(attrs...) + } +} + +func WithOwnerIDFromPool(pool []user.ID) ObjectOption { + return func(obj *objectSDK.Object) { + obj.SetOwnerID(pool[rand.Intn(len(pool))]) + } +} diff --git a/scripts/populate-metabase/internal/populate.go b/scripts/populate-metabase/internal/populate.go new file mode 100644 index 000000000..390c1cdc0 --- /dev/null +++ b/scripts/populate-metabase/internal/populate.go @@ -0,0 +1,263 @@ +package internal + +import ( + "context" + "fmt" + "math/rand" + "sync" + + meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase" + objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + oid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/id" + "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object/transformer" + "github.com/nspcc-dev/neo-go/pkg/crypto/keys" + "golang.org/x/sync/errgroup" +) + +type EpochState struct{} + +func (s EpochState) CurrentEpoch() uint64 { + return 0 +} + +func PopulateWithObjects( + ctx context.Context, + db *meta.DB, + group *errgroup.Group, + count uint, + factory func() *objectSDK.Object, +) { + digits := "0123456789" + + for i := uint(0); i < count; i++ { + obj := factory() + + id := []byte(fmt.Sprintf( + "%c/%c/%c", + digits[rand.Int()%len(digits)], + digits[rand.Int()%len(digits)], + digits[rand.Int()%len(digits)], + )) + + prm := meta.PutPrm{} + prm.SetObject(obj) + prm.SetStorageID(id) + + group.Go(func() error { + if _, err := db.Put(ctx, prm); err != nil { + return fmt.Errorf("couldn't put an object: %w", err) + } + return nil + }) + } +} + +func PopulateWithBigObjects( + ctx context.Context, + db *meta.DB, + group *errgroup.Group, + count uint, + factory func() *objectSDK.Object, +) { + for i := uint(0); i < count; i++ { + group.Go(func() error { + if err := populateWithBigObject(ctx, db, factory); err != nil { + return fmt.Errorf("couldn't put a big object: %w", err) + } + return nil + }) + } +} + +func populateWithBigObject( + ctx context.Context, + db *meta.DB, + factory func() *objectSDK.Object, +) error { + t := &target{db: db} + + pk, _ := keys.NewPrivateKey() + p := transformer.NewPayloadSizeLimiter(transformer.Params{ + Key: &pk.PrivateKey, + NextTargetInit: func() transformer.ObjectWriter { return t }, + NetworkState: EpochState{}, + MaxSize: 10, + }) + + obj := factory() + payload := make([]byte, 30) + + err := p.WriteHeader(ctx, obj) + if err != nil { + return err + } + + _, err = p.Write(ctx, payload) + if err != nil { + return err + } + + _, err = p.Close(ctx) + if err != nil { + return err + } + + return nil +} + +type target struct { + db *meta.DB +} + +func (t *target) WriteObject(ctx context.Context, obj *objectSDK.Object) error { + prm := meta.PutPrm{} + prm.SetObject(obj) + + _, err := t.db.Put(ctx, prm) + return err +} + +func PopulateGraveyard( + ctx context.Context, + db *meta.DB, + group *errgroup.Group, + workBufferSize int, + count uint, + factory func() *objectSDK.Object, +) { + ts := factory() + ts.SetType(objectSDK.TypeTombstone) + + prm := meta.PutPrm{} + prm.SetObject(ts) + + group.Go(func() error { + if _, err := db.Put(ctx, prm); err != nil { + return fmt.Errorf("couldn't put a tombstone object: %w", err) + } + return nil + }) + + cID, _ := ts.ContainerID() + oID, _ := ts.ID() + + var tsAddr oid.Address + + tsAddr.SetContainer(cID) + tsAddr.SetObject(oID) + + addrs := make(chan oid.Address, workBufferSize) + + go func() { + defer close(addrs) + + wg := &sync.WaitGroup{} + wg.Add(int(count)) + + for i := uint(0); i < count; i++ { + obj := factory() + + prm := meta.PutPrm{} + prm.SetObject(obj) + + group.Go(func() error { + defer wg.Done() + + if _, err := db.Put(ctx, prm); err != nil { + return fmt.Errorf("couldn't put an object: %w", err) + } + + cID, _ := obj.ContainerID() + oID, _ := obj.ID() + + var addr oid.Address + addr.SetContainer(cID) + addr.SetObject(oID) + + addrs <- addr + return nil + }) + } + wg.Wait() + }() + + go func() { + for addr := range addrs { + prm := meta.InhumePrm{} + prm.SetAddresses(addr) + prm.SetTombstoneAddress(tsAddr) + + group.Go(func() error { + if _, err := db.Inhume(ctx, prm); err != nil { + return fmt.Errorf("couldn't inhume an object: %w", err) + } + return nil + }) + } + }() +} + +func PopulateLocked( + ctx context.Context, + db *meta.DB, + group *errgroup.Group, + workBufferSize int, + count uint, + factory func() *objectSDK.Object, +) { + locker := factory() + locker.SetType(objectSDK.TypeLock) + + prm := meta.PutPrm{} + prm.SetObject(locker) + + group.Go(func() error { + if _, err := db.Put(ctx, prm); err != nil { + return fmt.Errorf("couldn't put a locker object: %w", err) + } + return nil + }) + + ids := make(chan oid.ID, workBufferSize) + + go func() { + defer close(ids) + + wg := &sync.WaitGroup{} + wg.Add(int(count)) + + for i := uint(0); i < count; i++ { + defer wg.Done() + + obj := factory() + + prm := meta.PutPrm{} + prm.SetObject(obj) + + group.Go(func() error { + if _, err := db.Put(ctx, prm); err != nil { + return fmt.Errorf("couldn't put an object: %w", err) + } + + id, _ := obj.ID() + ids <- id + return nil + }) + } + wg.Wait() + }() + + go func() { + for id := range ids { + lockerCID, _ := locker.ContainerID() + lockerOID, _ := locker.ID() + + group.Go(func() error { + if err := db.Lock(ctx, lockerCID, lockerOID, []oid.ID{id}); err != nil { + return fmt.Errorf("couldn't lock an object: %w", err) + } + return nil + }) + } + }() +} diff --git a/scripts/populate-metabase/main.go b/scripts/populate-metabase/main.go new file mode 100644 index 000000000..2bc7a5553 --- /dev/null +++ b/scripts/populate-metabase/main.go @@ -0,0 +1,159 @@ +package main + +import ( + "context" + "errors" + "flag" + "fmt" + "os" + + meta "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/metabase" + "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" + "git.frostfs.info/TrueCloudLab/frostfs-node/scripts/populate-metabase/internal" + cidtest "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id/test" + objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object" + "golang.org/x/sync/errgroup" +) + +var ( + path string + force bool + jobs uint + + numContainers, + numObjects, + numAttributesPerObj, + numOwners, + numPayloads, + numAttributes uint +) + +func main() { + flag.StringVar(&path, "path", "", "Path to metabase") + flag.BoolVar(&force, "force", false, "Rewrite existing database") + flag.UintVar(&jobs, "j", 10000, "Number of jobs to run") + + flag.UintVar(&numContainers, "containers", 0, "Number of containers to be created") + flag.UintVar(&numObjects, "objects", 0, "Number of objects per container") + flag.UintVar(&numAttributesPerObj, "attributes", 0, "Number of attributes per object") + + flag.UintVar(&numOwners, "distinct-owners", 10, "Number of distinct owners to be used") + flag.UintVar(&numPayloads, "distinct-payloads", 10, "Number of distinct payloads to be used") + flag.UintVar(&numAttributes, "distinct-attributes", 10, "Number of distinct attributes to be used") + + flag.Parse() + + exitIf(numPayloads == 0, "must have payloads\n") + exitIf(numAttributes == 0, "must have attributes\n") + exitIf(numOwners == 0, "must have owners\n") + exitIf(len(path) == 0, "path to metabase not specified\n") + exitIf( + numAttributesPerObj > numAttributes, + "object can't have more attributes than available\n", + ) + + info, err := os.Stat(path) + exitIf( + err != nil && !errors.Is(err, os.ErrNotExist), + "couldn't get path info: %s\n", err, + ) + + // Path exits. + if err == nil { + exitIf(info.IsDir(), "path is a directory\n") + exitIf(!force, "couldn't rewrite existing file, use '-force' flag\n") + + err = os.Remove(path) + exitIf(err != nil, "couldn't remove existing file: %s\n", err) + } + + err = populate() + exitIf(err != nil, "couldn't populate the metabase: %s\n", err) +} + +func getObjectFactory(opts ...internal.ObjectOption) func() *objectSDK.Object { + return func() *objectSDK.Object { + return internal.GenerateObject(opts...) + } +} + +func populate() (err error) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + db := meta.New([]meta.Option{ + meta.WithPath(path), + meta.WithPermissions(0o600), + meta.WithEpochState(internal.EpochState{}), + }...) + + if err = db.Open(ctx, mode.ReadWrite); err != nil { + return fmt.Errorf("couldn't open the metabase: %w", err) + } + defer func() { + if errOnClose := db.Close(); errOnClose != nil { + err = errors.Join( + err, + fmt.Errorf("couldn't close the metabase: %w", db.Close()), + ) + } + }() + + if err = db.Init(); err != nil { + return fmt.Errorf("couldn't init the metabase: %w", err) + } + + payloads := internal.GeneratePayloadPool(numPayloads, 32) + attributes := internal.GenerateAttributePool(numAttributes) + owners := internal.GenerateOwnerPool(numOwners) + + types := []objectSDK.Type{ + objectSDK.TypeRegular, + objectSDK.TypeLock, + objectSDK.TypeTombstone, + } + + eg, ctx := errgroup.WithContext(ctx) + eg.SetLimit(int(jobs)) + + for i := uint(0); i < numContainers; i++ { + cid := cidtest.ID() + + for _, typ := range types { + internal.PopulateWithObjects(ctx, db, eg, numObjects, getObjectFactory( + internal.WithContainerID(cid), + internal.WithType(typ), + internal.WithPayloadFromPool(payloads), + internal.WithOwnerIDFromPool(owners), + internal.WithAttributesFromPool(attributes, numAttributesPerObj), + )) + } + internal.PopulateWithBigObjects(ctx, db, eg, numObjects, getObjectFactory( + internal.WithContainerID(cid), + internal.WithType(objectSDK.TypeRegular), + internal.WithAttributesFromPool(attributes, numAttributesPerObj), + internal.WithOwnerIDFromPool(owners), + )) + internal.PopulateGraveyard(ctx, db, eg, int(jobs), numObjects, getObjectFactory( + internal.WithContainerID(cid), + internal.WithType(objectSDK.TypeRegular), + internal.WithAttributesFromPool(attributes, numAttributesPerObj), + internal.WithOwnerIDFromPool(owners), + )) + internal.PopulateLocked(ctx, db, eg, int(jobs), numObjects, getObjectFactory( + internal.WithContainerID(cid), + internal.WithType(objectSDK.TypeRegular), + internal.WithAttributesFromPool(attributes, numAttributesPerObj), + internal.WithOwnerIDFromPool(owners), + )) + } + + return eg.Wait() +} + +func exitIf(cond bool, format string, args ...any) { + if cond { + fmt.Fprintf(os.Stderr, format, args...) + os.Exit(1) + } +}