From 22d7996f795b2e62092ead0fb48c2eb944294307 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 23 Mar 2023 16:32:25 +0300 Subject: [PATCH] [#42] registry: Optimize `ObjectInfo` marshaling 1. Get rid of JSON in the database. 2. Store `CreatedAt` as int64. It decreases JSON marshaling time by about ~25% with no changes for native scheme. Signed-off-by: Evgenii Stratonikov --- internal/registry/obj_info.go | 53 ++++++++++++++ internal/registry/obj_info_test.go | 107 +++++++++++++++++++++++++++++ internal/registry/obj_registry.go | 24 ++----- internal/registry/obj_selector.go | 9 ++- 4 files changed, 169 insertions(+), 24 deletions(-) create mode 100644 internal/registry/obj_info.go create mode 100644 internal/registry/obj_info_test.go diff --git a/internal/registry/obj_info.go b/internal/registry/obj_info.go new file mode 100644 index 0000000..b3cae4a --- /dev/null +++ b/internal/registry/obj_info.go @@ -0,0 +1,53 @@ +package registry + +import ( + "github.com/nspcc-dev/neo-go/pkg/io" +) + +// ObjectInfo represents information about FrostFS object that has been created +// via gRPC/HTTP/S3 API. +type ObjectInfo struct { + Id uint64 // Identifier in bolt DB + CreatedAt int64 // UTC seconds from epoch when the object was created + CID string // Container ID in gRPC/HTTP + OID string // Object ID in gRPC/HTTP + S3Bucket string // Bucket name in S3 + S3Key string // Object key in S3 + Status string // Status of the object + PayloadHash string // SHA256 hash of object payload that can be used for verification +} + +func (o ObjectInfo) EncodeBinary(w *io.BinWriter) { + w.WriteU64LE(o.Id) + w.WriteU64LE(uint64(o.CreatedAt)) + w.WriteString(o.CID) + w.WriteString(o.OID) + w.WriteString(o.S3Bucket) + w.WriteString(o.S3Key) + w.WriteString(o.Status) + w.WriteString(o.PayloadHash) +} + +func (o *ObjectInfo) DecodeBinary(r *io.BinReader) { + o.Id = r.ReadU64LE() + o.CreatedAt = int64(r.ReadU64LE()) + o.CID = r.ReadString() + o.OID = r.ReadString() + o.S3Bucket = r.ReadString() + o.S3Key = r.ReadString() + o.Status = r.ReadString() + o.PayloadHash = r.ReadString() +} + +func (o ObjectInfo) Marshal() ([]byte, error) { + w := io.NewBufBinWriter() + o.EncodeBinary(w.BinWriter) + err := w.Err // Bytes() sets Err to ErrDrained + return w.Bytes(), err +} + +func (o *ObjectInfo) Unmarshal(data []byte) error { + r := io.NewBinReaderFromBuf(data) + o.DecodeBinary(r) + return r.Err +} diff --git a/internal/registry/obj_info_test.go b/internal/registry/obj_info_test.go new file mode 100644 index 0000000..2c5bdc5 --- /dev/null +++ b/internal/registry/obj_info_test.go @@ -0,0 +1,107 @@ +package registry + +import ( + "encoding/json" + "math/rand" + "strings" + "testing" + + "github.com/nspcc-dev/neo-go/pkg/io" + "github.com/stretchr/testify/require" +) + +func BenchmarkObjectInfoMarshal(b *testing.B) { + obj := randomObjectInfo() + b.Run("json", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err := json.Marshal(obj) + if err != nil { + b.FailNow() + } + } + }) + b.Run("native", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err := obj.Marshal() + if err != nil { + b.FailNow() + } + } + }) +} + +func BenchmarkObjectInfoUnmarshal(b *testing.B) { + obj := randomObjectInfo() + + b.Run("json", func(b *testing.B) { + data, err := json.Marshal(obj) + require.NoError(b, err) + + for i := 0; i < b.N; i++ { + var obj ObjectInfo + err := json.Unmarshal(data, &obj) + if err != nil { + b.FailNow() + } + } + }) + b.Run("native", func(b *testing.B) { + data, err := obj.Marshal() + require.NoError(b, err) + + for i := 0; i < b.N; i++ { + err := obj.Unmarshal(data) + if err != nil { + b.FailNow() + } + } + }) +} + +func TestObjectInfoMarshal(t *testing.T) { + expected := randomObjectInfo() + + data, err := expected.Marshal() + require.NoError(t, err) + + var actual ObjectInfo + require.NoError(t, actual.Unmarshal(data)) + require.Equal(t, expected, actual) +} + +func TestObjectInfoEncodeBinary(t *testing.T) { + expected := randomObjectInfo() + + w := io.NewBufBinWriter() + expected.EncodeBinary(w.BinWriter) + require.NoError(t, w.Err) + + data := w.Bytes() + r := io.NewBinReaderFromBuf(data) + + var actual ObjectInfo + actual.DecodeBinary(r) + require.NoError(t, r.Err) + require.Equal(t, expected, actual) +} + +func randomObjectInfo() ObjectInfo { + return ObjectInfo{ + Id: rand.Uint64(), + CreatedAt: int64(rand.Uint64()), + CID: randString(32), + OID: randString(32), + S3Bucket: randString(32), + S3Key: randString(32), + Status: "created", + PayloadHash: randString(64), + } +} + +func randString(n int) string { + var sb strings.Builder + for i := 0; i < n; i++ { + sb.WriteRune('a' + rune(rand.Int())%('z'-'a'+1)) + } + return sb.String() +} diff --git a/internal/registry/obj_registry.go b/internal/registry/obj_registry.go index e7c451d..e2368f8 100644 --- a/internal/registry/obj_registry.go +++ b/internal/registry/obj_registry.go @@ -3,7 +3,6 @@ package registry import ( "context" "encoding/binary" - "encoding/json" "errors" "os" "time" @@ -24,19 +23,6 @@ const ( const bucketName = "_object" -// ObjectInfo represents information about FrostFS object that has been created -// via gRPC/HTTP/S3 API. -type ObjectInfo struct { - Id uint64 // Identifier in bolt DB - CreatedAt time.Time // UTC date&time when the object was created - CID string // Container ID in gRPC/HTTP - OID string // Object ID in gRPC/HTTP - S3Bucket string // Bucket name in S3 - S3Key string // Object key in S3 - Status string // Status of the object - PayloadHash string // SHA256 hash of object payload that can be used for verification -} - // NewObjRegistry creates a new instance of object registry that stores information // about objects in the specified bolt database. As registry uses read-write // connection to the database, there may be only one instance of object registry @@ -72,7 +58,7 @@ func (o *ObjRegistry) AddObject(cid, oid, s3Bucket, s3Key, payloadHash string) e object := ObjectInfo{ Id: id, - CreatedAt: time.Now().UTC(), + CreatedAt: time.Now().UTC().Unix(), CID: cid, OID: oid, S3Bucket: s3Bucket, @@ -80,12 +66,12 @@ func (o *ObjRegistry) AddObject(cid, oid, s3Bucket, s3Key, payloadHash string) e PayloadHash: payloadHash, Status: statusCreated, } - objectJson, err := json.Marshal(object) + objBytes, err := object.Marshal() if err != nil { return err } - return b.Put(encodeId(id), objectJson) + return b.Put(encodeId(id), objBytes) }) } @@ -102,12 +88,12 @@ func (o *ObjRegistry) SetObjectStatus(id uint64, newStatus string) error { } obj := new(ObjectInfo) - if err := json.Unmarshal(objBytes, &obj); err != nil { + if err := obj.Unmarshal(objBytes); err != nil { return err } obj.Status = newStatus - objBytes, err = json.Marshal(obj) + objBytes, err = obj.Marshal() if err != nil { return err } diff --git a/internal/registry/obj_selector.go b/internal/registry/obj_selector.go index f983f6f..f46368d 100644 --- a/internal/registry/obj_selector.go +++ b/internal/registry/obj_selector.go @@ -2,7 +2,6 @@ package registry import ( "context" - "encoding/json" "fmt" "time" @@ -67,7 +66,7 @@ func (o *ObjSelector) Count() (int, error) { return b.ForEach(func(_, objBytes []byte) error { if objBytes != nil { var obj ObjectInfo - if err := json.Unmarshal(objBytes, &obj); err != nil { + if err := obj.Unmarshal(objBytes); err != nil { // Ignore malformed objects return nil } @@ -120,7 +119,7 @@ func (o *ObjSelector) selectLoop() { for ; keyBytes != nil && len(cache) != o.cacheSize; keyBytes, objBytes = c.Next() { if objBytes != nil { var obj ObjectInfo - if err := json.Unmarshal(objBytes, &obj); err != nil { + if err := obj.Unmarshal(objBytes); err != nil { // Ignore malformed objects for now. Maybe it should be panic? continue } @@ -168,8 +167,8 @@ func (f *ObjFilter) match(o ObjectInfo) bool { return false } if f.Age != 0 { - objAge := time.Now().UTC().Sub(o.CreatedAt).Seconds() - if objAge < float64(f.Age) { + objAge := time.Now().UTC().Unix() - o.CreatedAt + if objAge < int64(f.Age) { return false } }