From 77e80f517f7194ee6cf43dd8fc2b49f18eb2c77a Mon Sep 17 00:00:00 2001 From: Leonard Lyubich Date: Tue, 3 Nov 2020 15:12:56 +0300 Subject: [PATCH] [#142] metabase: Store header value index in a tree leaf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the previous implementation of the metabase, the unique value of the header was assigned a bucket, the elements of which were leaves with a key-address and an empty value. This approach was relatively efficient in terms of write speed. However, a large number of buckets led to a rapid increase in the database volume (~4GB for 100K objects with unique attributes). An approach is presented with storing indexes on the value of headers in the leaves of the tree, where the keys are the unique values ​​of the header, and the values ​​are a serialized list of addresses (gob encoding is temporarily used for serialization). The new approach gave a good result in saving space (~350MB), however, it significantly reduced the write speed with an increase in the number of objects (~ 80x after 100K objects). Signed-off-by: Leonard Lyubich --- pkg/local_object_storage/metabase/put.go | 48 +++++++++++++++++---- pkg/local_object_storage/metabase/select.go | 23 ++++++---- 2 files changed, 53 insertions(+), 18 deletions(-) diff --git a/pkg/local_object_storage/metabase/put.go b/pkg/local_object_storage/metabase/put.go index 7c9a430f0..bd676cbdc 100644 --- a/pkg/local_object_storage/metabase/put.go +++ b/pkg/local_object_storage/metabase/put.go @@ -1,6 +1,9 @@ package meta import ( + "bytes" + "encoding/gob" + objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object" v2object "github.com/nspcc-dev/neofs-api-go/v2/object" "github.com/nspcc-dev/neofs-node/pkg/core/object" @@ -59,19 +62,19 @@ func (db *DB) Put(obj *object.Object) error { return errors.Wrapf(err, "(%T) could not create bucket for header key", db) } - // FIXME: here we can get empty slice that could not be the key - // Possible solutions: - // 1. add prefix byte (0 if empty); - v := []byte(indices[i].val) + v := nonEmptyKeyBytes([]byte(indices[i].val)) - // create address bucket for the value - valBucket, err := keyBucket.CreateBucketIfNotExists(nonEmptyKeyBytes(v)) + strs, err := decodeAddressList(keyBucket.Get(v)) if err != nil { - return errors.Wrapf(err, "(%T) could not create bucket for header value", db) + return errors.Wrapf(err, "(%T) could not decode address list", db) } - // put object address to value bucket - if err := valBucket.Put(addrKey, nil); err != nil { + data, err := encodeAddressList(append(strs, string(addrKey))) + if err != nil { + return errors.Wrapf(err, "(%T) could not encode address list", db) + } + + if err := keyBucket.Put(v, data); err != nil { return errors.Wrapf(err, "(%T) could not put item to header bucket", db) } } @@ -151,3 +154,30 @@ func objectIndices(obj *object.Object, parent bool) []bucketItem { return res } + +// FIXME: gob is a temporary solution, use protobuf. +func decodeAddressList(data []byte) ([]string, error) { + if len(data) == 0 { + return nil, nil + } + + var strs []string + + decoder := gob.NewDecoder(bytes.NewReader(data)) + if err := decoder.Decode(&strs); err != nil { + return nil, err + } + + return strs, nil +} + +func encodeAddressList(l []string) ([]byte, error) { + buf := bytes.NewBuffer(nil) + encoder := gob.NewEncoder(buf) + + if err := encoder.Encode(l); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} diff --git a/pkg/local_object_storage/metabase/select.go b/pkg/local_object_storage/metabase/select.go index aaec37239..b42461790 100644 --- a/pkg/local_object_storage/metabase/select.go +++ b/pkg/local_object_storage/metabase/select.go @@ -66,18 +66,23 @@ func (db *DB) Select(fs object.SearchFilters) ([]*object.Address, error) { fVal := f.Value() // iterate over all existing values for the key - if err := keyBucket.ForEach(func(k, _ []byte) error { + if err := keyBucket.ForEach(func(k, v []byte) error { include := matchFunc(string(key), string(cutKeyBytes(k)), fVal) - return keyBucket.Bucket(k).ForEach(func(k, _ []byte) error { - if include { - mAddr[string(k)] = struct{}{} - } else { - delete(mAddr, string(k)) - } + strs, err := decodeAddressList(v) + if err != nil { + return errors.Wrapf(err, "(%T) could not decode address list", db) + } - return nil - }) + for i := range strs { + if include { + mAddr[strs[i]] = struct{}{} + } else { + delete(mAddr, strs[i]) + } + } + + return nil }); err != nil { return errors.Wrapf(err, "(%T) could not iterate bucket %s", db, key) }