[#142] metabase: Store header value index in a tree leaf

In the previous implementation of the metabase, the unique value of the
header was assigned a bucket, the elements of which were leaves with a
key-address and an empty value. This approach was relatively efficient in
terms of write speed. However, a large number of buckets led to a rapid
increase in the database volume (~4GB for 100K objects with unique
attributes). An approach is presented with storing indexes on the value of
headers in the leaves of the tree, where the keys are the unique values ​​of
the header, and the values ​​are a serialized list of addresses (gob
encoding is temporarily used for serialization).

The new approach gave a good result in saving space (~350MB), however, it
significantly reduced the write speed with an increase in the number of
objects (~ 80x after 100K objects).

Signed-off-by: Leonard Lyubich <leonard@nspcc.ru>
This commit is contained in:
Leonard Lyubich 2020-11-03 15:12:56 +03:00 committed by Alex Vanin
parent e48f8a189e
commit 77e80f517f
2 changed files with 53 additions and 18 deletions

View file

@ -1,6 +1,9 @@
package meta
import (
"bytes"
"encoding/gob"
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
v2object "github.com/nspcc-dev/neofs-api-go/v2/object"
"github.com/nspcc-dev/neofs-node/pkg/core/object"
@ -59,19 +62,19 @@ func (db *DB) Put(obj *object.Object) error {
return errors.Wrapf(err, "(%T) could not create bucket for header key", db)
}
// FIXME: here we can get empty slice that could not be the key
// Possible solutions:
// 1. add prefix byte (0 if empty);
v := []byte(indices[i].val)
v := nonEmptyKeyBytes([]byte(indices[i].val))
// create address bucket for the value
valBucket, err := keyBucket.CreateBucketIfNotExists(nonEmptyKeyBytes(v))
strs, err := decodeAddressList(keyBucket.Get(v))
if err != nil {
return errors.Wrapf(err, "(%T) could not create bucket for header value", db)
return errors.Wrapf(err, "(%T) could not decode address list", db)
}
// put object address to value bucket
if err := valBucket.Put(addrKey, nil); err != nil {
data, err := encodeAddressList(append(strs, string(addrKey)))
if err != nil {
return errors.Wrapf(err, "(%T) could not encode address list", db)
}
if err := keyBucket.Put(v, data); err != nil {
return errors.Wrapf(err, "(%T) could not put item to header bucket", db)
}
}
@ -151,3 +154,30 @@ func objectIndices(obj *object.Object, parent bool) []bucketItem {
return res
}
// FIXME: gob is a temporary solution, use protobuf.
func decodeAddressList(data []byte) ([]string, error) {
if len(data) == 0 {
return nil, nil
}
var strs []string
decoder := gob.NewDecoder(bytes.NewReader(data))
if err := decoder.Decode(&strs); err != nil {
return nil, err
}
return strs, nil
}
func encodeAddressList(l []string) ([]byte, error) {
buf := bytes.NewBuffer(nil)
encoder := gob.NewEncoder(buf)
if err := encoder.Encode(l); err != nil {
return nil, err
}
return buf.Bytes(), nil
}

View file

@ -66,18 +66,23 @@ func (db *DB) Select(fs object.SearchFilters) ([]*object.Address, error) {
fVal := f.Value()
// iterate over all existing values for the key
if err := keyBucket.ForEach(func(k, _ []byte) error {
if err := keyBucket.ForEach(func(k, v []byte) error {
include := matchFunc(string(key), string(cutKeyBytes(k)), fVal)
return keyBucket.Bucket(k).ForEach(func(k, _ []byte) error {
if include {
mAddr[string(k)] = struct{}{}
} else {
delete(mAddr, string(k))
}
strs, err := decodeAddressList(v)
if err != nil {
return errors.Wrapf(err, "(%T) could not decode address list", db)
}
return nil
})
for i := range strs {
if include {
mAddr[strs[i]] = struct{}{}
} else {
delete(mAddr, strs[i])
}
}
return nil
}); err != nil {
return errors.Wrapf(err, "(%T) could not iterate bucket %s", db, key)
}