forked from TrueCloudLab/frostfs-node
[#142] metabase: Store header value index in a tree leaf
In the previous implementation of the metabase, the unique value of the header was assigned a bucket, the elements of which were leaves with a key-address and an empty value. This approach was relatively efficient in terms of write speed. However, a large number of buckets led to a rapid increase in the database volume (~4GB for 100K objects with unique attributes). An approach is presented with storing indexes on the value of headers in the leaves of the tree, where the keys are the unique values of the header, and the values are a serialized list of addresses (gob encoding is temporarily used for serialization). The new approach gave a good result in saving space (~350MB), however, it significantly reduced the write speed with an increase in the number of objects (~ 80x after 100K objects). Signed-off-by: Leonard Lyubich <leonard@nspcc.ru>
This commit is contained in:
parent
e48f8a189e
commit
77e80f517f
2 changed files with 53 additions and 18 deletions
|
@ -1,6 +1,9 @@
|
||||||
package meta
|
package meta
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/gob"
|
||||||
|
|
||||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||||
v2object "github.com/nspcc-dev/neofs-api-go/v2/object"
|
v2object "github.com/nspcc-dev/neofs-api-go/v2/object"
|
||||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||||
|
@ -59,19 +62,19 @@ func (db *DB) Put(obj *object.Object) error {
|
||||||
return errors.Wrapf(err, "(%T) could not create bucket for header key", db)
|
return errors.Wrapf(err, "(%T) could not create bucket for header key", db)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: here we can get empty slice that could not be the key
|
v := nonEmptyKeyBytes([]byte(indices[i].val))
|
||||||
// Possible solutions:
|
|
||||||
// 1. add prefix byte (0 if empty);
|
|
||||||
v := []byte(indices[i].val)
|
|
||||||
|
|
||||||
// create address bucket for the value
|
strs, err := decodeAddressList(keyBucket.Get(v))
|
||||||
valBucket, err := keyBucket.CreateBucketIfNotExists(nonEmptyKeyBytes(v))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Wrapf(err, "(%T) could not create bucket for header value", db)
|
return errors.Wrapf(err, "(%T) could not decode address list", db)
|
||||||
}
|
}
|
||||||
|
|
||||||
// put object address to value bucket
|
data, err := encodeAddressList(append(strs, string(addrKey)))
|
||||||
if err := valBucket.Put(addrKey, nil); err != nil {
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "(%T) could not encode address list", db)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := keyBucket.Put(v, data); err != nil {
|
||||||
return errors.Wrapf(err, "(%T) could not put item to header bucket", db)
|
return errors.Wrapf(err, "(%T) could not put item to header bucket", db)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -151,3 +154,30 @@ func objectIndices(obj *object.Object, parent bool) []bucketItem {
|
||||||
|
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: gob is a temporary solution, use protobuf.
|
||||||
|
func decodeAddressList(data []byte) ([]string, error) {
|
||||||
|
if len(data) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var strs []string
|
||||||
|
|
||||||
|
decoder := gob.NewDecoder(bytes.NewReader(data))
|
||||||
|
if err := decoder.Decode(&strs); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return strs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func encodeAddressList(l []string) ([]byte, error) {
|
||||||
|
buf := bytes.NewBuffer(nil)
|
||||||
|
encoder := gob.NewEncoder(buf)
|
||||||
|
|
||||||
|
if err := encoder.Encode(l); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.Bytes(), nil
|
||||||
|
}
|
||||||
|
|
|
@ -66,18 +66,23 @@ func (db *DB) Select(fs object.SearchFilters) ([]*object.Address, error) {
|
||||||
fVal := f.Value()
|
fVal := f.Value()
|
||||||
|
|
||||||
// iterate over all existing values for the key
|
// iterate over all existing values for the key
|
||||||
if err := keyBucket.ForEach(func(k, _ []byte) error {
|
if err := keyBucket.ForEach(func(k, v []byte) error {
|
||||||
include := matchFunc(string(key), string(cutKeyBytes(k)), fVal)
|
include := matchFunc(string(key), string(cutKeyBytes(k)), fVal)
|
||||||
|
|
||||||
return keyBucket.Bucket(k).ForEach(func(k, _ []byte) error {
|
strs, err := decodeAddressList(v)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "(%T) could not decode address list", db)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range strs {
|
||||||
if include {
|
if include {
|
||||||
mAddr[string(k)] = struct{}{}
|
mAddr[strs[i]] = struct{}{}
|
||||||
} else {
|
} else {
|
||||||
delete(mAddr, string(k))
|
delete(mAddr, strs[i])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
return errors.Wrapf(err, "(%T) could not iterate bucket %s", db, key)
|
return errors.Wrapf(err, "(%T) could not iterate bucket %s", db, key)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue