[#1433] metabase: Optimize Select

For some filters we can scan only a subset of keys instead of checking
each key.

```
name                       old time/op  new time/op  delta
Select/string_equal-8      49.3µs ± 4%  11.0µs ± 4%  -77.68%  (p=0.000 n=10+10)
Select/string_not_equal-8  7.01ms ± 5%  7.06ms ±10%     ~     (p=0.971 n=10+10)
Select/common_prefix-8      118µs ± 6%    79µs ± 5%  -33.04%  (p=0.000 n=10+9)
Select/unknown-8           21.3µs ± 4%   3.2µs ± 4%  -84.88%  (p=0.000 n=10+9)
```

Signed-off-by: Evgenii Stratonikov <evgeniy@nspcc.ru>
This commit is contained in:
Evgenii Stratonikov 2022-05-25 15:39:51 +03:00 committed by LeL
parent 5c48588c64
commit 5073a37930
3 changed files with 171 additions and 31 deletions

View file

@ -1,6 +1,7 @@
package meta
import (
"bytes"
"encoding/binary"
"encoding/hex"
"io/fs"
@ -16,11 +17,16 @@ import (
"go.uber.org/zap"
)
type matcher struct {
matchSlow func(string, []byte, string) bool
matchBucket func(*bbolt.Bucket, string, string, func([]byte, []byte) error) error
}
// DB represents local metabase of storage node.
type DB struct {
*cfg
matchers map[object.SearchMatchType]func(string, []byte, string) bool
matchers map[object.SearchMatchType]matcher
boltDB *bbolt.DB
}
@ -60,11 +66,23 @@ func New(opts ...Option) *DB {
return &DB{
cfg: c,
matchers: map[object.SearchMatchType]func(string, []byte, string) bool{
object.MatchUnknown: unknownMatcher,
object.MatchStringEqual: stringEqualMatcher,
object.MatchStringNotEqual: stringNotEqualMatcher,
object.MatchCommonPrefix: stringCommonPrefixMatcher,
matchers: map[object.SearchMatchType]matcher{
object.MatchUnknown: {
matchSlow: unknownMatcher,
matchBucket: unknownMatcherBucket,
},
object.MatchStringEqual: {
matchSlow: stringEqualMatcher,
matchBucket: stringEqualMatcherBucket,
},
object.MatchStringNotEqual: {
matchSlow: stringNotEqualMatcher,
matchBucket: stringNotEqualMatcherBucket,
},
object.MatchCommonPrefix: {
matchSlow: stringCommonPrefixMatcher,
matchBucket: stringCommonPrefixMatcherBucket,
},
},
}
}
@ -80,22 +98,132 @@ func stringifyValue(key string, objVal []byte) string {
}
}
// fromHexChar converts a hex character into its value and a success flag.
func fromHexChar(c byte) (byte, bool) {
switch {
case '0' <= c && c <= '9':
return c - '0', true
case 'a' <= c && c <= 'f':
return c - 'a' + 10, true
case 'A' <= c && c <= 'F':
return c - 'A' + 10, true
}
return 0, false
}
// destringifyValue is the reverse operation for stringify value.
// The last return value returns true if the filter CAN match any value.
// The second return value is true iff prefix is true and the filter value is considered
// a hex-encoded string. In this case only the first (highest) bits of the last byte should be checked.
func destringifyValue(key, value string, prefix bool) ([]byte, bool, bool) {
switch key {
default:
return []byte(value), false, true
case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash:
v, err := hex.DecodeString(value)
if err != nil {
if !prefix || len(value)%2 == 0 {
return v, false, false
}
// To match the old behaviour we need to process odd length hex strings, such as 'abc'
last, ok := fromHexChar(value[len(value)-1])
if !ok {
return v, false, false
}
v := make([]byte, hex.DecodedLen(len(value)-1)+1)
_, err := hex.Decode(v, []byte(value[:len(value)-1]))
if err != nil {
return nil, false, false
}
v[len(v)-1] = last
return v, true, true
}
return v, false, err == nil
case v2object.FilterHeaderCreationEpoch, v2object.FilterHeaderPayloadLength:
u, err := strconv.ParseUint(value, 10, 64)
if err != nil {
return nil, false, false
}
raw := make([]byte, 8)
binary.LittleEndian.PutUint64(raw, u)
return raw, false, true
}
}
func stringEqualMatcher(key string, objVal []byte, filterVal string) bool {
return stringifyValue(key, objVal) == filterVal
}
func stringEqualMatcherBucket(b *bbolt.Bucket, fKey string, fValue string, f func([]byte, []byte) error) error {
// Ignore the second return value because we check for strict equality.
val, _, ok := destringifyValue(fKey, fValue, false)
if !ok {
return nil
}
if data := b.Get(val); data != nil {
return f(val, data)
}
if b.Bucket(val) != nil {
return f(val, nil)
}
return nil
}
func stringNotEqualMatcher(key string, objVal []byte, filterVal string) bool {
return stringifyValue(key, objVal) != filterVal
}
func stringNotEqualMatcherBucket(b *bbolt.Bucket, fKey string, fValue string, f func([]byte, []byte) error) error {
// Ignore the second return value because we check for strict inequality.
val, _, ok := destringifyValue(fKey, fValue, false)
return b.ForEach(func(k, v []byte) error {
if !ok || !bytes.Equal(val, k) {
return f(k, v)
}
return nil
})
}
func stringCommonPrefixMatcher(key string, objVal []byte, filterVal string) bool {
return strings.HasPrefix(stringifyValue(key, objVal), filterVal)
}
func stringCommonPrefixMatcherBucket(b *bbolt.Bucket, fKey string, fVal string, f func([]byte, []byte) error) error {
val, checkLast, ok := destringifyValue(fKey, fVal, true)
if !ok {
return nil
}
prefix := val
if checkLast {
prefix = val[:len(val)-1]
}
c := b.Cursor()
for k, v := c.Seek(val); bytes.HasPrefix(k, prefix); k, v = c.Next() {
if checkLast && (len(k) == len(prefix) || k[len(prefix)]>>4 != val[len(val)-1]) {
// If the last byte doesn't match, this means the prefix does no longer match,
// so we need to break here.
break
}
if err := f(k, v); err != nil {
return err
}
}
return nil
}
func unknownMatcher(_ string, _ []byte, _ string) bool {
return false
}
func unknownMatcherBucket(_ *bbolt.Bucket, _ string, _ string, _ func([]byte, []byte) error) error {
return nil
}
// bucketKeyHelper returns byte representation of val that is used as a key
// in boltDB. Useful for getting filter values from unique and list indexes.
func bucketKeyHelper(hdr string, val string) []byte {

View file

@ -288,22 +288,18 @@ func (db *DB) selectFromFKBT(
return
}
err := fkbtRoot.ForEach(func(k, _ []byte) error {
if matchFunc(f.Header(), k, f.Value()) {
fkbtLeaf := fkbtRoot.Bucket(k)
if fkbtLeaf == nil {
return nil
}
return fkbtLeaf.ForEach(func(k, _ []byte) error {
addr := prefix + string(k)
markAddressInCache(to, fNum, addr)
return nil
})
err := matchFunc.matchBucket(fkbtRoot, f.Header(), f.Value(), func(k, _ []byte) error {
fkbtLeaf := fkbtRoot.Bucket(k)
if fkbtLeaf == nil {
return nil
}
return nil
return fkbtLeaf.ForEach(func(k, _ []byte) error {
addr := prefix + string(k)
markAddressInCache(to, fNum, addr)
return nil
})
})
if err != nil {
db.log.Debug("error in FKBT selection", zap.String("error", err.Error()))
@ -393,11 +389,7 @@ func (db *DB) selectFromList(
return
}
if err = bkt.ForEach(func(key, val []byte) error {
if !fMatch(f.Header(), key, f.Value()) {
return nil
}
if err = fMatch.matchBucket(bkt, f.Header(), f.Value(), func(key, val []byte) error {
l, err := decodeList(val)
if err != nil {
db.log.Debug("can't decode list bucket leaf",
@ -474,11 +466,8 @@ func (db *DB) selectObjectID(
return
}
err := bkt.ForEach(func(k, v []byte) error {
if obj := string(k); fMatch(f.Header(), k, f.Value()) {
appendOID(obj)
}
err := fMatch.matchBucket(bkt, f.Header(), f.Value(), func(k, v []byte) error {
appendOID(string(k))
return nil
})
if err != nil {
@ -525,7 +514,7 @@ func (db *DB) matchSlowFilters(tx *bbolt.Tx, addr oid.Address, f object.SearchFi
continue // ignore unknown search attributes
}
if !matchFunc(f[i].Header(), data, f[i].Value()) {
if !matchFunc.matchSlow(f[i].Header(), data, f[i].Value()) {
return false
}
}

View file

@ -396,6 +396,29 @@ func TestDB_SelectPayloadHash(t *testing.T) {
objectSDK.MatchNotPresent)
testSelect(t, db, cnr, fs)
t.Run("invalid hashes", func(t *testing.T) {
fs = objectSDK.SearchFilters{}
fs.AddFilter(v2object.FilterHeaderPayloadHash,
payloadHash[:len(payloadHash)-1],
objectSDK.MatchStringNotEqual)
testSelect(t, db, cnr, fs, object.AddressOf(raw1), object.AddressOf(raw2))
fs = objectSDK.SearchFilters{}
fs.AddFilter(v2object.FilterHeaderPayloadHash,
payloadHash[:len(payloadHash)-2]+"x",
objectSDK.MatchCommonPrefix)
testSelect(t, db, cnr, fs)
fs = objectSDK.SearchFilters{}
fs.AddFilter(v2object.FilterHeaderPayloadHash,
payloadHash[:len(payloadHash)-3]+"x0",
objectSDK.MatchCommonPrefix)
testSelect(t, db, cnr, fs)
})
}
func TestDB_SelectWithSlowFilters(t *testing.T) {