From 5073a379300e016c78f30305484b460b6923183b Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Wed, 25 May 2022 15:39:51 +0300 Subject: [PATCH] [#1433] metabase: Optimize `Select` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some filters we can scan only a subset of keys instead of checking each key. ``` name old time/op new time/op delta Select/string_equal-8 49.3µs ± 4% 11.0µs ± 4% -77.68% (p=0.000 n=10+10) Select/string_not_equal-8 7.01ms ± 5% 7.06ms ±10% ~ (p=0.971 n=10+10) Select/common_prefix-8 118µs ± 6% 79µs ± 5% -33.04% (p=0.000 n=10+9) Select/unknown-8 21.3µs ± 4% 3.2µs ± 4% -84.88% (p=0.000 n=10+9) ``` Signed-off-by: Evgenii Stratonikov --- pkg/local_object_storage/metabase/db.go | 140 +++++++++++++++++- pkg/local_object_storage/metabase/select.go | 39 ++--- .../metabase/select_test.go | 23 +++ 3 files changed, 171 insertions(+), 31 deletions(-) diff --git a/pkg/local_object_storage/metabase/db.go b/pkg/local_object_storage/metabase/db.go index 70469a4769..bd11302767 100644 --- a/pkg/local_object_storage/metabase/db.go +++ b/pkg/local_object_storage/metabase/db.go @@ -1,6 +1,7 @@ package meta import ( + "bytes" "encoding/binary" "encoding/hex" "io/fs" @@ -16,11 +17,16 @@ import ( "go.uber.org/zap" ) +type matcher struct { + matchSlow func(string, []byte, string) bool + matchBucket func(*bbolt.Bucket, string, string, func([]byte, []byte) error) error +} + // DB represents local metabase of storage node. type DB struct { *cfg - matchers map[object.SearchMatchType]func(string, []byte, string) bool + matchers map[object.SearchMatchType]matcher boltDB *bbolt.DB } @@ -60,11 +66,23 @@ func New(opts ...Option) *DB { return &DB{ cfg: c, - matchers: map[object.SearchMatchType]func(string, []byte, string) bool{ - object.MatchUnknown: unknownMatcher, - object.MatchStringEqual: stringEqualMatcher, - object.MatchStringNotEqual: stringNotEqualMatcher, - object.MatchCommonPrefix: stringCommonPrefixMatcher, + matchers: map[object.SearchMatchType]matcher{ + object.MatchUnknown: { + matchSlow: unknownMatcher, + matchBucket: unknownMatcherBucket, + }, + object.MatchStringEqual: { + matchSlow: stringEqualMatcher, + matchBucket: stringEqualMatcherBucket, + }, + object.MatchStringNotEqual: { + matchSlow: stringNotEqualMatcher, + matchBucket: stringNotEqualMatcherBucket, + }, + object.MatchCommonPrefix: { + matchSlow: stringCommonPrefixMatcher, + matchBucket: stringCommonPrefixMatcherBucket, + }, }, } } @@ -80,22 +98,132 @@ func stringifyValue(key string, objVal []byte) string { } } +// fromHexChar converts a hex character into its value and a success flag. +func fromHexChar(c byte) (byte, bool) { + switch { + case '0' <= c && c <= '9': + return c - '0', true + case 'a' <= c && c <= 'f': + return c - 'a' + 10, true + case 'A' <= c && c <= 'F': + return c - 'A' + 10, true + } + + return 0, false +} + +// destringifyValue is the reverse operation for stringify value. +// The last return value returns true if the filter CAN match any value. +// The second return value is true iff prefix is true and the filter value is considered +// a hex-encoded string. In this case only the first (highest) bits of the last byte should be checked. +func destringifyValue(key, value string, prefix bool) ([]byte, bool, bool) { + switch key { + default: + return []byte(value), false, true + case v2object.FilterHeaderPayloadHash, v2object.FilterHeaderHomomorphicHash: + v, err := hex.DecodeString(value) + if err != nil { + if !prefix || len(value)%2 == 0 { + return v, false, false + } + // To match the old behaviour we need to process odd length hex strings, such as 'abc' + last, ok := fromHexChar(value[len(value)-1]) + if !ok { + return v, false, false + } + + v := make([]byte, hex.DecodedLen(len(value)-1)+1) + _, err := hex.Decode(v, []byte(value[:len(value)-1])) + if err != nil { + return nil, false, false + } + v[len(v)-1] = last + + return v, true, true + } + return v, false, err == nil + case v2object.FilterHeaderCreationEpoch, v2object.FilterHeaderPayloadLength: + u, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return nil, false, false + } + raw := make([]byte, 8) + binary.LittleEndian.PutUint64(raw, u) + return raw, false, true + } +} + func stringEqualMatcher(key string, objVal []byte, filterVal string) bool { return stringifyValue(key, objVal) == filterVal } +func stringEqualMatcherBucket(b *bbolt.Bucket, fKey string, fValue string, f func([]byte, []byte) error) error { + // Ignore the second return value because we check for strict equality. + val, _, ok := destringifyValue(fKey, fValue, false) + if !ok { + return nil + } + if data := b.Get(val); data != nil { + return f(val, data) + } + if b.Bucket(val) != nil { + return f(val, nil) + } + return nil +} + func stringNotEqualMatcher(key string, objVal []byte, filterVal string) bool { return stringifyValue(key, objVal) != filterVal } +func stringNotEqualMatcherBucket(b *bbolt.Bucket, fKey string, fValue string, f func([]byte, []byte) error) error { + // Ignore the second return value because we check for strict inequality. + val, _, ok := destringifyValue(fKey, fValue, false) + return b.ForEach(func(k, v []byte) error { + if !ok || !bytes.Equal(val, k) { + return f(k, v) + } + return nil + }) +} + func stringCommonPrefixMatcher(key string, objVal []byte, filterVal string) bool { return strings.HasPrefix(stringifyValue(key, objVal), filterVal) } +func stringCommonPrefixMatcherBucket(b *bbolt.Bucket, fKey string, fVal string, f func([]byte, []byte) error) error { + val, checkLast, ok := destringifyValue(fKey, fVal, true) + if !ok { + return nil + } + + prefix := val + if checkLast { + prefix = val[:len(val)-1] + } + + c := b.Cursor() + for k, v := c.Seek(val); bytes.HasPrefix(k, prefix); k, v = c.Next() { + if checkLast && (len(k) == len(prefix) || k[len(prefix)]>>4 != val[len(val)-1]) { + // If the last byte doesn't match, this means the prefix does no longer match, + // so we need to break here. + break + } + if err := f(k, v); err != nil { + return err + } + } + return nil +} + func unknownMatcher(_ string, _ []byte, _ string) bool { return false } +func unknownMatcherBucket(_ *bbolt.Bucket, _ string, _ string, _ func([]byte, []byte) error) error { + return nil +} + // bucketKeyHelper returns byte representation of val that is used as a key // in boltDB. Useful for getting filter values from unique and list indexes. func bucketKeyHelper(hdr string, val string) []byte { diff --git a/pkg/local_object_storage/metabase/select.go b/pkg/local_object_storage/metabase/select.go index 2a7206a1ee..a18ca662e0 100644 --- a/pkg/local_object_storage/metabase/select.go +++ b/pkg/local_object_storage/metabase/select.go @@ -288,22 +288,18 @@ func (db *DB) selectFromFKBT( return } - err := fkbtRoot.ForEach(func(k, _ []byte) error { - if matchFunc(f.Header(), k, f.Value()) { - fkbtLeaf := fkbtRoot.Bucket(k) - if fkbtLeaf == nil { - return nil - } - - return fkbtLeaf.ForEach(func(k, _ []byte) error { - addr := prefix + string(k) - markAddressInCache(to, fNum, addr) - - return nil - }) + err := matchFunc.matchBucket(fkbtRoot, f.Header(), f.Value(), func(k, _ []byte) error { + fkbtLeaf := fkbtRoot.Bucket(k) + if fkbtLeaf == nil { + return nil } - return nil + return fkbtLeaf.ForEach(func(k, _ []byte) error { + addr := prefix + string(k) + markAddressInCache(to, fNum, addr) + + return nil + }) }) if err != nil { db.log.Debug("error in FKBT selection", zap.String("error", err.Error())) @@ -393,11 +389,7 @@ func (db *DB) selectFromList( return } - if err = bkt.ForEach(func(key, val []byte) error { - if !fMatch(f.Header(), key, f.Value()) { - return nil - } - + if err = fMatch.matchBucket(bkt, f.Header(), f.Value(), func(key, val []byte) error { l, err := decodeList(val) if err != nil { db.log.Debug("can't decode list bucket leaf", @@ -474,11 +466,8 @@ func (db *DB) selectObjectID( return } - err := bkt.ForEach(func(k, v []byte) error { - if obj := string(k); fMatch(f.Header(), k, f.Value()) { - appendOID(obj) - } - + err := fMatch.matchBucket(bkt, f.Header(), f.Value(), func(k, v []byte) error { + appendOID(string(k)) return nil }) if err != nil { @@ -525,7 +514,7 @@ func (db *DB) matchSlowFilters(tx *bbolt.Tx, addr oid.Address, f object.SearchFi continue // ignore unknown search attributes } - if !matchFunc(f[i].Header(), data, f[i].Value()) { + if !matchFunc.matchSlow(f[i].Header(), data, f[i].Value()) { return false } } diff --git a/pkg/local_object_storage/metabase/select_test.go b/pkg/local_object_storage/metabase/select_test.go index 6f67a88731..db97379034 100644 --- a/pkg/local_object_storage/metabase/select_test.go +++ b/pkg/local_object_storage/metabase/select_test.go @@ -396,6 +396,29 @@ func TestDB_SelectPayloadHash(t *testing.T) { objectSDK.MatchNotPresent) testSelect(t, db, cnr, fs) + + t.Run("invalid hashes", func(t *testing.T) { + fs = objectSDK.SearchFilters{} + fs.AddFilter(v2object.FilterHeaderPayloadHash, + payloadHash[:len(payloadHash)-1], + objectSDK.MatchStringNotEqual) + + testSelect(t, db, cnr, fs, object.AddressOf(raw1), object.AddressOf(raw2)) + + fs = objectSDK.SearchFilters{} + fs.AddFilter(v2object.FilterHeaderPayloadHash, + payloadHash[:len(payloadHash)-2]+"x", + objectSDK.MatchCommonPrefix) + + testSelect(t, db, cnr, fs) + + fs = objectSDK.SearchFilters{} + fs.AddFilter(v2object.FilterHeaderPayloadHash, + payloadHash[:len(payloadHash)-3]+"x0", + objectSDK.MatchCommonPrefix) + + testSelect(t, db, cnr, fs) + }) } func TestDB_SelectWithSlowFilters(t *testing.T) {