From 1efa64ee72851e53ca5eb2bf643e74141b41ca46 Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Wed, 2 Oct 2024 13:04:29 +0300 Subject: [PATCH] [#1412] metabase: Add search by indexed attributes Signed-off-by: Dmitrii Stepanov --- pkg/local_object_storage/metabase/db_test.go | 12 +- pkg/local_object_storage/metabase/select.go | 115 ++++++++++++++++-- .../metabase/select_test.go | 81 +++++++----- 3 files changed, 169 insertions(+), 39 deletions(-) diff --git a/pkg/local_object_storage/metabase/db_test.go b/pkg/local_object_storage/metabase/db_test.go index 01e1ed2bc..0abb5ea89 100644 --- a/pkg/local_object_storage/metabase/db_test.go +++ b/pkg/local_object_storage/metabase/db_test.go @@ -32,7 +32,17 @@ func putBig(db *meta.DB, obj *objectSDK.Object) error { } func testSelect(t *testing.T, db *meta.DB, cnr cid.ID, fs objectSDK.SearchFilters, exp ...oid.Address) { - res, err := metaSelect(db, cnr, fs) + res, err := metaSelect(db, cnr, fs, false) + require.NoError(t, err) + require.Len(t, res, len(exp)) + + for i := range exp { + require.Contains(t, res, exp[i]) + } +} + +func testSelect2(t *testing.T, db *meta.DB, cnr cid.ID, fs objectSDK.SearchFilters, useAttrIndex bool, exp ...oid.Address) { + res, err := metaSelect(db, cnr, fs, useAttrIndex) require.NoError(t, err) require.Len(t, res, len(exp)) diff --git a/pkg/local_object_storage/metabase/select.go b/pkg/local_object_storage/metabase/select.go index 85d1b08ba..88ef7d5a4 100644 --- a/pkg/local_object_storage/metabase/select.go +++ b/pkg/local_object_storage/metabase/select.go @@ -37,8 +37,9 @@ type ( // SelectPrm groups the parameters of Select operation. type SelectPrm struct { - cnr cid.ID - filters objectSDK.SearchFilters + cnr cid.ID + filters objectSDK.SearchFilters + useAttributeIndex bool } // SelectRes groups the resulting values of Select operation. @@ -56,6 +57,10 @@ func (p *SelectPrm) SetFilters(fs objectSDK.SearchFilters) { p.filters = fs } +func (p *SelectPrm) SetUseAttributeIndex(v bool) { + p.useAttributeIndex = v +} + // AddressList returns list of addresses of the selected objects. func (r SelectRes) AddressList() []oid.Address { return r.addrList @@ -92,14 +97,14 @@ func (db *DB) Select(ctx context.Context, prm SelectPrm) (res SelectRes, err err currEpoch := db.epochState.CurrentEpoch() return res, metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error { - res.addrList, err = db.selectObjects(tx, prm.cnr, prm.filters, currEpoch) + res.addrList, err = db.selectObjects(tx, prm.cnr, prm.filters, currEpoch, prm.useAttributeIndex) success = err == nil return err })) } -func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters, currEpoch uint64) ([]oid.Address, error) { - group, err := groupFilters(fs) +func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters, currEpoch uint64, useAttributeIndex bool) ([]oid.Address, error) { + group, err := groupFilters(fs, useAttributeIndex) if err != nil { return nil, err } @@ -218,7 +223,13 @@ func (db *DB) selectFastFilter( selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, fNum) selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, fNum) selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, fNum) - default: + default: // user attribute + bucketName := attributeBucketName(cnr, f.Header(), bucketName) + if f.Operation() == objectSDK.MatchNotPresent { + selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, to, fNum) + } else { + db.selectFromFKBT(tx, bucketName, f, to, fNum) + } } } @@ -228,6 +239,15 @@ var mBucketNaming = map[string][]func(cid.ID, []byte) []byte{ v2object.TypeLock.String(): {bucketNameLockers}, } +func allBucketNames(cnr cid.ID) (names [][]byte) { + for _, fns := range mBucketNaming { + for _, fn := range fns { + names = append(names, fn(cnr, make([]byte, bucketKeySize))) + } + } + return +} + func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal string) (names [][]byte) { appendNames := func(key string) { fns, ok := mBucketNaming[key] @@ -259,6 +279,81 @@ func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal str return } +func (db *DB) selectFromFKBT( + tx *bbolt.Tx, + name []byte, // fkbt root bucket name + f objectSDK.SearchFilter, // filter for operation and value + to map[string]int, // resulting cache + fNum int, // index of filter +) { // + matchFunc, ok := db.matchers[f.Operation()] + if !ok { + db.log.Debug(logs.MetabaseMissingMatcher, zap.Stringer("operation", f.Operation())) + + return + } + + fkbtRoot := tx.Bucket(name) + if fkbtRoot == nil { + return + } + + err := matchFunc.matchBucket(fkbtRoot, f.Header(), f.Value(), func(k, _ []byte) error { + fkbtLeaf := fkbtRoot.Bucket(k) + if fkbtLeaf == nil { + return nil + } + + return fkbtLeaf.ForEach(func(k, _ []byte) error { + markAddressInCache(to, fNum, string(k)) + + return nil + }) + }) + if err != nil { + db.log.Debug(logs.MetabaseErrorInFKBTSelection, zap.String("error", err.Error())) + } +} + +// selectOutsideFKBT looks into all incl buckets to find list of addresses outside to add in +// resulting cache. +func selectOutsideFKBT( + tx *bbolt.Tx, + incl [][]byte, // buckets + name []byte, // fkbt root bucket name + to map[string]int, // resulting cache + fNum int, // index of filter +) { + mExcl := make(map[string]struct{}) + + bktExcl := tx.Bucket(name) + if bktExcl != nil { + _ = bktExcl.ForEachBucket(func(k []byte) error { + exclBktLeaf := bktExcl.Bucket(k) + return exclBktLeaf.ForEach(func(k, _ []byte) error { + mExcl[string(k)] = struct{}{} + + return nil + }) + }) + } + + for i := range incl { + bktIncl := tx.Bucket(incl[i]) + if bktIncl == nil { + continue + } + + _ = bktIncl.ForEach(func(k, _ []byte) error { + if _, ok := mExcl[string(k)]; !ok { + markAddressInCache(to, fNum, string(k)) + } + + return nil + }) + } +} + // selectFromList looks into index to find list of addresses to add in // resulting cache. func (db *DB) selectFromList( @@ -486,7 +581,7 @@ func attributeValue(obj *objectSDK.Object, attribute string) (string, bool) { // groupFilters divides filters in two groups: fast and slow. Fast filters // processed by indexes and slow filters processed after by unmarshaling // object headers. -func groupFilters(filters objectSDK.SearchFilters) (filterGroup, error) { +func groupFilters(filters objectSDK.SearchFilters, useAttributeIndex bool) (filterGroup, error) { res := filterGroup{ fastFilters: make(objectSDK.SearchFilters, 0, len(filters)), slowFilters: make(objectSDK.SearchFilters, 0, len(filters)), @@ -511,7 +606,11 @@ func groupFilters(filters objectSDK.SearchFilters) (filterGroup, error) { v2object.FilterPropertyPhy: res.fastFilters = append(res.fastFilters, filters[i]) default: - res.slowFilters = append(res.slowFilters, filters[i]) + if useAttributeIndex && IsAtrributeIndexed(filters[i].Header()) { + res.fastFilters = append(res.fastFilters, filters[i]) + } else { + res.slowFilters = append(res.slowFilters, filters[i]) + } } } diff --git a/pkg/local_object_storage/metabase/select_test.go b/pkg/local_object_storage/metabase/select_test.go index bee778e2b..5cc25a9f6 100644 --- a/pkg/local_object_storage/metabase/select_test.go +++ b/pkg/local_object_storage/metabase/select_test.go @@ -26,6 +26,16 @@ import ( func TestDB_SelectUserAttributes(t *testing.T) { t.Parallel() + t.Run("with_index", func(t *testing.T) { + testSelectUserAttributes(t, true) + }) + t.Run("without_index", func(t *testing.T) { + testSelectUserAttributes(t, false) + }) +} + +func testSelectUserAttributes(t *testing.T, index bool) { + t.Parallel() db := newDB(t) defer func() { require.NoError(t, db.Close()) }() @@ -36,44 +46,52 @@ func TestDB_SelectUserAttributes(t *testing.T) { testutil.AddAttribute(raw1, "foo", "bar") testutil.AddAttribute(raw1, "x", "y") - err := putBig(db, raw1) + var putPrm meta.PutPrm + putPrm.SetIndexAttributes(index) + putPrm.SetObject(raw1) + _, err := db.Put(context.Background(), putPrm) require.NoError(t, err) raw2 := testutil.GenerateObjectWithCID(cnr) testutil.AddAttribute(raw2, "foo", "bar") testutil.AddAttribute(raw2, "x", "z") - err = putBig(db, raw2) + putPrm.SetObject(raw2) + _, err = db.Put(context.Background(), putPrm) require.NoError(t, err) raw3 := testutil.GenerateObjectWithCID(cnr) testutil.AddAttribute(raw3, "a", "b") - err = putBig(db, raw3) + putPrm.SetObject(raw3) + _, err = db.Put(context.Background(), putPrm) require.NoError(t, err) raw4 := testutil.GenerateObjectWithCID(cnr) - testutil.AddAttribute(raw4, "path", "test/1/2") + testutil.AddAttribute(raw4, objectSDK.AttributeFilePath, "/test/1/2") - err = putBig(db, raw4) + putPrm.SetObject(raw4) + _, err = db.Put(context.Background(), putPrm) require.NoError(t, err) raw5 := testutil.GenerateObjectWithCID(cnr) - testutil.AddAttribute(raw5, "path", "test/1/3") + testutil.AddAttribute(raw5, objectSDK.AttributeFilePath, "/test/1/3") - err = putBig(db, raw5) + putPrm.SetObject(raw5) + _, err = db.Put(context.Background(), putPrm) require.NoError(t, err) raw6 := testutil.GenerateObjectWithCID(cnr) - testutil.AddAttribute(raw6, "path", "test/2/3") + testutil.AddAttribute(raw6, objectSDK.AttributeFilePath, "/test/2/3") - err = putBig(db, raw6) + putPrm.SetObject(raw6) + _, err = db.Put(context.Background(), putPrm) require.NoError(t, err) raw7 := testutil.GenerateObjectWithCID(cnr) var attr objectSDK.Attribute - attr.SetKey("path") - attr.SetValue("test/3/4") + attr.SetKey(objectSDK.AttributeFilePath) + attr.SetValue("/test/3/4") attrs := raw7.Attributes() attrs = append(attrs, attr) ech := objectSDK.NewECHeader(objectSDK.ECParentInfo{ @@ -81,37 +99,39 @@ func TestDB_SelectUserAttributes(t *testing.T) { Attributes: attrs, }, 0, 3, []byte{}, 0) raw7.SetECHeader(ech) - require.NoError(t, putBig(db, raw7)) + putPrm.SetObject(raw7) + _, err = db.Put(context.Background(), putPrm) + require.NoError(t, err) var raw7Parent oid.Address raw7Parent.SetContainer(cnr) raw7Parent.SetObject(ech.Parent()) fs := objectSDK.SearchFilters{} fs.AddFilter("foo", "bar", objectSDK.MatchStringEqual) - testSelect(t, db, cnr, fs, + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw1), object.AddressOf(raw2), ) fs = objectSDK.SearchFilters{} fs.AddFilter("x", "y", objectSDK.MatchStringEqual) - testSelect(t, db, cnr, fs, object.AddressOf(raw1)) + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw1)) fs = objectSDK.SearchFilters{} fs.AddFilter("x", "y", objectSDK.MatchStringNotEqual) - testSelect(t, db, cnr, fs, object.AddressOf(raw2)) + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw2)) fs = objectSDK.SearchFilters{} fs.AddFilter("a", "b", objectSDK.MatchStringEqual) - testSelect(t, db, cnr, fs, object.AddressOf(raw3)) + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw3)) fs = objectSDK.SearchFilters{} fs.AddFilter("c", "d", objectSDK.MatchStringEqual) - testSelect(t, db, cnr, fs) + testSelect2(t, db, cnr, fs, index) fs = objectSDK.SearchFilters{} fs.AddFilter("foo", "", objectSDK.MatchNotPresent) - testSelect(t, db, cnr, fs, + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw3), object.AddressOf(raw4), object.AddressOf(raw5), @@ -121,7 +141,7 @@ func TestDB_SelectUserAttributes(t *testing.T) { fs = objectSDK.SearchFilters{} fs.AddFilter("a", "", objectSDK.MatchNotPresent) - testSelect(t, db, cnr, fs, + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw1), object.AddressOf(raw2), object.AddressOf(raw4), @@ -131,7 +151,7 @@ func TestDB_SelectUserAttributes(t *testing.T) { ) fs = objectSDK.SearchFilters{} - testSelect(t, db, cnr, fs, + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw1), object.AddressOf(raw2), object.AddressOf(raw3), @@ -143,7 +163,7 @@ func TestDB_SelectUserAttributes(t *testing.T) { fs = objectSDK.SearchFilters{} fs.AddFilter("key", "", objectSDK.MatchNotPresent) - testSelect(t, db, cnr, fs, + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw1), object.AddressOf(raw2), object.AddressOf(raw3), @@ -154,8 +174,8 @@ func TestDB_SelectUserAttributes(t *testing.T) { ) fs = objectSDK.SearchFilters{} - fs.AddFilter("path", "test", objectSDK.MatchCommonPrefix) - testSelect(t, db, cnr, fs, + fs.AddFilter(objectSDK.AttributeFilePath, "/test", objectSDK.MatchCommonPrefix) + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw4), object.AddressOf(raw5), object.AddressOf(raw6), @@ -163,15 +183,15 @@ func TestDB_SelectUserAttributes(t *testing.T) { ) fs = objectSDK.SearchFilters{} - fs.AddFilter("path", "test/1", objectSDK.MatchCommonPrefix) - testSelect(t, db, cnr, fs, + fs.AddFilter(objectSDK.AttributeFilePath, "/test/1", objectSDK.MatchCommonPrefix) + testSelect2(t, db, cnr, fs, index, object.AddressOf(raw4), object.AddressOf(raw5), ) fs = objectSDK.SearchFilters{} - fs.AddFilter("path", "test/3/4", objectSDK.MatchStringEqual) - testSelect(t, db, cnr, fs, + fs.AddFilter(objectSDK.AttributeFilePath, "/test/3/4", objectSDK.MatchStringEqual) + testSelect2(t, db, cnr, fs, index, raw7Parent, ) } @@ -1185,11 +1205,11 @@ func TestExpiredObjects(t *testing.T) { cidExp, _ := exp.ContainerID() cidNonExp, _ := nonExp.ContainerID() - objs, err := metaSelect(db, cidExp, objectSDK.SearchFilters{}) + objs, err := metaSelect(db, cidExp, objectSDK.SearchFilters{}, false) require.NoError(t, err) require.Empty(t, objs) // expired object should not be returned - objs, err = metaSelect(db, cidNonExp, objectSDK.SearchFilters{}) + objs, err = metaSelect(db, cidNonExp, objectSDK.SearchFilters{}, false) require.NoError(t, err) require.NotEmpty(t, objs) }) @@ -1211,10 +1231,11 @@ func benchmarkSelect(b *testing.B, db *meta.DB, cid cidSDK.ID, fs objectSDK.Sear } } -func metaSelect(db *meta.DB, cnr cidSDK.ID, fs objectSDK.SearchFilters) ([]oid.Address, error) { +func metaSelect(db *meta.DB, cnr cidSDK.ID, fs objectSDK.SearchFilters, useAttributeIndex bool) ([]oid.Address, error) { var prm meta.SelectPrm prm.SetFilters(fs) prm.SetContainerID(cnr) + prm.SetUseAttributeIndex(useAttributeIndex) res, err := db.Select(context.Background(), prm) return res.AddressList(), err