[#1412] metabase: Add search by indexed attributes

Signed-off-by: Dmitrii Stepanov <d.stepanov@yadro.com>
This commit is contained in:
Dmitrii Stepanov 2024-10-02 13:04:29 +03:00
parent be744ae3e6
commit 1efa64ee72
Signed by: dstepanov-yadro
GPG key ID: 237AF1A763293BC0
3 changed files with 169 additions and 39 deletions

View file

@ -32,7 +32,17 @@ func putBig(db *meta.DB, obj *objectSDK.Object) error {
}
func testSelect(t *testing.T, db *meta.DB, cnr cid.ID, fs objectSDK.SearchFilters, exp ...oid.Address) {
res, err := metaSelect(db, cnr, fs)
res, err := metaSelect(db, cnr, fs, false)
require.NoError(t, err)
require.Len(t, res, len(exp))
for i := range exp {
require.Contains(t, res, exp[i])
}
}
func testSelect2(t *testing.T, db *meta.DB, cnr cid.ID, fs objectSDK.SearchFilters, useAttrIndex bool, exp ...oid.Address) {
res, err := metaSelect(db, cnr, fs, useAttrIndex)
require.NoError(t, err)
require.Len(t, res, len(exp))

View file

@ -37,8 +37,9 @@ type (
// SelectPrm groups the parameters of Select operation.
type SelectPrm struct {
cnr cid.ID
filters objectSDK.SearchFilters
cnr cid.ID
filters objectSDK.SearchFilters
useAttributeIndex bool
}
// SelectRes groups the resulting values of Select operation.
@ -56,6 +57,10 @@ func (p *SelectPrm) SetFilters(fs objectSDK.SearchFilters) {
p.filters = fs
}
func (p *SelectPrm) SetUseAttributeIndex(v bool) {
p.useAttributeIndex = v
}
// AddressList returns list of addresses of the selected objects.
func (r SelectRes) AddressList() []oid.Address {
return r.addrList
@ -92,14 +97,14 @@ func (db *DB) Select(ctx context.Context, prm SelectPrm) (res SelectRes, err err
currEpoch := db.epochState.CurrentEpoch()
return res, metaerr.Wrap(db.boltDB.View(func(tx *bbolt.Tx) error {
res.addrList, err = db.selectObjects(tx, prm.cnr, prm.filters, currEpoch)
res.addrList, err = db.selectObjects(tx, prm.cnr, prm.filters, currEpoch, prm.useAttributeIndex)
success = err == nil
return err
}))
}
func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters, currEpoch uint64) ([]oid.Address, error) {
group, err := groupFilters(fs)
func (db *DB) selectObjects(tx *bbolt.Tx, cnr cid.ID, fs objectSDK.SearchFilters, currEpoch uint64, useAttributeIndex bool) ([]oid.Address, error) {
group, err := groupFilters(fs, useAttributeIndex)
if err != nil {
return nil, err
}
@ -218,7 +223,13 @@ func (db *DB) selectFastFilter(
selectAllFromBucket(tx, primaryBucketName(cnr, bucketName), to, fNum)
selectAllFromBucket(tx, tombstoneBucketName(cnr, bucketName), to, fNum)
selectAllFromBucket(tx, bucketNameLockers(cnr, bucketName), to, fNum)
default:
default: // user attribute
bucketName := attributeBucketName(cnr, f.Header(), bucketName)
if f.Operation() == objectSDK.MatchNotPresent {
selectOutsideFKBT(tx, allBucketNames(cnr), bucketName, to, fNum)
} else {
db.selectFromFKBT(tx, bucketName, f, to, fNum)
}
}
}
@ -228,6 +239,15 @@ var mBucketNaming = map[string][]func(cid.ID, []byte) []byte{
v2object.TypeLock.String(): {bucketNameLockers},
}
func allBucketNames(cnr cid.ID) (names [][]byte) {
for _, fns := range mBucketNaming {
for _, fn := range fns {
names = append(names, fn(cnr, make([]byte, bucketKeySize)))
}
}
return
}
func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal string) (names [][]byte) {
appendNames := func(key string) {
fns, ok := mBucketNaming[key]
@ -259,6 +279,81 @@ func bucketNamesForType(cnr cid.ID, mType objectSDK.SearchMatchType, typeVal str
return
}
func (db *DB) selectFromFKBT(
tx *bbolt.Tx,
name []byte, // fkbt root bucket name
f objectSDK.SearchFilter, // filter for operation and value
to map[string]int, // resulting cache
fNum int, // index of filter
) { //
matchFunc, ok := db.matchers[f.Operation()]
if !ok {
db.log.Debug(logs.MetabaseMissingMatcher, zap.Stringer("operation", f.Operation()))
return
}
fkbtRoot := tx.Bucket(name)
if fkbtRoot == nil {
return
}
err := matchFunc.matchBucket(fkbtRoot, f.Header(), f.Value(), func(k, _ []byte) error {
fkbtLeaf := fkbtRoot.Bucket(k)
if fkbtLeaf == nil {
return nil
}
return fkbtLeaf.ForEach(func(k, _ []byte) error {
markAddressInCache(to, fNum, string(k))
return nil
})
})
if err != nil {
db.log.Debug(logs.MetabaseErrorInFKBTSelection, zap.String("error", err.Error()))
}
}
// selectOutsideFKBT looks into all incl buckets to find list of addresses outside <fkbt> to add in
// resulting cache.
func selectOutsideFKBT(
tx *bbolt.Tx,
incl [][]byte, // buckets
name []byte, // fkbt root bucket name
to map[string]int, // resulting cache
fNum int, // index of filter
) {
mExcl := make(map[string]struct{})
bktExcl := tx.Bucket(name)
if bktExcl != nil {
_ = bktExcl.ForEachBucket(func(k []byte) error {
exclBktLeaf := bktExcl.Bucket(k)
return exclBktLeaf.ForEach(func(k, _ []byte) error {
mExcl[string(k)] = struct{}{}
return nil
})
})
}
for i := range incl {
bktIncl := tx.Bucket(incl[i])
if bktIncl == nil {
continue
}
_ = bktIncl.ForEach(func(k, _ []byte) error {
if _, ok := mExcl[string(k)]; !ok {
markAddressInCache(to, fNum, string(k))
}
return nil
})
}
}
// selectFromList looks into <list> index to find list of addresses to add in
// resulting cache.
func (db *DB) selectFromList(
@ -486,7 +581,7 @@ func attributeValue(obj *objectSDK.Object, attribute string) (string, bool) {
// groupFilters divides filters in two groups: fast and slow. Fast filters
// processed by indexes and slow filters processed after by unmarshaling
// object headers.
func groupFilters(filters objectSDK.SearchFilters) (filterGroup, error) {
func groupFilters(filters objectSDK.SearchFilters, useAttributeIndex bool) (filterGroup, error) {
res := filterGroup{
fastFilters: make(objectSDK.SearchFilters, 0, len(filters)),
slowFilters: make(objectSDK.SearchFilters, 0, len(filters)),
@ -511,7 +606,11 @@ func groupFilters(filters objectSDK.SearchFilters) (filterGroup, error) {
v2object.FilterPropertyPhy:
res.fastFilters = append(res.fastFilters, filters[i])
default:
res.slowFilters = append(res.slowFilters, filters[i])
if useAttributeIndex && IsAtrributeIndexed(filters[i].Header()) {
res.fastFilters = append(res.fastFilters, filters[i])
} else {
res.slowFilters = append(res.slowFilters, filters[i])
}
}
}

View file

@ -26,6 +26,16 @@ import (
func TestDB_SelectUserAttributes(t *testing.T) {
t.Parallel()
t.Run("with_index", func(t *testing.T) {
testSelectUserAttributes(t, true)
})
t.Run("without_index", func(t *testing.T) {
testSelectUserAttributes(t, false)
})
}
func testSelectUserAttributes(t *testing.T, index bool) {
t.Parallel()
db := newDB(t)
defer func() { require.NoError(t, db.Close()) }()
@ -36,44 +46,52 @@ func TestDB_SelectUserAttributes(t *testing.T) {
testutil.AddAttribute(raw1, "foo", "bar")
testutil.AddAttribute(raw1, "x", "y")
err := putBig(db, raw1)
var putPrm meta.PutPrm
putPrm.SetIndexAttributes(index)
putPrm.SetObject(raw1)
_, err := db.Put(context.Background(), putPrm)
require.NoError(t, err)
raw2 := testutil.GenerateObjectWithCID(cnr)
testutil.AddAttribute(raw2, "foo", "bar")
testutil.AddAttribute(raw2, "x", "z")
err = putBig(db, raw2)
putPrm.SetObject(raw2)
_, err = db.Put(context.Background(), putPrm)
require.NoError(t, err)
raw3 := testutil.GenerateObjectWithCID(cnr)
testutil.AddAttribute(raw3, "a", "b")
err = putBig(db, raw3)
putPrm.SetObject(raw3)
_, err = db.Put(context.Background(), putPrm)
require.NoError(t, err)
raw4 := testutil.GenerateObjectWithCID(cnr)
testutil.AddAttribute(raw4, "path", "test/1/2")
testutil.AddAttribute(raw4, objectSDK.AttributeFilePath, "/test/1/2")
err = putBig(db, raw4)
putPrm.SetObject(raw4)
_, err = db.Put(context.Background(), putPrm)
require.NoError(t, err)
raw5 := testutil.GenerateObjectWithCID(cnr)
testutil.AddAttribute(raw5, "path", "test/1/3")
testutil.AddAttribute(raw5, objectSDK.AttributeFilePath, "/test/1/3")
err = putBig(db, raw5)
putPrm.SetObject(raw5)
_, err = db.Put(context.Background(), putPrm)
require.NoError(t, err)
raw6 := testutil.GenerateObjectWithCID(cnr)
testutil.AddAttribute(raw6, "path", "test/2/3")
testutil.AddAttribute(raw6, objectSDK.AttributeFilePath, "/test/2/3")
err = putBig(db, raw6)
putPrm.SetObject(raw6)
_, err = db.Put(context.Background(), putPrm)
require.NoError(t, err)
raw7 := testutil.GenerateObjectWithCID(cnr)
var attr objectSDK.Attribute
attr.SetKey("path")
attr.SetValue("test/3/4")
attr.SetKey(objectSDK.AttributeFilePath)
attr.SetValue("/test/3/4")
attrs := raw7.Attributes()
attrs = append(attrs, attr)
ech := objectSDK.NewECHeader(objectSDK.ECParentInfo{
@ -81,37 +99,39 @@ func TestDB_SelectUserAttributes(t *testing.T) {
Attributes: attrs,
}, 0, 3, []byte{}, 0)
raw7.SetECHeader(ech)
require.NoError(t, putBig(db, raw7))
putPrm.SetObject(raw7)
_, err = db.Put(context.Background(), putPrm)
require.NoError(t, err)
var raw7Parent oid.Address
raw7Parent.SetContainer(cnr)
raw7Parent.SetObject(ech.Parent())
fs := objectSDK.SearchFilters{}
fs.AddFilter("foo", "bar", objectSDK.MatchStringEqual)
testSelect(t, db, cnr, fs,
testSelect2(t, db, cnr, fs, index,
object.AddressOf(raw1),
object.AddressOf(raw2),
)
fs = objectSDK.SearchFilters{}
fs.AddFilter("x", "y", objectSDK.MatchStringEqual)
testSelect(t, db, cnr, fs, object.AddressOf(raw1))
testSelect2(t, db, cnr, fs, index, object.AddressOf(raw1))
fs = objectSDK.SearchFilters{}
fs.AddFilter("x", "y", objectSDK.MatchStringNotEqual)
testSelect(t, db, cnr, fs, object.AddressOf(raw2))
testSelect2(t, db, cnr, fs, index, object.AddressOf(raw2))
fs = objectSDK.SearchFilters{}
fs.AddFilter("a", "b", objectSDK.MatchStringEqual)
testSelect(t, db, cnr, fs, object.AddressOf(raw3))
testSelect2(t, db, cnr, fs, index, object.AddressOf(raw3))
fs = objectSDK.SearchFilters{}
fs.AddFilter("c", "d", objectSDK.MatchStringEqual)
testSelect(t, db, cnr, fs)
testSelect2(t, db, cnr, fs, index)
fs = objectSDK.SearchFilters{}
fs.AddFilter("foo", "", objectSDK.MatchNotPresent)
testSelect(t, db, cnr, fs,
testSelect2(t, db, cnr, fs, index,
object.AddressOf(raw3),
object.AddressOf(raw4),
object.AddressOf(raw5),
@ -121,7 +141,7 @@ func TestDB_SelectUserAttributes(t *testing.T) {
fs = objectSDK.SearchFilters{}
fs.AddFilter("a", "", objectSDK.MatchNotPresent)
testSelect(t, db, cnr, fs,
testSelect2(t, db, cnr, fs, index,
object.AddressOf(raw1),
object.AddressOf(raw2),
object.AddressOf(raw4),
@ -131,7 +151,7 @@ func TestDB_SelectUserAttributes(t *testing.T) {
)
fs = objectSDK.SearchFilters{}
testSelect(t, db, cnr, fs,
testSelect2(t, db, cnr, fs, index,
object.AddressOf(raw1),
object.AddressOf(raw2),
object.AddressOf(raw3),
@ -143,7 +163,7 @@ func TestDB_SelectUserAttributes(t *testing.T) {
fs = objectSDK.SearchFilters{}
fs.AddFilter("key", "", objectSDK.MatchNotPresent)
testSelect(t, db, cnr, fs,
testSelect2(t, db, cnr, fs, index,
object.AddressOf(raw1),
object.AddressOf(raw2),
object.AddressOf(raw3),
@ -154,8 +174,8 @@ func TestDB_SelectUserAttributes(t *testing.T) {
)
fs = objectSDK.SearchFilters{}
fs.AddFilter("path", "test", objectSDK.MatchCommonPrefix)
testSelect(t, db, cnr, fs,
fs.AddFilter(objectSDK.AttributeFilePath, "/test", objectSDK.MatchCommonPrefix)
testSelect2(t, db, cnr, fs, index,
object.AddressOf(raw4),
object.AddressOf(raw5),
object.AddressOf(raw6),
@ -163,15 +183,15 @@ func TestDB_SelectUserAttributes(t *testing.T) {
)
fs = objectSDK.SearchFilters{}
fs.AddFilter("path", "test/1", objectSDK.MatchCommonPrefix)
testSelect(t, db, cnr, fs,
fs.AddFilter(objectSDK.AttributeFilePath, "/test/1", objectSDK.MatchCommonPrefix)
testSelect2(t, db, cnr, fs, index,
object.AddressOf(raw4),
object.AddressOf(raw5),
)
fs = objectSDK.SearchFilters{}
fs.AddFilter("path", "test/3/4", objectSDK.MatchStringEqual)
testSelect(t, db, cnr, fs,
fs.AddFilter(objectSDK.AttributeFilePath, "/test/3/4", objectSDK.MatchStringEqual)
testSelect2(t, db, cnr, fs, index,
raw7Parent,
)
}
@ -1185,11 +1205,11 @@ func TestExpiredObjects(t *testing.T) {
cidExp, _ := exp.ContainerID()
cidNonExp, _ := nonExp.ContainerID()
objs, err := metaSelect(db, cidExp, objectSDK.SearchFilters{})
objs, err := metaSelect(db, cidExp, objectSDK.SearchFilters{}, false)
require.NoError(t, err)
require.Empty(t, objs) // expired object should not be returned
objs, err = metaSelect(db, cidNonExp, objectSDK.SearchFilters{})
objs, err = metaSelect(db, cidNonExp, objectSDK.SearchFilters{}, false)
require.NoError(t, err)
require.NotEmpty(t, objs)
})
@ -1211,10 +1231,11 @@ func benchmarkSelect(b *testing.B, db *meta.DB, cid cidSDK.ID, fs objectSDK.Sear
}
}
func metaSelect(db *meta.DB, cnr cidSDK.ID, fs objectSDK.SearchFilters) ([]oid.Address, error) {
func metaSelect(db *meta.DB, cnr cidSDK.ID, fs objectSDK.SearchFilters, useAttributeIndex bool) ([]oid.Address, error) {
var prm meta.SelectPrm
prm.SetFilters(fs)
prm.SetContainerID(cnr)
prm.SetUseAttributeIndex(useAttributeIndex)
res, err := db.Select(context.Background(), prm)
return res.AddressList(), err