[#142] metabase: Replace exclusive select with the inclusive one

The previous metabase implementation took an exclusionary approach: filters
narrowed the set of all objects to those that match all filters. An
inclusive approach is presented. In it, when traversing the indexed headers,
the object becomes a candidate for selection. If at least one of the
subsequent filters is not passed, the object ceases to be a candidate. At
the end of the traversal, the remaining candidates are added to the
resulting sample. The borderline case of no filters is handled in a special
way: all stored objects are added to the resulting selection.

Presented inclusive approach showed better performance in most scenarios
(although not all).

Signed-off-by: Leonard Lyubich <leonard@nspcc.ru>
This commit is contained in:
Leonard Lyubich 2020-11-03 15:08:54 +03:00 committed by Alex Vanin
parent 3bbf1d6a60
commit e48f8a189e

View file

@ -18,7 +18,34 @@ func (db *DB) Select(fs object.SearchFilters) ([]*object.Address, error) {
return nil
}
// keep addresses that does not match some filter
if len(fs) == 0 {
// get primary bucket
primaryBucket := tx.Bucket(primaryBucket)
if primaryBucket == nil {
// empty storage
return nil
}
// iterate over all stored addresses
return primaryBucket.ForEach(func(k, v []byte) error {
// check if object marked as deleted
if objectRemoved(tx, k) {
return nil
}
addr := object.NewAddress()
if err := addr.Parse(string(k)); err != nil {
// TODO: storage was broken, so we need to handle it
return err
}
res = append(res, addr)
return nil
})
}
// keep processed addresses
mAddr := make(map[string]struct{})
for _, f := range fs {
@ -40,50 +67,39 @@ func (db *DB) Select(fs object.SearchFilters) ([]*object.Address, error) {
// iterate over all existing values for the key
if err := keyBucket.ForEach(func(k, _ []byte) error {
if !matchFunc(string(key), string(cutKeyBytes(k)), fVal) {
// exclude all addresses with this value
return keyBucket.Bucket(k).ForEach(func(k, _ []byte) error {
mAddr[string(k)] = struct{}{}
include := matchFunc(string(key), string(cutKeyBytes(k)), fVal)
return nil
})
return keyBucket.Bucket(k).ForEach(func(k, _ []byte) error {
if include {
mAddr[string(k)] = struct{}{}
} else {
delete(mAddr, string(k))
}
return nil
})
}); err != nil {
return errors.Wrapf(err, "(%T) could not iterate bucket %s", db, key)
}
}
// get primary bucket
primaryBucket := tx.Bucket(primaryBucket)
if primaryBucket == nil {
// empty storage
return nil
}
// iterate over all stored addresses
return primaryBucket.ForEach(func(k, v []byte) error {
if _, ok := mAddr[string(k)]; ok {
return nil
}
for a := range mAddr {
// check if object marked as deleted
if objectRemoved(tx, k) {
if objectRemoved(tx, []byte(a)) {
return nil
}
addr := object.NewAddress()
if err := addr.Parse(string(k)); err != nil {
if err := addr.Parse(a); err != nil {
// TODO: storage was broken, so we need to handle it
return err
}
res = append(res, addr)
}
return nil
})
})
return res, err
}