[#1318] metabase: Separate buckets with TS and GC marks

It allows storing information about object in both ways at the same time:
1. Metabase should know if an object is covered by a tombstone (that is
not expired yet);
2. It should be possible to physically delete objects covered by a
tombstone immediately (mark with GC) but keep tombstone knowledge.

Signed-off-by: Pavel Karpy <carpawell@nspcc.ru>
This commit is contained in:
Pavel Karpy 2021-04-08 17:19:31 +03:00 committed by LeL
parent b74fb2b932
commit 8107c8d1a9
9 changed files with 163 additions and 84 deletions

View file

@ -49,6 +49,7 @@ func (db *DB) init(reset bool) error {
string(containerVolumeBucketName): {},
string(graveyardBucketName): {},
string(toMoveItBucketName): {},
string(garbageBucketName): {},
}
return db.boltDB.Update(func(tx *bbolt.Tx) error {

View file

@ -86,12 +86,12 @@ func (db *DB) deleteGroup(tx *bbolt.Tx, addrs []*addressSDK.Address) error {
}
func (db *DB) delete(tx *bbolt.Tx, addr *addressSDK.Address, refCounter referenceCounter) error {
// remove record from graveyard
graveyard := tx.Bucket(graveyardBucketName)
if graveyard != nil {
err := graveyard.Delete(addressKey(addr))
// remove record from the garbage bucket
garbageBKT := tx.Bucket(garbageBucketName)
if garbageBKT != nil {
err := garbageBKT.Delete(addressKey(addr))
if err != nil {
return fmt.Errorf("could not remove from graveyard: %w", err)
return fmt.Errorf("could not remove from garbage bucket: %w", err)
}
}

View file

@ -6,6 +6,7 @@ import (
"github.com/nspcc-dev/neofs-node/pkg/core/object"
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
apistatus "github.com/nspcc-dev/neofs-sdk-go/client/status"
cidtest "github.com/nspcc-dev/neofs-sdk-go/container/id/test"
objectSDK "github.com/nspcc-dev/neofs-sdk-go/object"
"github.com/stretchr/testify/require"
@ -45,9 +46,6 @@ func TestDB_Delete(t *testing.T) {
err = meta.Inhume(db, object.AddressOf(child), object.AddressOf(ts))
require.NoError(t, err)
err = meta.Inhume(db, object.AddressOf(child), object.AddressOf(ts))
require.NoError(t, err)
// delete object
err = meta.Delete(db, object.AddressOf(child))
require.NoError(t, err)
@ -57,13 +55,14 @@ func TestDB_Delete(t *testing.T) {
require.NoError(t, err)
require.Len(t, l, 0)
// check if they removed from graveyard
// check if they marked as already removed
ok, err := meta.Exists(db, object.AddressOf(child))
require.NoError(t, err)
require.Error(t, apistatus.ObjectAlreadyRemoved{})
require.False(t, ok)
ok, err = meta.Exists(db, object.AddressOf(parent))
require.NoError(t, err)
require.Error(t, apistatus.ObjectAlreadyRemoved{})
require.False(t, ok)
}

View file

@ -1,7 +1,6 @@
package meta
import (
"bytes"
"errors"
"fmt"
@ -101,24 +100,37 @@ func (db *DB) exists(tx *bbolt.Tx, addr *addressSDK.Address) (exists bool, err e
}
// inGraveyard returns:
// * 0 if object is not in graveyard;
// * 1 if object is in graveyard with GC mark;
// * 2 if object is in graveyard with tombstone.
// * 0 if object is not marked for deletion;
// * 1 if object with GC mark;
// * 2 if object is covered with tombstone.
func inGraveyard(tx *bbolt.Tx, addr *addressSDK.Address) uint8 {
graveyard := tx.Bucket(graveyardBucketName)
if graveyard == nil {
// incorrect metabase state, does not make
// sense to check garbage bucket
return 0
}
val := graveyard.Get(addressKey(addr))
if val == nil {
garbageBCK := tx.Bucket(garbageBucketName)
if garbageBCK == nil {
// incorrect node state
return 0
}
val = garbageBCK.Get(addressKey(addr))
if val != nil {
// object has been marked with GC
return 1
}
// neither in the graveyard
// nor was marked with GC mark
return 0
}
if bytes.Equal(val, []byte(inhumeGCMarkValue)) {
return 1
}
// object in the graveyard
return 2
}

View file

@ -1,7 +1,6 @@
package meta
import (
"bytes"
"errors"
"fmt"
@ -9,53 +8,74 @@ import (
"go.etcd.io/bbolt"
)
// Grave represents descriptor of DB's graveyard item.
type Grave struct {
gcMark bool
// DeletedObject represents descriptor of the object that was
// marked to be deleted.
type DeletedObject struct {
addr *addressSDK.Address
}
// WithGCMark returns true if grave marked for GC to be removed.
func (g *Grave) WithGCMark() bool {
return g.gcMark
}
// Address returns buried object address.
func (g *Grave) Address() *addressSDK.Address {
func (g *DeletedObject) Address() *addressSDK.Address {
return g.addr
}
// GraveHandler is a Grave handling function.
type GraveHandler func(*Grave) error
// Handler is a DeletedObject handling function.
type Handler func(*DeletedObject) error
// IterateOverGarbage iterates over all objects
// marked with GC mark.
//
// If h returns ErrInterruptIterator, nil returns immediately.
// Returns other errors of h directly.
func (db *DB) IterateOverGarbage(h Handler) error {
return db.boltDB.View(func(tx *bbolt.Tx) error {
return db.iterateDeletedObj(tx, withGC, h)
})
}
// IterateOverGraveyard iterates over all graves in DB.
//
// If h returns ErrInterruptIterator, nil returns immediately.
// Returns other errors of h directly.
func (db *DB) IterateOverGraveyard(h GraveHandler) error {
func (db *DB) IterateOverGraveyard(h Handler) error {
return db.boltDB.View(func(tx *bbolt.Tx) error {
return db.iterateOverGraveyard(tx, h)
return db.iterateDeletedObj(tx, grave, h)
})
}
func (db *DB) iterateOverGraveyard(tx *bbolt.Tx, h GraveHandler) error {
// get graveyard bucket
bktGraveyard := tx.Bucket(graveyardBucketName)
if bktGraveyard == nil {
type deletedType uint8
const (
_ deletedType = iota
grave
withGC
)
func (db *DB) iterateDeletedObj(tx *bbolt.Tx, t deletedType, h Handler) error {
var bkt *bbolt.Bucket
switch t {
case grave:
bkt = tx.Bucket(graveyardBucketName)
case withGC:
bkt = tx.Bucket(garbageBucketName)
default:
panic(fmt.Sprintf("metabase: unknown iteration object type: %d", t))
}
if bkt == nil {
return nil
}
// iterate over all graves
err := bktGraveyard.ForEach(func(k, v []byte) error {
// parse Grave
g, err := graveFromKV(k, v)
// iterate over all deleted objects
err := bkt.ForEach(func(k, v []byte) error {
// parse deleted object
delObj, err := deletedObjectFromKV(k, v)
if err != nil {
return fmt.Errorf("could not parse Grave: %w", err)
}
// handler Grave
return h(g)
// handler object
return h(delObj)
})
if errors.Is(err, ErrInterruptIterator) {
@ -65,14 +85,13 @@ func (db *DB) iterateOverGraveyard(tx *bbolt.Tx, h GraveHandler) error {
return err
}
func graveFromKV(k, v []byte) (*Grave, error) {
func deletedObjectFromKV(k, _ []byte) (*DeletedObject, error) {
addr, err := addressFromKey(k)
if err != nil {
return nil, fmt.Errorf("could not parse address: %w", err)
}
return &Grave{
gcMark: bytes.Equal(v, []byte(inhumeGCMarkValue)),
addr: addr,
return &DeletedObject{
addr: addr,
}, nil
}

View file

@ -9,12 +9,14 @@ import (
"github.com/stretchr/testify/require"
)
func TestDB_IterateOverGraveyard(t *testing.T) {
func TestDB_IterateDeletedObjects(t *testing.T) {
db := newDB(t)
// generate and put 2 objects
obj1 := generateObject(t)
obj2 := generateObject(t)
obj3 := generateObject(t)
obj4 := generateObject(t)
var err error
@ -24,20 +26,26 @@ func TestDB_IterateOverGraveyard(t *testing.T) {
err = putBig(db, obj2)
require.NoError(t, err)
err = putBig(db, obj3)
require.NoError(t, err)
err = putBig(db, obj4)
require.NoError(t, err)
inhumePrm := new(meta.InhumePrm)
// inhume with tombstone
addrTombstone := generateAddress()
_, err = db.Inhume(inhumePrm.
WithAddresses(object.AddressOf(obj1)).
WithAddresses(object.AddressOf(obj1), object.AddressOf(obj2)).
WithTombstoneAddress(addrTombstone),
)
require.NoError(t, err)
// inhume with GC mark
_, err = db.Inhume(inhumePrm.
WithAddresses(object.AddressOf(obj2)).
WithAddresses(object.AddressOf(obj3), object.AddressOf(obj4)).
WithGCMark(),
)
@ -46,13 +54,15 @@ func TestDB_IterateOverGraveyard(t *testing.T) {
buriedTS, buriedGC []*addressSDK.Address
)
err = db.IterateOverGraveyard(func(g *meta.Grave) error {
if g.WithGCMark() {
buriedGC = append(buriedGC, g.Address())
} else {
buriedTS = append(buriedTS, g.Address())
}
err = db.IterateOverGraveyard(func(deletedObject *meta.DeletedObject) error {
buriedTS = append(buriedTS, deletedObject.Address())
counterAll++
return nil
})
err = db.IterateOverGarbage(func(deletedObject *meta.DeletedObject) error {
buriedGC = append(buriedGC, deletedObject.Address())
counterAll++
return nil
@ -60,7 +70,30 @@ func TestDB_IterateOverGraveyard(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 2, counterAll)
require.Equal(t, []*addressSDK.Address{object.AddressOf(obj1)}, buriedTS)
require.Equal(t, []*addressSDK.Address{object.AddressOf(obj2)}, buriedGC)
require.Equal(t, 4, counterAll)
require.True(t, equalAddresses([]*addressSDK.Address{object.AddressOf(obj1), object.AddressOf(obj2)}, buriedTS))
require.True(t, equalAddresses([]*addressSDK.Address{object.AddressOf(obj3), object.AddressOf(obj4)}, buriedGC))
}
func equalAddresses(aa1 []*addressSDK.Address, aa2 []*addressSDK.Address) bool {
if len(aa1) != len(aa2) {
return false
}
for _, a1 := range aa1 {
found := false
for _, a2 := range aa2 {
if a1.String() == a2.String() {
found = true
break
}
}
if !found {
return false
}
}
return true
}

View file

@ -65,8 +65,6 @@ func Inhume(db *DB, target, tomb *addressSDK.Address) error {
return err
}
const inhumeGCMarkValue = "GCMARK"
var errBreakBucketForEach = errors.New("bucket ForEach break")
// Inhume marks objects as removed but not removes it from metabase.
@ -75,28 +73,40 @@ var errBreakBucketForEach = errors.New("bucket ForEach break")
// if at least one object is locked.
func (db *DB) Inhume(prm *InhumePrm) (res *InhumeRes, err error) {
err = db.boltDB.Update(func(tx *bbolt.Tx) error {
graveyard, err := tx.CreateBucketIfNotExists(graveyardBucketName)
if err != nil {
return err
}
var (
// target bucket of the operation, one of the:
// 1. Graveyard if Inhume was called with a Tombstone
// 2. Garbage if Inhume was called with a GC mark
bkt *bbolt.Bucket
// value that will be put in the bucket, one of the:
// 1. tombstone address if Inhume was called with
// a Tombstone
// 2. zeroValue if Inhume was called with a GC mark
value []byte
)
var tombKey []byte
if prm.tomb != nil {
tombKey = addressKey(prm.tomb)
bkt = tx.Bucket(graveyardBucketName)
tombKey := addressKey(prm.tomb)
// it is forbidden to have a tomb-on-tomb in NeoFS,
// so graveyard keys must not be addresses of tombstones
// tombstones can be marked for GC in graveyard, so exclude this case
data := graveyard.Get(tombKey)
if data != nil && !bytes.Equal(data, []byte(inhumeGCMarkValue)) {
err := graveyard.Delete(tombKey)
data := bkt.Get(tombKey)
if data != nil {
err := bkt.Delete(tombKey)
if err != nil {
return fmt.Errorf("could not remove grave with tombstone key: %w", err)
}
}
value = tombKey
} else {
tombKey = []byte(inhumeGCMarkValue)
bkt, err = tx.CreateBucketIfNotExists(garbageBucketName)
if err != nil {
return err
}
value = zeroValue
}
for i := range prm.target {
@ -128,7 +138,7 @@ func (db *DB) Inhume(prm *InhumePrm) (res *InhumeRes, err error) {
// iterate over graveyard and check if target address
// is the address of tombstone in graveyard.
err = graveyard.ForEach(func(k, v []byte) error {
err = bkt.ForEach(func(k, v []byte) error {
// check if graveyard has record with key corresponding
// to tombstone address (at least one)
targetIsTomb = bytes.Equal(v, targetKey)
@ -158,7 +168,7 @@ func (db *DB) Inhume(prm *InhumePrm) (res *InhumeRes, err error) {
}
// consider checking if target is already in graveyard?
err = graveyard.Put(targetKey, tombKey)
err = bkt.Put(targetKey, value)
if err != nil {
return err
}

View file

@ -21,7 +21,14 @@ bytes. Check it later.
const invalidBase58String = "_"
var (
graveyardBucketName = []byte(invalidBase58String + "Graveyard")
// graveyardBucketName stores rows with the objects that have been
// covered with Tombstone objects. That objects should not be returned
// from the node and should not be accepted by the node from other
// nodes.
graveyardBucketName = []byte(invalidBase58String + "Graveyard")
// garbageBucketName stores rows with the objects that should be physically
// deleted by the node (Garbage Collector routine).
garbageBucketName = []byte(invalidBase58String + "Garbage")
toMoveItBucketName = []byte(invalidBase58String + "ToMoveIt")
containerVolumeBucketName = []byte(invalidBase58String + "ContainerSize")

View file

@ -166,7 +166,7 @@ func (gc *gc) stop() {
})
}
// iterates over metabase graveyard and deletes objects
// iterates over metabase and deletes objects
// with GC-marked graves.
// Does nothing if shard is in "read-only" mode.
func (s *Shard) removeGarbage() {
@ -176,12 +176,10 @@ func (s *Shard) removeGarbage() {
buf := make([]*addressSDK.Address, 0, s.rmBatchSize)
// iterate over metabase graveyard and accumulate
// objects with GC mark (no more the s.rmBatchSize objects)
err := s.metaBase.IterateOverGraveyard(func(g *meta.Grave) error {
if g.WithGCMark() {
buf = append(buf, g.Address())
}
// iterate over metabase's objects with GC mark
// (no more than s.rmBatchSize objects)
err := s.metaBase.IterateOverGarbage(func(g *meta.DeletedObject) error {
buf = append(buf, g.Address())
if len(buf) == s.rmBatchSize {
return meta.ErrInterruptIterator