forked from TrueCloudLab/frostfs-node
[#568] writecache: Improve flushing scheme for badger
Signed-off-by: Anton Nikiforov <an.nikiforov@yadro.com>
This commit is contained in:
parent
806cc13d9f
commit
4dff9555f1
4 changed files with 89 additions and 65 deletions
2
go.mod
2
go.mod
|
@ -11,6 +11,7 @@ require (
|
||||||
git.frostfs.info/TrueCloudLab/tzhash v1.8.0
|
git.frostfs.info/TrueCloudLab/tzhash v1.8.0
|
||||||
github.com/cheggaaa/pb v1.0.29
|
github.com/cheggaaa/pb v1.0.29
|
||||||
github.com/chzyer/readline v1.5.1
|
github.com/chzyer/readline v1.5.1
|
||||||
|
github.com/dgraph-io/ristretto v0.1.1
|
||||||
github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568
|
github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568
|
||||||
github.com/google/uuid v1.3.0
|
github.com/google/uuid v1.3.0
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.4
|
github.com/hashicorp/golang-lru/v2 v2.0.4
|
||||||
|
@ -42,7 +43,6 @@ require (
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/dgraph-io/ristretto v0.1.1 // indirect
|
|
||||||
github.com/dustin/go-humanize v1.0.0 // indirect
|
github.com/dustin/go-humanize v1.0.0 // indirect
|
||||||
github.com/gogo/protobuf v1.3.2 // indirect
|
github.com/gogo/protobuf v1.3.2 // indirect
|
||||||
github.com/golang/glog v1.1.0 // indirect
|
github.com/golang/glog v1.1.0 // indirect
|
||||||
|
|
|
@ -296,6 +296,7 @@ const (
|
||||||
WritecacheCantParseAddress = "can't parse address"
|
WritecacheCantParseAddress = "can't parse address"
|
||||||
WritecacheCantRemoveObjectFromWritecache = "can't remove object from write-cache"
|
WritecacheCantRemoveObjectFromWritecache = "can't remove object from write-cache"
|
||||||
WritecacheDBValueLogGCRunCompleted = "value log GC run completed"
|
WritecacheDBValueLogGCRunCompleted = "value log GC run completed"
|
||||||
|
WritecacheBadgerObjAlreadyScheduled = "object already scheduled for flush"
|
||||||
BlobovniczatreeCouldNotGetObjectFromLevel = "could not get object from level"
|
BlobovniczatreeCouldNotGetObjectFromLevel = "could not get object from level"
|
||||||
BlobovniczatreeCouldNotReadPayloadRangeFromOpenedBlobovnicza = "could not read payload range from opened blobovnicza"
|
BlobovniczatreeCouldNotReadPayloadRangeFromOpenedBlobovnicza = "could not read payload range from opened blobovnicza"
|
||||||
BlobovniczatreeCouldNotReadPayloadRangeFromActiveBlobovnicza = "could not read payload range from active blobovnicza"
|
BlobovniczatreeCouldNotReadPayloadRangeFromActiveBlobovnicza = "could not read payload range from active blobovnicza"
|
||||||
|
|
|
@ -21,6 +21,10 @@ type cache struct {
|
||||||
|
|
||||||
// flushCh is a channel with objects to flush.
|
// flushCh is a channel with objects to flush.
|
||||||
flushCh chan *objectSDK.Object
|
flushCh chan *objectSDK.Object
|
||||||
|
// scheduled4Flush contains objects scheduled for flush via flushCh
|
||||||
|
// helps to avoid multiple flushing of one object
|
||||||
|
scheduled4Flush map[oid.Address]struct{}
|
||||||
|
scheduled4FlushMtx sync.RWMutex
|
||||||
// closeCh is close channel, protected by modeMtx.
|
// closeCh is close channel, protected by modeMtx.
|
||||||
closeCh chan struct{}
|
closeCh chan struct{}
|
||||||
// wg is a wait group for flush workers.
|
// wg is a wait group for flush workers.
|
||||||
|
@ -47,8 +51,9 @@ const (
|
||||||
// New creates new writecache instance.
|
// New creates new writecache instance.
|
||||||
func New(opts ...Option) writecache.Cache {
|
func New(opts ...Option) writecache.Cache {
|
||||||
c := &cache{
|
c := &cache{
|
||||||
flushCh: make(chan *objectSDK.Object),
|
flushCh: make(chan *objectSDK.Object),
|
||||||
mode: mode.ReadWrite,
|
mode: mode.ReadWrite,
|
||||||
|
scheduled4Flush: map[oid.Address]struct{}{},
|
||||||
|
|
||||||
options: options{
|
options: options{
|
||||||
log: &logger.Logger{Logger: zap.NewNop()},
|
log: &logger.Logger{Logger: zap.NewNop()},
|
||||||
|
|
|
@ -18,7 +18,7 @@ import (
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
"git.frostfs.info/TrueCloudLab/frostfs-observability/tracing"
|
||||||
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
objectSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/object"
|
||||||
"github.com/dgraph-io/badger/v4"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"github.com/mr-tron/base58"
|
"github.com/dgraph-io/ristretto/z"
|
||||||
"go.opentelemetry.io/otel/attribute"
|
"go.opentelemetry.io/otel/attribute"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
@ -35,6 +35,65 @@ const (
|
||||||
defaultFlushInterval = time.Second
|
defaultFlushInterval = time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type collector struct {
|
||||||
|
cache *cache
|
||||||
|
scheduled int
|
||||||
|
processed int
|
||||||
|
cancel func()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *collector) Send(buf *z.Buffer) error {
|
||||||
|
list, err := badger.BufferToKVList(buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, kv := range list.Kv {
|
||||||
|
select {
|
||||||
|
case <-c.cache.closeCh:
|
||||||
|
c.cancel()
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
if kv.StreamDone {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if c.scheduled >= flushBatchSize {
|
||||||
|
c.cancel()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if got, want := len(kv.Key), len(internalKey{}); got != want {
|
||||||
|
c.cache.log.Debug(
|
||||||
|
fmt.Sprintf("not expected db key len: got %d, want %d", got, want))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
c.processed++
|
||||||
|
obj := objectSDK.New()
|
||||||
|
val := bytes.Clone(kv.Value)
|
||||||
|
if err = obj.Unmarshal(val); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
addr := objectCore.AddressOf(obj)
|
||||||
|
c.cache.scheduled4FlushMtx.RLock()
|
||||||
|
_, ok := c.cache.scheduled4Flush[addr]
|
||||||
|
c.cache.scheduled4FlushMtx.RUnlock()
|
||||||
|
if ok {
|
||||||
|
c.cache.log.Debug(logs.WritecacheBadgerObjAlreadyScheduled, zap.Stringer("obj", addr))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
c.cache.scheduled4FlushMtx.Lock()
|
||||||
|
c.cache.scheduled4Flush[addr] = struct{}{}
|
||||||
|
c.cache.scheduled4FlushMtx.Unlock()
|
||||||
|
c.scheduled++
|
||||||
|
select {
|
||||||
|
case c.cache.flushCh <- obj:
|
||||||
|
case <-c.cache.closeCh:
|
||||||
|
c.cancel()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// runFlushLoop starts background workers which periodically flush objects to the blobstor.
|
// runFlushLoop starts background workers which periodically flush objects to the blobstor.
|
||||||
func (c *cache) runFlushLoop() {
|
func (c *cache) runFlushLoop() {
|
||||||
for i := 0; i < c.workersCount; i++ {
|
for i := 0; i < c.workersCount; i++ {
|
||||||
|
@ -62,17 +121,12 @@ func (c *cache) runFlushLoop() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cache) flushSmallObjects() {
|
func (c *cache) flushSmallObjects() {
|
||||||
var lastKey internalKey
|
|
||||||
var m []objectInfo
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-c.closeCh:
|
case <-c.closeCh:
|
||||||
return
|
return
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
m = m[:0]
|
|
||||||
|
|
||||||
c.modeMtx.RLock()
|
c.modeMtx.RLock()
|
||||||
if c.readOnly() {
|
if c.readOnly() {
|
||||||
c.modeMtx.RUnlock()
|
c.modeMtx.RUnlock()
|
||||||
|
@ -86,61 +140,24 @@ func (c *cache) flushSmallObjects() {
|
||||||
c.modeMtx.RUnlock()
|
c.modeMtx.RUnlock()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
ctx, cancel := context.WithCancel(context.TODO())
|
||||||
_ = c.db.View(func(tx *badger.Txn) error {
|
coll := collector{
|
||||||
it := tx.NewIterator(badger.DefaultIteratorOptions)
|
cache: c,
|
||||||
defer it.Close()
|
cancel: cancel,
|
||||||
if len(lastKey) == 0 {
|
|
||||||
it.Rewind()
|
|
||||||
} else {
|
|
||||||
it.Seek(lastKey[:])
|
|
||||||
if it.Valid() && bytes.Equal(it.Item().Key(), lastKey[:]) {
|
|
||||||
it.Next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for ; it.Valid() && len(m) < flushBatchSize; it.Next() {
|
|
||||||
if got, want := int(it.Item().KeySize()), len(lastKey); got != want {
|
|
||||||
return fmt.Errorf("invalid db key len: got %d, want %d", got, want)
|
|
||||||
}
|
|
||||||
it.Item().KeyCopy(lastKey[:])
|
|
||||||
value, err := it.Item().ValueCopy(nil)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m = append(m, objectInfo{
|
|
||||||
addr: lastKey.address(),
|
|
||||||
data: value,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
var count int
|
|
||||||
for i := range m {
|
|
||||||
obj := objectSDK.New()
|
|
||||||
if err := obj.Unmarshal(m[i].data); err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
count++
|
|
||||||
select {
|
|
||||||
case c.flushCh <- obj:
|
|
||||||
case <-c.closeCh:
|
|
||||||
c.modeMtx.RUnlock()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
stream := c.db.NewStream()
|
||||||
if count == 0 {
|
// All calls to Send are done by a single goroutine
|
||||||
c.modeMtx.RUnlock()
|
stream.Send = coll.Send
|
||||||
|
if err := stream.Orchestrate(ctx); err != nil {
|
||||||
|
c.log.Debug(fmt.Sprintf(
|
||||||
|
"error during flushing object from wc: %s", err))
|
||||||
|
}
|
||||||
|
c.modeMtx.RUnlock()
|
||||||
|
if coll.scheduled == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
c.modeMtx.RUnlock()
|
|
||||||
|
|
||||||
c.log.Debug(logs.WritecacheTriedToFlushItemsFromWritecache,
|
c.log.Debug(logs.WritecacheTriedToFlushItemsFromWritecache,
|
||||||
zap.Int("count", count),
|
zap.Int("scheduled", coll.scheduled), zap.Int("processed", coll.processed))
|
||||||
zap.String("start", base58.Encode(lastKey[:])))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,13 +184,14 @@ func (c *cache) workerFlushSmall() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addr := objectCore.AddressOf(obj)
|
||||||
err := c.flushObject(context.TODO(), obj, nil, writecache.StorageTypeDB)
|
err := c.flushObject(context.TODO(), obj, nil, writecache.StorageTypeDB)
|
||||||
if err != nil {
|
if err == nil {
|
||||||
// Error is handled in flushObject.
|
c.deleteFromDB([]internalKey{addr2key(addr)})
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
c.scheduled4FlushMtx.Lock()
|
||||||
c.deleteFromDB([]internalKey{addr2key(objectCore.AddressOf(obj))})
|
delete(c.scheduled4Flush, addr)
|
||||||
|
c.scheduled4FlushMtx.Unlock()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue