forked from TrueCloudLab/frostfs-node
[#472] blobstor: implement write-cache
Signed-off-by: Evgenii Stratonikov <evgeniy@nspcc.ru>
This commit is contained in:
parent
96a8ee7c83
commit
59de521fd1
24 changed files with 1011 additions and 116 deletions
|
@ -24,6 +24,7 @@ import (
|
|||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/engine"
|
||||
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/writecache"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/metrics"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/morph/client"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/morph/client/container/wrapper"
|
||||
|
@ -131,6 +132,11 @@ const (
|
|||
|
||||
cfgBlobStorSection = "blobstor"
|
||||
cfgWriteCacheSection = "writecache"
|
||||
cfgWriteCacheMemSize = "mem_size"
|
||||
cfgWriteCacheDBSize = "db_size"
|
||||
cfgWriteCacheSmallSize = "small_size"
|
||||
cfgWriteCacheMaxSize = "max_size"
|
||||
cfgWriteCacheWrkCount = "workers_count"
|
||||
cfgBlobStorCompress = "compress"
|
||||
cfgBlobStorShallowDepth = "shallow_depth"
|
||||
cfgBlobStorTreePath = "path"
|
||||
|
@ -560,6 +566,11 @@ func initShardOptions(c *cfg) {
|
|||
c.log.Warn("incorrect writeCache path, ignore shard")
|
||||
break
|
||||
}
|
||||
writeCacheMemSize := c.viper.GetUint64(configPath(writeCachePrefix, cfgWriteCacheMemSize))
|
||||
writeCacheDBSize := c.viper.GetUint64(configPath(writeCachePrefix, cfgWriteCacheDBSize))
|
||||
writeCacheSmallSize := c.viper.GetUint64(configPath(writeCachePrefix, cfgWriteCacheSmallSize))
|
||||
writeCacheMaxSize := c.viper.GetUint64(configPath(writeCachePrefix, cfgWriteCacheMaxSize))
|
||||
writeCacheWrkCount := c.viper.GetInt(configPath(writeCachePrefix, cfgWriteCacheWrkCount))
|
||||
|
||||
blobPrefix := configPath(prefix, cfgBlobStorSection)
|
||||
|
||||
|
@ -589,6 +600,9 @@ func initShardOptions(c *cfg) {
|
|||
if smallSzLimit == 0 {
|
||||
smallSzLimit = 1 << 20 // 1MB
|
||||
}
|
||||
if writeCacheMaxSize <= 0 {
|
||||
writeCacheSmallSize = smallSzLimit
|
||||
}
|
||||
|
||||
blzPrefix := configPath(blobPrefix, cfgBlobStorBlzSection)
|
||||
|
||||
|
@ -657,9 +671,13 @@ func initShardOptions(c *cfg) {
|
|||
),
|
||||
shard.WithWriteCache(useCache),
|
||||
shard.WithWriteCacheOptions(
|
||||
blobstor.WithRootPath(writeCachePath),
|
||||
blobstor.WithBlobovniczaShallowDepth(0),
|
||||
blobstor.WithBlobovniczaShallowWidth(1),
|
||||
writecache.WithPath(writeCachePath),
|
||||
writecache.WithLogger(c.log),
|
||||
writecache.WithMaxMemSize(writeCacheMemSize),
|
||||
writecache.WithMaxObjectSize(writeCacheMaxSize),
|
||||
writecache.WithSmallObjectSize(writeCacheSmallSize),
|
||||
writecache.WithMaxDBSize(writeCacheDBSize),
|
||||
writecache.WithFlushWorkersCount(writeCacheWrkCount),
|
||||
),
|
||||
shard.WithRemoverBatchSize(rmBatchSize),
|
||||
shard.WithGCRemoverSleepInterval(rmSleepInterval),
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
)
|
||||
|
@ -44,6 +45,52 @@ func stringifyAddress(addr *objectSDK.Address) string {
|
|||
return hex.EncodeToString(h[:])
|
||||
}
|
||||
|
||||
// Iterate iterates over all stored objects.
|
||||
func (t *FSTree) Iterate(f func(addr *objectSDK.Address, data []byte) error) error {
|
||||
return t.iterate(0, []string{t.RootPath}, f)
|
||||
}
|
||||
|
||||
func (t *FSTree) iterate(depth int, curPath []string, f func(*objectSDK.Address, []byte) error) error {
|
||||
curName := strings.Join(curPath[1:], "")
|
||||
des, err := ioutil.ReadDir(curName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
isLast := depth >= t.Depth
|
||||
l := len(curPath)
|
||||
curPath = append(curPath, "")
|
||||
|
||||
for i := range des {
|
||||
curPath[l] = des[i].Name()
|
||||
|
||||
if !isLast && des[i].IsDir() {
|
||||
err := t.iterate(depth+1, curPath, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
addr := objectSDK.NewAddress()
|
||||
err := addr.Parse(curName + des[i].Name())
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
curPath = append(curPath, des[i].Name())
|
||||
data, err := ioutil.ReadFile(path.Join(curPath...))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := f(addr, data); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *FSTree) treePath(addr *objectSDK.Address) string {
|
||||
sAddr := stringifyAddress(addr)
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ package blobstor
|
|||
|
||||
import "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/fstree"
|
||||
|
||||
// DumpInfo returns information about the BlobStor.
|
||||
// FSTree returns file-system tree for big object store.
|
||||
func (b *BlobStor) DumpInfo() fstree.Info {
|
||||
return b.fsTree.Info
|
||||
}
|
||||
|
|
|
@ -47,10 +47,9 @@ func (p *PutPrm) WithBlobovniczaID(id *blobovnicza.ID) *PutPrm {
|
|||
}
|
||||
|
||||
var (
|
||||
ErrUnknownObjectType = errors.New("unknown object type")
|
||||
ErrIncorrectBlobovniczaUpdate = errors.New("updating blobovnicza id on object without it")
|
||||
ErrIncorrectSplitInfoUpdate = errors.New("updating split info on object without it")
|
||||
ErrIncorrectRootObject = errors.New("invalid root object")
|
||||
ErrUnknownObjectType = errors.New("unknown object type")
|
||||
ErrIncorrectSplitInfoUpdate = errors.New("updating split info on object without it")
|
||||
ErrIncorrectRootObject = errors.New("invalid root object")
|
||||
)
|
||||
|
||||
// Put saves the object in DB.
|
||||
|
@ -379,20 +378,12 @@ func decodeList(data []byte) (lst [][]byte, err error) {
|
|||
// updateBlobovniczaID for existing objects if they were moved from from
|
||||
// one blobovnicza to another.
|
||||
func updateBlobovniczaID(tx *bbolt.Tx, addr *objectSDK.Address, id *blobovnicza.ID) error {
|
||||
bkt := tx.Bucket(smallBucketName(addr.ContainerID()))
|
||||
if bkt == nil {
|
||||
// if object exists, don't have blobovniczaID and we want to update it
|
||||
// then ignore, this should never happen
|
||||
return ErrIncorrectBlobovniczaUpdate
|
||||
bkt, err := tx.CreateBucketIfNotExists(smallBucketName(addr.ContainerID()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
objectKey := objectKey(addr.ObjectID())
|
||||
|
||||
if len(bkt.Get(objectKey)) == 0 {
|
||||
return ErrIncorrectBlobovniczaUpdate
|
||||
}
|
||||
|
||||
return bkt.Put(objectKey, *id)
|
||||
return bkt.Put(objectKey(addr.ObjectID()), *id)
|
||||
}
|
||||
|
||||
// updateSpliInfo for existing objects if storage filled with extra information
|
||||
|
|
|
@ -42,8 +42,5 @@ func TestDB_PutBlobovnicaUpdate(t *testing.T) {
|
|||
fetchedBlobovniczaID, err := meta.IsSmall(db, raw2.Object().Address())
|
||||
require.NoError(t, err)
|
||||
require.Nil(t, fetchedBlobovniczaID)
|
||||
|
||||
err = meta.Put(db, raw2.Object(), &blobovniczaID)
|
||||
require.Error(t, err)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -2,9 +2,11 @@ package shard
|
|||
|
||||
import (
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobovnicza"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
|
||||
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
|
||||
"github.com/pkg/errors"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
|
@ -37,13 +39,10 @@ func (s *Shard) Delete(prm *DeletePrm) (*DeleteRes, error) {
|
|||
smalls := make(map[*objectSDK.Address]*blobovnicza.ID, ln)
|
||||
|
||||
for i := range prm.addr {
|
||||
delSmallPrm.SetAddress(prm.addr[i])
|
||||
delBigPrm.SetAddress(prm.addr[i])
|
||||
|
||||
if s.hasWriteCache() {
|
||||
_, err := s.writeCache.DeleteSmall(delSmallPrm)
|
||||
if err != nil {
|
||||
_, _ = s.writeCache.DeleteBig(delBigPrm)
|
||||
err := s.writeCache.Delete(prm.addr[i])
|
||||
if err != nil && !errors.Is(err, object.ErrNotFound) {
|
||||
s.log.Error("can't delete object from write cache", zap.String("error", err.Error()))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9,24 +9,19 @@ import (
|
|||
)
|
||||
|
||||
func TestShard_Delete(t *testing.T) {
|
||||
sh := newShard(t, false)
|
||||
shWC := newShard(t, true)
|
||||
|
||||
defer func() {
|
||||
releaseShard(sh, t)
|
||||
releaseShard(shWC, t)
|
||||
}()
|
||||
|
||||
t.Run("without write cache", func(t *testing.T) {
|
||||
testShardDelete(t, sh)
|
||||
testShardDelete(t, false)
|
||||
})
|
||||
|
||||
t.Run("with write cache", func(t *testing.T) {
|
||||
testShardDelete(t, shWC)
|
||||
testShardDelete(t, true)
|
||||
})
|
||||
}
|
||||
|
||||
func testShardDelete(t *testing.T, sh *shard.Shard) {
|
||||
func testShardDelete(t *testing.T, hasWriteCache bool) {
|
||||
sh := newShard(t, hasWriteCache)
|
||||
defer releaseShard(sh, t)
|
||||
|
||||
cid := generateCID()
|
||||
|
||||
obj := generateRawObjectWithCID(t, cid)
|
||||
|
@ -47,7 +42,7 @@ func testShardDelete(t *testing.T, sh *shard.Shard) {
|
|||
_, err := sh.Put(putPrm)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = sh.Get(getPrm)
|
||||
_, err = testGet(t, sh, getPrm, hasWriteCache)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = sh.Delete(delPrm)
|
||||
|
|
|
@ -8,6 +8,8 @@ import (
|
|||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobovnicza"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
|
||||
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
|
||||
"github.com/pkg/errors"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// storFetcher is a type to unify object fetching mechanism in `fetchObjectData`
|
||||
|
@ -89,19 +91,16 @@ func (s *Shard) fetchObjectData(addr *objectSDK.Address, big, small storFetcher)
|
|||
)
|
||||
|
||||
if s.hasWriteCache() {
|
||||
res, err = small(s.writeCache, nil)
|
||||
res, err = s.writeCache.Get(addr)
|
||||
if err == nil {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
s.log.Debug("miss in writeCache blobovnicza")
|
||||
|
||||
res, err = big(s.writeCache, nil)
|
||||
if err == nil {
|
||||
return res, nil
|
||||
if errors.Is(err, object.ErrNotFound) {
|
||||
s.log.Debug("object is missing in write-cache")
|
||||
} else {
|
||||
s.log.Error("failed to fetch object from write-cache", zap.Error(err))
|
||||
}
|
||||
|
||||
s.log.Debug("miss in writeCache shallow dir")
|
||||
}
|
||||
|
||||
exists, err := meta.Exists(s.metaBase, addr)
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"bytes"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||
|
@ -12,31 +13,25 @@ import (
|
|||
)
|
||||
|
||||
func TestShard_Get(t *testing.T) {
|
||||
sh := newShard(t, false)
|
||||
shWC := newShard(t, true)
|
||||
|
||||
defer func() {
|
||||
releaseShard(sh, t)
|
||||
releaseShard(shWC, t)
|
||||
}()
|
||||
|
||||
t.Run("without write cache", func(t *testing.T) {
|
||||
testShardGet(t, sh)
|
||||
testShardGet(t, false)
|
||||
})
|
||||
|
||||
t.Run("with write cache", func(t *testing.T) {
|
||||
testShardGet(t, shWC)
|
||||
testShardGet(t, true)
|
||||
})
|
||||
}
|
||||
|
||||
func testShardGet(t *testing.T, sh *shard.Shard) {
|
||||
obj := generateRawObject(t)
|
||||
addAttribute(obj, "foo", "bar")
|
||||
func testShardGet(t *testing.T, hasWriteCache bool) {
|
||||
sh := newShard(t, hasWriteCache)
|
||||
defer releaseShard(sh, t)
|
||||
|
||||
putPrm := new(shard.PutPrm)
|
||||
getPrm := new(shard.GetPrm)
|
||||
|
||||
t.Run("small object", func(t *testing.T) {
|
||||
obj := generateRawObject(t)
|
||||
addAttribute(obj, "foo", "bar")
|
||||
addPayload(obj, 1<<5)
|
||||
|
||||
putPrm.WithObject(obj.Object())
|
||||
|
@ -46,12 +41,14 @@ func testShardGet(t *testing.T, sh *shard.Shard) {
|
|||
|
||||
getPrm.WithAddress(obj.Object().Address())
|
||||
|
||||
res, err := sh.Get(getPrm)
|
||||
res, err := testGet(t, sh, getPrm, hasWriteCache)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, obj.Object(), res.Object())
|
||||
})
|
||||
|
||||
t.Run("big object", func(t *testing.T) {
|
||||
obj := generateRawObject(t)
|
||||
addAttribute(obj, "foo", "bar")
|
||||
obj.SetID(generateOID())
|
||||
addPayload(obj, 1<<20) // big obj
|
||||
|
||||
|
@ -62,12 +59,14 @@ func testShardGet(t *testing.T, sh *shard.Shard) {
|
|||
|
||||
getPrm.WithAddress(obj.Object().Address())
|
||||
|
||||
res, err := sh.Get(getPrm)
|
||||
res, err := testGet(t, sh, getPrm, hasWriteCache)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, obj.Object(), res.Object())
|
||||
})
|
||||
|
||||
t.Run("parent object", func(t *testing.T) {
|
||||
obj := generateRawObject(t)
|
||||
addAttribute(obj, "foo", "bar")
|
||||
cid := generateCID()
|
||||
splitID := objectSDK.NewSplitID()
|
||||
|
||||
|
@ -87,13 +86,13 @@ func testShardGet(t *testing.T, sh *shard.Shard) {
|
|||
|
||||
getPrm.WithAddress(child.Object().Address())
|
||||
|
||||
res, err := sh.Get(getPrm)
|
||||
res, err := testGet(t, sh, getPrm, hasWriteCache)
|
||||
require.NoError(t, err)
|
||||
require.True(t, binaryEqual(child.Object(), res.Object()))
|
||||
|
||||
getPrm.WithAddress(parent.Object().Address())
|
||||
|
||||
_, err = sh.Get(getPrm)
|
||||
_, err = testGet(t, sh, getPrm, hasWriteCache)
|
||||
|
||||
var expectedErr *objectSDK.SplitInfoError
|
||||
require.True(t, errors.As(err, &expectedErr))
|
||||
|
@ -106,6 +105,19 @@ func testShardGet(t *testing.T, sh *shard.Shard) {
|
|||
})
|
||||
}
|
||||
|
||||
func testGet(t *testing.T, sh *shard.Shard, getPrm *shard.GetPrm, hasWriteCache bool) (*shard.GetRes, error) {
|
||||
res, err := sh.Get(getPrm)
|
||||
if hasWriteCache {
|
||||
require.Eventually(t, func() bool {
|
||||
if errors.Is(err, object.ErrNotFound) {
|
||||
res, err = sh.Get(getPrm)
|
||||
}
|
||||
return !errors.Is(err, object.ErrNotFound)
|
||||
}, time.Second, time.Millisecond*100)
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
|
||||
// binary equal is used when object contains empty lists in the structure and
|
||||
// requre.Equal fails on comparing <nil> and []{} lists.
|
||||
func binaryEqual(a, b *object.Object) bool {
|
||||
|
|
|
@ -3,31 +3,28 @@ package shard_test
|
|||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestShard_Head(t *testing.T) {
|
||||
sh := newShard(t, false)
|
||||
shWC := newShard(t, true)
|
||||
|
||||
defer func() {
|
||||
releaseShard(sh, t)
|
||||
releaseShard(shWC, t)
|
||||
}()
|
||||
|
||||
t.Run("without write cache", func(t *testing.T) {
|
||||
testShardHead(t, sh)
|
||||
testShardHead(t, false)
|
||||
})
|
||||
|
||||
t.Run("with write cache", func(t *testing.T) {
|
||||
testShardHead(t, shWC)
|
||||
testShardHead(t, true)
|
||||
})
|
||||
}
|
||||
|
||||
func testShardHead(t *testing.T, sh *shard.Shard) {
|
||||
func testShardHead(t *testing.T, hasWriteCache bool) {
|
||||
sh := newShard(t, hasWriteCache)
|
||||
defer releaseShard(sh, t)
|
||||
|
||||
putPrm := new(shard.PutPrm)
|
||||
headPrm := new(shard.HeadPrm)
|
||||
|
||||
|
@ -42,7 +39,7 @@ func testShardHead(t *testing.T, sh *shard.Shard) {
|
|||
|
||||
headPrm.WithAddress(obj.Object().Address())
|
||||
|
||||
res, err := sh.Head(headPrm)
|
||||
res, err := testHead(t, sh, headPrm, hasWriteCache)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, obj.Object(), res.Object())
|
||||
})
|
||||
|
@ -69,7 +66,7 @@ func testShardHead(t *testing.T, sh *shard.Shard) {
|
|||
|
||||
var siErr *objectSDK.SplitInfoError
|
||||
|
||||
_, err = sh.Head(headPrm)
|
||||
_, err = testHead(t, sh, headPrm, hasWriteCache)
|
||||
require.True(t, errors.As(err, &siErr))
|
||||
|
||||
headPrm.WithAddress(parent.Object().Address())
|
||||
|
@ -80,3 +77,16 @@ func testShardHead(t *testing.T, sh *shard.Shard) {
|
|||
require.Equal(t, parent.Object(), head.Object())
|
||||
})
|
||||
}
|
||||
|
||||
func testHead(t *testing.T, sh *shard.Shard, headPrm *shard.HeadPrm, hasWriteCache bool) (*shard.HeadRes, error) {
|
||||
res, err := sh.Head(headPrm)
|
||||
if hasWriteCache {
|
||||
require.Eventually(t, func() bool {
|
||||
if errors.Is(err, object.ErrNotFound) {
|
||||
res, err = sh.Head(headPrm)
|
||||
}
|
||||
return !errors.Is(err, object.ErrNotFound)
|
||||
}, time.Second, time.Millisecond*100)
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
|
|
|
@ -44,6 +44,12 @@ func (p *InhumePrm) MarkAsGarbage(addr ...*objectSDK.Address) *InhumePrm {
|
|||
// Inhume calls metabase. Inhume method to mark object as removed. It won't be
|
||||
// removed physically from blobStor and metabase until `Delete` operation.
|
||||
func (s *Shard) Inhume(prm *InhumePrm) (*InhumeRes, error) {
|
||||
if s.hasWriteCache() {
|
||||
for i := range prm.target {
|
||||
_ = s.writeCache.Delete(prm.target[i])
|
||||
}
|
||||
}
|
||||
|
||||
metaPrm := new(meta.InhumePrm).WithAddresses(prm.target...)
|
||||
|
||||
if prm.tombstone != nil {
|
||||
|
|
|
@ -9,24 +9,19 @@ import (
|
|||
)
|
||||
|
||||
func TestShard_Inhume(t *testing.T) {
|
||||
sh := newShard(t, false)
|
||||
shWC := newShard(t, true)
|
||||
|
||||
defer func() {
|
||||
releaseShard(sh, t)
|
||||
releaseShard(shWC, t)
|
||||
}()
|
||||
|
||||
t.Run("without write cache", func(t *testing.T) {
|
||||
testShardInhume(t, sh)
|
||||
testShardInhume(t, false)
|
||||
})
|
||||
|
||||
t.Run("with write cache", func(t *testing.T) {
|
||||
testShardInhume(t, shWC)
|
||||
testShardInhume(t, true)
|
||||
})
|
||||
}
|
||||
|
||||
func testShardInhume(t *testing.T, sh *shard.Shard) {
|
||||
func testShardInhume(t *testing.T, hasWriteCache bool) {
|
||||
sh := newShard(t, hasWriteCache)
|
||||
defer releaseShard(sh, t)
|
||||
|
||||
cid := generateCID()
|
||||
|
||||
obj := generateRawObjectWithCID(t, cid)
|
||||
|
@ -46,7 +41,7 @@ func testShardInhume(t *testing.T, sh *shard.Shard) {
|
|||
_, err := sh.Put(putPrm)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = sh.Get(getPrm)
|
||||
_, err = testGet(t, sh, getPrm, hasWriteCache)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = sh.Inhume(inhPrm)
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
|
||||
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
|
||||
"github.com/pkg/errors"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// PutPrm groups the parameters of Put operation.
|
||||
|
@ -34,26 +35,24 @@ func (s *Shard) Put(prm *PutPrm) (*PutRes, error) {
|
|||
|
||||
// exist check are not performed there, these checks should be executed
|
||||
// ahead of `Put` by storage engine
|
||||
if s.hasWriteCache() {
|
||||
err := s.writeCache.Put(prm.obj)
|
||||
if err == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
s.log.Debug("can't put message to writeCache, trying to blobStor",
|
||||
zap.String("err", err.Error()))
|
||||
}
|
||||
|
||||
var (
|
||||
err error
|
||||
res *blobstor.PutRes
|
||||
)
|
||||
|
||||
if s.hasWriteCache() {
|
||||
res, err = s.writeCache.Put(putPrm)
|
||||
if err != nil {
|
||||
s.log.Debug("can't put message to writeCache, trying to blobStor")
|
||||
|
||||
res = nil // just in case
|
||||
}
|
||||
}
|
||||
|
||||
// res == nil if there is no writeCache or writeCache.Put has been failed
|
||||
if res == nil {
|
||||
if res, err = s.blobStor.Put(putPrm); err != nil {
|
||||
return nil, errors.Wrap(err, "could not put object to BLOB storage")
|
||||
}
|
||||
if res, err = s.blobStor.Put(putPrm); err != nil {
|
||||
return nil, errors.Wrap(err, "could not put object to BLOB storage")
|
||||
}
|
||||
|
||||
// put to metabase
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
|
||||
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/writecache"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/util"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/util/logger"
|
||||
"go.uber.org/atomic"
|
||||
|
@ -19,7 +20,7 @@ type Shard struct {
|
|||
|
||||
mode *atomic.Uint32
|
||||
|
||||
writeCache *blobstor.BlobStor
|
||||
writeCache writecache.Cache
|
||||
|
||||
blobStor *blobstor.BlobStor
|
||||
|
||||
|
@ -43,7 +44,7 @@ type cfg struct {
|
|||
|
||||
metaOpts []meta.Option
|
||||
|
||||
writeCacheOpts []blobstor.Option
|
||||
writeCacheOpts []writecache.Option
|
||||
|
||||
log *logger.Logger
|
||||
|
||||
|
@ -68,19 +69,22 @@ func New(opts ...Option) *Shard {
|
|||
opts[i](c)
|
||||
}
|
||||
|
||||
var writeCache *blobstor.BlobStor
|
||||
bs := blobstor.New(c.blobOpts...)
|
||||
mb := meta.New(c.metaOpts...)
|
||||
|
||||
var writeCache writecache.Cache
|
||||
if c.useWriteCache {
|
||||
writeCache = blobstor.New(
|
||||
append(c.blobOpts, c.writeCacheOpts...)...,
|
||||
)
|
||||
writeCache = writecache.New(
|
||||
append(c.writeCacheOpts,
|
||||
writecache.WithBlobstor(bs),
|
||||
writecache.WithMetabase(mb))...)
|
||||
}
|
||||
|
||||
return &Shard{
|
||||
cfg: c,
|
||||
mode: atomic.NewUint32(0), // TODO: init with particular mode
|
||||
blobStor: blobstor.New(c.blobOpts...),
|
||||
metaBase: meta.New(c.metaOpts...),
|
||||
blobStor: bs,
|
||||
metaBase: mb,
|
||||
writeCache: writeCache,
|
||||
}
|
||||
}
|
||||
|
@ -107,7 +111,7 @@ func WithMetaBaseOptions(opts ...meta.Option) Option {
|
|||
}
|
||||
|
||||
// WithMetaBaseOptions returns option to set internal metabase options.
|
||||
func WithWriteCacheOptions(opts ...blobstor.Option) Option {
|
||||
func WithWriteCacheOptions(opts ...writecache.Option) Option {
|
||||
return func(c *cfg) {
|
||||
c.writeCacheOpts = opts
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
|
||||
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/writecache"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/util/test"
|
||||
"github.com/nspcc-dev/tzhash/tz"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
@ -41,9 +42,8 @@ func newShard(t testing.TB, enableWriteCache bool) *shard.Shard {
|
|||
),
|
||||
shard.WithWriteCache(enableWriteCache),
|
||||
shard.WithWriteCacheOptions(
|
||||
blobstor.WithRootPath(path.Join(rootPath, "wcache")),
|
||||
blobstor.WithBlobovniczaShallowWidth(1),
|
||||
blobstor.WithBlobovniczaShallowDepth(0),
|
||||
writecache.WithMaxMemSize(0), // disable memory batches
|
||||
writecache.WithPath(path.Join(rootPath, "wcache")),
|
||||
),
|
||||
}
|
||||
|
||||
|
|
49
pkg/local_object_storage/writecache/delete.go
Normal file
49
pkg/local_object_storage/writecache/delete.go
Normal file
|
@ -0,0 +1,49 @@
|
|||
package writecache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/fstree"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
// Delete removes object from write-cache.
|
||||
func (c *cache) Delete(addr *objectSDK.Address) error {
|
||||
saddr := addr.String()
|
||||
|
||||
// Check memory cache.
|
||||
c.mtx.Lock()
|
||||
for i := range c.mem {
|
||||
if saddr == c.mem[i].addr {
|
||||
copy(c.mem[i:], c.mem[i+1:])
|
||||
c.mem = c.mem[:len(c.mem)-1]
|
||||
c.mtx.Unlock()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
c.mtx.Unlock()
|
||||
|
||||
// Check disk cache.
|
||||
has := false
|
||||
_ = c.db.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(defaultBucket)
|
||||
has = b.Get([]byte(saddr)) != nil
|
||||
return nil
|
||||
})
|
||||
|
||||
if has {
|
||||
return c.db.Update(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(defaultBucket)
|
||||
return b.Delete([]byte(saddr))
|
||||
})
|
||||
}
|
||||
|
||||
err := c.fsTree.Delete(addr)
|
||||
if errors.Is(err, fstree.ErrFileNotFound) {
|
||||
err = object.ErrNotFound
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
20
pkg/local_object_storage/writecache/doc.go
Normal file
20
pkg/local_object_storage/writecache/doc.go
Normal file
|
@ -0,0 +1,20 @@
|
|||
// Package writecache implements write-cache for objects.
|
||||
//
|
||||
// It contains in-memory cache of fixed size and underlying database
|
||||
// (usually on SSD) for storing small objects.
|
||||
// There are 3 places where object can be:
|
||||
// 1. In-memory cache.
|
||||
// 2. On-disk cache DB.
|
||||
// 3. Main storage (blobstor).
|
||||
//
|
||||
// There are 2 types of background jobs:
|
||||
// 1. Persisting objects from in-memory cache to database.
|
||||
// 2. Flushing objects from database to blobstor.
|
||||
// On flushing object address is put in in-memory LRU cache.
|
||||
// The actual deletion from the DB is done when object
|
||||
// is evicted from this cache.
|
||||
//
|
||||
// Putting objects to the main storage is done by multiple workers.
|
||||
// Some of them prioritize flushing items, others prioritize putting new objects.
|
||||
// The current ration is 50/50. This helps to make some progress even under load.
|
||||
package writecache
|
196
pkg/local_object_storage/writecache/flush.go
Normal file
196
pkg/local_object_storage/writecache/flush.go
Normal file
|
@ -0,0 +1,196 @@
|
|||
package writecache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mr-tron/base58"
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobovnicza"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
|
||||
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
|
||||
"go.etcd.io/bbolt"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
const (
|
||||
// flushBatchSize is amount of keys which will be read from cache to be flushed
|
||||
// to the main storage. It is used to reduce contention between cache put
|
||||
// and cache persist.
|
||||
flushBatchSize = 512
|
||||
// flushWorkersCount is number of workers for putting objects in main storage.
|
||||
flushWorkersCount = 20
|
||||
// defaultFlushInterval is default time interval between successive flushes.
|
||||
defaultFlushInterval = time.Second
|
||||
)
|
||||
|
||||
// flushLoop periodically flushes changes from the database to memory.
|
||||
func (c *cache) flushLoop() {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for i := 0; i < c.workersCount; i++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
c.flushWorker(i)
|
||||
}(i)
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
c.flushBigObjects()
|
||||
}()
|
||||
|
||||
tick := time.NewTicker(defaultFlushInterval)
|
||||
for {
|
||||
select {
|
||||
case <-tick.C:
|
||||
c.flush()
|
||||
case <-c.closeCh:
|
||||
c.log.Debug("waiting for workers to quit")
|
||||
wg.Wait()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cache) flush() {
|
||||
lastKey := []byte{}
|
||||
var m []objectInfo
|
||||
for {
|
||||
m = m[:0]
|
||||
sz := 0
|
||||
|
||||
// We put objects in batches of fixed size to not interfere with main put cycle a lot.
|
||||
_ = c.db.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(defaultBucket)
|
||||
cs := b.Cursor()
|
||||
for k, v := cs.Seek(lastKey); k != nil && len(m) < flushBatchSize; k, v = cs.Next() {
|
||||
if _, ok := c.flushed.Peek(string(k)); ok {
|
||||
continue
|
||||
}
|
||||
|
||||
sz += len(k) + len(v)
|
||||
m = append(m, objectInfo{
|
||||
addr: string(k),
|
||||
data: cloneBytes(v),
|
||||
})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
for i := range m {
|
||||
obj := object.New()
|
||||
if err := obj.Unmarshal(m[i].data); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case c.flushCh <- obj:
|
||||
case <-c.closeCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
c.evictObjects(len(m))
|
||||
for i := range m {
|
||||
c.flushed.Add(m[i].addr, true)
|
||||
}
|
||||
|
||||
c.dbSize.Sub(uint64(sz))
|
||||
|
||||
c.log.Debug("flushed items from write-cache",
|
||||
zap.Int("count", len(m)),
|
||||
zap.String("start", base58.Encode(lastKey)))
|
||||
|
||||
if len(m) > 0 {
|
||||
lastKey = append([]byte(m[len(m)-1].addr), 0)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cache) flushBigObjects() {
|
||||
tick := time.NewTicker(defaultFlushInterval * 10)
|
||||
for {
|
||||
select {
|
||||
case <-tick.C:
|
||||
_ = c.fsTree.Iterate(func(addr *objectSDK.Address, data []byte) error {
|
||||
if _, ok := c.store.flushed.Peek(addr.String()); ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, err := c.blobstor.PutRaw(addr, data); err != nil {
|
||||
c.log.Error("cant flush object to blobstor", zap.Error(err))
|
||||
}
|
||||
return nil
|
||||
})
|
||||
case <-c.closeCh:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// flushWorker runs in a separate goroutine and write objects to the main storage.
|
||||
// If flushFirst is true, flushing objects from cache database takes priority over
|
||||
// putting new objects.
|
||||
func (c *cache) flushWorker(num int) {
|
||||
priorityCh := c.directCh
|
||||
switch num % 3 {
|
||||
case 0:
|
||||
priorityCh = c.flushCh
|
||||
case 1:
|
||||
priorityCh = c.metaCh
|
||||
}
|
||||
|
||||
var obj *object.Object
|
||||
for {
|
||||
metaOnly := false
|
||||
|
||||
// Give priority to direct put.
|
||||
// TODO(fyrchik): do this once in N iterations depending on load
|
||||
select {
|
||||
case obj = <-priorityCh:
|
||||
default:
|
||||
select {
|
||||
case obj = <-c.directCh:
|
||||
case obj = <-c.flushCh:
|
||||
case obj = <-c.metaCh:
|
||||
metaOnly = true
|
||||
case <-c.closeCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err := c.writeObject(obj, metaOnly)
|
||||
if err != nil {
|
||||
c.log.Error("can't flush object to the main storage", zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// writeObject is used to write object directly to the main storage.
|
||||
func (c *cache) writeObject(obj *object.Object, metaOnly bool) error {
|
||||
var id *blobovnicza.ID
|
||||
|
||||
if !metaOnly {
|
||||
prm := new(blobstor.PutPrm)
|
||||
prm.SetObject(obj)
|
||||
res, err := c.blobstor.Put(prm)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
id = res.BlobovniczaID()
|
||||
}
|
||||
|
||||
return meta.Put(c.metabase, obj, id)
|
||||
}
|
||||
|
||||
func cloneBytes(a []byte) []byte {
|
||||
b := make([]byte, len(a))
|
||||
copy(b, a)
|
||||
return b
|
||||
}
|
51
pkg/local_object_storage/writecache/get.go
Normal file
51
pkg/local_object_storage/writecache/get.go
Normal file
|
@ -0,0 +1,51 @@
|
|||
package writecache
|
||||
|
||||
import (
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
// Get returns object from write-cache.
|
||||
func (c *cache) Get(addr *objectSDK.Address) (*object.Object, error) {
|
||||
saddr := addr.String()
|
||||
|
||||
c.mtx.RLock()
|
||||
for i := range c.mem {
|
||||
if saddr == c.mem[i].addr {
|
||||
obj := c.mem[i].obj
|
||||
c.mtx.RUnlock()
|
||||
return obj, nil
|
||||
}
|
||||
}
|
||||
c.mtx.RUnlock()
|
||||
|
||||
var value []byte
|
||||
_ = c.db.View(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(defaultBucket)
|
||||
val := b.Get([]byte(saddr))
|
||||
if val != nil {
|
||||
value = cloneBytes(val)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if value != nil {
|
||||
obj := object.New()
|
||||
c.flushed.Get(saddr)
|
||||
return obj, obj.Unmarshal(value)
|
||||
}
|
||||
|
||||
data, err := c.fsTree.Get(addr)
|
||||
if err != nil {
|
||||
return nil, object.ErrNotFound
|
||||
}
|
||||
|
||||
obj := object.New()
|
||||
if err := obj.Unmarshal(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.flushed.Get(saddr)
|
||||
return obj, nil
|
||||
}
|
100
pkg/local_object_storage/writecache/options.go
Normal file
100
pkg/local_object_storage/writecache/options.go
Normal file
|
@ -0,0 +1,100 @@
|
|||
package writecache
|
||||
|
||||
import (
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
|
||||
meta "github.com/nspcc-dev/neofs-node/pkg/local_object_storage/metabase"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Option represents write-cache configuration option.
|
||||
type Option func(*options)
|
||||
|
||||
type options struct {
|
||||
log *zap.Logger
|
||||
// path is a path to a directory for write-cache.
|
||||
path string
|
||||
// blobstor is the main persistent storage.
|
||||
blobstor *blobstor.BlobStor
|
||||
// metabase is the metabase instance.
|
||||
metabase *meta.DB
|
||||
// maxMemSize is the maximum total size of all objects cached in memory.
|
||||
// 1 GiB by default.
|
||||
maxMemSize uint64
|
||||
// maxDBSize is the maximum size of database in bytes.
|
||||
// Unrestricted by default.
|
||||
maxDBSize uint64
|
||||
// maxObjectSize is the maximum size of the object stored in the write-cache.
|
||||
maxObjectSize uint64
|
||||
// smallObjectSize is the maximum size of the object stored in the database.
|
||||
smallObjectSize uint64
|
||||
// workersCount is the number of workers flushing objects in parallel.
|
||||
workersCount int
|
||||
}
|
||||
|
||||
// WithLogger sets logger.
|
||||
func WithLogger(log *zap.Logger) Option {
|
||||
return func(o *options) {
|
||||
o.log = log
|
||||
}
|
||||
}
|
||||
|
||||
// WithPath sets path to writecache db.
|
||||
func WithPath(path string) Option {
|
||||
return func(o *options) {
|
||||
o.path = path
|
||||
}
|
||||
}
|
||||
|
||||
// WithBlobstor sets main object storage.
|
||||
func WithBlobstor(bs *blobstor.BlobStor) Option {
|
||||
return func(o *options) {
|
||||
o.blobstor = bs
|
||||
}
|
||||
}
|
||||
|
||||
// WithMetabase sets metabase.
|
||||
func WithMetabase(db *meta.DB) Option {
|
||||
return func(o *options) {
|
||||
o.metabase = db
|
||||
}
|
||||
}
|
||||
|
||||
// WithMaxMemSize sets maximum size for in-memory DB.
|
||||
func WithMaxMemSize(sz uint64) Option {
|
||||
return func(o *options) {
|
||||
o.maxMemSize = sz
|
||||
}
|
||||
}
|
||||
|
||||
// WithMaxDBSize sets maximum size for on-disk DB.
|
||||
func WithMaxDBSize(sz uint64) Option {
|
||||
return func(o *options) {
|
||||
o.maxDBSize = sz
|
||||
}
|
||||
}
|
||||
|
||||
// WithMaxObjectSize sets maximum object size to be stored in write-cache.
|
||||
func WithMaxObjectSize(sz uint64) Option {
|
||||
return func(o *options) {
|
||||
if sz > 0 {
|
||||
o.maxObjectSize = sz
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WithSmallObjectSize sets maximum object size to be stored in write-cache.
|
||||
func WithSmallObjectSize(sz uint64) Option {
|
||||
return func(o *options) {
|
||||
if sz > 0 {
|
||||
o.smallObjectSize = sz
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func WithFlushWorkersCount(c int) Option {
|
||||
return func(o *options) {
|
||||
if c > 0 {
|
||||
o.workersCount = c
|
||||
}
|
||||
}
|
||||
}
|
124
pkg/local_object_storage/writecache/persist.go
Normal file
124
pkg/local_object_storage/writecache/persist.go
Normal file
|
@ -0,0 +1,124 @@
|
|||
package writecache
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"go.etcd.io/bbolt"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
const defaultPersistInterval = time.Second
|
||||
|
||||
// persistLoop persists object accumulated in memory to the database.
|
||||
func (c *cache) persistLoop() {
|
||||
tick := time.NewTicker(defaultPersistInterval)
|
||||
defer tick.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-tick.C:
|
||||
c.mtx.RLock()
|
||||
m := c.mem
|
||||
c.mtx.RUnlock()
|
||||
|
||||
sort.Slice(m, func(i, j int) bool { return m[i].addr < m[j].addr })
|
||||
|
||||
start := time.Now()
|
||||
c.persistObjects(m)
|
||||
c.log.Debug("persisted items to disk",
|
||||
zap.Duration("took", time.Since(start)),
|
||||
zap.Int("total", len(m)))
|
||||
|
||||
c.mtx.Lock()
|
||||
n := copy(c.mem, c.mem[len(m):])
|
||||
c.mem = c.mem[:n]
|
||||
for i := range c.mem {
|
||||
c.curMemSize += uint64(len(c.mem[i].data))
|
||||
}
|
||||
c.mtx.Unlock()
|
||||
|
||||
sz := 0
|
||||
for i := range m {
|
||||
sz += len(m[i].addr) + m[i].obj.ToV2().StableSize()
|
||||
}
|
||||
c.dbSize.Add(uint64(sz))
|
||||
|
||||
case <-c.closeCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cache) persistToCache(objs []objectInfo) []int {
|
||||
var (
|
||||
failMem []int
|
||||
doneMem []int
|
||||
)
|
||||
|
||||
_ = c.db.Update(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(defaultBucket)
|
||||
for i := range objs {
|
||||
if uint64(len(objs[i].data)) >= c.smallObjectSize {
|
||||
failMem = append(failMem, i)
|
||||
continue
|
||||
}
|
||||
|
||||
err := b.Put([]byte(objs[i].addr), objs[i].data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
doneMem = append(doneMem, i)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if len(doneMem) > 0 {
|
||||
c.evictObjects(len(doneMem))
|
||||
for _, i := range doneMem {
|
||||
c.flushed.Add(objs[i].addr, true)
|
||||
}
|
||||
}
|
||||
|
||||
var failDisk []int
|
||||
|
||||
for _, i := range failMem {
|
||||
if uint64(len(objs[i].data)) > c.maxObjectSize {
|
||||
failDisk = append(failDisk, i)
|
||||
continue
|
||||
}
|
||||
|
||||
err := c.fsTree.Put(objs[i].obj.Address(), objs[i].data)
|
||||
if err != nil {
|
||||
failDisk = append(failDisk, i)
|
||||
}
|
||||
}
|
||||
|
||||
return failDisk
|
||||
}
|
||||
|
||||
// persistObjects tries to write objects from memory to the persistent storage.
|
||||
// If tryCache is false, writing skips cache and is done directly to the main storage.
|
||||
func (c *cache) persistObjects(objs []objectInfo) {
|
||||
toDisk := c.persistToCache(objs)
|
||||
j := 0
|
||||
|
||||
for i := range objs {
|
||||
ch := c.metaCh
|
||||
if j < len(toDisk) {
|
||||
if i == toDisk[j] {
|
||||
ch = c.directCh
|
||||
} else {
|
||||
for ; j < len(toDisk) && i > toDisk[j]; j++ {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- objs[j].obj:
|
||||
case <-c.closeCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
44
pkg/local_object_storage/writecache/put.go
Normal file
44
pkg/local_object_storage/writecache/put.go
Normal file
|
@ -0,0 +1,44 @@
|
|||
package writecache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||
)
|
||||
|
||||
// ErrBigObject is returned when object is too big to be placed in cache.
|
||||
var ErrBigObject = errors.New("too big object")
|
||||
|
||||
// Put puts object to write-cache.
|
||||
func (c *cache) Put(o *object.Object) error {
|
||||
sz := uint64(o.ToV2().StableSize())
|
||||
if sz > c.maxObjectSize {
|
||||
return ErrBigObject
|
||||
}
|
||||
|
||||
data, err := o.Marshal(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
oi := objectInfo{
|
||||
addr: o.Address().String(),
|
||||
obj: o,
|
||||
data: data,
|
||||
}
|
||||
|
||||
c.mtx.Lock()
|
||||
|
||||
if sz < c.smallObjectSize && c.curMemSize+sz <= c.maxMemSize {
|
||||
c.curMemSize += sz
|
||||
c.mem = append(c.mem, oi)
|
||||
|
||||
c.mtx.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
c.mtx.Unlock()
|
||||
|
||||
c.persistObjects([]objectInfo{oi})
|
||||
return nil
|
||||
}
|
130
pkg/local_object_storage/writecache/storage.go
Normal file
130
pkg/local_object_storage/writecache/storage.go
Normal file
|
@ -0,0 +1,130 @@
|
|||
package writecache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
lru "github.com/hashicorp/golang-lru"
|
||||
"github.com/hashicorp/golang-lru/simplelru"
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/fstree"
|
||||
"go.etcd.io/bbolt"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// store represents persistent storage with in-memory LRU cache
|
||||
// for flushed items on top of it.
|
||||
type store struct {
|
||||
flushed simplelru.LRUCache
|
||||
db *bbolt.DB
|
||||
}
|
||||
|
||||
const lruKeysCount = 256 * 1024 * 8
|
||||
|
||||
const dbName = "small.bolt"
|
||||
|
||||
func (c *cache) openStore() error {
|
||||
if err := os.MkdirAll(c.path, os.ModePerm); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
db, err := bbolt.Open(path.Join(c.path, dbName), os.ModePerm, &bbolt.Options{
|
||||
NoFreelistSync: true,
|
||||
NoSync: true,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.fsTree = &fstree.FSTree{
|
||||
Info: fstree.Info{
|
||||
Permissions: os.ModePerm,
|
||||
RootPath: c.path,
|
||||
},
|
||||
Depth: 1,
|
||||
DirNameLen: 1,
|
||||
}
|
||||
|
||||
_ = db.Update(func(tx *bbolt.Tx) error {
|
||||
_, err := tx.CreateBucketIfNotExists(defaultBucket)
|
||||
return err
|
||||
})
|
||||
|
||||
c.db = db
|
||||
c.flushed, _ = lru.New(lruKeysCount)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *store) removeFlushedKeys(n int) ([][]byte, [][]byte) {
|
||||
var keysMem, keysDisk [][]byte
|
||||
for i := 0; i < n; i++ {
|
||||
k, v, ok := s.flushed.RemoveOldest()
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
|
||||
if v.(bool) {
|
||||
keysMem = append(keysMem, []byte(k.(string)))
|
||||
} else {
|
||||
keysDisk = append(keysDisk, []byte(k.(string)))
|
||||
}
|
||||
}
|
||||
|
||||
return keysMem, keysDisk
|
||||
}
|
||||
|
||||
func (c *cache) evictObjects(putCount int) {
|
||||
sum := c.flushed.Len() + putCount
|
||||
if sum <= lruKeysCount {
|
||||
return
|
||||
}
|
||||
|
||||
keysMem, keysDisk := c.store.removeFlushedKeys(sum - lruKeysCount)
|
||||
|
||||
if err := c.deleteFromDB(keysMem); err != nil {
|
||||
c.log.Error("error while removing objects from write-cache (database)", zap.Error(err))
|
||||
}
|
||||
|
||||
if err := c.deleteFromDisk(keysDisk); err != nil {
|
||||
c.log.Error("error while removing objects from write-cache (disk)", zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cache) deleteFromDB(keys [][]byte) error {
|
||||
if len(keys) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return c.db.Update(func(tx *bbolt.Tx) error {
|
||||
b := tx.Bucket(defaultBucket)
|
||||
for i := range keys {
|
||||
if err := b.Delete(keys[i]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func (c *cache) deleteFromDisk(keys [][]byte) error {
|
||||
var lastErr error
|
||||
|
||||
for i := range keys {
|
||||
addr := objectSDK.NewAddress()
|
||||
addrStr := string(keys[i])
|
||||
|
||||
if err := addr.Parse(addrStr); err != nil {
|
||||
c.log.Error("can't parse address", zap.String("address", addrStr))
|
||||
continue
|
||||
}
|
||||
|
||||
if err := c.fsTree.Delete(addr); err != nil && !errors.Is(err, fstree.ErrFileNotFound) {
|
||||
lastErr = err
|
||||
c.log.Error("can't remove object from write-cache", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return lastErr
|
||||
}
|
109
pkg/local_object_storage/writecache/writecache.go
Normal file
109
pkg/local_object_storage/writecache/writecache.go
Normal file
|
@ -0,0 +1,109 @@
|
|||
package writecache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
objectSDK "github.com/nspcc-dev/neofs-api-go/pkg/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/core/object"
|
||||
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor/fstree"
|
||||
"go.uber.org/atomic"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Cache represents write-cache for objects.
|
||||
type Cache interface {
|
||||
Get(*objectSDK.Address) (*object.Object, error)
|
||||
Delete(*objectSDK.Address) error
|
||||
Put(*object.Object) error
|
||||
|
||||
Init() error
|
||||
Open() error
|
||||
Close() error
|
||||
}
|
||||
|
||||
type cache struct {
|
||||
options
|
||||
|
||||
// mtx protects mem field, statistics and counters.
|
||||
mtx sync.RWMutex
|
||||
mem []objectInfo
|
||||
|
||||
// curMemSize is the current size of all objects cached in memory.
|
||||
curMemSize uint64
|
||||
|
||||
// flushCh is a channel with objects to flush.
|
||||
flushCh chan *object.Object
|
||||
// directCh is a channel with objects to put directly to the main storage.
|
||||
// it is prioritized over flushCh.
|
||||
directCh chan *object.Object
|
||||
// metaCh is a channel with objects for which only metadata needs to be written.
|
||||
metaCh chan *object.Object
|
||||
// closeCh is close channel.
|
||||
closeCh chan struct{}
|
||||
evictCh chan []byte
|
||||
// store contains underlying database.
|
||||
store
|
||||
// dbSize stores approximate database size. It is updated every flush/persist cycle.
|
||||
dbSize atomic.Uint64
|
||||
// fsTree contains big files stored directly on file-system.
|
||||
fsTree *fstree.FSTree
|
||||
}
|
||||
|
||||
type objectInfo struct {
|
||||
addr string
|
||||
data []byte
|
||||
obj *object.Object
|
||||
}
|
||||
|
||||
const (
|
||||
maxInMemorySizeBytes = 1024 * 1024 * 1024 // 1 GiB
|
||||
maxObjectSize = 64 * 1024 * 1024 // 64 MiB
|
||||
smallObjectSize = 32 * 1024 // 32 KiB
|
||||
)
|
||||
|
||||
var (
|
||||
defaultBucket = []byte{0}
|
||||
)
|
||||
|
||||
// New creates new writecache instance.
|
||||
func New(opts ...Option) Cache {
|
||||
c := &cache{
|
||||
flushCh: make(chan *object.Object),
|
||||
directCh: make(chan *object.Object),
|
||||
metaCh: make(chan *object.Object),
|
||||
closeCh: make(chan struct{}),
|
||||
evictCh: make(chan []byte),
|
||||
|
||||
options: options{
|
||||
log: zap.NewNop(),
|
||||
maxMemSize: maxInMemorySizeBytes,
|
||||
maxObjectSize: maxObjectSize,
|
||||
smallObjectSize: smallObjectSize,
|
||||
workersCount: flushWorkersCount,
|
||||
},
|
||||
}
|
||||
|
||||
for i := range opts {
|
||||
opts[i](&c.options)
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// Open opens and initializes database.
|
||||
func (c *cache) Open() error {
|
||||
return c.openStore()
|
||||
}
|
||||
|
||||
// Init runs necessary services.
|
||||
func (c *cache) Init() error {
|
||||
go c.persistLoop()
|
||||
go c.flushLoop()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes db connection and stops services.
|
||||
func (c *cache) Close() error {
|
||||
close(c.closeCh)
|
||||
return c.db.Close()
|
||||
}
|
Loading…
Reference in a new issue