frostfs-node/pkg/local_object_storage/writecache/writecachebitcask/writecachebitcask.go
Alejandro Lopez 42e74d6aab [#610] Add bitcask-inspired writecache implementation
Signed-off-by: Alejandro Lopez <a.lopez@yadro.com>
2023-08-31 14:17:10 +03:00

84 lines
8.1 KiB
Go

package writecachebitcask
import (
"sync/atomic"
"time"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/writecache"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
"go.uber.org/zap"
)
/*
The cache operates as a hash table where the key space is split in regions, each with its own lock (a sync.Mutex).
Each region maintains a key directory and appends updates (PUTs and DELETEs) to a log file. A key directory
entry stores the address, the log file index and the offset within the logfile. This structure is similar to
a bitcask (see https://en.wikipedia.org/wiki/Bitcask and https://riak.com/assets/bitcask-intro.pdf).
Since the file writes use O_SYNC, the updates are batched in a memory buffer first, so that it incurs fewer writes
under heavy load. After a log file reaches maximum capacity, it's closed and a new one is started. The completed log
files are pushed to the flushing process which processes them one by one and deletes them after updating the underlying
storage. The flushing process doesn't poll periodically for new work; instead, there's a buffered channel to which the
log file indices are pushed. If this channel fills, it will block the sending process (a request) until there's room
to continue, providing backpressure to the client when the underlying storage is not able to keep up.
The lower bytes of the object addresses are used as region and bucket hashes, since they are already hashed values
by construction.
│ │
Regions │ In-Memory │ In-Disk
───────────────────────┼───────────────┼─────────────────────────────────────────────────────────────────────────────────
┌─ ┌─ │ │ ┌────────────────────────────────────────┐ ┌───────────┐ ┌───────────┐
│ │ Bucket 1 │ KeyDir │ │ LogFile 0 │ │ LogFile 1 │ │ LogFile 2 │
│ Region 1 │ Bucket 2 │ ┌───────────┐ │ │┌──────────────┐ ┌──────────────┐ │ │ │ │ │
│ │ (...) │ │ MemBuffer │ │ ││Addr Size Data│ │Addr Size Data│ (...) │ │ (...) │ │ (...) │ (...)
│ (Mutex) │ Bucket K │ └───────────┘ │ │└──────────────┘ └──────────────┘ │ │ │ │ │
│ └─ │ │ └────────────────────────────────────────┘ └───────────┘ └───────────┘
│ ───────────────────────┼───────────────┼─────────────────────────────────────────────────────────────────────────────────
│ ┌─ │ │ ┌────────────────────────────────────────┐ ┌───────────┐ ┌───────────┐
│ │ Bucket K+1 │ KeyDir │ │ LogFile 0 │ │ LogFile 1 │ │ LogFile 2 │
│ Region 2 │ Bucket K+2 │ ┌───────────┐ │ │┌──────────────┐ ┌──────────────┐ │ │ │ │ │
│ │ (...) │ │ MemBuffer │ │ ││Addr Size Data│ │Addr Size Data│ (...) │ │ (...) │ │ (...) │ (...)
Key │ │ Bucket 2K │ └───────────┘ │ │└──────────────┘ └──────────────┘ │ │ │ │ │
Space │ └─ │ │ └────────────────────────────────────────┘ └───────────┘ └───────────┘
│ ───────────────────────┼───────────────┼─────────────────────────────────────────────────────────────────────────────────
│ (...) │ (...) │ (...)
│ ───────────────────────┼───────────────┼─────────────────────────────────────────────────────────────────────────────────
│ ┌─ │ │ ┌────────────────────────────────────────┐ ┌───────────┐ ┌───────────┐
│ │ Bucket (N-1)*K+1 │ KeyDir │ │ LogFile 0 │ │ LogFile 1 │ │ LogFile 2 │
│ Region N │ Bucket (N-1)*K+2 │ ┌───────────┐ │ │┌──────────────┐ ┌──────────────┐ │ │ │ │ │
│ │ (...) │ │ MemBuffer │ │ ││Addr Size Data│ │Addr Size Data│ (...) │ │ (...) │ │ (...) │ (...)
│ │ Bucket N*K │ └───────────┘ │ │└──────────────┘ └──────────────┘ │ │ │ │ │
└─ └─ │ │ └────────────────────────────────────────┘ └───────────┘ └───────────┘
───────────────────────┴───────────────┴─────────────────────────────────────────────────────────────────────────────────
*/
type cache struct {
options
mode atomic.Uint32
closed atomic.Bool
regions []*region
}
func New(opts ...Option) writecache.Cache {
c := &cache{
options: options{
log: &logger.Logger{Logger: zap.NewNop()},
metrics: writecache.DefaultMetrics(),
maxObjectSize: 128 << 10,
bucketCount: 1 << 16,
regionCount: 1 << 2,
maxLogSize: 64 << 20,
maxBatchDelay: 1 * time.Millisecond,
maxPendingLogFileFlush: 4,
},
}
for i := range opts {
opts[i](&c.options)
}
return c
}