Some checks failed
DCO action / DCO (pull_request) Successful in 3m8s
Vulncheck / Vulncheck (pull_request) Successful in 3m16s
Build / Build Components (1.20) (pull_request) Successful in 4m13s
Build / Build Components (1.21) (pull_request) Successful in 4m16s
Tests and linters / Staticcheck (pull_request) Successful in 5m11s
Tests and linters / Lint (pull_request) Successful in 5m58s
Tests and linters / Tests with -race (pull_request) Failing after 6m3s
Tests and linters / Tests (1.20) (pull_request) Successful in 7m29s
Tests and linters / Tests (1.21) (pull_request) Successful in 7m38s
Signed-off-by: Alejandro Lopez <a.lopez@yadro.com>
84 lines
8.1 KiB
Go
84 lines
8.1 KiB
Go
package writecachebitcask
|
|
|
|
import (
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/writecache"
|
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
/*
|
|
|
|
The cache operates as a hash table where the key space is split in regions, each with its own lock (a sync.Mutex).
|
|
Each region maintains a key directory and appends updates (PUTs and DELETEs) to a log file. A key directory
|
|
entry stores the address, the log file index and the offset within the logfile. This structure is similar to
|
|
a bitcask (see https://en.wikipedia.org/wiki/Bitcask and https://riak.com/assets/bitcask-intro.pdf).
|
|
|
|
Since the file writes use O_SYNC, the updates are batched in a memory buffer first, so that it incurs fewer writes
|
|
under heavy load. After a log file reaches maximum capacity, it's closed and a new one is started. The completed log
|
|
files are pushed to the flushing process which processes them one by one and deletes them after updating the underlying
|
|
storage. The flushing process doesn't poll periodically for new work; instead, there's a buffered channel to which the
|
|
log file indices are pushed. If this channel fills, it will block the sending process (a request) until there's room
|
|
to continue, providing backpressure to the client when the underlying storage is not able to keep up.
|
|
|
|
The lower bytes of the object addresses are used as region and bucket hashes, since they are already hashed values
|
|
by construction.
|
|
|
|
│ │
|
|
Regions │ In-Memory │ In-Disk
|
|
───────────────────────┼───────────────┼─────────────────────────────────────────────────────────────────────────────────
|
|
┌─ ┌─ │ │ ┌────────────────────────────────────────┐ ┌───────────┐ ┌───────────┐
|
|
│ │ Bucket 1 │ KeyDir │ │ LogFile 0 │ │ LogFile 1 │ │ LogFile 2 │
|
|
│ Region 1 │ Bucket 2 │ ┌───────────┐ │ │┌──────────────┐ ┌──────────────┐ │ │ │ │ │
|
|
│ │ (...) │ │ MemBuffer │ │ ││Addr Size Data│ │Addr Size Data│ (...) │ │ (...) │ │ (...) │ (...)
|
|
│ (Mutex) │ Bucket K │ └───────────┘ │ │└──────────────┘ └──────────────┘ │ │ │ │ │
|
|
│ └─ │ │ └────────────────────────────────────────┘ └───────────┘ └───────────┘
|
|
│ ───────────────────────┼───────────────┼─────────────────────────────────────────────────────────────────────────────────
|
|
│ ┌─ │ │ ┌────────────────────────────────────────┐ ┌───────────┐ ┌───────────┐
|
|
│ │ Bucket K+1 │ KeyDir │ │ LogFile 0 │ │ LogFile 1 │ │ LogFile 2 │
|
|
│ Region 2 │ Bucket K+2 │ ┌───────────┐ │ │┌──────────────┐ ┌──────────────┐ │ │ │ │ │
|
|
│ │ (...) │ │ MemBuffer │ │ ││Addr Size Data│ │Addr Size Data│ (...) │ │ (...) │ │ (...) │ (...)
|
|
Key │ │ Bucket 2K │ └───────────┘ │ │└──────────────┘ └──────────────┘ │ │ │ │ │
|
|
Space │ └─ │ │ └────────────────────────────────────────┘ └───────────┘ └───────────┘
|
|
│ ───────────────────────┼───────────────┼─────────────────────────────────────────────────────────────────────────────────
|
|
│ (...) │ (...) │ (...)
|
|
│ ───────────────────────┼───────────────┼─────────────────────────────────────────────────────────────────────────────────
|
|
│ ┌─ │ │ ┌────────────────────────────────────────┐ ┌───────────┐ ┌───────────┐
|
|
│ │ Bucket (N-1)*K+1 │ KeyDir │ │ LogFile 0 │ │ LogFile 1 │ │ LogFile 2 │
|
|
│ Region N │ Bucket (N-1)*K+2 │ ┌───────────┐ │ │┌──────────────┐ ┌──────────────┐ │ │ │ │ │
|
|
│ │ (...) │ │ MemBuffer │ │ ││Addr Size Data│ │Addr Size Data│ (...) │ │ (...) │ │ (...) │ (...)
|
|
│ │ Bucket N*K │ └───────────┘ │ │└──────────────┘ └──────────────┘ │ │ │ │ │
|
|
└─ └─ │ │ └────────────────────────────────────────┘ └───────────┘ └───────────┘
|
|
───────────────────────┴───────────────┴─────────────────────────────────────────────────────────────────────────────────
|
|
|
|
*/
|
|
|
|
type cache struct {
|
|
options
|
|
|
|
mode atomic.Uint32
|
|
closed atomic.Bool
|
|
regions []*region
|
|
}
|
|
|
|
func New(opts ...Option) writecache.Cache {
|
|
c := &cache{
|
|
options: options{
|
|
log: &logger.Logger{Logger: zap.NewNop()},
|
|
metrics: writecache.DefaultMetrics(),
|
|
|
|
maxObjectSize: 128 << 10,
|
|
bucketCount: 1 << 16,
|
|
regionCount: 1 << 2,
|
|
maxLogSize: 64 << 20,
|
|
maxBatchDelay: 1 * time.Millisecond,
|
|
maxPendingLogFileFlush: 4,
|
|
},
|
|
}
|
|
for i := range opts {
|
|
opts[i](&c.options)
|
|
}
|
|
return c
|
|
}
|