forked from TrueCloudLab/restic
Cache blobs for each snapshot
This commit is contained in:
parent
b2cce7f051
commit
14298fe232
6 changed files with 98 additions and 136 deletions
112
archiver.go
112
archiver.go
|
@ -29,6 +29,7 @@ const (
|
||||||
type Archiver struct {
|
type Archiver struct {
|
||||||
s Server
|
s Server
|
||||||
m *Map
|
m *Map
|
||||||
|
c *Cache
|
||||||
|
|
||||||
blobToken chan struct{}
|
blobToken chan struct{}
|
||||||
|
|
||||||
|
@ -51,6 +52,12 @@ func NewArchiver(s Server) (*Archiver, error) {
|
||||||
// create new map to store all blobs in
|
// create new map to store all blobs in
|
||||||
arch.m = NewMap()
|
arch.m = NewMap()
|
||||||
|
|
||||||
|
// init cache
|
||||||
|
arch.c, err = NewCache()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
// abort on all errors
|
// abort on all errors
|
||||||
arch.Error = func(string, os.FileInfo, error) error { return err }
|
arch.Error = func(string, os.FileInfo, error) error { return err }
|
||||||
// allow all files
|
// allow all files
|
||||||
|
@ -59,81 +66,40 @@ func NewArchiver(s Server) (*Archiver, error) {
|
||||||
return arch, nil
|
return arch, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Preload loads all tree objects from repository and adds all blobs that are
|
// Preload loads all blobs for all cached snapshots.
|
||||||
// still available to the map for deduplication.
|
func (arch *Archiver) Preload() error {
|
||||||
func (arch *Archiver) Preload(p *Progress) error {
|
// list snapshots first
|
||||||
cache, err := NewCache()
|
snapshots, err := arch.s.List(backend.Snapshot)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
p.Start()
|
// TODO: track seen tree ids, load trees that aren't in the set
|
||||||
defer p.Done()
|
for _, id := range snapshots {
|
||||||
|
// try to load snapshot blobs from cache
|
||||||
|
rd, err := arch.c.Load(backend.Snapshot, "blobs", id)
|
||||||
|
if err != nil {
|
||||||
|
debug.Log("Archiver.Preload", "blobs for snapshot %v not cached: %v", id.Str(), err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
debug.Log("Archiver.Preload", "Start loading known blobs")
|
debug.Log("Archiver.Preload", "load cached blobs for snapshot %v", id.Str())
|
||||||
|
|
||||||
// load all trees, in parallel
|
|
||||||
worker := func(wg *sync.WaitGroup, c <-chan backend.ID) {
|
|
||||||
for id := range c {
|
|
||||||
var tree *Tree
|
|
||||||
|
|
||||||
// load from cache
|
|
||||||
var t Tree
|
|
||||||
rd, err := cache.Load(backend.Tree, id)
|
|
||||||
if err == nil {
|
|
||||||
debug.Log("Archiver.Preload", "tree %v cached", id.Str())
|
|
||||||
tree = &t
|
|
||||||
dec := json.NewDecoder(rd)
|
dec := json.NewDecoder(rd)
|
||||||
err = dec.Decode(&t)
|
|
||||||
|
|
||||||
|
m := &Map{}
|
||||||
|
err = dec.Decode(m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
debug.Log("Archiver.Preload", "error loading cached blobs for snapshot %v: %v", id.Str(), err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
debug.Log("Archiver.Preload", "tree %v not cached: %v", id.Str(), err)
|
|
||||||
|
|
||||||
tree, err = LoadTree(arch.s, id)
|
arch.m.Merge(m)
|
||||||
// ignore error and advance to next tree
|
|
||||||
if err != nil {
|
debug.Log("Archiver.Preload", "done loading cached blobs for snapshot %v", id.Str())
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
debug.Log("Archiver.Preload", "load tree %v with %d blobs", id, tree.Map.Len())
|
debug.Log("Archiver.Preload", "Loaded %v blobs from %v snapshots", arch.m.Len(), len(snapshots))
|
||||||
arch.m.Merge(tree.Map)
|
return nil
|
||||||
p.Report(Stat{Trees: 1, Blobs: uint64(tree.Map.Len())})
|
|
||||||
}
|
|
||||||
wg.Done()
|
|
||||||
}
|
|
||||||
|
|
||||||
idCh := make(chan backend.ID)
|
|
||||||
|
|
||||||
// start workers
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
for i := 0; i < maxConcurrencyPreload; i++ {
|
|
||||||
wg.Add(1)
|
|
||||||
go worker(&wg, idCh)
|
|
||||||
}
|
|
||||||
|
|
||||||
// list ids
|
|
||||||
trees := 0
|
|
||||||
err = arch.s.EachID(backend.Tree, func(id backend.ID) {
|
|
||||||
trees++
|
|
||||||
|
|
||||||
if trees%1000 == 0 {
|
|
||||||
debug.Log("Archiver.Preload", "Loaded %v trees", trees)
|
|
||||||
}
|
|
||||||
idCh <- id
|
|
||||||
})
|
|
||||||
|
|
||||||
close(idCh)
|
|
||||||
|
|
||||||
// wait for workers
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
debug.Log("Archiver.Preload", "Loaded %v blobs from %v trees", arch.m.Len(), trees)
|
|
||||||
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (arch *Archiver) Save(t backend.Type, id backend.ID, length uint, rd io.Reader) (Blob, error) {
|
func (arch *Archiver) Save(t backend.Type, id backend.ID, length uint, rd io.Reader) (Blob, error) {
|
||||||
|
@ -787,7 +753,7 @@ func (arch *Archiver) Snapshot(p *Progress, paths []string, pid backend.ID) (*Sn
|
||||||
|
|
||||||
// receive the top-level tree
|
// receive the top-level tree
|
||||||
root := (<-resCh).(*Node)
|
root := (<-resCh).(*Node)
|
||||||
debug.Log("Archiver.Snapshot", "root node received: %#v", root.blobs[0])
|
debug.Log("Archiver.Snapshot", "root node received: %v", root.blobs[0])
|
||||||
sn.Tree = root.blobs[0]
|
sn.Tree = root.blobs[0]
|
||||||
|
|
||||||
// save snapshot
|
// save snapshot
|
||||||
|
@ -796,6 +762,26 @@ func (arch *Archiver) Snapshot(p *Progress, paths []string, pid backend.ID) (*Sn
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
debug.Log("Archiver.Snapshot", "saved snapshot %v", blob.Storage.Str())
|
||||||
|
|
||||||
|
// cache blobs
|
||||||
|
wr, err := arch.c.Store(backend.Snapshot, "blobs", blob.Storage)
|
||||||
|
if err != nil {
|
||||||
|
debug.Log("Archiver.Snapshot", "unable to cache blobs for snapshot %v: %v", blob.Storage.Str(), err)
|
||||||
|
fmt.Fprintf(os.Stderr, "unable to cache blobs for snapshot %v: %v\n", blob.Storage.Str(), err)
|
||||||
|
return sn, blob.Storage, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
enc := json.NewEncoder(wr)
|
||||||
|
err = enc.Encode(arch.m)
|
||||||
|
if err != nil {
|
||||||
|
debug.Log("Archiver.Snapshot", "error encoding map for snapshot %v: %v", blob.Storage.Str(), err)
|
||||||
|
} else {
|
||||||
|
debug.Log("Archiver.Snapshot", "cached %d blobs for snapshot %v", arch.m.Len(), blob.Storage.Str())
|
||||||
|
}
|
||||||
|
|
||||||
|
wr.Close()
|
||||||
|
|
||||||
return sn, blob.Storage, nil
|
return sn, blob.Storage, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -146,7 +146,7 @@ func BenchmarkArchiveDirectory(b *testing.B) {
|
||||||
func snapshot(t testing.TB, server restic.Server, path string) *restic.Snapshot {
|
func snapshot(t testing.TB, server restic.Server, path string) *restic.Snapshot {
|
||||||
arch, err := restic.NewArchiver(server)
|
arch, err := restic.NewArchiver(server)
|
||||||
ok(t, err)
|
ok(t, err)
|
||||||
ok(t, arch.Preload(nil))
|
ok(t, arch.Preload())
|
||||||
sn, _, err := arch.Snapshot(nil, []string{path}, nil)
|
sn, _, err := arch.Snapshot(nil, []string{path}, nil)
|
||||||
ok(t, err)
|
ok(t, err)
|
||||||
return sn
|
return sn
|
||||||
|
@ -226,7 +226,7 @@ func BenchmarkPreload(t *testing.B) {
|
||||||
// create new archiver and preload
|
// create new archiver and preload
|
||||||
arch2, err := restic.NewArchiver(server)
|
arch2, err := restic.NewArchiver(server)
|
||||||
ok(t, err)
|
ok(t, err)
|
||||||
ok(t, arch2.Preload(nil))
|
ok(t, arch2.Preload())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
39
cache.go
39
cache.go
|
@ -9,12 +9,15 @@ import (
|
||||||
"github.com/restic/restic/backend"
|
"github.com/restic/restic/backend"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// for testing
|
||||||
|
var getCacheDir = GetCacheDir
|
||||||
|
|
||||||
type Cache struct {
|
type Cache struct {
|
||||||
base string
|
base string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCache() (*Cache, error) {
|
func NewCache() (*Cache, error) {
|
||||||
dir, err := GetCacheDir()
|
dir, err := getCacheDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -22,9 +25,9 @@ func NewCache() (*Cache, error) {
|
||||||
return &Cache{base: dir}, nil
|
return &Cache{base: dir}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Cache) Has(t backend.Type, id backend.ID) (bool, error) {
|
func (c *Cache) Has(t backend.Type, subtype string, id backend.ID) (bool, error) {
|
||||||
// try to open file
|
// try to open file
|
||||||
filename, err := c.filename(t, id)
|
filename, err := c.filename(t, subtype, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
@ -42,31 +45,29 @@ func (c *Cache) Has(t backend.Type, id backend.ID) (bool, error) {
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Cache) Store(t backend.Type, id backend.ID, rd io.Reader) error {
|
func (c *Cache) Store(t backend.Type, subtype string, id backend.ID) (io.WriteCloser, error) {
|
||||||
filename, err := c.filename(t, id)
|
filename, err := c.filename(t, subtype, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
dirname := filepath.Dir(filename)
|
dirname := filepath.Dir(filename)
|
||||||
err = os.MkdirAll(dirname, 0700)
|
err = os.MkdirAll(dirname, 0700)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
file, err := os.Create(filename)
|
file, err := os.Create(filename)
|
||||||
defer file.Close()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = io.Copy(file, rd)
|
return file, nil
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Cache) Load(t backend.Type, id backend.ID) (io.ReadCloser, error) {
|
func (c *Cache) Load(t backend.Type, subtype string, id backend.ID) (io.ReadCloser, error) {
|
||||||
// try to open file
|
// try to open file
|
||||||
filename, err := c.filename(t, id)
|
filename, err := c.filename(t, subtype, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -75,17 +76,17 @@ func (c *Cache) Load(t backend.Type, id backend.ID) (io.ReadCloser, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct file name for given Type.
|
// Construct file name for given Type.
|
||||||
func (c *Cache) filename(t backend.Type, id backend.ID) (string, error) {
|
func (c *Cache) filename(t backend.Type, subtype string, id backend.ID) (string, error) {
|
||||||
cachedir, err := GetCacheDir()
|
filename := id.String()
|
||||||
if err != nil {
|
if subtype != "" {
|
||||||
return "", err
|
filename += "." + subtype
|
||||||
}
|
}
|
||||||
|
|
||||||
switch t {
|
switch t {
|
||||||
case backend.Snapshot:
|
case backend.Snapshot:
|
||||||
return filepath.Join(cachedir, "snapshots", id.String()), nil
|
return filepath.Join(c.base, "snapshots", filename), nil
|
||||||
case backend.Tree:
|
case backend.Tree:
|
||||||
return filepath.Join(cachedir, "trees", id.String()), nil
|
return filepath.Join(c.base, "trees", filename), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return "", fmt.Errorf("cache not supported for type %v", t)
|
return "", fmt.Errorf("cache not supported for type %v", t)
|
||||||
|
|
|
@ -91,44 +91,6 @@ func newScanProgress() *restic.Progress {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
func newLoadBlobsProgress(s restic.Server) (*restic.Progress, error) {
|
|
||||||
if !terminal.IsTerminal(int(os.Stdout.Fd())) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
trees, err := s.Count(backend.Tree)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
eta := uint64(0)
|
|
||||||
tps := uint64(0) // trees per second
|
|
||||||
|
|
||||||
p := restic.NewProgress(time.Second)
|
|
||||||
p.OnUpdate = func(s restic.Stat, d time.Duration, ticker bool) {
|
|
||||||
sec := uint64(d / time.Second)
|
|
||||||
if trees > 0 && sec > 0 && ticker {
|
|
||||||
tps = uint64(s.Trees) / sec
|
|
||||||
if tps > 0 {
|
|
||||||
eta = (uint64(trees) - s.Trees) / tps
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("\x1b[2K\r[%s] %3.2f%% %d trees/s %d / %d trees, %d blobs ETA %s",
|
|
||||||
format_duration(d),
|
|
||||||
float64(s.Trees)/float64(trees)*100,
|
|
||||||
tps,
|
|
||||||
s.Trees, trees,
|
|
||||||
s.Blobs,
|
|
||||||
format_seconds(eta))
|
|
||||||
}
|
|
||||||
p.OnDone = func(s restic.Stat, d time.Duration, ticker bool) {
|
|
||||||
fmt.Printf("\nDone in %s\n", format_duration(d))
|
|
||||||
}
|
|
||||||
|
|
||||||
return p, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func newArchiveProgress(todo restic.Stat) *restic.Progress {
|
func newArchiveProgress(todo restic.Stat) *restic.Progress {
|
||||||
if !terminal.IsTerminal(int(os.Stdout.Fd())) {
|
if !terminal.IsTerminal(int(os.Stdout.Fd())) {
|
||||||
return nil
|
return nil
|
||||||
|
@ -218,12 +180,7 @@ func (cmd CmdBackup) Execute(args []string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("loading blobs\n")
|
fmt.Printf("loading blobs\n")
|
||||||
pb, err := newLoadBlobsProgress(s)
|
err = arch.Preload()
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
err = arch.Preload(pb)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/restic/restic"
|
"github.com/restic/restic"
|
||||||
|
@ -49,7 +50,7 @@ func (cmd CmdCache) Execute(args []string) error {
|
||||||
treeCh := make(chan backend.ID)
|
treeCh := make(chan backend.ID)
|
||||||
worker := func(wg *sync.WaitGroup, ch chan backend.ID) {
|
worker := func(wg *sync.WaitGroup, ch chan backend.ID) {
|
||||||
for treeID := range ch {
|
for treeID := range ch {
|
||||||
cached, err := cache.Has(backend.Tree, treeID)
|
cached, err := cache.Has(backend.Tree, "", treeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("tree %v cache error: %v\n", treeID.Str(), err)
|
fmt.Printf("tree %v cache error: %v\n", treeID.Str(), err)
|
||||||
continue
|
continue
|
||||||
|
@ -72,12 +73,18 @@ func (cmd CmdCache) Execute(args []string) error {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
err = cache.Store(backend.Tree, treeID, decRd)
|
wr, err := cache.Store(backend.Tree, "", treeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf(" store error: %v\n", err)
|
fmt.Printf(" store error: %v\n", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_, err = io.Copy(wr, decRd)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf(" Copy error: %v\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
err = decRd.Close()
|
err = decRd.Close()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf(" close error: %v\n", err)
|
fmt.Printf(" close error: %v\n", err)
|
||||||
|
|
11
map.go
11
map.go
|
@ -140,6 +140,17 @@ func (bl *Map) Equals(other *Map) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Each calls f for each blob in the Map. While Each is running, no other
|
||||||
|
// operation is possible, since a mutex is held for the whole time.
|
||||||
|
func (bl *Map) Each(f func(blob Blob)) {
|
||||||
|
bl.m.Lock()
|
||||||
|
defer bl.m.Unlock()
|
||||||
|
|
||||||
|
for _, blob := range bl.list {
|
||||||
|
f(blob)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Select returns a list of of blobs from the plaintext IDs given in list.
|
// Select returns a list of of blobs from the plaintext IDs given in list.
|
||||||
func (bl *Map) Select(list backend.IDs) (Blobs, error) {
|
func (bl *Map) Select(list backend.IDs) (Blobs, error) {
|
||||||
bl.m.Lock()
|
bl.m.Lock()
|
||||||
|
|
Loading…
Reference in a new issue