restic/src/restic/walk/walk.go

197 lines
4.2 KiB
Go
Raw Normal View History

2016-09-01 20:24:48 +00:00
package walk
2015-03-02 13:48:47 +00:00
import (
2015-12-28 23:26:29 +00:00
"fmt"
"os"
2015-03-02 13:48:47 +00:00
"path/filepath"
2016-09-01 20:24:48 +00:00
"restic"
2015-10-27 21:44:10 +00:00
"sync"
2015-03-02 13:48:47 +00:00
"restic/debug"
2015-03-02 13:48:47 +00:00
)
2016-09-01 20:24:48 +00:00
// TreeJob is a job sent from the tree walker.
type TreeJob struct {
2015-03-02 13:48:47 +00:00
Path string
Error error
2016-09-01 20:24:48 +00:00
Node *restic.Node
Tree *restic.Tree
2015-03-02 13:48:47 +00:00
}
2015-10-27 21:44:10 +00:00
// TreeWalker traverses a tree in the repository depth-first and sends a job
// for each item (file or dir) that it encounters.
type TreeWalker struct {
ch chan<- loadTreeJob
2016-09-01 20:24:48 +00:00
out chan<- TreeJob
2015-10-27 21:44:10 +00:00
}
// NewTreeWalker uses ch to load trees from the repository and sends jobs to
// out.
2016-09-01 20:24:48 +00:00
func NewTreeWalker(ch chan<- loadTreeJob, out chan<- TreeJob) *TreeWalker {
2015-10-27 21:44:10 +00:00
return &TreeWalker{ch: ch, out: out}
}
// Walk starts walking the tree given by id. When the channel done is closed,
// processing stops.
2016-09-01 20:24:48 +00:00
func (tw *TreeWalker) Walk(path string, id restic.ID, done chan struct{}) {
2015-10-27 21:44:10 +00:00
debug.Log("TreeWalker.Walk", "starting on tree %v for %v", id.Str(), path)
defer debug.Log("TreeWalker.Walk", "done walking tree %v for %v", id.Str(), path)
2015-10-27 21:44:10 +00:00
resCh := make(chan loadTreeResult, 1)
tw.ch <- loadTreeJob{
id: id,
res: resCh,
}
res := <-resCh
if res.err != nil {
2015-07-11 13:51:18 +00:00
select {
2016-09-01 20:24:48 +00:00
case tw.out <- TreeJob{Path: path, Error: res.err}:
2015-07-11 13:51:18 +00:00
case <-done:
return
}
2015-03-02 13:48:47 +00:00
return
}
2015-10-27 21:44:10 +00:00
tw.walk(path, res.tree, done)
select {
2016-09-01 20:24:48 +00:00
case tw.out <- TreeJob{Path: path, Tree: res.tree}:
2015-10-27 21:44:10 +00:00
case <-done:
return
}
}
2016-09-01 20:24:48 +00:00
func (tw *TreeWalker) walk(path string, tree *restic.Tree, done chan struct{}) {
2015-10-27 21:44:10 +00:00
debug.Log("TreeWalker.walk", "start on %q", path)
defer debug.Log("TreeWalker.walk", "done for %q", path)
2015-12-28 23:26:29 +00:00
debug.Log("TreeWalker.walk", "tree %#v", tree)
2015-10-27 21:44:10 +00:00
// load all subtrees in parallel
results := make([]<-chan loadTreeResult, len(tree.Nodes))
for i, node := range tree.Nodes {
2016-09-01 19:20:03 +00:00
if node.Type == "dir" {
2015-10-27 21:44:10 +00:00
resCh := make(chan loadTreeResult, 1)
tw.ch <- loadTreeJob{
id: *node.Subtree,
res: resCh,
}
results[i] = resCh
}
}
for i, node := range tree.Nodes {
2015-03-02 13:48:47 +00:00
p := filepath.Join(path, node.Name)
2016-09-01 20:24:48 +00:00
var job TreeJob
2015-10-27 21:44:10 +00:00
2016-09-01 19:20:03 +00:00
if node.Type == "dir" {
2015-10-27 21:44:10 +00:00
if results[i] == nil {
panic("result chan should not be nil")
}
res := <-results[i]
2015-12-28 23:26:29 +00:00
if res.err == nil {
tw.walk(p, res.tree, done)
} else {
fmt.Fprintf(os.Stderr, "error loading tree: %v\n", res.err)
}
2015-10-27 21:44:10 +00:00
2016-09-01 20:24:48 +00:00
job = TreeJob{Path: p, Tree: res.tree, Error: res.err}
2015-03-02 13:48:47 +00:00
} else {
2016-09-01 20:24:48 +00:00
job = TreeJob{Path: p, Node: node}
2015-10-27 21:44:10 +00:00
}
select {
case tw.out <- job:
case <-done:
return
}
}
}
type loadTreeResult struct {
2016-09-01 20:24:48 +00:00
tree *restic.Tree
2015-10-27 21:44:10 +00:00
err error
}
type loadTreeJob struct {
2016-09-01 20:24:48 +00:00
id restic.ID
2015-10-27 21:44:10 +00:00
res chan<- loadTreeResult
}
2016-09-01 20:24:48 +00:00
type treeLoader func(restic.ID) (*restic.Tree, error)
2015-10-27 21:44:10 +00:00
func loadTreeWorker(wg *sync.WaitGroup, in <-chan loadTreeJob, load treeLoader, done <-chan struct{}) {
debug.Log("loadTreeWorker", "start")
defer debug.Log("loadTreeWorker", "exit")
defer wg.Done()
for {
select {
case <-done:
debug.Log("loadTreeWorker", "done channel closed")
return
case job, ok := <-in:
if !ok {
debug.Log("loadTreeWorker", "input channel closed, exiting")
return
}
debug.Log("loadTreeWorker", "received job to load tree %v", job.id.Str())
tree, err := load(job.id)
debug.Log("loadTreeWorker", "tree %v loaded, error %v", job.id.Str(), err)
2015-07-11 13:51:18 +00:00
select {
2015-10-27 21:44:10 +00:00
case job.res <- loadTreeResult{tree, err}:
debug.Log("loadTreeWorker", "job result sent")
2015-07-11 13:51:18 +00:00
case <-done:
2015-10-27 21:44:10 +00:00
debug.Log("loadTreeWorker", "done channel closed before result could be sent")
2015-07-11 13:51:18 +00:00
return
}
2015-03-02 13:48:47 +00:00
}
}
}
// TreeLoader loads tree objects.
type TreeLoader interface {
LoadTree(restic.ID) (*restic.Tree, error)
}
2015-10-27 21:44:10 +00:00
const loadTreeWorkers = 10
2016-09-01 20:24:48 +00:00
// Tree walks the tree specified by id recursively and sends a job for each
2015-03-02 13:48:47 +00:00
// file and directory it finds. When the channel done is closed, processing
// stops.
func Tree(repo TreeLoader, id restic.ID, done chan struct{}, jobCh chan<- TreeJob) {
2015-10-27 21:44:10 +00:00
debug.Log("WalkTree", "start on %v, start workers", id.Str())
2016-09-01 20:24:48 +00:00
load := func(id restic.ID) (*restic.Tree, error) {
tree, err := repo.LoadTree(id)
2015-10-27 21:44:10 +00:00
if err != nil {
return nil, err
}
return tree, nil
}
ch := make(chan loadTreeJob)
var wg sync.WaitGroup
for i := 0; i < loadTreeWorkers; i++ {
wg.Add(1)
go loadTreeWorker(&wg, ch, load, done)
}
tw := NewTreeWalker(ch, jobCh)
tw.Walk("", id, done)
2015-03-02 13:48:47 +00:00
close(jobCh)
2015-10-27 21:44:10 +00:00
close(ch)
wg.Wait()
2015-03-08 20:21:31 +00:00
debug.Log("WalkTree", "done")
2015-03-02 13:48:47 +00:00
}