2014-12-05 20:45:49 +00:00
|
|
|
package restic
|
2014-09-23 20:39:12 +00:00
|
|
|
|
|
|
|
import (
|
2015-01-10 22:40:10 +00:00
|
|
|
"encoding/json"
|
2014-11-30 21:49:14 +00:00
|
|
|
"fmt"
|
2014-11-17 22:28:51 +00:00
|
|
|
"io"
|
2014-09-23 20:39:12 +00:00
|
|
|
"os"
|
2015-02-21 13:23:49 +00:00
|
|
|
"path/filepath"
|
2015-03-02 13:48:47 +00:00
|
|
|
"sort"
|
2014-11-16 21:50:20 +00:00
|
|
|
"sync"
|
2014-09-23 20:39:12 +00:00
|
|
|
|
2014-12-05 20:45:49 +00:00
|
|
|
"github.com/restic/restic/backend"
|
2015-01-14 21:08:48 +00:00
|
|
|
"github.com/restic/restic/debug"
|
2015-04-26 15:44:38 +00:00
|
|
|
"github.com/restic/restic/pack"
|
2015-02-15 13:44:54 +00:00
|
|
|
"github.com/restic/restic/pipe"
|
2015-04-26 12:46:15 +00:00
|
|
|
"github.com/restic/restic/server"
|
2015-05-01 23:29:54 +00:00
|
|
|
|
|
|
|
"github.com/juju/errors"
|
2014-09-23 20:39:12 +00:00
|
|
|
)
|
|
|
|
|
2014-11-16 21:50:20 +00:00
|
|
|
const (
|
2015-02-17 22:15:08 +00:00
|
|
|
maxConcurrentBlobs = 32
|
|
|
|
maxConcurrency = 10
|
2015-02-18 22:10:59 +00:00
|
|
|
maxConcurrencyPreload = 20
|
2014-11-16 21:50:20 +00:00
|
|
|
)
|
|
|
|
|
2015-04-30 01:41:51 +00:00
|
|
|
var archiverAbortOnAllErrors = func(str string, fi os.FileInfo, err error) error { return err }
|
|
|
|
var archiverAllowAllFiles = func(string, os.FileInfo) bool { return true }
|
|
|
|
|
2014-09-23 20:39:12 +00:00
|
|
|
type Archiver struct {
|
2015-04-26 12:46:15 +00:00
|
|
|
s *server.Server
|
2014-09-23 20:39:12 +00:00
|
|
|
|
2014-11-22 21:05:39 +00:00
|
|
|
blobToken chan struct{}
|
2014-11-16 21:50:20 +00:00
|
|
|
|
2014-09-23 20:39:12 +00:00
|
|
|
Error func(dir string, fi os.FileInfo, err error) error
|
|
|
|
Filter func(item string, fi os.FileInfo) bool
|
|
|
|
}
|
|
|
|
|
2015-04-30 01:41:51 +00:00
|
|
|
func NewArchiver(s *server.Server) *Archiver {
|
2014-11-16 21:50:20 +00:00
|
|
|
arch := &Archiver{
|
2014-12-21 16:02:49 +00:00
|
|
|
s: s,
|
2014-11-22 21:05:39 +00:00
|
|
|
blobToken: make(chan struct{}, maxConcurrentBlobs),
|
2014-11-16 21:50:20 +00:00
|
|
|
}
|
|
|
|
|
2014-11-22 21:05:39 +00:00
|
|
|
for i := 0; i < maxConcurrentBlobs; i++ {
|
|
|
|
arch.blobToken <- struct{}{}
|
|
|
|
}
|
|
|
|
|
2015-04-30 01:41:51 +00:00
|
|
|
arch.Error = archiverAbortOnAllErrors
|
|
|
|
arch.Filter = archiverAllowAllFiles
|
2014-09-23 20:39:12 +00:00
|
|
|
|
2015-04-30 01:41:51 +00:00
|
|
|
return arch
|
2015-01-10 22:40:10 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
func (arch *Archiver) Save(t pack.BlobType, id backend.ID, length uint, rd io.Reader) error {
|
2015-01-14 21:08:48 +00:00
|
|
|
debug.Log("Archiver.Save", "Save(%v, %v)\n", t, id.Str())
|
2015-01-10 22:40:10 +00:00
|
|
|
|
|
|
|
// test if this blob is already known
|
2015-04-26 15:44:38 +00:00
|
|
|
if arch.s.Index().Has(id) {
|
|
|
|
debug.Log("Archiver.Save", "(%v, %v) already saved\n", t, id.Str())
|
|
|
|
return nil
|
2015-01-05 20:40:43 +00:00
|
|
|
}
|
2014-09-23 20:39:12 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
// otherwise save blob
|
|
|
|
err := arch.s.SaveFrom(t, id, length, rd)
|
|
|
|
if err != nil {
|
|
|
|
debug.Log("Archiver.Save", "Save(%v, %v): error %v\n", t, id.Str(), err)
|
|
|
|
return err
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
debug.Log("Archiver.Save", "Save(%v, %v): new blob\n", t, id.Str())
|
|
|
|
return nil
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
func (arch *Archiver) SaveTreeJSON(item interface{}) (backend.ID, error) {
|
2015-01-10 22:40:10 +00:00
|
|
|
data, err := json.Marshal(item)
|
2014-09-23 20:39:12 +00:00
|
|
|
if err != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
return nil, err
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
2015-04-30 01:41:51 +00:00
|
|
|
data = append(data, '\n')
|
2014-09-23 20:39:12 +00:00
|
|
|
|
2015-01-10 22:40:10 +00:00
|
|
|
// check if tree has been saved before
|
2015-02-15 23:30:51 +00:00
|
|
|
id := backend.Hash(data)
|
2015-04-26 15:44:38 +00:00
|
|
|
if arch.s.Index().Has(id) {
|
|
|
|
return id, nil
|
2015-01-10 22:40:10 +00:00
|
|
|
}
|
2014-09-23 20:39:12 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
return arch.s.SaveJSON(pack.Tree, item)
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
|
|
|
|
2014-11-17 22:28:51 +00:00
|
|
|
// SaveFile stores the content of the file on the backend as a Blob by calling
|
|
|
|
// Save for each chunk.
|
2015-04-26 15:44:38 +00:00
|
|
|
func (arch *Archiver) SaveFile(p *Progress, node *Node) error {
|
2015-04-25 00:36:54 +00:00
|
|
|
file, err := node.OpenForReading()
|
2014-11-17 22:28:51 +00:00
|
|
|
defer file.Close()
|
2014-09-23 20:39:12 +00:00
|
|
|
if err != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
return err
|
2015-01-04 21:39:30 +00:00
|
|
|
}
|
|
|
|
|
2015-04-30 01:41:51 +00:00
|
|
|
// check file again, since it could have disappeared by now
|
2015-01-04 21:39:30 +00:00
|
|
|
fi, err := file.Stat()
|
|
|
|
if err != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
return err
|
2015-01-04 21:39:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if fi.ModTime() != node.ModTime {
|
2015-01-10 22:40:10 +00:00
|
|
|
e2 := arch.Error(node.path, fi, errors.New("file was updated, using new version"))
|
2015-01-04 21:39:30 +00:00
|
|
|
|
|
|
|
if e2 == nil {
|
|
|
|
n, err := NodeFromFileInfo(node.path, fi)
|
|
|
|
if err != nil {
|
2015-03-21 13:43:33 +00:00
|
|
|
debug.Log("Archiver.SaveFile", "NodeFromFileInfo returned error for %v: %v", node.path, err)
|
2015-04-26 15:44:38 +00:00
|
|
|
return err
|
2015-01-04 21:39:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
*node = *n
|
|
|
|
}
|
2014-11-17 22:28:51 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
type result struct {
|
|
|
|
id backend.ID
|
|
|
|
bytes uint64
|
|
|
|
}
|
2014-11-17 22:28:51 +00:00
|
|
|
|
2015-02-08 21:54:45 +00:00
|
|
|
// store all chunks
|
2015-02-09 22:38:50 +00:00
|
|
|
chnker := GetChunker("archiver.SaveFile")
|
2015-04-05 22:22:19 +00:00
|
|
|
chnker.Reset(file, arch.s.ChunkerPolynomial())
|
2015-04-26 15:44:38 +00:00
|
|
|
chans := [](<-chan result){}
|
2015-02-09 22:38:50 +00:00
|
|
|
defer FreeChunker("archiver.SaveFile", chnker)
|
2014-11-23 15:48:00 +00:00
|
|
|
|
2015-02-08 21:54:45 +00:00
|
|
|
chunks := 0
|
2014-11-17 22:28:51 +00:00
|
|
|
|
2015-02-08 21:54:45 +00:00
|
|
|
for {
|
|
|
|
chunk, err := chnker.Next()
|
2014-11-30 21:16:34 +00:00
|
|
|
if err == io.EOF {
|
2015-02-08 21:54:45 +00:00
|
|
|
break
|
2014-11-17 22:28:51 +00:00
|
|
|
}
|
|
|
|
|
2015-02-08 21:54:45 +00:00
|
|
|
if err != nil {
|
2015-05-01 23:29:54 +00:00
|
|
|
return errors.Annotate(err, "Chunker.Next()")
|
2015-02-08 21:54:45 +00:00
|
|
|
}
|
2014-11-17 22:28:51 +00:00
|
|
|
|
2015-02-08 21:54:45 +00:00
|
|
|
chunks++
|
2014-11-30 21:49:14 +00:00
|
|
|
|
2015-02-08 21:54:45 +00:00
|
|
|
// acquire token, start goroutine to save chunk
|
|
|
|
token := <-arch.blobToken
|
2015-04-26 15:44:38 +00:00
|
|
|
resCh := make(chan result, 1)
|
2014-11-22 21:05:39 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
go func(ch chan<- result) {
|
|
|
|
err := arch.Save(pack.Data, chunk.Digest, chunk.Length, chunk.Reader(file))
|
2015-02-08 21:54:45 +00:00
|
|
|
// TODO handle error
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2014-11-17 22:28:51 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
p.Report(Stat{Bytes: uint64(chunk.Length)})
|
2015-02-08 21:54:45 +00:00
|
|
|
arch.blobToken <- token
|
2015-04-26 15:44:38 +00:00
|
|
|
ch <- result{id: backend.ID(chunk.Digest), bytes: uint64(chunk.Length)}
|
2015-02-08 21:54:45 +00:00
|
|
|
}(resCh)
|
2014-11-22 21:05:39 +00:00
|
|
|
|
2015-02-08 21:54:45 +00:00
|
|
|
chans = append(chans, resCh)
|
|
|
|
}
|
2014-11-17 22:28:51 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
results := []result{}
|
2015-02-08 21:54:45 +00:00
|
|
|
for _, ch := range chans {
|
2015-04-26 15:44:38 +00:00
|
|
|
results = append(results, <-ch)
|
2015-02-08 21:54:45 +00:00
|
|
|
}
|
2014-11-30 21:49:14 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
if len(results) != chunks {
|
|
|
|
return fmt.Errorf("chunker returned %v chunks, but only %v blobs saved", chunks, len(results))
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
|
|
|
|
2014-12-07 12:30:16 +00:00
|
|
|
var bytes uint64
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
node.Content = make([]backend.ID, len(results))
|
2015-02-08 21:54:45 +00:00
|
|
|
debug.Log("Archiver.Save", "checking size for file %s", node.path)
|
2015-04-26 15:44:38 +00:00
|
|
|
for i, b := range results {
|
|
|
|
node.Content[i] = b.id
|
|
|
|
bytes += b.bytes
|
2015-02-08 21:54:45 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
debug.Log("Archiver.Save", " adding blob %s, %d bytes", b.id.Str(), b.bytes)
|
2014-12-07 12:30:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if bytes != node.Size {
|
2015-04-26 15:44:38 +00:00
|
|
|
return fmt.Errorf("errors saving node %q: saved %d bytes, wanted %d bytes", node.path, bytes, node.Size)
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
debug.Log("Archiver.SaveFile", "SaveFile(%q): %v blobs\n", node.path, len(results))
|
2015-01-10 22:40:10 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
return nil
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
func (arch *Archiver) saveTree(p *Progress, t *Tree) (backend.ID, error) {
|
2015-01-14 21:08:48 +00:00
|
|
|
debug.Log("Archiver.saveTree", "saveTree(%v)\n", t)
|
2014-11-16 21:50:20 +00:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
2015-01-10 22:40:10 +00:00
|
|
|
// TODO: do all this in parallel
|
|
|
|
for _, node := range t.Nodes {
|
|
|
|
if node.tree != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
id, err := arch.saveTree(p, node.tree)
|
2014-11-16 20:29:11 +00:00
|
|
|
if err != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
return nil, err
|
2014-11-16 20:29:11 +00:00
|
|
|
}
|
2015-04-26 15:44:38 +00:00
|
|
|
node.Subtree = id
|
2015-02-21 13:23:49 +00:00
|
|
|
p.Report(Stat{Dirs: 1})
|
2015-01-10 22:40:10 +00:00
|
|
|
} else if node.Type == "file" {
|
|
|
|
if len(node.Content) > 0 {
|
|
|
|
removeContent := false
|
|
|
|
|
|
|
|
// check content
|
|
|
|
for _, id := range node.Content {
|
2015-04-26 15:44:38 +00:00
|
|
|
packID, _, _, _, err := arch.s.Index().Lookup(id)
|
2015-01-10 22:40:10 +00:00
|
|
|
if err != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
debug.Log("Archiver.saveTree", "unable to find storage id for data blob %v: %v", id.Str(), err)
|
|
|
|
arch.Error(node.path, nil, fmt.Errorf("unable to find storage id for data blob %v: %v", id.Str(), err))
|
2015-01-10 22:40:10 +00:00
|
|
|
removeContent = true
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
if ok, err := arch.s.Test(backend.Data, packID.String()); !ok || err != nil {
|
|
|
|
debug.Log("Archiver.saveTree", "pack %v of blob %v not in repository (error is %v)", packID, id, err)
|
|
|
|
arch.Error(node.path, nil, fmt.Errorf("pack %v of blob %v not in repository (error is %v)", packID, id, err))
|
2015-01-10 22:40:10 +00:00
|
|
|
removeContent = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if removeContent {
|
2015-01-14 21:08:48 +00:00
|
|
|
debug.Log("Archiver.saveTree", "removing content for %s", node.path)
|
2015-01-10 22:40:10 +00:00
|
|
|
node.Content = node.Content[:0]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(node.Content) == 0 {
|
|
|
|
// start goroutine
|
|
|
|
wg.Add(1)
|
|
|
|
go func(n *Node) {
|
|
|
|
defer wg.Done()
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
n.err = arch.SaveFile(p, n)
|
2015-02-21 13:23:49 +00:00
|
|
|
p.Report(Stat{Files: 1})
|
2015-01-10 22:40:10 +00:00
|
|
|
}(node)
|
|
|
|
}
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-16 21:50:20 +00:00
|
|
|
wg.Wait()
|
|
|
|
|
2015-01-10 22:40:10 +00:00
|
|
|
usedIDs := backend.NewIDSet()
|
|
|
|
|
2014-11-30 23:06:29 +00:00
|
|
|
// check for invalid file nodes
|
2015-01-10 22:40:10 +00:00
|
|
|
for _, node := range t.Nodes {
|
2015-01-04 21:58:58 +00:00
|
|
|
if node.Type == "file" && node.Content == nil && node.err == nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
return nil, fmt.Errorf("node %v has empty content", node.Name)
|
2014-11-30 23:06:29 +00:00
|
|
|
}
|
2015-01-04 21:58:58 +00:00
|
|
|
|
2015-01-10 22:40:10 +00:00
|
|
|
// remember used hashes
|
|
|
|
if node.Type == "file" && node.Content != nil {
|
|
|
|
for _, id := range node.Content {
|
|
|
|
usedIDs.Insert(id)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if node.Type == "dir" && node.Subtree != nil {
|
|
|
|
usedIDs.Insert(node.Subtree)
|
|
|
|
}
|
|
|
|
|
2015-01-04 21:58:58 +00:00
|
|
|
if node.err != nil {
|
|
|
|
err := arch.Error(node.path, nil, node.err)
|
|
|
|
if err != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
return nil, err
|
2015-01-04 21:58:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// save error message in node
|
|
|
|
node.Error = node.err.Error()
|
|
|
|
}
|
2014-11-30 23:06:29 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
id, err := arch.SaveTreeJSON(t)
|
2014-11-16 20:29:11 +00:00
|
|
|
if err != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
return nil, err
|
2014-11-16 20:29:11 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
return id, nil
|
2014-11-16 20:29:11 +00:00
|
|
|
}
|
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
func (arch *Archiver) fileWorker(wg *sync.WaitGroup, p *Progress, done <-chan struct{}, entCh <-chan pipe.Entry) {
|
2015-03-07 10:53:32 +00:00
|
|
|
defer func() {
|
|
|
|
debug.Log("Archiver.fileWorker", "done")
|
|
|
|
wg.Done()
|
|
|
|
}()
|
2015-03-02 13:48:47 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case e, ok := <-entCh:
|
|
|
|
if !ok {
|
|
|
|
// channel is closed
|
|
|
|
return
|
|
|
|
}
|
2015-01-11 13:09:44 +00:00
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
debug.Log("Archiver.fileWorker", "got job %v", e)
|
|
|
|
|
2015-03-15 11:20:30 +00:00
|
|
|
// check for errors
|
|
|
|
if e.Error() != nil {
|
|
|
|
debug.Log("Archiver.fileWorker", "job %v has errors: %v", e.Path(), e.Error())
|
2015-03-15 14:48:05 +00:00
|
|
|
// TODO: integrate error reporting
|
|
|
|
fmt.Fprintf(os.Stderr, "error for %v: %v\n", e.Path(), e.Error())
|
|
|
|
// ignore this file
|
|
|
|
e.Result() <- nil
|
2015-04-26 01:54:35 +00:00
|
|
|
p.Report(Stat{Errors: 1})
|
2015-03-15 14:48:05 +00:00
|
|
|
continue
|
2015-03-15 11:20:30 +00:00
|
|
|
}
|
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
node, err := NodeFromFileInfo(e.Fullpath(), e.Info())
|
2015-03-02 13:48:47 +00:00
|
|
|
if err != nil {
|
2015-03-21 13:43:33 +00:00
|
|
|
// TODO: integrate error reporting
|
|
|
|
debug.Log("Archiver.fileWorker", "NodeFromFileInfo returned error for %v: %v", node.path, err)
|
|
|
|
e.Result() <- nil
|
2015-04-26 01:54:35 +00:00
|
|
|
p.Report(Stat{Errors: 1})
|
2015-03-21 13:43:33 +00:00
|
|
|
continue
|
2015-03-02 13:48:47 +00:00
|
|
|
}
|
2014-11-23 08:22:18 +00:00
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
// try to use old node, if present
|
|
|
|
if e.Node != nil {
|
|
|
|
debug.Log("Archiver.fileWorker", " %v use old data", e.Path())
|
|
|
|
|
|
|
|
oldNode := e.Node.(*Node)
|
|
|
|
// check if all content is still available in the repository
|
|
|
|
contentMissing := false
|
|
|
|
for _, blob := range oldNode.blobs {
|
2015-03-28 10:50:23 +00:00
|
|
|
if ok, err := arch.s.Test(backend.Data, blob.Storage.String()); !ok || err != nil {
|
2015-03-07 10:53:32 +00:00
|
|
|
debug.Log("Archiver.fileWorker", " %v not using old data, %v (%v) is missing", e.Path(), blob.ID.Str(), blob.Storage.Str())
|
|
|
|
contentMissing = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !contentMissing {
|
|
|
|
node.Content = oldNode.Content
|
|
|
|
node.blobs = oldNode.blobs
|
|
|
|
debug.Log("Archiver.fileWorker", " %v content is complete", e.Path())
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
debug.Log("Archiver.fileWorker", " %v no old data", e.Path())
|
|
|
|
}
|
|
|
|
|
|
|
|
// otherwise read file normally
|
|
|
|
if node.Type == "file" && len(node.Content) == 0 {
|
|
|
|
debug.Log("Archiver.fileWorker", " read and save %v, content: %v", e.Path(), node.Content)
|
2015-04-26 15:44:38 +00:00
|
|
|
err = arch.SaveFile(p, node)
|
2015-03-02 13:48:47 +00:00
|
|
|
if err != nil {
|
2015-03-15 14:48:05 +00:00
|
|
|
// TODO: integrate error reporting
|
|
|
|
fmt.Fprintf(os.Stderr, "error for %v: %v\n", node.path, err)
|
|
|
|
// ignore this file
|
|
|
|
e.Result() <- nil
|
2015-04-26 01:54:35 +00:00
|
|
|
p.Report(Stat{Errors: 1})
|
2015-03-15 14:48:05 +00:00
|
|
|
continue
|
2015-03-02 13:48:47 +00:00
|
|
|
}
|
2015-03-07 10:53:32 +00:00
|
|
|
} else {
|
|
|
|
// report old data size
|
|
|
|
p.Report(Stat{Bytes: node.Size})
|
2015-03-02 13:48:47 +00:00
|
|
|
}
|
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
debug.Log("Archiver.fileWorker", " processed %v, %d/%d blobs", e.Path(), len(node.Content), len(node.blobs))
|
|
|
|
e.Result() <- node
|
2015-03-02 13:48:47 +00:00
|
|
|
p.Report(Stat{Files: 1})
|
|
|
|
case <-done:
|
|
|
|
// pipeline was cancelled
|
|
|
|
return
|
|
|
|
}
|
2014-12-21 16:20:49 +00:00
|
|
|
}
|
2015-03-02 13:48:47 +00:00
|
|
|
}
|
2014-12-21 16:20:49 +00:00
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
func (arch *Archiver) dirWorker(wg *sync.WaitGroup, p *Progress, done <-chan struct{}, dirCh <-chan pipe.Dir) {
|
2015-03-07 10:53:32 +00:00
|
|
|
defer func() {
|
|
|
|
debug.Log("Archiver.dirWorker", "done")
|
|
|
|
wg.Done()
|
|
|
|
}()
|
2015-03-02 13:48:47 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case dir, ok := <-dirCh:
|
|
|
|
if !ok {
|
|
|
|
// channel is closed
|
|
|
|
return
|
|
|
|
}
|
2015-03-07 10:53:32 +00:00
|
|
|
debug.Log("Archiver.dirWorker", "save dir %v\n", dir.Path())
|
2014-11-16 20:29:11 +00:00
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
tree := NewTree()
|
2015-02-15 13:44:54 +00:00
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
// wait for all content
|
|
|
|
for _, ch := range dir.Entries {
|
2015-03-15 14:48:05 +00:00
|
|
|
res := <-ch
|
|
|
|
|
|
|
|
// if we get a nil pointer here, an error has happened while
|
|
|
|
// processing this entry. Ignore it for now.
|
|
|
|
if res == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// else insert node
|
|
|
|
node := res.(*Node)
|
2015-03-02 13:48:47 +00:00
|
|
|
tree.Insert(node)
|
2015-02-15 13:44:54 +00:00
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
if node.Type == "dir" {
|
2015-03-07 10:53:32 +00:00
|
|
|
debug.Log("Archiver.dirWorker", "got tree node for %s: %v", node.path, node.blobs)
|
2015-02-15 13:44:54 +00:00
|
|
|
}
|
2015-03-02 13:48:47 +00:00
|
|
|
}
|
2015-02-15 13:44:54 +00:00
|
|
|
|
2015-03-08 19:57:21 +00:00
|
|
|
var (
|
|
|
|
node *Node
|
|
|
|
err error
|
|
|
|
)
|
|
|
|
if dir.Path() == "" {
|
|
|
|
// if this is the top-level dir, only create a stub node
|
|
|
|
node = &Node{}
|
|
|
|
} else {
|
2015-04-25 19:40:42 +00:00
|
|
|
// else create node from path and fi
|
2015-03-08 19:57:21 +00:00
|
|
|
node, err = NodeFromFileInfo(dir.Path(), dir.Info())
|
|
|
|
if err != nil {
|
|
|
|
node.Error = err.Error()
|
|
|
|
dir.Result() <- node
|
|
|
|
continue
|
|
|
|
}
|
2015-03-02 13:48:47 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
id, err := arch.SaveTreeJSON(tree)
|
2015-03-02 13:48:47 +00:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
2015-02-15 13:44:54 +00:00
|
|
|
}
|
2015-04-26 15:44:38 +00:00
|
|
|
debug.Log("Archiver.dirWorker", "save tree for %s: %v", dir.Path(), id.Str())
|
2015-03-02 13:48:47 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
node.Subtree = id
|
2015-03-02 13:48:47 +00:00
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
dir.Result() <- node
|
2015-04-25 19:40:42 +00:00
|
|
|
if dir.Path() != "" {
|
|
|
|
p.Report(Stat{Dirs: 1})
|
|
|
|
}
|
2015-03-02 13:48:47 +00:00
|
|
|
case <-done:
|
|
|
|
// pipeline was cancelled
|
|
|
|
return
|
2015-02-15 13:44:54 +00:00
|
|
|
}
|
|
|
|
}
|
2015-03-02 13:48:47 +00:00
|
|
|
}
|
2015-02-15 13:44:54 +00:00
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
type ArchivePipe struct {
|
|
|
|
Old <-chan WalkTreeJob
|
|
|
|
New <-chan pipe.Job
|
|
|
|
}
|
|
|
|
|
|
|
|
func copyJobs(done <-chan struct{}, in <-chan pipe.Job, out chan<- pipe.Job) {
|
|
|
|
i := in
|
|
|
|
o := out
|
|
|
|
|
|
|
|
o = nil
|
|
|
|
|
|
|
|
var (
|
|
|
|
j pipe.Job
|
|
|
|
ok bool
|
|
|
|
)
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-done:
|
|
|
|
return
|
|
|
|
case j, ok = <-i:
|
|
|
|
if !ok {
|
|
|
|
// in ch closed, we're done
|
|
|
|
debug.Log("copyJobs", "in channel closed, we're done")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
i = nil
|
|
|
|
o = out
|
|
|
|
case o <- j:
|
|
|
|
o = nil
|
|
|
|
i = in
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type archiveJob struct {
|
|
|
|
hasOld bool
|
|
|
|
old WalkTreeJob
|
|
|
|
new pipe.Job
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *ArchivePipe) compare(done <-chan struct{}, out chan<- pipe.Job) {
|
|
|
|
defer func() {
|
|
|
|
close(out)
|
|
|
|
debug.Log("ArchivePipe.compare", "done")
|
|
|
|
}()
|
|
|
|
|
|
|
|
debug.Log("ArchivePipe.compare", "start")
|
|
|
|
var (
|
|
|
|
loadOld, loadNew bool = true, true
|
|
|
|
ok bool
|
|
|
|
oldJob WalkTreeJob
|
|
|
|
newJob pipe.Job
|
|
|
|
)
|
|
|
|
|
|
|
|
for {
|
|
|
|
if loadOld {
|
|
|
|
oldJob, ok = <-a.Old
|
|
|
|
// if the old channel is closed, just pass through the new jobs
|
|
|
|
if !ok {
|
|
|
|
debug.Log("ArchivePipe.compare", "old channel is closed, copy from new channel")
|
|
|
|
|
|
|
|
// handle remaining newJob
|
|
|
|
if !loadNew {
|
|
|
|
out <- archiveJob{new: newJob}.Copy()
|
|
|
|
}
|
|
|
|
|
|
|
|
copyJobs(done, a.New, out)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
loadOld = false
|
|
|
|
}
|
|
|
|
|
|
|
|
if loadNew {
|
|
|
|
newJob, ok = <-a.New
|
|
|
|
// if the new channel is closed, there are no more files in the current snapshot, return
|
|
|
|
if !ok {
|
|
|
|
debug.Log("ArchivePipe.compare", "new channel is closed, we're done")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
loadNew = false
|
|
|
|
}
|
|
|
|
|
|
|
|
debug.Log("ArchivePipe.compare", "old job: %v", oldJob.Path)
|
|
|
|
debug.Log("ArchivePipe.compare", "new job: %v", newJob.Path())
|
|
|
|
|
|
|
|
// at this point we have received an old job as well as a new job, compare paths
|
|
|
|
file1 := oldJob.Path
|
|
|
|
file2 := newJob.Path()
|
|
|
|
|
|
|
|
dir1 := filepath.Dir(file1)
|
|
|
|
dir2 := filepath.Dir(file2)
|
|
|
|
|
|
|
|
if file1 == file2 {
|
|
|
|
debug.Log("ArchivePipe.compare", " same filename %q", file1)
|
|
|
|
|
|
|
|
// send job
|
|
|
|
out <- archiveJob{hasOld: true, old: oldJob, new: newJob}.Copy()
|
|
|
|
loadOld = true
|
|
|
|
loadNew = true
|
|
|
|
continue
|
|
|
|
} else if dir1 < dir2 {
|
|
|
|
debug.Log("ArchivePipe.compare", " %q < %q, file %q added", dir1, dir2, file2)
|
|
|
|
// file is new, send new job and load new
|
|
|
|
loadNew = true
|
|
|
|
out <- archiveJob{new: newJob}.Copy()
|
|
|
|
continue
|
2015-03-09 21:56:23 +00:00
|
|
|
} else if dir1 == dir2 {
|
|
|
|
if file1 < file2 {
|
|
|
|
debug.Log("ArchivePipe.compare", " %q < %q, file %q removed", file1, file2, file1)
|
|
|
|
// file has been removed, load new old
|
|
|
|
loadOld = true
|
|
|
|
continue
|
|
|
|
} else {
|
|
|
|
debug.Log("ArchivePipe.compare", " %q > %q, file %q added", file1, file2, file2)
|
|
|
|
// file is new, send new job and load new
|
|
|
|
loadNew = true
|
|
|
|
out <- archiveJob{new: newJob}.Copy()
|
|
|
|
continue
|
|
|
|
}
|
2015-03-07 10:53:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
debug.Log("ArchivePipe.compare", " %q > %q, file %q removed", file1, file2, file1)
|
|
|
|
// file has been removed, throw away old job and load new
|
|
|
|
loadOld = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (j archiveJob) Copy() pipe.Job {
|
|
|
|
if !j.hasOld {
|
|
|
|
return j.new
|
|
|
|
}
|
|
|
|
|
|
|
|
// handle files
|
|
|
|
if isFile(j.new.Info()) {
|
|
|
|
debug.Log("archiveJob.Copy", " job %v is file", j.new.Path())
|
|
|
|
|
|
|
|
// if type has changed, return new job directly
|
|
|
|
if j.old.Node == nil {
|
|
|
|
return j.new
|
|
|
|
}
|
|
|
|
|
|
|
|
// if file is newer, return the new job
|
|
|
|
if j.old.Node.isNewer(j.new.Fullpath(), j.new.Info()) {
|
|
|
|
debug.Log("archiveJob.Copy", " job %v is newer", j.new.Path())
|
|
|
|
return j.new
|
|
|
|
}
|
|
|
|
|
|
|
|
debug.Log("archiveJob.Copy", " job %v add old data", j.new.Path())
|
|
|
|
// otherwise annotate job with old data
|
|
|
|
e := j.new.(pipe.Entry)
|
|
|
|
e.Node = j.old.Node
|
|
|
|
return e
|
|
|
|
}
|
|
|
|
|
|
|
|
// dirs and other types are just returned
|
|
|
|
return j.new
|
|
|
|
}
|
|
|
|
|
|
|
|
func (arch *Archiver) Snapshot(p *Progress, paths []string, pid backend.ID) (*Snapshot, backend.ID, error) {
|
2015-03-02 13:48:47 +00:00
|
|
|
debug.Log("Archiver.Snapshot", "start for %v", paths)
|
2015-02-15 13:44:54 +00:00
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
debug.Break("Archiver.Snapshot")
|
|
|
|
sort.Strings(paths)
|
2015-02-15 13:44:54 +00:00
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
// signal the whole pipeline to stop
|
|
|
|
done := make(chan struct{})
|
|
|
|
var err error
|
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
p.Start()
|
|
|
|
defer p.Done()
|
2015-02-15 13:44:54 +00:00
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
// create new snapshot
|
2015-03-02 13:48:47 +00:00
|
|
|
sn, err := NewSnapshot(paths)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
2015-02-15 13:44:54 +00:00
|
|
|
}
|
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
jobs := ArchivePipe{}
|
2015-03-02 13:48:47 +00:00
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
// use parent snapshot (if some was given)
|
|
|
|
if pid != nil {
|
|
|
|
sn.Parent = pid
|
|
|
|
|
|
|
|
// load parent snapshot
|
|
|
|
parent, err := LoadSnapshot(arch.s, pid)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// start walker on old tree
|
|
|
|
ch := make(chan WalkTreeJob)
|
2015-03-28 14:07:08 +00:00
|
|
|
go WalkTree(arch.s, parent.Tree, done, ch)
|
2015-03-07 10:53:32 +00:00
|
|
|
jobs.Old = ch
|
|
|
|
} else {
|
|
|
|
// use closed channel
|
|
|
|
ch := make(chan WalkTreeJob)
|
|
|
|
close(ch)
|
|
|
|
jobs.Old = ch
|
|
|
|
}
|
|
|
|
|
|
|
|
// start walker
|
|
|
|
pipeCh := make(chan pipe.Job)
|
|
|
|
resCh := make(chan pipe.Result, 1)
|
|
|
|
go func() {
|
|
|
|
err := pipe.Walk(paths, done, pipeCh, resCh)
|
|
|
|
if err != nil {
|
|
|
|
debug.Log("Archiver.Snapshot", "pipe.Walk returned error %v", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
debug.Log("Archiver.Snapshot", "pipe.Walk done")
|
|
|
|
}()
|
|
|
|
jobs.New = pipeCh
|
2015-03-02 13:48:47 +00:00
|
|
|
|
2015-03-07 10:53:32 +00:00
|
|
|
ch := make(chan pipe.Job)
|
|
|
|
go jobs.compare(done, ch)
|
2015-03-02 13:48:47 +00:00
|
|
|
|
2015-02-15 13:44:54 +00:00
|
|
|
var wg sync.WaitGroup
|
2015-03-02 13:48:47 +00:00
|
|
|
entCh := make(chan pipe.Entry)
|
|
|
|
dirCh := make(chan pipe.Dir)
|
|
|
|
|
|
|
|
// split
|
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
2015-03-07 10:53:32 +00:00
|
|
|
pipe.Split(ch, dirCh, entCh)
|
|
|
|
debug.Log("Archiver.Snapshot", "split done")
|
2015-03-02 13:48:47 +00:00
|
|
|
close(dirCh)
|
|
|
|
close(entCh)
|
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
|
|
|
|
// run workers
|
2015-02-15 13:44:54 +00:00
|
|
|
for i := 0; i < maxConcurrency; i++ {
|
|
|
|
wg.Add(2)
|
2015-03-02 13:48:47 +00:00
|
|
|
go arch.fileWorker(&wg, p, done, entCh)
|
|
|
|
go arch.dirWorker(&wg, p, done, dirCh)
|
2015-02-15 13:44:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// wait for all workers to terminate
|
2015-03-02 13:48:47 +00:00
|
|
|
debug.Log("Archiver.Snapshot", "wait for workers")
|
2015-02-15 13:44:54 +00:00
|
|
|
wg.Wait()
|
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
debug.Log("Archiver.Snapshot", "workers terminated")
|
2015-02-15 13:44:54 +00:00
|
|
|
|
2015-03-08 19:57:21 +00:00
|
|
|
// receive the top-level tree
|
|
|
|
root := (<-resCh).(*Node)
|
2015-04-26 15:44:38 +00:00
|
|
|
debug.Log("Archiver.Snapshot", "root node received: %v", root.Subtree.Str())
|
|
|
|
sn.Tree = root.Subtree
|
2014-11-23 21:26:01 +00:00
|
|
|
|
2014-09-23 20:39:12 +00:00
|
|
|
// save snapshot
|
2015-04-26 15:44:38 +00:00
|
|
|
id, err := arch.s.SaveJSONUnpacked(backend.Snapshot, sn)
|
2014-09-23 20:39:12 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
2015-03-09 21:56:44 +00:00
|
|
|
// store ID in snapshot struct
|
2015-04-26 15:44:38 +00:00
|
|
|
sn.id = id
|
|
|
|
debug.Log("Archiver.Snapshot", "saved snapshot %v", id.Str())
|
2015-03-09 21:56:44 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
// flush server
|
|
|
|
err = arch.s.Flush()
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
2015-03-09 21:26:39 +00:00
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
// save index
|
|
|
|
indexID, err := arch.s.SaveIndex()
|
2015-03-09 21:26:39 +00:00
|
|
|
if err != nil {
|
2015-04-26 15:44:38 +00:00
|
|
|
debug.Log("Archiver.Snapshot", "error saving index: %v", err)
|
|
|
|
return nil, nil, err
|
2015-03-09 21:26:39 +00:00
|
|
|
}
|
|
|
|
|
2015-04-26 15:44:38 +00:00
|
|
|
debug.Log("Archiver.Snapshot", "saved index %v", indexID.Str())
|
|
|
|
|
|
|
|
return sn, id, nil
|
2014-09-23 20:39:12 +00:00
|
|
|
}
|
2015-02-21 13:23:49 +00:00
|
|
|
|
|
|
|
func isFile(fi os.FileInfo) bool {
|
2015-03-07 10:53:32 +00:00
|
|
|
if fi == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2015-02-21 13:23:49 +00:00
|
|
|
return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0
|
|
|
|
}
|
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
func Scan(dirs []string, p *Progress) (Stat, error) {
|
2015-02-21 13:23:49 +00:00
|
|
|
p.Start()
|
|
|
|
defer p.Done()
|
|
|
|
|
|
|
|
var stat Stat
|
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
for _, dir := range dirs {
|
2015-03-15 11:20:30 +00:00
|
|
|
debug.Log("Scan", "Start for %v", dir)
|
2015-03-02 13:48:47 +00:00
|
|
|
err := filepath.Walk(dir, func(str string, fi os.FileInfo, err error) error {
|
2015-03-15 11:20:30 +00:00
|
|
|
debug.Log("Scan.Walk", "%v, fi: %v, err: %v", str, fi, err)
|
2015-03-21 13:43:33 +00:00
|
|
|
// TODO: integrate error reporting
|
|
|
|
if err != nil {
|
|
|
|
fmt.Fprintf(os.Stderr, "error for %v: %v\n", str, err)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if fi == nil {
|
|
|
|
fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str)
|
|
|
|
return nil
|
|
|
|
}
|
2015-03-02 13:48:47 +00:00
|
|
|
s := Stat{}
|
|
|
|
if isFile(fi) {
|
|
|
|
s.Files++
|
|
|
|
s.Bytes += uint64(fi.Size())
|
|
|
|
} else if fi.IsDir() {
|
|
|
|
s.Dirs++
|
|
|
|
}
|
2015-02-21 13:23:49 +00:00
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
p.Report(s)
|
|
|
|
stat.Add(s)
|
2015-02-21 13:23:49 +00:00
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
// TODO: handle error?
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
2015-03-15 11:20:30 +00:00
|
|
|
debug.Log("Scan", "Done for %v, err: %v", dir, err)
|
2015-03-02 13:48:47 +00:00
|
|
|
if err != nil {
|
|
|
|
return Stat{}, err
|
|
|
|
}
|
|
|
|
}
|
2015-02-21 13:23:49 +00:00
|
|
|
|
2015-03-02 13:48:47 +00:00
|
|
|
return stat, nil
|
2015-02-21 13:23:49 +00:00
|
|
|
}
|