Refactor Archiver and backup command

Improve incremental backup by first scanning the tree, loading the old
tree and afterwards comparing both trees in memory.
This commit is contained in:
Alexander Neumann 2015-01-04 22:39:30 +01:00
parent 2a97e2b08a
commit e543f5926c
6 changed files with 149 additions and 36 deletions

View file

@ -1,6 +1,7 @@
package restic
import (
"errors"
"fmt"
"io"
"os"
@ -58,6 +59,7 @@ func NewArchiver(s Server, p *Progress) (*Archiver, error) {
arch.ch = NewContentHandler(s)
// load all blobs from all snapshots
// TODO: only use bloblist from old snapshot if available
err = arch.ch.LoadAllMaps()
if err != nil {
return nil, err
@ -96,7 +98,28 @@ func (arch *Archiver) SaveFile(node *Node) error {
file, err := os.Open(node.path)
defer file.Close()
if err != nil {
return arrar.Annotate(err, "SaveFile()")
return arrar.Annotatef(err, "SaveFile(%v)", node.path)
}
// check file again
fi, err := file.Stat()
if err != nil {
return err
}
if fi.ModTime() != node.ModTime {
e2 := arch.Error(node.path, fi, errors.New("file changed as we read it\n"))
if e2 == nil {
// create new node
n, err := NodeFromFileInfo(node.path, fi)
if err != nil {
return err
}
// copy node
*node = *n
}
}
var blobs Blobs
@ -203,8 +226,8 @@ func (arch *Archiver) saveTree(t *Tree) (Blob, error) {
var wg sync.WaitGroup
for _, node := range *t {
if node.Tree != nil && node.Subtree == nil {
b, err := arch.saveTree(node.Tree)
if node.tree != nil && node.Subtree == nil {
b, err := arch.saveTree(node.tree)
if err != nil {
return Blob{}, err
}
@ -229,8 +252,6 @@ func (arch *Archiver) saveTree(t *Tree) (Blob, error) {
}
arch.p.Report(Stat{Files: 1})
}(node)
} else {
arch.p.Report(Stat{Other: 1})
}
}

View file

@ -3,6 +3,7 @@ package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
@ -59,9 +60,9 @@ func format_duration(d time.Duration) string {
func print_tree2(indent int, t *restic.Tree) {
for _, node := range *t {
if node.Tree != nil {
if node.Tree() != nil {
fmt.Printf("%s%s/\n", strings.Repeat(" ", indent), node.Name)
print_tree2(indent+1, node.Tree)
print_tree2(indent+1, node.Tree())
} else {
fmt.Printf("%s%s\n", strings.Repeat(" ", indent), node.Name)
}
@ -94,7 +95,7 @@ func (cmd CmdBackup) Execute(args []string) error {
fmt.Printf("found parent snapshot %v\n", parentSnapshotID)
}
fmt.Printf("scanning %s\n", target)
fmt.Printf("scan %s\n", target)
scanProgress := restic.NewProgress(time.Second)
if terminal.IsTerminal(int(os.Stdout.Fd())) {
@ -113,29 +114,51 @@ func (cmd CmdBackup) Execute(args []string) error {
sc := restic.NewScanner(scanProgress)
t, err := sc.Scan(target)
newTree, err := sc.Scan(target)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
return err
}
if parentSnapshotID != nil {
fmt.Printf("load old snapshot\n")
ch := restic.NewContentHandler(s)
sn, err := ch.LoadSnapshot(parentSnapshotID)
if err != nil {
return err
}
oldTree, err := restic.LoadTreeRecursive(filepath.Dir(sn.Dir), ch, sn.Tree)
if err != nil {
return err
}
newTree.CopyFrom(oldTree)
}
archiveProgress := restic.NewProgress(time.Second)
targetStat := scanProgress.Current()
targetStat := newTree.StatTodo()
if terminal.IsTerminal(int(os.Stdout.Fd())) {
var bps, eta uint64
itemsTodo := targetStat.Files + targetStat.Dirs
archiveProgress.F = func(s restic.Stat, d time.Duration, ticker bool) {
sec := uint64(d / time.Second)
if sec > 0 && ticker {
if targetStat.Bytes > 0 && sec > 0 && ticker {
bps = s.Bytes / sec
eta = (targetStat.Bytes - s.Bytes) / bps
if bps > 0 {
eta = (targetStat.Bytes - s.Bytes) / bps
}
}
fmt.Printf("\x1b[2K\r[%s] %3.2f%% %s/s %s / %s ETA %s",
itemsDone := s.Files + s.Dirs
fmt.Printf("\x1b[2K\r[%s] %3.2f%% %s/s %s / %s %d / %d items ETA %s",
format_duration(d),
float64(s.Bytes)/float64(targetStat.Bytes)*100,
format_bytes(bps),
format_bytes(s.Bytes), format_bytes(targetStat.Bytes),
itemsDone, itemsTodo,
format_seconds(eta))
}
@ -154,11 +177,11 @@ func (cmd CmdBackup) Execute(args []string) error {
arch.Error = func(dir string, fi os.FileInfo, err error) error {
// TODO: make ignoring errors configurable
fmt.Fprintf(os.Stderr, "\nerror for %s: %v\n%v\n", dir, err, fi)
fmt.Fprintf(os.Stderr, "\nerror for %s: %v\n", dir, err)
return nil
}
_, id, err := arch.Snapshot(target, t, parentSnapshotID)
_, id, err := arch.Snapshot(target, newTree, parentSnapshotID)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
}

View file

@ -243,3 +243,8 @@ func (ch *ContentHandler) Test(t backend.Type, id backend.ID) (bool, error) {
return ch.s.Test(t, id)
}
// BlobList returns the current BlobList.
func (ch *ContentHandler) BlobList() *BlobList {
return ch.bl
}

View file

@ -25,7 +25,6 @@ type Progress struct {
type Stat struct {
Files uint64
Dirs uint64
Other uint64
Bytes uint64
}
@ -162,7 +161,6 @@ func (s *Stat) Add(other Stat) {
s.Bytes += other.Bytes
s.Dirs += other.Dirs
s.Files += other.Files
s.Other += other.Other
}
func (s Stat) String() string {
@ -182,6 +180,6 @@ func (s Stat) String() string {
str = fmt.Sprintf("%dB", s.Bytes)
}
return fmt.Sprintf("Stat(%d files, %d dirs, %d other, %v)",
s.Files, s.Dirs, s.Other, str)
return fmt.Sprintf("Stat(%d files, %d dirs, %v)",
s.Files, s.Dirs, str)
}

View file

@ -66,7 +66,7 @@ func scan(filterFn FilterFunc, progress *Progress, dir string) (*Tree, error) {
if entry.IsDir() {
// save all errors in node.err, sort out later
node.Tree, node.err = scan(filterFn, progress, path)
node.tree, node.err = scan(filterFn, progress, path)
}
}
@ -80,8 +80,6 @@ func scan(filterFn FilterFunc, progress *Progress, dir string) (*Tree, error) {
progress.Report(Stat{Files: 1, Bytes: node.Size})
case "dir":
progress.Report(Stat{Dirs: 1})
default:
progress.Report(Stat{Other: 1})
}
}
@ -112,7 +110,7 @@ func (sc *Scanner) Scan(path string) (*Tree, error) {
sc.p.Report(Stat{Dirs: 1})
node.Tree, err = scan(sc.Filter, sc.p, path)
node.tree, err = scan(sc.Filter, sc.p, path)
if err != nil {
return nil, arrar.Annotate(err, "loadTree()")
}

94
tree.go
View file

@ -6,6 +6,8 @@ import (
"fmt"
"os"
"os/user"
"path/filepath"
"reflect"
"sort"
"strconv"
"strings"
@ -37,7 +39,7 @@ type Node struct {
Content []backend.ID `json:"content"`
Subtree backend.ID `json:"subtree,omitempty"`
Tree *Tree `json:"-"`
tree *Tree
path string
err error
@ -92,11 +94,33 @@ func LoadTree(ch *ContentHandler, id backend.ID) (Tree, error) {
return tree, nil
}
// PopulateFrom copies subtrees and content from other when it hasn't changed.
func (t Tree) PopulateFrom(other Tree) error {
// LoadTreeRecursive loads the tree and all subtrees via ch.
func LoadTreeRecursive(path string, ch *ContentHandler, id backend.ID) (Tree, error) {
tree, err := LoadTree(ch, id)
if err != nil {
return nil, err
}
for _, n := range tree {
n.path = filepath.Join(path, n.Name)
if n.Type == "dir" && n.Subtree != nil {
t, err := LoadTreeRecursive(n.path, ch, n.Subtree)
if err != nil {
return nil, err
}
n.tree = &t
}
}
return tree, nil
}
// CopyFrom recursively copies all content from other to t.
func (t Tree) CopyFrom(other Tree) {
for _, node := range t {
// only copy entries for files
if node.Type != "file" {
// only process files and dirs
if node.Type != "file" && node.Type != "dir" {
continue
}
@ -108,14 +132,32 @@ func (t Tree) PopulateFrom(other Tree) error {
continue
}
// compare content
if node.SameContent(oldNode) {
// copy Content
node.Content = oldNode.Content
if node.Type == "file" {
// compare content
if node.SameContent(oldNode) {
// copy Content
node.Content = oldNode.Content
}
} else {
// fill in all subtrees from old subtree
node.tree.CopyFrom(*oldNode.tree)
// check if tree has changed
if node.tree.Equals(*oldNode.tree) {
// if nothing has changed, copy subtree ID
node.Subtree = oldNode.Subtree
}
}
}
}
return nil
// Equals returns true if t and other have exactly the same nodes.
func (t Tree) Equals(other Tree) bool {
if len(t) != len(other) {
return false
}
return reflect.DeepEqual(t, other)
}
func (t *Tree) Insert(node *Node) error {
@ -160,15 +202,41 @@ func (t Tree) Stat() Stat {
s.Bytes += n.Size
case "dir":
s.Dirs++
s.Add(n.Tree.Stat())
default:
s.Other++
if n.tree != nil {
s.Add(n.tree.Stat())
}
}
}
return s
}
func (t Tree) StatTodo() Stat {
s := Stat{}
for _, n := range t {
switch n.Type {
case "file":
if n.Content == nil {
s.Files++
s.Bytes += n.Size
}
case "dir":
if n.Subtree == nil {
s.Dirs++
if n.tree != nil {
s.Add(n.tree.StatTodo())
}
}
}
}
return s
}
func (node Node) Tree() *Tree {
return node.tree
}
func (node *Node) fill_extra(path string, fi os.FileInfo) (err error) {
stat, ok := fi.Sys().(*syscall.Stat_t)
if !ok {