forked from TrueCloudLab/restic
backup: Add --ignore-ctime option and document change detection
This commit is contained in:
parent
43cb26010a
commit
6bd8a2faaa
5 changed files with 174 additions and 68 deletions
26
changelog/unreleased/pull-2823
Normal file
26
changelog/unreleased/pull-2823
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
Enhancement: Add option to let backup trust mtime without checking ctime
|
||||||
|
|
||||||
|
The backup command used to require that both ctime and mtime of a file matched
|
||||||
|
with a previously backed up version to determine that the file was unchanged.
|
||||||
|
In other words, if either ctime or mtime of the file had changed, it would be
|
||||||
|
considered changed and restic would read the file's content again to back up
|
||||||
|
the relevant (changed) parts of it.
|
||||||
|
|
||||||
|
The new option --ignore-ctime makes restic look at mtime only, such that ctime
|
||||||
|
changes for a file does not cause restic to read the file's contents again.
|
||||||
|
|
||||||
|
The check for both ctime and mtime was introduced in restic 0.9.6 to make
|
||||||
|
backups more reliable in the face of programs that reset mtime (some Unix
|
||||||
|
archivers do that), but it turned out to often be expensive because it made
|
||||||
|
restic read file contents even if only the metadata (owner, permissions) of
|
||||||
|
a file had changed. The new --ignore-ctime option lets the user restore the
|
||||||
|
0.9.5 behavior when needed. The existing --ignore-inode option already turned
|
||||||
|
off this behavior, but also removed a different check.
|
||||||
|
|
||||||
|
Please note that changes in files' metadata are still recorded, regardless of
|
||||||
|
the command line options provided to the backup command.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/issues/2495
|
||||||
|
https://github.com/restic/restic/issues/2558
|
||||||
|
https://github.com/restic/restic/issues/2819
|
||||||
|
https://github.com/restic/restic/pull/2823
|
|
@ -90,6 +90,7 @@ type BackupOptions struct {
|
||||||
TimeStamp string
|
TimeStamp string
|
||||||
WithAtime bool
|
WithAtime bool
|
||||||
IgnoreInode bool
|
IgnoreInode bool
|
||||||
|
IgnoreCtime bool
|
||||||
UseFsSnapshot bool
|
UseFsSnapshot bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,6 +127,7 @@ func init() {
|
||||||
f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)")
|
f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)")
|
||||||
f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories")
|
f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories")
|
||||||
f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files")
|
f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files")
|
||||||
|
f.BoolVar(&backupOptions.IgnoreCtime, "ignore-ctime", false, "ignore ctime changes when checking for modified files")
|
||||||
if runtime.GOOS == "windows" {
|
if runtime.GOOS == "windows" {
|
||||||
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
|
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
|
||||||
}
|
}
|
||||||
|
@ -665,7 +667,15 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||||
arch.CompleteItem = p.CompleteItem
|
arch.CompleteItem = p.CompleteItem
|
||||||
arch.StartFile = p.StartFile
|
arch.StartFile = p.StartFile
|
||||||
arch.CompleteBlob = p.CompleteBlob
|
arch.CompleteBlob = p.CompleteBlob
|
||||||
arch.IgnoreInode = opts.IgnoreInode
|
|
||||||
|
if opts.IgnoreInode {
|
||||||
|
// --ignore-inode implies --ignore-ctime: on FUSE, the ctime is not
|
||||||
|
// reliable either.
|
||||||
|
arch.ChangeIgnoreFlags |= archiver.ChangeIgnoreCtime | archiver.ChangeIgnoreInode
|
||||||
|
}
|
||||||
|
if opts.IgnoreCtime {
|
||||||
|
arch.ChangeIgnoreFlags |= archiver.ChangeIgnoreCtime
|
||||||
|
}
|
||||||
|
|
||||||
if parentSnapshotID == nil {
|
if parentSnapshotID == nil {
|
||||||
parentSnapshotID = &restic.ID{}
|
parentSnapshotID = &restic.ID{}
|
||||||
|
|
|
@ -131,24 +131,62 @@ restic encounters:
|
||||||
In fact several hosts may use the same repository to backup directories
|
In fact several hosts may use the same repository to backup directories
|
||||||
and files leading to a greater de-duplication.
|
and files leading to a greater de-duplication.
|
||||||
|
|
||||||
Please be aware that when you backup different directories (or the
|
|
||||||
directories to be saved have a variable name component like a
|
|
||||||
time/date), restic always needs to read all files and only afterwards
|
|
||||||
can compute which parts of the files need to be saved. When you backup
|
|
||||||
the same directory again (maybe with new or changed files) restic will
|
|
||||||
find the old snapshot in the repo and by default only reads those files
|
|
||||||
that are new or have been modified since the last snapshot. This is
|
|
||||||
decided based on the following attributes of the file in the file system:
|
|
||||||
|
|
||||||
* Type (file, symlink, or directory?)
|
|
||||||
* Modification time
|
|
||||||
* Size
|
|
||||||
* Inode number (internal number used to reference a file in a file system)
|
|
||||||
|
|
||||||
Now is a good time to run ``restic check`` to verify that all data
|
Now is a good time to run ``restic check`` to verify that all data
|
||||||
is properly stored in the repository. You should run this command regularly
|
is properly stored in the repository. You should run this command regularly
|
||||||
to make sure the internal structure of the repository is free of errors.
|
to make sure the internal structure of the repository is free of errors.
|
||||||
|
|
||||||
|
File change detection
|
||||||
|
*********************
|
||||||
|
|
||||||
|
When restic encounters a file that has already been backed up, whether in the
|
||||||
|
current backup or a previous one, it makes sure the file's contents are only
|
||||||
|
stored once in the repository. To do so, it normally has to scan the entire
|
||||||
|
contents of every file. Because this can be very expensive, restic also uses a
|
||||||
|
change detection rule based on file metadata to determine whether a file is
|
||||||
|
likely unchanged since a previous backup. If it is, the file is not scanned
|
||||||
|
again.
|
||||||
|
|
||||||
|
Change detection is only performed for regular files (not special files,
|
||||||
|
symlinks or directories) that have the exact same path as they did in a
|
||||||
|
previous backup of the same location. If a file or one of its containing
|
||||||
|
directories was renamed, it is considered a different file and its entire
|
||||||
|
contents will be scanned again.
|
||||||
|
|
||||||
|
Metadata changes (permissions, ownership, etc.) are always included in the
|
||||||
|
backup, even if file contents are considered unchanged.
|
||||||
|
|
||||||
|
On **Unix** (including Linux and Mac), given that a file lives at the same
|
||||||
|
location as a file in a previous backup, the following file metadata
|
||||||
|
attributes have to match for its contents to be presumed unchanged:
|
||||||
|
|
||||||
|
* Modification timestamp (mtime).
|
||||||
|
* Metadata change timestamp (ctime).
|
||||||
|
* File size.
|
||||||
|
* Inode number (internal number used to reference a file in a filesystem).
|
||||||
|
|
||||||
|
The reason for requiring both mtime and ctime to match is that Unix programs
|
||||||
|
can freely change mtime (and some do). In such cases, a ctime change may be
|
||||||
|
the only hint that a file did change.
|
||||||
|
|
||||||
|
The following ``restic backup`` command line flags modify the change detection
|
||||||
|
rules:
|
||||||
|
|
||||||
|
* ``--force``: turn off change detection and rescan all files.
|
||||||
|
* ``--ignore-ctime``: require mtime to match, but allow ctime to differ.
|
||||||
|
* ``--ignore-inode``: require mtime to match, but allow inode number
|
||||||
|
and ctime to differ.
|
||||||
|
|
||||||
|
The option ``--ignore-inode`` exists to support FUSE-based filesystems and
|
||||||
|
pCloud, which do not assign stable inodes to files.
|
||||||
|
|
||||||
|
Note that the device id of the containing mount point is never taken into
|
||||||
|
account. Device numbers are not stable for removable devices and ZFS snapshots.
|
||||||
|
If you want to force a re-scan in such a case, you can change the mountpoint.
|
||||||
|
|
||||||
|
On **Windows**, a file is considered unchanged when its path and modification
|
||||||
|
time match, and only ``--force`` has any effect. The other options are
|
||||||
|
recognized but ignored.
|
||||||
|
|
||||||
Excluding Files
|
Excluding Files
|
||||||
***************
|
***************
|
||||||
|
|
||||||
|
@ -372,10 +410,6 @@ written, and the next backup needs to write new metadata again. If you really
|
||||||
want to save the access time for files and directories, you can pass the
|
want to save the access time for files and directories, you can pass the
|
||||||
``--with-atime`` option to the ``backup`` command.
|
``--with-atime`` option to the ``backup`` command.
|
||||||
|
|
||||||
In filesystems that do not support inode consistency, like FUSE-based ones and pCloud, it is
|
|
||||||
possible to ignore inode on changed files comparison by passing ``--ignore-inode`` to
|
|
||||||
``backup`` command.
|
|
||||||
|
|
||||||
Reading data from stdin
|
Reading data from stdin
|
||||||
***********************
|
***********************
|
||||||
|
|
||||||
|
|
|
@ -78,10 +78,18 @@ type Archiver struct {
|
||||||
// WithAtime configures if the access time for files and directories should
|
// WithAtime configures if the access time for files and directories should
|
||||||
// be saved. Enabling it may result in much metadata, so it's off by
|
// be saved. Enabling it may result in much metadata, so it's off by
|
||||||
// default.
|
// default.
|
||||||
WithAtime bool
|
WithAtime bool
|
||||||
IgnoreInode bool
|
|
||||||
|
// Flags controlling change detection. See doc/040_backup.rst for details.
|
||||||
|
ChangeIgnoreFlags uint
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Flags for the ChangeIgnoreFlags bitfield.
|
||||||
|
const (
|
||||||
|
ChangeIgnoreCtime = 1 << iota
|
||||||
|
ChangeIgnoreInode
|
||||||
|
)
|
||||||
|
|
||||||
// Options is used to configure the archiver.
|
// Options is used to configure the archiver.
|
||||||
type Options struct {
|
type Options struct {
|
||||||
// FileReadConcurrency sets how many files are read in concurrently. If
|
// FileReadConcurrency sets how many files are read in concurrently. If
|
||||||
|
@ -134,7 +142,6 @@ func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver {
|
||||||
CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {},
|
CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {},
|
||||||
StartFile: func(string) {},
|
StartFile: func(string) {},
|
||||||
CompleteBlob: func(string, uint64) {},
|
CompleteBlob: func(string, uint64) {},
|
||||||
IgnoreInode: false,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return arch
|
return arch
|
||||||
|
@ -379,7 +386,7 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
|
||||||
|
|
||||||
// check if the file has not changed before performing a fopen operation (more expensive, specially
|
// check if the file has not changed before performing a fopen operation (more expensive, specially
|
||||||
// in network filesystems)
|
// in network filesystems)
|
||||||
if previous != nil && !fileChanged(fi, previous, arch.IgnoreInode) {
|
if previous != nil && !fileChanged(fi, previous, arch.ChangeIgnoreFlags) {
|
||||||
if arch.allBlobsPresent(previous) {
|
if arch.allBlobsPresent(previous) {
|
||||||
debug.Log("%v hasn't changed, using old list of blobs", target)
|
debug.Log("%v hasn't changed, using old list of blobs", target)
|
||||||
arch.CompleteItem(snPath, previous, previous, ItemStats{}, time.Since(start))
|
arch.CompleteItem(snPath, previous, previous, ItemStats{}, time.Since(start))
|
||||||
|
@ -481,36 +488,30 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
|
||||||
return fn, false, nil
|
return fn, false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// fileChanged returns true if the file's content has changed since the node
|
// fileChanged tries to detect whether a file's content has changed compared
|
||||||
// was created.
|
// to the contents of node, which describes the same path in the parent backup.
|
||||||
func fileChanged(fi os.FileInfo, node *restic.Node, ignoreInode bool) bool {
|
// It should only be run for regular files.
|
||||||
if node == nil {
|
func fileChanged(fi os.FileInfo, node *restic.Node, ignoreFlags uint) bool {
|
||||||
|
switch {
|
||||||
|
case node == nil:
|
||||||
|
return true
|
||||||
|
case node.Type != "file":
|
||||||
|
// We're only called for regular files, so this is a type change.
|
||||||
|
return true
|
||||||
|
case uint64(fi.Size()) != node.Size:
|
||||||
|
return true
|
||||||
|
case !fi.ModTime().Equal(node.ModTime):
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// check type change
|
checkCtime := ignoreFlags&ChangeIgnoreCtime == 0
|
||||||
if node.Type != "file" {
|
checkInode := ignoreFlags&ChangeIgnoreInode == 0
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// check modification timestamp
|
|
||||||
if !fi.ModTime().Equal(node.ModTime) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// check status change timestamp
|
|
||||||
extFI := fs.ExtendedStat(fi)
|
extFI := fs.ExtendedStat(fi)
|
||||||
if !ignoreInode && !extFI.ChangeTime.Equal(node.ChangeTime) {
|
switch {
|
||||||
|
case checkCtime && !extFI.ChangeTime.Equal(node.ChangeTime):
|
||||||
return true
|
return true
|
||||||
}
|
case checkInode && node.Inode != extFI.Inode:
|
||||||
|
|
||||||
// check size
|
|
||||||
if uint64(fi.Size()) != node.Size || uint64(extFI.Size) != node.Size {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// check inode
|
|
||||||
if !ignoreInode && node.Inode != extFI.Inode {
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -505,6 +505,18 @@ func save(t testing.TB, filename string, data []byte) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func chmodTwice(t testing.TB, name string) {
|
||||||
|
// POSIX says that ctime is updated "even if the file status does not
|
||||||
|
// change", but let's make sure it does change, just in case.
|
||||||
|
err := os.Chmod(name, 0700)
|
||||||
|
restictest.OK(t, err)
|
||||||
|
|
||||||
|
sleep()
|
||||||
|
|
||||||
|
err = os.Chmod(name, 0600)
|
||||||
|
restictest.OK(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
func lstat(t testing.TB, name string) os.FileInfo {
|
func lstat(t testing.TB, name string) os.FileInfo {
|
||||||
fi, err := os.Lstat(name)
|
fi, err := os.Lstat(name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -533,6 +545,13 @@ func remove(t testing.TB, filename string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func rename(t testing.TB, oldname, newname string) {
|
||||||
|
err := os.Rename(oldname, newname)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func nodeFromFI(t testing.TB, filename string, fi os.FileInfo) *restic.Node {
|
func nodeFromFI(t testing.TB, filename string, fi os.FileInfo) *restic.Node {
|
||||||
node, err := restic.NodeFromFileInfo(filename, fi)
|
node, err := restic.NodeFromFileInfo(filename, fi)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -542,26 +561,26 @@ func nodeFromFI(t testing.TB, filename string, fi os.FileInfo) *restic.Node {
|
||||||
return node
|
return node
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sleep sleeps long enough to ensure a timestamp change.
|
||||||
|
func sleep() {
|
||||||
|
d := 50 * time.Millisecond
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
// On older Darwin instances, the file system only supports one second
|
||||||
|
// granularity.
|
||||||
|
d = 1500 * time.Millisecond
|
||||||
|
}
|
||||||
|
time.Sleep(d)
|
||||||
|
}
|
||||||
|
|
||||||
func TestFileChanged(t *testing.T) {
|
func TestFileChanged(t *testing.T) {
|
||||||
var defaultContent = []byte("foobar")
|
var defaultContent = []byte("foobar")
|
||||||
|
|
||||||
var d = 50 * time.Millisecond
|
|
||||||
if runtime.GOOS == "darwin" {
|
|
||||||
// on older darwin instances the file system only supports one second
|
|
||||||
// granularity
|
|
||||||
d = time.Second
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep := func() {
|
|
||||||
time.Sleep(d)
|
|
||||||
}
|
|
||||||
|
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
Name string
|
Name string
|
||||||
SkipForWindows bool
|
SkipForWindows bool
|
||||||
Content []byte
|
Content []byte
|
||||||
Modify func(t testing.TB, filename string)
|
Modify func(t testing.TB, filename string)
|
||||||
IgnoreInode bool
|
ChangeIgnore uint
|
||||||
SameFile bool
|
SameFile bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
|
@ -618,17 +637,33 @@ func TestFileChanged(t *testing.T) {
|
||||||
save(t, filename, defaultContent)
|
save(t, filename, defaultContent)
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: "ctime-change",
|
||||||
|
Modify: chmodTwice,
|
||||||
|
SameFile: false,
|
||||||
|
SkipForWindows: true, // No ctime on Windows, so this test would fail.
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "ignore-ctime-change",
|
||||||
|
Modify: chmodTwice,
|
||||||
|
ChangeIgnore: ChangeIgnoreCtime,
|
||||||
|
SameFile: true,
|
||||||
|
SkipForWindows: true, // No ctime on Windows, so this test is meaningless.
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Name: "ignore-inode",
|
Name: "ignore-inode",
|
||||||
Modify: func(t testing.TB, filename string) {
|
Modify: func(t testing.TB, filename string) {
|
||||||
fi := lstat(t, filename)
|
fi := lstat(t, filename)
|
||||||
remove(t, filename)
|
// First create the new file, then remove the old one,
|
||||||
sleep()
|
// so that the old file retains its inode number.
|
||||||
|
tempname := filename + ".old"
|
||||||
|
rename(t, filename, tempname)
|
||||||
save(t, filename, defaultContent)
|
save(t, filename, defaultContent)
|
||||||
|
remove(t, tempname)
|
||||||
setTimestamp(t, filename, fi.ModTime(), fi.ModTime())
|
setTimestamp(t, filename, fi.ModTime(), fi.ModTime())
|
||||||
},
|
},
|
||||||
IgnoreInode: true,
|
ChangeIgnore: ChangeIgnoreCtime | ChangeIgnoreInode,
|
||||||
SameFile: true,
|
SameFile: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -651,7 +686,7 @@ func TestFileChanged(t *testing.T) {
|
||||||
fiBefore := lstat(t, filename)
|
fiBefore := lstat(t, filename)
|
||||||
node := nodeFromFI(t, filename, fiBefore)
|
node := nodeFromFI(t, filename, fiBefore)
|
||||||
|
|
||||||
if fileChanged(fiBefore, node, false) {
|
if fileChanged(fiBefore, node, 0) {
|
||||||
t.Fatalf("unchanged file detected as changed")
|
t.Fatalf("unchanged file detected as changed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -661,12 +696,12 @@ func TestFileChanged(t *testing.T) {
|
||||||
|
|
||||||
if test.SameFile {
|
if test.SameFile {
|
||||||
// file should be detected as unchanged
|
// file should be detected as unchanged
|
||||||
if fileChanged(fiAfter, node, test.IgnoreInode) {
|
if fileChanged(fiAfter, node, test.ChangeIgnore) {
|
||||||
t.Fatalf("unmodified file detected as changed")
|
t.Fatalf("unmodified file detected as changed")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// file should be detected as changed
|
// file should be detected as changed
|
||||||
if !fileChanged(fiAfter, node, test.IgnoreInode) && !test.SameFile {
|
if !fileChanged(fiAfter, node, test.ChangeIgnore) && !test.SameFile {
|
||||||
t.Fatalf("modified file detected as unchanged")
|
t.Fatalf("modified file detected as unchanged")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -684,7 +719,7 @@ func TestFilChangedSpecialCases(t *testing.T) {
|
||||||
|
|
||||||
t.Run("nil-node", func(t *testing.T) {
|
t.Run("nil-node", func(t *testing.T) {
|
||||||
fi := lstat(t, filename)
|
fi := lstat(t, filename)
|
||||||
if !fileChanged(fi, nil, false) {
|
if !fileChanged(fi, nil, 0) {
|
||||||
t.Fatal("nil node detected as unchanged")
|
t.Fatal("nil node detected as unchanged")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -693,7 +728,7 @@ func TestFilChangedSpecialCases(t *testing.T) {
|
||||||
fi := lstat(t, filename)
|
fi := lstat(t, filename)
|
||||||
node := nodeFromFI(t, filename, fi)
|
node := nodeFromFI(t, filename, fi)
|
||||||
node.Type = "symlink"
|
node.Type = "symlink"
|
||||||
if !fileChanged(fi, node, false) {
|
if !fileChanged(fi, node, 0) {
|
||||||
t.Fatal("node with changed type detected as unchanged")
|
t.Fatal("node with changed type detected as unchanged")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in a new issue