forked from TrueCloudLab/restic
Merge pull request #2823 from greatroar/trust-mtime
Add --ignore-ctime flag to backup and document change detection
This commit is contained in:
commit
de7e3a0648
5 changed files with 174 additions and 68 deletions
26
changelog/unreleased/pull-2823
Normal file
26
changelog/unreleased/pull-2823
Normal file
|
@ -0,0 +1,26 @@
|
|||
Enhancement: Add option to let backup trust mtime without checking ctime
|
||||
|
||||
The backup command used to require that both ctime and mtime of a file matched
|
||||
with a previously backed up version to determine that the file was unchanged.
|
||||
In other words, if either ctime or mtime of the file had changed, it would be
|
||||
considered changed and restic would read the file's content again to back up
|
||||
the relevant (changed) parts of it.
|
||||
|
||||
The new option --ignore-ctime makes restic look at mtime only, such that ctime
|
||||
changes for a file does not cause restic to read the file's contents again.
|
||||
|
||||
The check for both ctime and mtime was introduced in restic 0.9.6 to make
|
||||
backups more reliable in the face of programs that reset mtime (some Unix
|
||||
archivers do that), but it turned out to often be expensive because it made
|
||||
restic read file contents even if only the metadata (owner, permissions) of
|
||||
a file had changed. The new --ignore-ctime option lets the user restore the
|
||||
0.9.5 behavior when needed. The existing --ignore-inode option already turned
|
||||
off this behavior, but also removed a different check.
|
||||
|
||||
Please note that changes in files' metadata are still recorded, regardless of
|
||||
the command line options provided to the backup command.
|
||||
|
||||
https://github.com/restic/restic/issues/2495
|
||||
https://github.com/restic/restic/issues/2558
|
||||
https://github.com/restic/restic/issues/2819
|
||||
https://github.com/restic/restic/pull/2823
|
|
@ -90,6 +90,7 @@ type BackupOptions struct {
|
|||
TimeStamp string
|
||||
WithAtime bool
|
||||
IgnoreInode bool
|
||||
IgnoreCtime bool
|
||||
UseFsSnapshot bool
|
||||
}
|
||||
|
||||
|
@ -126,6 +127,7 @@ func init() {
|
|||
f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)")
|
||||
f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories")
|
||||
f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files")
|
||||
f.BoolVar(&backupOptions.IgnoreCtime, "ignore-ctime", false, "ignore ctime changes when checking for modified files")
|
||||
if runtime.GOOS == "windows" {
|
||||
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
|
||||
}
|
||||
|
@ -665,7 +667,15 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
|||
arch.CompleteItem = p.CompleteItem
|
||||
arch.StartFile = p.StartFile
|
||||
arch.CompleteBlob = p.CompleteBlob
|
||||
arch.IgnoreInode = opts.IgnoreInode
|
||||
|
||||
if opts.IgnoreInode {
|
||||
// --ignore-inode implies --ignore-ctime: on FUSE, the ctime is not
|
||||
// reliable either.
|
||||
arch.ChangeIgnoreFlags |= archiver.ChangeIgnoreCtime | archiver.ChangeIgnoreInode
|
||||
}
|
||||
if opts.IgnoreCtime {
|
||||
arch.ChangeIgnoreFlags |= archiver.ChangeIgnoreCtime
|
||||
}
|
||||
|
||||
if parentSnapshotID == nil {
|
||||
parentSnapshotID = &restic.ID{}
|
||||
|
|
|
@ -131,24 +131,62 @@ restic encounters:
|
|||
In fact several hosts may use the same repository to backup directories
|
||||
and files leading to a greater de-duplication.
|
||||
|
||||
Please be aware that when you backup different directories (or the
|
||||
directories to be saved have a variable name component like a
|
||||
time/date), restic always needs to read all files and only afterwards
|
||||
can compute which parts of the files need to be saved. When you backup
|
||||
the same directory again (maybe with new or changed files) restic will
|
||||
find the old snapshot in the repo and by default only reads those files
|
||||
that are new or have been modified since the last snapshot. This is
|
||||
decided based on the following attributes of the file in the file system:
|
||||
|
||||
* Type (file, symlink, or directory?)
|
||||
* Modification time
|
||||
* Size
|
||||
* Inode number (internal number used to reference a file in a file system)
|
||||
|
||||
Now is a good time to run ``restic check`` to verify that all data
|
||||
is properly stored in the repository. You should run this command regularly
|
||||
to make sure the internal structure of the repository is free of errors.
|
||||
|
||||
File change detection
|
||||
*********************
|
||||
|
||||
When restic encounters a file that has already been backed up, whether in the
|
||||
current backup or a previous one, it makes sure the file's contents are only
|
||||
stored once in the repository. To do so, it normally has to scan the entire
|
||||
contents of every file. Because this can be very expensive, restic also uses a
|
||||
change detection rule based on file metadata to determine whether a file is
|
||||
likely unchanged since a previous backup. If it is, the file is not scanned
|
||||
again.
|
||||
|
||||
Change detection is only performed for regular files (not special files,
|
||||
symlinks or directories) that have the exact same path as they did in a
|
||||
previous backup of the same location. If a file or one of its containing
|
||||
directories was renamed, it is considered a different file and its entire
|
||||
contents will be scanned again.
|
||||
|
||||
Metadata changes (permissions, ownership, etc.) are always included in the
|
||||
backup, even if file contents are considered unchanged.
|
||||
|
||||
On **Unix** (including Linux and Mac), given that a file lives at the same
|
||||
location as a file in a previous backup, the following file metadata
|
||||
attributes have to match for its contents to be presumed unchanged:
|
||||
|
||||
* Modification timestamp (mtime).
|
||||
* Metadata change timestamp (ctime).
|
||||
* File size.
|
||||
* Inode number (internal number used to reference a file in a filesystem).
|
||||
|
||||
The reason for requiring both mtime and ctime to match is that Unix programs
|
||||
can freely change mtime (and some do). In such cases, a ctime change may be
|
||||
the only hint that a file did change.
|
||||
|
||||
The following ``restic backup`` command line flags modify the change detection
|
||||
rules:
|
||||
|
||||
* ``--force``: turn off change detection and rescan all files.
|
||||
* ``--ignore-ctime``: require mtime to match, but allow ctime to differ.
|
||||
* ``--ignore-inode``: require mtime to match, but allow inode number
|
||||
and ctime to differ.
|
||||
|
||||
The option ``--ignore-inode`` exists to support FUSE-based filesystems and
|
||||
pCloud, which do not assign stable inodes to files.
|
||||
|
||||
Note that the device id of the containing mount point is never taken into
|
||||
account. Device numbers are not stable for removable devices and ZFS snapshots.
|
||||
If you want to force a re-scan in such a case, you can change the mountpoint.
|
||||
|
||||
On **Windows**, a file is considered unchanged when its path and modification
|
||||
time match, and only ``--force`` has any effect. The other options are
|
||||
recognized but ignored.
|
||||
|
||||
Excluding Files
|
||||
***************
|
||||
|
||||
|
@ -372,10 +410,6 @@ written, and the next backup needs to write new metadata again. If you really
|
|||
want to save the access time for files and directories, you can pass the
|
||||
``--with-atime`` option to the ``backup`` command.
|
||||
|
||||
In filesystems that do not support inode consistency, like FUSE-based ones and pCloud, it is
|
||||
possible to ignore inode on changed files comparison by passing ``--ignore-inode`` to
|
||||
``backup`` command.
|
||||
|
||||
Reading data from stdin
|
||||
***********************
|
||||
|
||||
|
|
|
@ -78,10 +78,18 @@ type Archiver struct {
|
|||
// WithAtime configures if the access time for files and directories should
|
||||
// be saved. Enabling it may result in much metadata, so it's off by
|
||||
// default.
|
||||
WithAtime bool
|
||||
IgnoreInode bool
|
||||
WithAtime bool
|
||||
|
||||
// Flags controlling change detection. See doc/040_backup.rst for details.
|
||||
ChangeIgnoreFlags uint
|
||||
}
|
||||
|
||||
// Flags for the ChangeIgnoreFlags bitfield.
|
||||
const (
|
||||
ChangeIgnoreCtime = 1 << iota
|
||||
ChangeIgnoreInode
|
||||
)
|
||||
|
||||
// Options is used to configure the archiver.
|
||||
type Options struct {
|
||||
// FileReadConcurrency sets how many files are read in concurrently. If
|
||||
|
@ -134,7 +142,6 @@ func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver {
|
|||
CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {},
|
||||
StartFile: func(string) {},
|
||||
CompleteBlob: func(string, uint64) {},
|
||||
IgnoreInode: false,
|
||||
}
|
||||
|
||||
return arch
|
||||
|
@ -379,7 +386,7 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
|
|||
|
||||
// check if the file has not changed before performing a fopen operation (more expensive, specially
|
||||
// in network filesystems)
|
||||
if previous != nil && !fileChanged(fi, previous, arch.IgnoreInode) {
|
||||
if previous != nil && !fileChanged(fi, previous, arch.ChangeIgnoreFlags) {
|
||||
if arch.allBlobsPresent(previous) {
|
||||
debug.Log("%v hasn't changed, using old list of blobs", target)
|
||||
arch.CompleteItem(snPath, previous, previous, ItemStats{}, time.Since(start))
|
||||
|
@ -481,36 +488,30 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
|
|||
return fn, false, nil
|
||||
}
|
||||
|
||||
// fileChanged returns true if the file's content has changed since the node
|
||||
// was created.
|
||||
func fileChanged(fi os.FileInfo, node *restic.Node, ignoreInode bool) bool {
|
||||
if node == nil {
|
||||
// fileChanged tries to detect whether a file's content has changed compared
|
||||
// to the contents of node, which describes the same path in the parent backup.
|
||||
// It should only be run for regular files.
|
||||
func fileChanged(fi os.FileInfo, node *restic.Node, ignoreFlags uint) bool {
|
||||
switch {
|
||||
case node == nil:
|
||||
return true
|
||||
case node.Type != "file":
|
||||
// We're only called for regular files, so this is a type change.
|
||||
return true
|
||||
case uint64(fi.Size()) != node.Size:
|
||||
return true
|
||||
case !fi.ModTime().Equal(node.ModTime):
|
||||
return true
|
||||
}
|
||||
|
||||
// check type change
|
||||
if node.Type != "file" {
|
||||
return true
|
||||
}
|
||||
checkCtime := ignoreFlags&ChangeIgnoreCtime == 0
|
||||
checkInode := ignoreFlags&ChangeIgnoreInode == 0
|
||||
|
||||
// check modification timestamp
|
||||
if !fi.ModTime().Equal(node.ModTime) {
|
||||
return true
|
||||
}
|
||||
|
||||
// check status change timestamp
|
||||
extFI := fs.ExtendedStat(fi)
|
||||
if !ignoreInode && !extFI.ChangeTime.Equal(node.ChangeTime) {
|
||||
switch {
|
||||
case checkCtime && !extFI.ChangeTime.Equal(node.ChangeTime):
|
||||
return true
|
||||
}
|
||||
|
||||
// check size
|
||||
if uint64(fi.Size()) != node.Size || uint64(extFI.Size) != node.Size {
|
||||
return true
|
||||
}
|
||||
|
||||
// check inode
|
||||
if !ignoreInode && node.Inode != extFI.Inode {
|
||||
case checkInode && node.Inode != extFI.Inode:
|
||||
return true
|
||||
}
|
||||
|
||||
|
|
|
@ -505,6 +505,18 @@ func save(t testing.TB, filename string, data []byte) {
|
|||
}
|
||||
}
|
||||
|
||||
func chmodTwice(t testing.TB, name string) {
|
||||
// POSIX says that ctime is updated "even if the file status does not
|
||||
// change", but let's make sure it does change, just in case.
|
||||
err := os.Chmod(name, 0700)
|
||||
restictest.OK(t, err)
|
||||
|
||||
sleep()
|
||||
|
||||
err = os.Chmod(name, 0600)
|
||||
restictest.OK(t, err)
|
||||
}
|
||||
|
||||
func lstat(t testing.TB, name string) os.FileInfo {
|
||||
fi, err := os.Lstat(name)
|
||||
if err != nil {
|
||||
|
@ -533,6 +545,13 @@ func remove(t testing.TB, filename string) {
|
|||
}
|
||||
}
|
||||
|
||||
func rename(t testing.TB, oldname, newname string) {
|
||||
err := os.Rename(oldname, newname)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func nodeFromFI(t testing.TB, filename string, fi os.FileInfo) *restic.Node {
|
||||
node, err := restic.NodeFromFileInfo(filename, fi)
|
||||
if err != nil {
|
||||
|
@ -542,26 +561,26 @@ func nodeFromFI(t testing.TB, filename string, fi os.FileInfo) *restic.Node {
|
|||
return node
|
||||
}
|
||||
|
||||
// sleep sleeps long enough to ensure a timestamp change.
|
||||
func sleep() {
|
||||
d := 50 * time.Millisecond
|
||||
if runtime.GOOS == "darwin" {
|
||||
// On older Darwin instances, the file system only supports one second
|
||||
// granularity.
|
||||
d = 1500 * time.Millisecond
|
||||
}
|
||||
time.Sleep(d)
|
||||
}
|
||||
|
||||
func TestFileChanged(t *testing.T) {
|
||||
var defaultContent = []byte("foobar")
|
||||
|
||||
var d = 50 * time.Millisecond
|
||||
if runtime.GOOS == "darwin" {
|
||||
// on older darwin instances the file system only supports one second
|
||||
// granularity
|
||||
d = time.Second
|
||||
}
|
||||
|
||||
sleep := func() {
|
||||
time.Sleep(d)
|
||||
}
|
||||
|
||||
var tests = []struct {
|
||||
Name string
|
||||
SkipForWindows bool
|
||||
Content []byte
|
||||
Modify func(t testing.TB, filename string)
|
||||
IgnoreInode bool
|
||||
ChangeIgnore uint
|
||||
SameFile bool
|
||||
}{
|
||||
{
|
||||
|
@ -618,17 +637,33 @@ func TestFileChanged(t *testing.T) {
|
|||
save(t, filename, defaultContent)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "ctime-change",
|
||||
Modify: chmodTwice,
|
||||
SameFile: false,
|
||||
SkipForWindows: true, // No ctime on Windows, so this test would fail.
|
||||
},
|
||||
{
|
||||
Name: "ignore-ctime-change",
|
||||
Modify: chmodTwice,
|
||||
ChangeIgnore: ChangeIgnoreCtime,
|
||||
SameFile: true,
|
||||
SkipForWindows: true, // No ctime on Windows, so this test is meaningless.
|
||||
},
|
||||
{
|
||||
Name: "ignore-inode",
|
||||
Modify: func(t testing.TB, filename string) {
|
||||
fi := lstat(t, filename)
|
||||
remove(t, filename)
|
||||
sleep()
|
||||
// First create the new file, then remove the old one,
|
||||
// so that the old file retains its inode number.
|
||||
tempname := filename + ".old"
|
||||
rename(t, filename, tempname)
|
||||
save(t, filename, defaultContent)
|
||||
remove(t, tempname)
|
||||
setTimestamp(t, filename, fi.ModTime(), fi.ModTime())
|
||||
},
|
||||
IgnoreInode: true,
|
||||
SameFile: true,
|
||||
ChangeIgnore: ChangeIgnoreCtime | ChangeIgnoreInode,
|
||||
SameFile: true,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -651,7 +686,7 @@ func TestFileChanged(t *testing.T) {
|
|||
fiBefore := lstat(t, filename)
|
||||
node := nodeFromFI(t, filename, fiBefore)
|
||||
|
||||
if fileChanged(fiBefore, node, false) {
|
||||
if fileChanged(fiBefore, node, 0) {
|
||||
t.Fatalf("unchanged file detected as changed")
|
||||
}
|
||||
|
||||
|
@ -661,12 +696,12 @@ func TestFileChanged(t *testing.T) {
|
|||
|
||||
if test.SameFile {
|
||||
// file should be detected as unchanged
|
||||
if fileChanged(fiAfter, node, test.IgnoreInode) {
|
||||
if fileChanged(fiAfter, node, test.ChangeIgnore) {
|
||||
t.Fatalf("unmodified file detected as changed")
|
||||
}
|
||||
} else {
|
||||
// file should be detected as changed
|
||||
if !fileChanged(fiAfter, node, test.IgnoreInode) && !test.SameFile {
|
||||
if !fileChanged(fiAfter, node, test.ChangeIgnore) && !test.SameFile {
|
||||
t.Fatalf("modified file detected as unchanged")
|
||||
}
|
||||
}
|
||||
|
@ -684,7 +719,7 @@ func TestFilChangedSpecialCases(t *testing.T) {
|
|||
|
||||
t.Run("nil-node", func(t *testing.T) {
|
||||
fi := lstat(t, filename)
|
||||
if !fileChanged(fi, nil, false) {
|
||||
if !fileChanged(fi, nil, 0) {
|
||||
t.Fatal("nil node detected as unchanged")
|
||||
}
|
||||
})
|
||||
|
@ -693,7 +728,7 @@ func TestFilChangedSpecialCases(t *testing.T) {
|
|||
fi := lstat(t, filename)
|
||||
node := nodeFromFI(t, filename, fi)
|
||||
node.Type = "symlink"
|
||||
if !fileChanged(fi, node, false) {
|
||||
if !fileChanged(fi, node, 0) {
|
||||
t.Fatal("node with changed type detected as unchanged")
|
||||
}
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue