Merge pull request #4006 from MichaelEischer/deviceID-only-for-hardlinks
archiver: only store deviceID for hardlinks
This commit is contained in:
commit
71b6284155
9 changed files with 113 additions and 8 deletions
16
changelog/unreleased/pull-4006
Normal file
16
changelog/unreleased/pull-4006
Normal file
|
@ -0,0 +1,16 @@
|
|||
Enhancement: (alpha) Store deviceID only for hardlinks
|
||||
|
||||
Set `RESTIC_FEATURES=device-id-for-hardlinks` to enable this alpha feature.
|
||||
The feature flag will be removed after repository format version 3 becomes
|
||||
available or be replaced with a different solution.
|
||||
|
||||
When creating backups from a filesystem snapshot, for example created using
|
||||
btrfs subvolumes, the deviceID of the filesystem changes compared to previous
|
||||
snapshots. This prevented restic from deduplicating the directory metadata of
|
||||
a snapshot.
|
||||
|
||||
When this alpha feature is enabled, then the deviceID is only stored for
|
||||
hardlinks. This significantly reduces the metadata duplication for most
|
||||
backups.
|
||||
|
||||
https://github.com/restic/restic/pull/4006
|
|
@ -4,4 +4,5 @@ If files on different devices had the same inode id, then the `stats` command
|
|||
did not correctly calculate the snapshot size. This has been fixed.
|
||||
|
||||
https://github.com/restic/restic/pull/4503
|
||||
https://github.com/restic/restic/pull/4006
|
||||
https://forum.restic.net/t/possible-bug-in-stats/6461/8
|
||||
|
|
|
@ -270,11 +270,14 @@ func statsWalkTree(repo restic.Loader, opts StatsOptions, stats *statsContainer,
|
|||
// will still be restored
|
||||
stats.TotalFileCount++
|
||||
|
||||
// if inodes are present, only count each inode once
|
||||
// (hard links do not increase restore size)
|
||||
if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 {
|
||||
hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{})
|
||||
if node.Links == 1 || node.Type == "dir" {
|
||||
stats.TotalSize += node.Size
|
||||
} else {
|
||||
// if hardlinks are present only count each deviceID+inode once
|
||||
if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 {
|
||||
hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{})
|
||||
stats.TotalSize += node.Size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
|
||||
"github.com/restic/restic/internal/debug"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/feature"
|
||||
"github.com/restic/restic/internal/fs"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
@ -188,6 +189,14 @@ func (arch *Archiver) nodeFromFileInfo(snPath, filename string, fi os.FileInfo)
|
|||
if !arch.WithAtime {
|
||||
node.AccessTime = node.ModTime
|
||||
}
|
||||
if feature.Flag.Enabled(feature.DeviceIDForHardlinks) {
|
||||
if node.Links == 1 || node.Type == "dir" {
|
||||
// the DeviceID is only necessary for hardlinked files
|
||||
// when using subvolumes or snapshots their deviceIDs tend to change which causes
|
||||
// restic to upload new tree blobs
|
||||
node.DeviceID = 0
|
||||
}
|
||||
}
|
||||
// overwrite name to match that within the snapshot
|
||||
node.Name = path.Base(snPath)
|
||||
if err != nil {
|
||||
|
|
|
@ -19,6 +19,7 @@ import (
|
|||
"github.com/restic/restic/internal/backend/mem"
|
||||
"github.com/restic/restic/internal/checker"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/feature"
|
||||
"github.com/restic/restic/internal/fs"
|
||||
"github.com/restic/restic/internal/repository"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
|
@ -2125,6 +2126,8 @@ const (
|
|||
)
|
||||
|
||||
func TestMetadataChanged(t *testing.T) {
|
||||
defer feature.TestSetFlag(t, feature.Flag, feature.DeviceIDForHardlinks, true)()
|
||||
|
||||
files := TestDir{
|
||||
"testfile": TestFile{
|
||||
Content: "foo bar test file",
|
||||
|
@ -2153,6 +2156,7 @@ func TestMetadataChanged(t *testing.T) {
|
|||
sn, node2 := snapshot(t, repo, fs, nil, "testfile")
|
||||
|
||||
// set some values so we can then compare the nodes
|
||||
want.DeviceID = 0
|
||||
want.Content = node2.Content
|
||||
want.Path = ""
|
||||
if len(want.ExtendedAttributes) == 0 {
|
||||
|
|
|
@ -6,6 +6,12 @@ package archiver
|
|||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/restic/restic/internal/feature"
|
||||
"github.com/restic/restic/internal/fs"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
restictest "github.com/restic/restic/internal/test"
|
||||
)
|
||||
|
||||
type wrappedFileInfo struct {
|
||||
|
@ -39,3 +45,45 @@ func wrapFileInfo(fi os.FileInfo) os.FileInfo {
|
|||
|
||||
return res
|
||||
}
|
||||
|
||||
func statAndSnapshot(t *testing.T, repo restic.Repository, name string) (*restic.Node, *restic.Node) {
|
||||
fi := lstat(t, name)
|
||||
want, err := restic.NodeFromFileInfo(name, fi)
|
||||
restictest.OK(t, err)
|
||||
|
||||
_, node := snapshot(t, repo, fs.Local{}, nil, name)
|
||||
return want, node
|
||||
}
|
||||
|
||||
func TestHardlinkMetadata(t *testing.T) {
|
||||
defer feature.TestSetFlag(t, feature.Flag, feature.DeviceIDForHardlinks, true)()
|
||||
|
||||
files := TestDir{
|
||||
"testfile": TestFile{
|
||||
Content: "foo bar test file",
|
||||
},
|
||||
"linktarget": TestFile{
|
||||
Content: "test file",
|
||||
},
|
||||
"testlink": TestHardlink{
|
||||
Target: "./linktarget",
|
||||
},
|
||||
"testdir": TestDir{},
|
||||
}
|
||||
|
||||
tempdir, repo := prepareTempdirRepoSrc(t, files)
|
||||
|
||||
back := restictest.Chdir(t, tempdir)
|
||||
defer back()
|
||||
|
||||
want, node := statAndSnapshot(t, repo, "testlink")
|
||||
restictest.Assert(t, node.DeviceID == want.DeviceID, "device id mismatch expected %v got %v", want.DeviceID, node.DeviceID)
|
||||
restictest.Assert(t, node.Links == want.Links, "link count mismatch expected %v got %v", want.Links, node.Links)
|
||||
restictest.Assert(t, node.Inode == want.Inode, "inode mismatch expected %v got %v", want.Inode, node.Inode)
|
||||
|
||||
_, node = statAndSnapshot(t, repo, "testfile")
|
||||
restictest.Assert(t, node.DeviceID == 0, "device id mismatch for testfile expected %v got %v", 0, node.DeviceID)
|
||||
|
||||
_, node = statAndSnapshot(t, repo, "testdir")
|
||||
restictest.Assert(t, node.DeviceID == 0, "device id mismatch for testdir expected %v got %v", 0, node.DeviceID)
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -63,11 +64,29 @@ func (s TestSymlink) String() string {
|
|||
return "<Symlink>"
|
||||
}
|
||||
|
||||
// TestHardlink describes a hardlink created for a test.
|
||||
type TestHardlink struct {
|
||||
Target string
|
||||
}
|
||||
|
||||
func (s TestHardlink) String() string {
|
||||
return "<Hardlink>"
|
||||
}
|
||||
|
||||
// TestCreateFiles creates a directory structure described by dir at target,
|
||||
// which must already exist. On Windows, symlinks aren't created.
|
||||
func TestCreateFiles(t testing.TB, target string, dir TestDir) {
|
||||
t.Helper()
|
||||
for name, item := range dir {
|
||||
|
||||
// ensure a stable order such that it can be guaranteed that a hardlink target already exists
|
||||
var names []string
|
||||
for name := range dir {
|
||||
names = append(names, name)
|
||||
}
|
||||
sort.Strings(names)
|
||||
|
||||
for _, name := range names {
|
||||
item := dir[name]
|
||||
targetPath := filepath.Join(target, name)
|
||||
|
||||
switch it := item.(type) {
|
||||
|
@ -81,6 +100,11 @@ func TestCreateFiles(t testing.TB, target string, dir TestDir) {
|
|||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
case TestHardlink:
|
||||
err := fs.Link(filepath.Join(target, filepath.FromSlash(it.Target)), targetPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
case TestDir:
|
||||
err := fs.Mkdir(targetPath, 0755)
|
||||
if err != nil {
|
||||
|
|
|
@ -5,13 +5,13 @@ var Flag = New()
|
|||
|
||||
// flag names are written in kebab-case
|
||||
const (
|
||||
ExampleFeature FlagName = "example-feature"
|
||||
DeprecateLegacyIndex FlagName = "deprecate-legacy-index"
|
||||
DeviceIDForHardlinks FlagName = "device-id-for-hardlinks"
|
||||
)
|
||||
|
||||
func init() {
|
||||
Flag.SetFlags(map[FlagName]FlagDesc{
|
||||
ExampleFeature: {Type: Alpha, Description: "just for testing"},
|
||||
DeprecateLegacyIndex: {Type: Beta, Description: "disable support for index format used by restic 0.1.0. Use `restic repair index` to update the index if necessary."},
|
||||
DeviceIDForHardlinks: {Type: Alpha, Description: "store deviceID only for hardlinks to reduce metadata changes for example when using btrfs subvolumes. Will be removed in a future restic version after repository format 3 is available"},
|
||||
})
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ type Node struct {
|
|||
User string `json:"user,omitempty"`
|
||||
Group string `json:"group,omitempty"`
|
||||
Inode uint64 `json:"inode,omitempty"`
|
||||
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev
|
||||
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev, only stored for hardlinks
|
||||
Size uint64 `json:"size,omitempty"`
|
||||
Links uint64 `json:"links,omitempty"`
|
||||
LinkTarget string `json:"linktarget,omitempty"`
|
||||
|
|
Loading…
Reference in a new issue