Merge pull request #4006 from MichaelEischer/deviceID-only-for-hardlinks
archiver: only store deviceID for hardlinks
This commit is contained in:
commit
71b6284155
9 changed files with 113 additions and 8 deletions
16
changelog/unreleased/pull-4006
Normal file
16
changelog/unreleased/pull-4006
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
Enhancement: (alpha) Store deviceID only for hardlinks
|
||||||
|
|
||||||
|
Set `RESTIC_FEATURES=device-id-for-hardlinks` to enable this alpha feature.
|
||||||
|
The feature flag will be removed after repository format version 3 becomes
|
||||||
|
available or be replaced with a different solution.
|
||||||
|
|
||||||
|
When creating backups from a filesystem snapshot, for example created using
|
||||||
|
btrfs subvolumes, the deviceID of the filesystem changes compared to previous
|
||||||
|
snapshots. This prevented restic from deduplicating the directory metadata of
|
||||||
|
a snapshot.
|
||||||
|
|
||||||
|
When this alpha feature is enabled, then the deviceID is only stored for
|
||||||
|
hardlinks. This significantly reduces the metadata duplication for most
|
||||||
|
backups.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/pull/4006
|
|
@ -4,4 +4,5 @@ If files on different devices had the same inode id, then the `stats` command
|
||||||
did not correctly calculate the snapshot size. This has been fixed.
|
did not correctly calculate the snapshot size. This has been fixed.
|
||||||
|
|
||||||
https://github.com/restic/restic/pull/4503
|
https://github.com/restic/restic/pull/4503
|
||||||
|
https://github.com/restic/restic/pull/4006
|
||||||
https://forum.restic.net/t/possible-bug-in-stats/6461/8
|
https://forum.restic.net/t/possible-bug-in-stats/6461/8
|
||||||
|
|
|
@ -270,11 +270,14 @@ func statsWalkTree(repo restic.Loader, opts StatsOptions, stats *statsContainer,
|
||||||
// will still be restored
|
// will still be restored
|
||||||
stats.TotalFileCount++
|
stats.TotalFileCount++
|
||||||
|
|
||||||
// if inodes are present, only count each inode once
|
if node.Links == 1 || node.Type == "dir" {
|
||||||
// (hard links do not increase restore size)
|
|
||||||
if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 {
|
|
||||||
hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{})
|
|
||||||
stats.TotalSize += node.Size
|
stats.TotalSize += node.Size
|
||||||
|
} else {
|
||||||
|
// if hardlinks are present only count each deviceID+inode once
|
||||||
|
if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 {
|
||||||
|
hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{})
|
||||||
|
stats.TotalSize += node.Size
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ import (
|
||||||
|
|
||||||
"github.com/restic/restic/internal/debug"
|
"github.com/restic/restic/internal/debug"
|
||||||
"github.com/restic/restic/internal/errors"
|
"github.com/restic/restic/internal/errors"
|
||||||
|
"github.com/restic/restic/internal/feature"
|
||||||
"github.com/restic/restic/internal/fs"
|
"github.com/restic/restic/internal/fs"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
|
@ -188,6 +189,14 @@ func (arch *Archiver) nodeFromFileInfo(snPath, filename string, fi os.FileInfo)
|
||||||
if !arch.WithAtime {
|
if !arch.WithAtime {
|
||||||
node.AccessTime = node.ModTime
|
node.AccessTime = node.ModTime
|
||||||
}
|
}
|
||||||
|
if feature.Flag.Enabled(feature.DeviceIDForHardlinks) {
|
||||||
|
if node.Links == 1 || node.Type == "dir" {
|
||||||
|
// the DeviceID is only necessary for hardlinked files
|
||||||
|
// when using subvolumes or snapshots their deviceIDs tend to change which causes
|
||||||
|
// restic to upload new tree blobs
|
||||||
|
node.DeviceID = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
// overwrite name to match that within the snapshot
|
// overwrite name to match that within the snapshot
|
||||||
node.Name = path.Base(snPath)
|
node.Name = path.Base(snPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -19,6 +19,7 @@ import (
|
||||||
"github.com/restic/restic/internal/backend/mem"
|
"github.com/restic/restic/internal/backend/mem"
|
||||||
"github.com/restic/restic/internal/checker"
|
"github.com/restic/restic/internal/checker"
|
||||||
"github.com/restic/restic/internal/errors"
|
"github.com/restic/restic/internal/errors"
|
||||||
|
"github.com/restic/restic/internal/feature"
|
||||||
"github.com/restic/restic/internal/fs"
|
"github.com/restic/restic/internal/fs"
|
||||||
"github.com/restic/restic/internal/repository"
|
"github.com/restic/restic/internal/repository"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
|
@ -2125,6 +2126,8 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMetadataChanged(t *testing.T) {
|
func TestMetadataChanged(t *testing.T) {
|
||||||
|
defer feature.TestSetFlag(t, feature.Flag, feature.DeviceIDForHardlinks, true)()
|
||||||
|
|
||||||
files := TestDir{
|
files := TestDir{
|
||||||
"testfile": TestFile{
|
"testfile": TestFile{
|
||||||
Content: "foo bar test file",
|
Content: "foo bar test file",
|
||||||
|
@ -2153,6 +2156,7 @@ func TestMetadataChanged(t *testing.T) {
|
||||||
sn, node2 := snapshot(t, repo, fs, nil, "testfile")
|
sn, node2 := snapshot(t, repo, fs, nil, "testfile")
|
||||||
|
|
||||||
// set some values so we can then compare the nodes
|
// set some values so we can then compare the nodes
|
||||||
|
want.DeviceID = 0
|
||||||
want.Content = node2.Content
|
want.Content = node2.Content
|
||||||
want.Path = ""
|
want.Path = ""
|
||||||
if len(want.ExtendedAttributes) == 0 {
|
if len(want.ExtendedAttributes) == 0 {
|
||||||
|
|
|
@ -6,6 +6,12 @@ package archiver
|
||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/restic/restic/internal/feature"
|
||||||
|
"github.com/restic/restic/internal/fs"
|
||||||
|
"github.com/restic/restic/internal/restic"
|
||||||
|
restictest "github.com/restic/restic/internal/test"
|
||||||
)
|
)
|
||||||
|
|
||||||
type wrappedFileInfo struct {
|
type wrappedFileInfo struct {
|
||||||
|
@ -39,3 +45,45 @@ func wrapFileInfo(fi os.FileInfo) os.FileInfo {
|
||||||
|
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func statAndSnapshot(t *testing.T, repo restic.Repository, name string) (*restic.Node, *restic.Node) {
|
||||||
|
fi := lstat(t, name)
|
||||||
|
want, err := restic.NodeFromFileInfo(name, fi)
|
||||||
|
restictest.OK(t, err)
|
||||||
|
|
||||||
|
_, node := snapshot(t, repo, fs.Local{}, nil, name)
|
||||||
|
return want, node
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHardlinkMetadata(t *testing.T) {
|
||||||
|
defer feature.TestSetFlag(t, feature.Flag, feature.DeviceIDForHardlinks, true)()
|
||||||
|
|
||||||
|
files := TestDir{
|
||||||
|
"testfile": TestFile{
|
||||||
|
Content: "foo bar test file",
|
||||||
|
},
|
||||||
|
"linktarget": TestFile{
|
||||||
|
Content: "test file",
|
||||||
|
},
|
||||||
|
"testlink": TestHardlink{
|
||||||
|
Target: "./linktarget",
|
||||||
|
},
|
||||||
|
"testdir": TestDir{},
|
||||||
|
}
|
||||||
|
|
||||||
|
tempdir, repo := prepareTempdirRepoSrc(t, files)
|
||||||
|
|
||||||
|
back := restictest.Chdir(t, tempdir)
|
||||||
|
defer back()
|
||||||
|
|
||||||
|
want, node := statAndSnapshot(t, repo, "testlink")
|
||||||
|
restictest.Assert(t, node.DeviceID == want.DeviceID, "device id mismatch expected %v got %v", want.DeviceID, node.DeviceID)
|
||||||
|
restictest.Assert(t, node.Links == want.Links, "link count mismatch expected %v got %v", want.Links, node.Links)
|
||||||
|
restictest.Assert(t, node.Inode == want.Inode, "inode mismatch expected %v got %v", want.Inode, node.Inode)
|
||||||
|
|
||||||
|
_, node = statAndSnapshot(t, repo, "testfile")
|
||||||
|
restictest.Assert(t, node.DeviceID == 0, "device id mismatch for testfile expected %v got %v", 0, node.DeviceID)
|
||||||
|
|
||||||
|
_, node = statAndSnapshot(t, repo, "testdir")
|
||||||
|
restictest.Assert(t, node.DeviceID == 0, "device id mismatch for testdir expected %v got %v", 0, node.DeviceID)
|
||||||
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ import (
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
@ -63,11 +64,29 @@ func (s TestSymlink) String() string {
|
||||||
return "<Symlink>"
|
return "<Symlink>"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestHardlink describes a hardlink created for a test.
|
||||||
|
type TestHardlink struct {
|
||||||
|
Target string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s TestHardlink) String() string {
|
||||||
|
return "<Hardlink>"
|
||||||
|
}
|
||||||
|
|
||||||
// TestCreateFiles creates a directory structure described by dir at target,
|
// TestCreateFiles creates a directory structure described by dir at target,
|
||||||
// which must already exist. On Windows, symlinks aren't created.
|
// which must already exist. On Windows, symlinks aren't created.
|
||||||
func TestCreateFiles(t testing.TB, target string, dir TestDir) {
|
func TestCreateFiles(t testing.TB, target string, dir TestDir) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
for name, item := range dir {
|
|
||||||
|
// ensure a stable order such that it can be guaranteed that a hardlink target already exists
|
||||||
|
var names []string
|
||||||
|
for name := range dir {
|
||||||
|
names = append(names, name)
|
||||||
|
}
|
||||||
|
sort.Strings(names)
|
||||||
|
|
||||||
|
for _, name := range names {
|
||||||
|
item := dir[name]
|
||||||
targetPath := filepath.Join(target, name)
|
targetPath := filepath.Join(target, name)
|
||||||
|
|
||||||
switch it := item.(type) {
|
switch it := item.(type) {
|
||||||
|
@ -81,6 +100,11 @@ func TestCreateFiles(t testing.TB, target string, dir TestDir) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
case TestHardlink:
|
||||||
|
err := fs.Link(filepath.Join(target, filepath.FromSlash(it.Target)), targetPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
case TestDir:
|
case TestDir:
|
||||||
err := fs.Mkdir(targetPath, 0755)
|
err := fs.Mkdir(targetPath, 0755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -5,13 +5,13 @@ var Flag = New()
|
||||||
|
|
||||||
// flag names are written in kebab-case
|
// flag names are written in kebab-case
|
||||||
const (
|
const (
|
||||||
ExampleFeature FlagName = "example-feature"
|
|
||||||
DeprecateLegacyIndex FlagName = "deprecate-legacy-index"
|
DeprecateLegacyIndex FlagName = "deprecate-legacy-index"
|
||||||
|
DeviceIDForHardlinks FlagName = "device-id-for-hardlinks"
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
Flag.SetFlags(map[FlagName]FlagDesc{
|
Flag.SetFlags(map[FlagName]FlagDesc{
|
||||||
ExampleFeature: {Type: Alpha, Description: "just for testing"},
|
|
||||||
DeprecateLegacyIndex: {Type: Beta, Description: "disable support for index format used by restic 0.1.0. Use `restic repair index` to update the index if necessary."},
|
DeprecateLegacyIndex: {Type: Beta, Description: "disable support for index format used by restic 0.1.0. Use `restic repair index` to update the index if necessary."},
|
||||||
|
DeviceIDForHardlinks: {Type: Alpha, Description: "store deviceID only for hardlinks to reduce metadata changes for example when using btrfs subvolumes. Will be removed in a future restic version after repository format 3 is available"},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,7 +82,7 @@ type Node struct {
|
||||||
User string `json:"user,omitempty"`
|
User string `json:"user,omitempty"`
|
||||||
Group string `json:"group,omitempty"`
|
Group string `json:"group,omitempty"`
|
||||||
Inode uint64 `json:"inode,omitempty"`
|
Inode uint64 `json:"inode,omitempty"`
|
||||||
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev
|
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev, only stored for hardlinks
|
||||||
Size uint64 `json:"size,omitempty"`
|
Size uint64 `json:"size,omitempty"`
|
||||||
Links uint64 `json:"links,omitempty"`
|
Links uint64 `json:"links,omitempty"`
|
||||||
LinkTarget string `json:"linktarget,omitempty"`
|
LinkTarget string `json:"linktarget,omitempty"`
|
||||||
|
|
Loading…
Reference in a new issue