Merge pull request #4503 from MichaelEischer/fix-stats-with-hardlinks

stats: Fix hardlink tracking across multiple filesystems
This commit is contained in:
Michael Eischer 2023-10-27 23:53:03 +02:00 committed by GitHub
commit 104107886a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 30 additions and 22 deletions

View file

@ -0,0 +1,7 @@
Bugfix: Correct hardlink handling in `stats` command
If files on different devices had the same inode id, then the `stats` command
did not correctly calculate the snapshot size. This has been fixed.
https://github.com/restic/restic/pull/4503
https://forum.restic.net/t/possible-bug-in-stats/6461/8

View file

@ -11,6 +11,7 @@ import (
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/restorer"
"github.com/restic/restic/internal/ui"
"github.com/restic/restic/internal/ui/table"
"github.com/restic/restic/internal/walker"
@ -201,8 +202,8 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
return restic.FindUsedBlobs(ctx, repo, restic.IDs{*snapshot.Tree}, stats.blobs, nil)
}
uniqueInodes := make(map[uint64]struct{})
err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, opts, stats, uniqueInodes))
hardLinkIndex := restorer.NewHardlinkIndex[struct{}]()
err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, opts, stats, hardLinkIndex))
if err != nil {
return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err)
}
@ -210,7 +211,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
return nil
}
func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContainer, uniqueInodes map[uint64]struct{}) walker.WalkFunc {
func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContainer, hardLinkIndex *restorer.HardlinkIndex[struct{}]) walker.WalkFunc {
return func(parentTreeID restic.ID, npath string, node *restic.Node, nodeErr error) (bool, error) {
if nodeErr != nil {
return true, nodeErr
@ -269,8 +270,8 @@ func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContai
// if inodes are present, only count each inode once
// (hard links do not increase restore size)
if _, ok := uniqueInodes[node.Inode]; !ok || node.Inode == 0 {
uniqueInodes[node.Inode] = struct{}{}
if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 {
hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{})
stats.TotalSize += node.Size
}

View file

@ -10,20 +10,20 @@ type HardlinkKey struct {
}
// HardlinkIndex contains a list of inodes, devices these inodes are one, and associated file names.
type HardlinkIndex struct {
type HardlinkIndex[T any] struct {
m sync.Mutex
Index map[HardlinkKey]string
Index map[HardlinkKey]T
}
// NewHardlinkIndex create a new index for hard links
func NewHardlinkIndex() *HardlinkIndex {
return &HardlinkIndex{
Index: make(map[HardlinkKey]string),
func NewHardlinkIndex[T any]() *HardlinkIndex[T] {
return &HardlinkIndex[T]{
Index: make(map[HardlinkKey]T),
}
}
// Has checks wether the link already exist in the index.
func (idx *HardlinkIndex) Has(inode uint64, device uint64) bool {
func (idx *HardlinkIndex[T]) Has(inode uint64, device uint64) bool {
idx.m.Lock()
defer idx.m.Unlock()
_, ok := idx.Index[HardlinkKey{inode, device}]
@ -32,25 +32,25 @@ func (idx *HardlinkIndex) Has(inode uint64, device uint64) bool {
}
// Add adds a link to the index.
func (idx *HardlinkIndex) Add(inode uint64, device uint64, name string) {
func (idx *HardlinkIndex[T]) Add(inode uint64, device uint64, value T) {
idx.m.Lock()
defer idx.m.Unlock()
_, ok := idx.Index[HardlinkKey{inode, device}]
if !ok {
idx.Index[HardlinkKey{inode, device}] = name
idx.Index[HardlinkKey{inode, device}] = value
}
}
// GetFilename obtains the filename from the index.
func (idx *HardlinkIndex) GetFilename(inode uint64, device uint64) string {
// Value obtains the filename from the index.
func (idx *HardlinkIndex[T]) Value(inode uint64, device uint64) T {
idx.m.Lock()
defer idx.m.Unlock()
return idx.Index[HardlinkKey{inode, device}]
}
// Remove removes a link from the index.
func (idx *HardlinkIndex) Remove(inode uint64, device uint64) {
func (idx *HardlinkIndex[T]) Remove(inode uint64, device uint64) {
idx.m.Lock()
defer idx.m.Unlock()
delete(idx.Index, HardlinkKey{inode, device})

View file

@ -10,15 +10,15 @@ import (
// TestHardLinks contains various tests for HardlinkIndex.
func TestHardLinks(t *testing.T) {
idx := restorer.NewHardlinkIndex()
idx := restorer.NewHardlinkIndex[string]()
idx.Add(1, 2, "inode1-file1-on-device2")
idx.Add(2, 3, "inode2-file2-on-device3")
sresult := idx.GetFilename(1, 2)
sresult := idx.Value(1, 2)
rtest.Equals(t, sresult, "inode1-file1-on-device2")
sresult = idx.GetFilename(2, 3)
sresult = idx.Value(2, 3)
rtest.Equals(t, sresult, "inode2-file2-on-device3")
bresult := idx.Has(1, 2)

View file

@ -230,7 +230,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
}
}
idx := NewHardlinkIndex()
idx := NewHardlinkIndex[string]()
filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup,
res.repo.Connections(), res.sparse, res.progress)
filerestorer.Error = res.Error
@ -319,8 +319,8 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
return res.restoreEmptyFileAt(node, target, location)
}
if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location {
return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location)
if idx.Has(node.Inode, node.DeviceID) && idx.Value(node.Inode, node.DeviceID) != location {
return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.Value(node.Inode, node.DeviceID)), target, location)
}
return res.restoreNodeMetadataTo(node, target, location)