forked from TrueCloudLab/restic
Count unique files by blob sequence rather than tree ID
This commit is contained in:
parent
f7659bd8b0
commit
925b542eb0
1 changed files with 24 additions and 4 deletions
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
@ -51,7 +52,7 @@ func runStats(gopts GlobalOptions, args []string) error {
|
||||||
|
|
||||||
// create a container for the stats, and other state
|
// create a container for the stats, and other state
|
||||||
// needed while walking the trees
|
// needed while walking the trees
|
||||||
stats := &statsContainer{idSet: restic.NewIDSet()}
|
stats := &statsContainer{uniqueFiles: make(map[fileID]struct{}), idSet: make(restic.IDSet)}
|
||||||
|
|
||||||
// iterate every snapshot in the repo
|
// iterate every snapshot in the repo
|
||||||
err = repo.List(ctx, restic.SnapshotFile, func(snapshotID restic.ID, size int64) error {
|
err = repo.List(ctx, restic.SnapshotFile, func(snapshotID restic.ID, size int64) error {
|
||||||
|
@ -96,10 +97,18 @@ func walkTree(ctx context.Context, repo restic.Repository, treeID restic.ID, sta
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, node := range tree.Nodes {
|
for _, node := range tree.Nodes {
|
||||||
// update our stats to account for this node
|
// only count this file if we haven't visited it before
|
||||||
stats.TotalOriginalSize += node.Size
|
fid := makeFileID(node)
|
||||||
stats.TotalCount++
|
if _, ok := stats.uniqueFiles[fid]; !ok {
|
||||||
|
// mark the file as visited
|
||||||
|
stats.uniqueFiles[fid] = struct{}{}
|
||||||
|
|
||||||
|
// update our stats to account for this node
|
||||||
|
stats.TotalOriginalSize += node.Size
|
||||||
|
stats.TotalCount++
|
||||||
|
}
|
||||||
|
|
||||||
|
// visit subtrees (i.e. directory contents)
|
||||||
if node.Subtree != nil {
|
if node.Subtree != nil {
|
||||||
err = walkTree(ctx, repo, *node.Subtree, stats)
|
err = walkTree(ctx, repo, *node.Subtree, stats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -111,6 +120,14 @@ func walkTree(ctx context.Context, repo restic.Repository, treeID restic.ID, sta
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func makeFileID(node *restic.Node) fileID {
|
||||||
|
var bb []byte
|
||||||
|
for _, c := range node.Content {
|
||||||
|
bb = append(bb, []byte(c[:])...)
|
||||||
|
}
|
||||||
|
return sha256.Sum256(bb)
|
||||||
|
}
|
||||||
|
|
||||||
// statsContainer holds information during a walk of a repository
|
// statsContainer holds information during a walk of a repository
|
||||||
// to collect information about it, as well as state needed
|
// to collect information about it, as well as state needed
|
||||||
// for a successful and efficient walk.
|
// for a successful and efficient walk.
|
||||||
|
@ -118,4 +135,7 @@ type statsContainer struct {
|
||||||
TotalCount uint64 `json:"total_count"`
|
TotalCount uint64 `json:"total_count"`
|
||||||
TotalOriginalSize uint64 `json:"total_original_size"`
|
TotalOriginalSize uint64 `json:"total_original_size"`
|
||||||
idSet restic.IDSet
|
idSet restic.IDSet
|
||||||
|
uniqueFiles map[fileID]struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type fileID [32]byte
|
||||||
|
|
Loading…
Reference in a new issue