forked from TrueCloudLab/restic
Merge pull request #2859 from buschjost/stats-filter-by-tag-and-path
Add filter by tag and path to stats command
This commit is contained in:
commit
55071ee367
4 changed files with 67 additions and 88 deletions
9
changelog/unreleased/issue-2858
Normal file
9
changelog/unreleased/issue-2858
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
Enhancement: Support filtering snapshots by tag and path in the stats command
|
||||||
|
|
||||||
|
We've added filtering snapshots by `--tag tagList` and by `--path path` to
|
||||||
|
the `stats` command. This includes filtering of only 'latest' snapshots or
|
||||||
|
all snapshots in a repository.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/issues/2858
|
||||||
|
https://github.com/restic/restic/pull/2859
|
||||||
|
https://forum.restic.net/t/stats-for-a-host-and-filtered-snapshots/3020
|
|
@ -6,7 +6,6 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/restic/restic/internal/errors"
|
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
"github.com/restic/restic/internal/walker"
|
"github.com/restic/restic/internal/walker"
|
||||||
|
|
||||||
|
@ -15,18 +14,19 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var cmdStats = &cobra.Command{
|
var cmdStats = &cobra.Command{
|
||||||
Use: "stats [flags] [snapshot-ID]",
|
Use: "stats [flags] [snapshot ID] [...]",
|
||||||
Short: "Scan the repository and show basic statistics",
|
Short: "Scan the repository and show basic statistics",
|
||||||
Long: `
|
Long: `
|
||||||
The "stats" command walks one or all snapshots in a repository and
|
The "stats" command walks one or multiple snapshots in a repository
|
||||||
accumulates statistics about the data stored therein. It reports on
|
and accumulates statistics about the data stored therein. It reports
|
||||||
the number of unique files and their sizes, according to one of
|
on the number of unique files and their sizes, according to one of
|
||||||
the counting modes as given by the --mode flag.
|
the counting modes as given by the --mode flag.
|
||||||
|
|
||||||
If no snapshot is specified, all snapshots will be considered. Some
|
It operates on all snapshots matching the selection criteria or all
|
||||||
modes make more sense over just a single snapshot, while others
|
snapshots if nothing is specified. The special snapshot ID "latest"
|
||||||
are useful across all snapshots, depending on what you are trying
|
is also supported. Some modes make more sense over
|
||||||
to calculate.
|
just a single snapshot, while others are useful across all snapshots,
|
||||||
|
depending on what you are trying to calculate.
|
||||||
|
|
||||||
The modes are:
|
The modes are:
|
||||||
|
|
||||||
|
@ -50,11 +50,26 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StatsOptions collects all options for the stats command.
|
||||||
|
type StatsOptions struct {
|
||||||
|
// the mode of counting to perform (see consts for available modes)
|
||||||
|
countMode string
|
||||||
|
|
||||||
|
// filter snapshots by, if given by user
|
||||||
|
Hosts []string
|
||||||
|
Tags restic.TagLists
|
||||||
|
Paths []string
|
||||||
|
}
|
||||||
|
|
||||||
|
var statsOptions StatsOptions
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
cmdRoot.AddCommand(cmdStats)
|
cmdRoot.AddCommand(cmdStats)
|
||||||
f := cmdStats.Flags()
|
f := cmdStats.Flags()
|
||||||
f.StringVar(&countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file, or raw-data")
|
f.StringVar(&statsOptions.countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file or raw-data")
|
||||||
f.StringArrayVarP(&snapshotByHosts, "host", "H", nil, "filter latest snapshot by this hostname (can be specified multiple times)")
|
f.StringArrayVarP(&statsOptions.Hosts, "host", "H", nil, "only consider snapshots with the given `host` (can be specified multiple times)")
|
||||||
|
f.Var(&statsOptions.Tags, "tag", "only consider snapshots which include this `taglist` in the format `tag[,tag,...]` (can be specified multiple times)")
|
||||||
|
f.StringArrayVar(&statsOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path` (can be specified multiple times)")
|
||||||
}
|
}
|
||||||
|
|
||||||
func runStats(gopts GlobalOptions, args []string) error {
|
func runStats(gopts GlobalOptions, args []string) error {
|
||||||
|
@ -93,48 +108,21 @@ func runStats(gopts GlobalOptions, args []string) error {
|
||||||
uniqueInodes: make(map[uint64]struct{}),
|
uniqueInodes: make(map[uint64]struct{}),
|
||||||
fileBlobs: make(map[string]restic.IDSet),
|
fileBlobs: make(map[string]restic.IDSet),
|
||||||
blobs: restic.NewBlobSet(),
|
blobs: restic.NewBlobSet(),
|
||||||
|
snapshotsCount: 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
if snapshotIDString != "" {
|
for sn := range FindFilteredSnapshots(ctx, repo, statsOptions.Hosts, statsOptions.Tags, statsOptions.Paths, args) {
|
||||||
// scan just a single snapshot
|
err = statsWalkSnapshot(ctx, sn, repo, stats)
|
||||||
|
|
||||||
var sID restic.ID
|
|
||||||
if snapshotIDString == "latest" {
|
|
||||||
sID, err = restic.FindLatestSnapshot(ctx, repo, []string{}, []restic.TagList{}, snapshotByHosts)
|
|
||||||
if err != nil {
|
|
||||||
return errors.Fatalf("latest snapshot for criteria not found: %v", err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sID, err = restic.FindSnapshot(repo, snapshotIDString)
|
|
||||||
if err != nil {
|
|
||||||
return errors.Fatalf("error loading snapshot: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
snapshot, err := restic.LoadSnapshot(ctx, repo, sID)
|
|
||||||
if err != nil {
|
|
||||||
return errors.Fatalf("error loading snapshot from repo: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = statsWalkSnapshot(ctx, snapshot, repo, stats)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error walking snapshot: %v", err)
|
return fmt.Errorf("error walking snapshot: %v", err)
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// iterate every snapshot in the repo
|
|
||||||
err = repo.List(ctx, restic.SnapshotFile, func(snapshotID restic.ID, size int64) error {
|
|
||||||
snapshot, err := restic.LoadSnapshot(ctx, repo, snapshotID)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("Error loading snapshot %s: %v", snapshotID.Str(), err)
|
|
||||||
}
|
|
||||||
return statsWalkSnapshot(ctx, snapshot, repo, stats)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if countMode == countModeRawData {
|
if statsOptions.countMode == countModeRawData {
|
||||||
// the blob handles have been collected, but not yet counted
|
// the blob handles have been collected, but not yet counted
|
||||||
for blobHandle := range stats.blobs {
|
for blobHandle := range stats.blobs {
|
||||||
blobSize, found := repo.LookupBlobSize(blobHandle.ID, blobHandle.Type)
|
blobSize, found := repo.LookupBlobSize(blobHandle.ID, blobHandle.Type)
|
||||||
|
@ -154,14 +142,8 @@ func runStats(gopts GlobalOptions, args []string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// inform the user what was scanned and how it was scanned
|
Printf("Stats in %s mode:\n", statsOptions.countMode)
|
||||||
snapshotsScanned := snapshotIDString
|
Printf("Snapshots processed: %d\n", stats.snapshotsCount)
|
||||||
if snapshotsScanned == "latest" {
|
|
||||||
snapshotsScanned = "the latest snapshot"
|
|
||||||
} else if snapshotsScanned == "" {
|
|
||||||
snapshotsScanned = "all snapshots"
|
|
||||||
}
|
|
||||||
Printf("Stats for %s in %s mode:\n", snapshotsScanned, countMode)
|
|
||||||
|
|
||||||
if stats.TotalBlobCount > 0 {
|
if stats.TotalBlobCount > 0 {
|
||||||
Printf(" Total Blob Count: %d\n", stats.TotalBlobCount)
|
Printf(" Total Blob Count: %d\n", stats.TotalBlobCount)
|
||||||
|
@ -179,7 +161,9 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
|
||||||
return fmt.Errorf("snapshot %s has nil tree", snapshot.ID().Str())
|
return fmt.Errorf("snapshot %s has nil tree", snapshot.ID().Str())
|
||||||
}
|
}
|
||||||
|
|
||||||
if countMode == countModeRawData {
|
stats.snapshotsCount++
|
||||||
|
|
||||||
|
if statsOptions.countMode == countModeRawData {
|
||||||
// count just the sizes of unique blobs; we don't need to walk the tree
|
// count just the sizes of unique blobs; we don't need to walk the tree
|
||||||
// ourselves in this case, since a nifty function does it for us
|
// ourselves in this case, since a nifty function does it for us
|
||||||
return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs)
|
return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs)
|
||||||
|
@ -189,6 +173,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err)
|
return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -201,19 +186,19 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if countMode == countModeUniqueFilesByContents || countMode == countModeBlobsPerFile {
|
if statsOptions.countMode == countModeUniqueFilesByContents || statsOptions.countMode == countModeBlobsPerFile {
|
||||||
// only count this file if we haven't visited it before
|
// only count this file if we haven't visited it before
|
||||||
fid := makeFileIDByContents(node)
|
fid := makeFileIDByContents(node)
|
||||||
if _, ok := stats.uniqueFiles[fid]; !ok {
|
if _, ok := stats.uniqueFiles[fid]; !ok {
|
||||||
// mark the file as visited
|
// mark the file as visited
|
||||||
stats.uniqueFiles[fid] = struct{}{}
|
stats.uniqueFiles[fid] = struct{}{}
|
||||||
|
|
||||||
if countMode == countModeUniqueFilesByContents {
|
if statsOptions.countMode == countModeUniqueFilesByContents {
|
||||||
// simply count the size of each unique file (unique by contents only)
|
// simply count the size of each unique file (unique by contents only)
|
||||||
stats.TotalSize += node.Size
|
stats.TotalSize += node.Size
|
||||||
stats.TotalFileCount++
|
stats.TotalFileCount++
|
||||||
}
|
}
|
||||||
if countMode == countModeBlobsPerFile {
|
if statsOptions.countMode == countModeBlobsPerFile {
|
||||||
// count the size of each unique blob reference, which is
|
// count the size of each unique blob reference, which is
|
||||||
// by unique file (unique by contents and file path)
|
// by unique file (unique by contents and file path)
|
||||||
for _, blobID := range node.Content {
|
for _, blobID := range node.Content {
|
||||||
|
@ -243,7 +228,7 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if countMode == countModeRestoreSize {
|
if statsOptions.countMode == countModeRestoreSize {
|
||||||
// as this is a file in the snapshot, we can simply count its
|
// as this is a file in the snapshot, we can simply count its
|
||||||
// size without worrying about uniqueness, since duplicate files
|
// size without worrying about uniqueness, since duplicate files
|
||||||
// will still be restored
|
// will still be restored
|
||||||
|
@ -275,23 +260,13 @@ func makeFileIDByContents(node *restic.Node) fileID {
|
||||||
|
|
||||||
func verifyStatsInput(gopts GlobalOptions, args []string) error {
|
func verifyStatsInput(gopts GlobalOptions, args []string) error {
|
||||||
// require a recognized counting mode
|
// require a recognized counting mode
|
||||||
switch countMode {
|
switch statsOptions.countMode {
|
||||||
case countModeRestoreSize:
|
case countModeRestoreSize:
|
||||||
case countModeUniqueFilesByContents:
|
case countModeUniqueFilesByContents:
|
||||||
case countModeBlobsPerFile:
|
case countModeBlobsPerFile:
|
||||||
case countModeRawData:
|
case countModeRawData:
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", countMode)
|
return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", statsOptions.countMode)
|
||||||
}
|
|
||||||
|
|
||||||
// ensure at most one snapshot was specified
|
|
||||||
if len(args) > 1 {
|
|
||||||
return fmt.Errorf("only one snapshot may be specified")
|
|
||||||
}
|
|
||||||
|
|
||||||
// if a snapshot was specified, mark it as the one to scan
|
|
||||||
if len(args) == 1 {
|
|
||||||
snapshotIDString = args[0]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -320,23 +295,14 @@ type statsContainer struct {
|
||||||
// blobs is used to count individual unique blobs,
|
// blobs is used to count individual unique blobs,
|
||||||
// independent of references to files
|
// independent of references to files
|
||||||
blobs restic.BlobSet
|
blobs restic.BlobSet
|
||||||
|
|
||||||
|
// holds count of all considered snapshots
|
||||||
|
snapshotsCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
// fileID is a 256-bit hash that distinguishes unique files.
|
// fileID is a 256-bit hash that distinguishes unique files.
|
||||||
type fileID [32]byte
|
type fileID [32]byte
|
||||||
|
|
||||||
var (
|
|
||||||
// the mode of counting to perform
|
|
||||||
countMode string
|
|
||||||
|
|
||||||
// the snapshot to scan, as given by the user
|
|
||||||
snapshotIDString string
|
|
||||||
|
|
||||||
// snapshotByHost is the host to filter latest
|
|
||||||
// snapshot by, if given by user
|
|
||||||
snapshotByHosts []string
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
countModeRestoreSize = "restore-size"
|
countModeRestoreSize = "restore-size"
|
||||||
countModeUniqueFilesByContents = "files-by-contents"
|
countModeUniqueFilesByContents = "files-by-contents"
|
||||||
|
|
|
@ -22,10 +22,10 @@ func FindFilteredSnapshots(ctx context.Context, repo *repository.Repository, hos
|
||||||
// Process all snapshot IDs given as arguments.
|
// Process all snapshot IDs given as arguments.
|
||||||
for _, s := range snapshotIDs {
|
for _, s := range snapshotIDs {
|
||||||
if s == "latest" {
|
if s == "latest" {
|
||||||
|
usedFilter = true
|
||||||
id, err = restic.FindLatestSnapshot(ctx, repo, paths, tags, hosts)
|
id, err = restic.FindLatestSnapshot(ctx, repo, paths, tags, hosts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Warnf("Ignoring %q, no snapshot matched given filter (Paths:%v Tags:%v Hosts:%v)\n", s, paths, tags, hosts)
|
Warnf("Ignoring %q, no snapshot matched given filter (Paths:%v Tags:%v Hosts:%v)\n", s, paths, tags, hosts)
|
||||||
usedFilter = true
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -306,6 +306,10 @@ host by using the ``--host`` flag:
|
||||||
There we see that it would take 482 GiB of disk space to restore the latest
|
There we see that it would take 482 GiB of disk space to restore the latest
|
||||||
snapshot from "myserver".
|
snapshot from "myserver".
|
||||||
|
|
||||||
|
In case you have multiple backups running from the same host so can also use
|
||||||
|
``--tag`` and ``--path`` to be more specific about which snapshots you
|
||||||
|
are looking for.
|
||||||
|
|
||||||
But how much space does that snapshot take on disk? In other words, how much
|
But how much space does that snapshot take on disk? In other words, how much
|
||||||
has restic's deduplication helped? We can check:
|
has restic's deduplication helped? We can check:
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue