Merge pull request #2859 from buschjost/stats-filter-by-tag-and-path
Add filter by tag and path to stats command
This commit is contained in:
commit
55071ee367
4 changed files with 67 additions and 88 deletions
9
changelog/unreleased/issue-2858
Normal file
9
changelog/unreleased/issue-2858
Normal file
|
@ -0,0 +1,9 @@
|
|||
Enhancement: Support filtering snapshots by tag and path in the stats command
|
||||
|
||||
We've added filtering snapshots by `--tag tagList` and by `--path path` to
|
||||
the `stats` command. This includes filtering of only 'latest' snapshots or
|
||||
all snapshots in a repository.
|
||||
|
||||
https://github.com/restic/restic/issues/2858
|
||||
https://github.com/restic/restic/pull/2859
|
||||
https://forum.restic.net/t/stats-for-a-host-and-filtered-snapshots/3020
|
|
@ -6,7 +6,6 @@ import (
|
|||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
"github.com/restic/restic/internal/walker"
|
||||
|
||||
|
@ -15,18 +14,19 @@ import (
|
|||
)
|
||||
|
||||
var cmdStats = &cobra.Command{
|
||||
Use: "stats [flags] [snapshot-ID]",
|
||||
Use: "stats [flags] [snapshot ID] [...]",
|
||||
Short: "Scan the repository and show basic statistics",
|
||||
Long: `
|
||||
The "stats" command walks one or all snapshots in a repository and
|
||||
accumulates statistics about the data stored therein. It reports on
|
||||
the number of unique files and their sizes, according to one of
|
||||
The "stats" command walks one or multiple snapshots in a repository
|
||||
and accumulates statistics about the data stored therein. It reports
|
||||
on the number of unique files and their sizes, according to one of
|
||||
the counting modes as given by the --mode flag.
|
||||
|
||||
If no snapshot is specified, all snapshots will be considered. Some
|
||||
modes make more sense over just a single snapshot, while others
|
||||
are useful across all snapshots, depending on what you are trying
|
||||
to calculate.
|
||||
It operates on all snapshots matching the selection criteria or all
|
||||
snapshots if nothing is specified. The special snapshot ID "latest"
|
||||
is also supported. Some modes make more sense over
|
||||
just a single snapshot, while others are useful across all snapshots,
|
||||
depending on what you are trying to calculate.
|
||||
|
||||
The modes are:
|
||||
|
||||
|
@ -50,11 +50,26 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
|
|||
},
|
||||
}
|
||||
|
||||
// StatsOptions collects all options for the stats command.
|
||||
type StatsOptions struct {
|
||||
// the mode of counting to perform (see consts for available modes)
|
||||
countMode string
|
||||
|
||||
// filter snapshots by, if given by user
|
||||
Hosts []string
|
||||
Tags restic.TagLists
|
||||
Paths []string
|
||||
}
|
||||
|
||||
var statsOptions StatsOptions
|
||||
|
||||
func init() {
|
||||
cmdRoot.AddCommand(cmdStats)
|
||||
f := cmdStats.Flags()
|
||||
f.StringVar(&countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file, or raw-data")
|
||||
f.StringArrayVarP(&snapshotByHosts, "host", "H", nil, "filter latest snapshot by this hostname (can be specified multiple times)")
|
||||
f.StringVar(&statsOptions.countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file or raw-data")
|
||||
f.StringArrayVarP(&statsOptions.Hosts, "host", "H", nil, "only consider snapshots with the given `host` (can be specified multiple times)")
|
||||
f.Var(&statsOptions.Tags, "tag", "only consider snapshots which include this `taglist` in the format `tag[,tag,...]` (can be specified multiple times)")
|
||||
f.StringArrayVar(&statsOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path` (can be specified multiple times)")
|
||||
}
|
||||
|
||||
func runStats(gopts GlobalOptions, args []string) error {
|
||||
|
@ -89,52 +104,25 @@ func runStats(gopts GlobalOptions, args []string) error {
|
|||
|
||||
// create a container for the stats (and other needed state)
|
||||
stats := &statsContainer{
|
||||
uniqueFiles: make(map[fileID]struct{}),
|
||||
uniqueInodes: make(map[uint64]struct{}),
|
||||
fileBlobs: make(map[string]restic.IDSet),
|
||||
blobs: restic.NewBlobSet(),
|
||||
uniqueFiles: make(map[fileID]struct{}),
|
||||
uniqueInodes: make(map[uint64]struct{}),
|
||||
fileBlobs: make(map[string]restic.IDSet),
|
||||
blobs: restic.NewBlobSet(),
|
||||
snapshotsCount: 0,
|
||||
}
|
||||
|
||||
if snapshotIDString != "" {
|
||||
// scan just a single snapshot
|
||||
|
||||
var sID restic.ID
|
||||
if snapshotIDString == "latest" {
|
||||
sID, err = restic.FindLatestSnapshot(ctx, repo, []string{}, []restic.TagList{}, snapshotByHosts)
|
||||
if err != nil {
|
||||
return errors.Fatalf("latest snapshot for criteria not found: %v", err)
|
||||
}
|
||||
} else {
|
||||
sID, err = restic.FindSnapshot(repo, snapshotIDString)
|
||||
if err != nil {
|
||||
return errors.Fatalf("error loading snapshot: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
snapshot, err := restic.LoadSnapshot(ctx, repo, sID)
|
||||
if err != nil {
|
||||
return errors.Fatalf("error loading snapshot from repo: %v", err)
|
||||
}
|
||||
|
||||
err = statsWalkSnapshot(ctx, snapshot, repo, stats)
|
||||
for sn := range FindFilteredSnapshots(ctx, repo, statsOptions.Hosts, statsOptions.Tags, statsOptions.Paths, args) {
|
||||
err = statsWalkSnapshot(ctx, sn, repo, stats)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error walking snapshot: %v", err)
|
||||
}
|
||||
} else {
|
||||
// iterate every snapshot in the repo
|
||||
err = repo.List(ctx, restic.SnapshotFile, func(snapshotID restic.ID, size int64) error {
|
||||
snapshot, err := restic.LoadSnapshot(ctx, repo, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error loading snapshot %s: %v", snapshotID.Str(), err)
|
||||
}
|
||||
return statsWalkSnapshot(ctx, snapshot, repo, stats)
|
||||
})
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if countMode == countModeRawData {
|
||||
if statsOptions.countMode == countModeRawData {
|
||||
// the blob handles have been collected, but not yet counted
|
||||
for blobHandle := range stats.blobs {
|
||||
blobSize, found := repo.LookupBlobSize(blobHandle.ID, blobHandle.Type)
|
||||
|
@ -154,22 +142,16 @@ func runStats(gopts GlobalOptions, args []string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// inform the user what was scanned and how it was scanned
|
||||
snapshotsScanned := snapshotIDString
|
||||
if snapshotsScanned == "latest" {
|
||||
snapshotsScanned = "the latest snapshot"
|
||||
} else if snapshotsScanned == "" {
|
||||
snapshotsScanned = "all snapshots"
|
||||
}
|
||||
Printf("Stats for %s in %s mode:\n", snapshotsScanned, countMode)
|
||||
Printf("Stats in %s mode:\n", statsOptions.countMode)
|
||||
Printf("Snapshots processed: %d\n", stats.snapshotsCount)
|
||||
|
||||
if stats.TotalBlobCount > 0 {
|
||||
Printf(" Total Blob Count: %d\n", stats.TotalBlobCount)
|
||||
Printf(" Total Blob Count: %d\n", stats.TotalBlobCount)
|
||||
}
|
||||
if stats.TotalFileCount > 0 {
|
||||
Printf(" Total File Count: %d\n", stats.TotalFileCount)
|
||||
Printf(" Total File Count: %d\n", stats.TotalFileCount)
|
||||
}
|
||||
Printf(" Total Size: %-5s\n", formatBytes(stats.TotalSize))
|
||||
Printf(" Total Size: %-5s\n", formatBytes(stats.TotalSize))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -179,7 +161,9 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
|
|||
return fmt.Errorf("snapshot %s has nil tree", snapshot.ID().Str())
|
||||
}
|
||||
|
||||
if countMode == countModeRawData {
|
||||
stats.snapshotsCount++
|
||||
|
||||
if statsOptions.countMode == countModeRawData {
|
||||
// count just the sizes of unique blobs; we don't need to walk the tree
|
||||
// ourselves in this case, since a nifty function does it for us
|
||||
return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs)
|
||||
|
@ -189,6 +173,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
|
|||
if err != nil {
|
||||
return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -201,19 +186,19 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun
|
|||
return true, nil
|
||||
}
|
||||
|
||||
if countMode == countModeUniqueFilesByContents || countMode == countModeBlobsPerFile {
|
||||
if statsOptions.countMode == countModeUniqueFilesByContents || statsOptions.countMode == countModeBlobsPerFile {
|
||||
// only count this file if we haven't visited it before
|
||||
fid := makeFileIDByContents(node)
|
||||
if _, ok := stats.uniqueFiles[fid]; !ok {
|
||||
// mark the file as visited
|
||||
stats.uniqueFiles[fid] = struct{}{}
|
||||
|
||||
if countMode == countModeUniqueFilesByContents {
|
||||
if statsOptions.countMode == countModeUniqueFilesByContents {
|
||||
// simply count the size of each unique file (unique by contents only)
|
||||
stats.TotalSize += node.Size
|
||||
stats.TotalFileCount++
|
||||
}
|
||||
if countMode == countModeBlobsPerFile {
|
||||
if statsOptions.countMode == countModeBlobsPerFile {
|
||||
// count the size of each unique blob reference, which is
|
||||
// by unique file (unique by contents and file path)
|
||||
for _, blobID := range node.Content {
|
||||
|
@ -243,7 +228,7 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun
|
|||
}
|
||||
}
|
||||
|
||||
if countMode == countModeRestoreSize {
|
||||
if statsOptions.countMode == countModeRestoreSize {
|
||||
// as this is a file in the snapshot, we can simply count its
|
||||
// size without worrying about uniqueness, since duplicate files
|
||||
// will still be restored
|
||||
|
@ -275,23 +260,13 @@ func makeFileIDByContents(node *restic.Node) fileID {
|
|||
|
||||
func verifyStatsInput(gopts GlobalOptions, args []string) error {
|
||||
// require a recognized counting mode
|
||||
switch countMode {
|
||||
switch statsOptions.countMode {
|
||||
case countModeRestoreSize:
|
||||
case countModeUniqueFilesByContents:
|
||||
case countModeBlobsPerFile:
|
||||
case countModeRawData:
|
||||
default:
|
||||
return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", countMode)
|
||||
}
|
||||
|
||||
// ensure at most one snapshot was specified
|
||||
if len(args) > 1 {
|
||||
return fmt.Errorf("only one snapshot may be specified")
|
||||
}
|
||||
|
||||
// if a snapshot was specified, mark it as the one to scan
|
||||
if len(args) == 1 {
|
||||
snapshotIDString = args[0]
|
||||
return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", statsOptions.countMode)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -320,23 +295,14 @@ type statsContainer struct {
|
|||
// blobs is used to count individual unique blobs,
|
||||
// independent of references to files
|
||||
blobs restic.BlobSet
|
||||
|
||||
// holds count of all considered snapshots
|
||||
snapshotsCount int
|
||||
}
|
||||
|
||||
// fileID is a 256-bit hash that distinguishes unique files.
|
||||
type fileID [32]byte
|
||||
|
||||
var (
|
||||
// the mode of counting to perform
|
||||
countMode string
|
||||
|
||||
// the snapshot to scan, as given by the user
|
||||
snapshotIDString string
|
||||
|
||||
// snapshotByHost is the host to filter latest
|
||||
// snapshot by, if given by user
|
||||
snapshotByHosts []string
|
||||
)
|
||||
|
||||
const (
|
||||
countModeRestoreSize = "restore-size"
|
||||
countModeUniqueFilesByContents = "files-by-contents"
|
||||
|
|
|
@ -22,10 +22,10 @@ func FindFilteredSnapshots(ctx context.Context, repo *repository.Repository, hos
|
|||
// Process all snapshot IDs given as arguments.
|
||||
for _, s := range snapshotIDs {
|
||||
if s == "latest" {
|
||||
usedFilter = true
|
||||
id, err = restic.FindLatestSnapshot(ctx, repo, paths, tags, hosts)
|
||||
if err != nil {
|
||||
Warnf("Ignoring %q, no snapshot matched given filter (Paths:%v Tags:%v Hosts:%v)\n", s, paths, tags, hosts)
|
||||
usedFilter = true
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -306,6 +306,10 @@ host by using the ``--host`` flag:
|
|||
There we see that it would take 482 GiB of disk space to restore the latest
|
||||
snapshot from "myserver".
|
||||
|
||||
In case you have multiple backups running from the same host so can also use
|
||||
``--tag`` and ``--path`` to be more specific about which snapshots you
|
||||
are looking for.
|
||||
|
||||
But how much space does that snapshot take on disk? In other words, how much
|
||||
has restic's deduplication helped? We can check:
|
||||
|
||||
|
|
Loading…
Reference in a new issue