stats: Add manual doc, improve -h doc
Also rename files-by-content to files-by-contents, once and for all
This commit is contained in:
parent
12c797700e
commit
f6b2731aa5
2 changed files with 98 additions and 9 deletions
|
@ -21,6 +21,25 @@ The "stats" command walks one or all snapshots in a repository and
|
|||
accumulates statistics about the data stored therein. It reports on
|
||||
the number of unique files and their sizes, according to one of
|
||||
the counting modes as given by the --mode flag.
|
||||
|
||||
If no snapshot is specified, all snapshots will be considered. Some
|
||||
modes make more sense over just a single snapshot, while others
|
||||
are useful across all snapshots, depending on what you are trying
|
||||
to calculate.
|
||||
|
||||
The modes are:
|
||||
|
||||
restore-size: (default) Counts the size of the restored files.
|
||||
|
||||
files-by-contents: Counts total size of files, where a file is
|
||||
considered unique if it has unique contents.
|
||||
|
||||
raw-data: Counts the size of blobs in the repository, regardless
|
||||
of how many files reference them.
|
||||
|
||||
blobs-per-file: A combination of files-by-contents and raw-data.
|
||||
|
||||
Refer to the online manual for more details about each mode.
|
||||
`,
|
||||
DisableAutoGenTag: true,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
|
@ -31,7 +50,7 @@ the counting modes as given by the --mode flag.
|
|||
func init() {
|
||||
cmdRoot.AddCommand(cmdStats)
|
||||
f := cmdStats.Flags()
|
||||
f.StringVar(&countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-content, blobs-per-file, or raw-data")
|
||||
f.StringVar(&countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file, or raw-data")
|
||||
f.StringVar(&snapshotByHost, "host", "", "filter latest snapshot by this hostname")
|
||||
}
|
||||
|
||||
|
@ -163,14 +182,14 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun
|
|||
return true, nil
|
||||
}
|
||||
|
||||
if countMode == countModeUniqueFilesByContent || countMode == countModeBlobsPerFile {
|
||||
if countMode == countModeUniqueFilesByContents || countMode == countModeBlobsPerFile {
|
||||
// only count this file if we haven't visited it before
|
||||
fid := makeFileIDByContents(node)
|
||||
if _, ok := stats.uniqueFiles[fid]; !ok {
|
||||
// mark the file as visited
|
||||
stats.uniqueFiles[fid] = struct{}{}
|
||||
|
||||
if countMode == countModeUniqueFilesByContent {
|
||||
if countMode == countModeUniqueFilesByContents {
|
||||
// simply count the size of each unique file (unique by contents only)
|
||||
stats.TotalSize += node.Size
|
||||
stats.TotalFileCount++
|
||||
|
@ -197,7 +216,6 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun
|
|||
// file (path) so we don't double-count it
|
||||
stats.TotalSize += uint64(blobSize)
|
||||
stats.fileBlobs[nodePath].Insert(blobID)
|
||||
|
||||
// this mode also counts total unique blob _references_ per file
|
||||
stats.TotalBlobCount++
|
||||
}
|
||||
|
@ -232,7 +250,7 @@ func verifyStatsInput(gopts GlobalOptions, args []string) error {
|
|||
// require a recognized counting mode
|
||||
switch countMode {
|
||||
case countModeRestoreSize:
|
||||
case countModeUniqueFilesByContent:
|
||||
case countModeUniqueFilesByContents:
|
||||
case countModeBlobsPerFile:
|
||||
case countModeRawData:
|
||||
default:
|
||||
|
@ -289,8 +307,8 @@ var (
|
|||
)
|
||||
|
||||
const (
|
||||
countModeRestoreSize = "restore-size"
|
||||
countModeUniqueFilesByContent = "files-by-content"
|
||||
countModeBlobsPerFile = "blobs-per-file"
|
||||
countModeRawData = "raw-data"
|
||||
countModeRestoreSize = "restore-size"
|
||||
countModeUniqueFilesByContents = "files-by-contents"
|
||||
countModeBlobsPerFile = "blobs-per-file"
|
||||
countModeRawData = "raw-data"
|
||||
)
|
||||
|
|
|
@ -36,6 +36,7 @@ Usage help is available:
|
|||
rebuild-index Build a new index file
|
||||
restore Extract the data from a snapshot
|
||||
snapshots List all snapshots
|
||||
stats Count up sizes and show information about repository data
|
||||
tag Modify tags on snapshots
|
||||
unlock Remove locks other processes created
|
||||
version Print version information
|
||||
|
@ -236,6 +237,76 @@ The following metadata is handled by restic:
|
|||
- Subtree
|
||||
- ExtendedAttributes
|
||||
|
||||
|
||||
Getting information about repository data
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Use the ``stats`` command to count up stats about the data in the repository.
|
||||
There are different counting modes available using the ``--mode`` flag,
|
||||
depending on what you want to calculate. The default is the restore size, or
|
||||
the size required to restore the files:
|
||||
|
||||
- ``restore-size`` (default) counts the size of the restored files.
|
||||
- ``files-by-contents`` counts the total size of unique files as given by their
|
||||
contents. This can be useful since a file is considered unique only if it has
|
||||
unique contents. Keep in mind that a small change to a large file (even when the
|
||||
file name/path hasn't changed) will cause them to look like different files, thus
|
||||
essentially causing the whole size of the file to be counted twice.
|
||||
- ``raw-data`` counts the size of the blobs in the repository, regardless of how many
|
||||
files reference them. This tells you how much restic has reduced all your original
|
||||
data down to (either for a single snapshot or across all your backups), and compared
|
||||
to the size given by the restore-size mode, can tell you how much deduplication is
|
||||
helping you.
|
||||
- ``blobs-per-file`` is kind of a mix between files-by-contents and raw-data modes;
|
||||
it is useful for knowing how much value your backup is providing you in terms of unique
|
||||
data stored by file. Like files-by-contents, it is resilient to file renames/moves.
|
||||
Unlike files-by-contents, it does not balloon to high values when large files have
|
||||
small edits, as long as the file path stayed the same. Unlike raw-data, this mode
|
||||
DOES consider how many files point to each blob such that the more files a blob is
|
||||
referenced by, the more it counts toward the size.
|
||||
|
||||
For example, to calculate how much space would be
|
||||
required to restore the latest snapshot (from any host that made it):
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ restic stats latest
|
||||
password is correct
|
||||
Total File Count: 10538
|
||||
Total Size: 37.824 GiB
|
||||
|
||||
If multiple hosts are backing up to the repository, the latest snapshot may not
|
||||
be the one you want. You can specify the latest snapshot from only a specific
|
||||
host by using the ``--host`` flag:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ restic stats --host myserver latest
|
||||
password is correct
|
||||
Total File Count: 21766
|
||||
Total Size: 481.783 GiB
|
||||
|
||||
There we see that it would take 482 GiB of disk space to restore the latest
|
||||
snapshot from "myserver".
|
||||
|
||||
But how much space does that snapshot take on disk? In other words, how much
|
||||
has restic's deduplication helped? We can check:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ restic stats --host myserver --mode raw-data latest
|
||||
password is correct
|
||||
Total Blob Count: 340847
|
||||
Total Size: 458.663 GiB
|
||||
|
||||
Comparing this size to the previous command, we see that restic has saved
|
||||
about 23 GiB of space with deduplication.
|
||||
|
||||
Which mode you use depends on your exact use case. Some modes are more useful
|
||||
across all snapshots, while others make more sense on just a single snapshot,
|
||||
depending on what you're trying to calculate.
|
||||
|
||||
|
||||
Scripting
|
||||
---------
|
||||
|
||||
|
|
Loading…
Reference in a new issue