Merge pull request #4081 from MichaelEischer/backup-group-by

Add support for `--group-by` to the backup command
This commit is contained in:
Michael Eischer 2023-02-11 11:38:13 +01:00 committed by GitHub
commit f137be42fe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 159 additions and 33 deletions

View file

@ -0,0 +1,14 @@
Enhancement: Support `--group-by` for backup parent selection
The backup command by default selected the parent snapshot based on the hostname
and the backup targets. When the backup path list changed, the backup command
was unable to determine a suitable parent snapshot and had to read all
files again.
The new `--group-by` option for the backup command allows filtering snapshots
for the parent selection by `host`, `paths` and `tags`. It defaults to
`host,paths` which selects the latest snapshot with hostname and paths matching
those of the backup run. It should be used consistently with `forget --group-by`.
https://github.com/restic/restic/issues/3941
https://github.com/restic/restic/pull/4081

View file

@ -89,6 +89,7 @@ type BackupOptions struct {
excludePatternOptions
Parent string
GroupBy restic.SnapshotGroupByOptions
Force bool
ExcludeOtherFS bool
ExcludeIfPresent []string
@ -120,7 +121,9 @@ func init() {
cmdRoot.AddCommand(cmdBackup)
f := cmdBackup.Flags()
f.StringVar(&backupOptions.Parent, "parent", "", "use this parent `snapshot` (default: last snapshot in the repository that has the same target files/directories, and is not newer than the snapshot time)")
f.StringVar(&backupOptions.Parent, "parent", "", "use this parent `snapshot` (default: latest snapshot in the group determined by --group-by and not newer than the timestamp determined by --time)")
backupOptions.GroupBy = restic.SnapshotGroupByOptions{Host: true, Path: true}
f.VarP(&backupOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')")
f.BoolVarP(&backupOptions.Force, "force", "f", false, `force re-reading the target files/directories (overrides the "parent" flag)`)
initExcludePatternOptions(f, &backupOptions.excludePatternOptions)
@ -439,7 +442,21 @@ func findParentSnapshot(ctx context.Context, repo restic.Repository, opts Backup
if snName == "" {
snName = "latest"
}
sn, err := restic.FindFilteredSnapshot(ctx, repo.Backend(), repo, []string{opts.Host}, []restic.TagList{}, targets, &timeStampLimit, snName)
var hosts []string
var paths []string
var tags []restic.TagList
if opts.GroupBy.Host {
hosts = []string{opts.Host}
}
if opts.GroupBy.Path {
paths = targets
}
if opts.GroupBy.Tag {
tags = []restic.TagList{opts.Tags.Flatten()}
}
sn, err := restic.FindFilteredSnapshot(ctx, repo.Backend(), repo, hosts, tags, paths, &timeStampLimit, snName)
// Snapshot not found is ok if no explicit parent was set
if opts.Parent == "" && errors.Is(err, restic.ErrNoSnapshotFound) {
err = nil

View file

@ -56,7 +56,7 @@ type ForgetOptions struct {
Compact bool
// Grouping
GroupBy string
GroupBy restic.SnapshotGroupByOptions
DryRun bool
Prune bool
}
@ -90,8 +90,8 @@ func init() {
}
f.BoolVarP(&forgetOptions.Compact, "compact", "c", false, "use compact output format")
f.StringVarP(&forgetOptions.GroupBy, "group-by", "g", "host,paths", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')")
forgetOptions.GroupBy = restic.SnapshotGroupByOptions{Host: true, Path: true}
f.VarP(&forgetOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')")
f.BoolVarP(&forgetOptions.DryRun, "dry-run", "n", false, "do not delete anything, just print what would be done")
f.BoolVar(&forgetOptions.Prune, "prune", false, "automatically run the 'prune' command if snapshots have been removed")

View file

@ -36,7 +36,7 @@ type SnapshotOptions struct {
Compact bool
Last bool // This option should be removed in favour of Latest.
Latest int
GroupBy string
GroupBy restic.SnapshotGroupByOptions
}
var snapshotOptions SnapshotOptions
@ -54,7 +54,7 @@ func init() {
panic(err)
}
f.IntVar(&snapshotOptions.Latest, "latest", 0, "only show the last `n` snapshots for each host and path")
f.StringVarP(&snapshotOptions.GroupBy, "group-by", "g", "", "`group` snapshots by host, paths and/or tags, separated by comma")
f.VarP(&snapshotOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma")
}
func runSnapshots(ctx context.Context, opts SnapshotOptions, gopts GlobalOptions, args []string) error {

View file

@ -139,13 +139,24 @@ File change detection
*********************
When restic encounters a file that has already been backed up, whether in the
current backup or a previous one, it makes sure the file's contents are only
current backup or a previous one, it makes sure the file's content is only
stored once in the repository. To do so, it normally has to scan the entire
contents of every file. Because this can be very expensive, restic also uses a
content of the file. Because this can be very expensive, restic also uses a
change detection rule based on file metadata to determine whether a file is
likely unchanged since a previous backup. If it is, the file is not scanned
again.
The previous backup snapshot, called "parent" snaphot in restic terminology,
is determined as follows. By default restic groups snapshots by hostname and
backup paths, and then selects the latest snapshot in the group that matches
the current backup. You can change the selection criteria using the
``--group-by`` option, which defaults to ``host,paths``. To select the latest
snapshot with the same paths independent of the hostname, use ``paths``. Or,
to only consider the hostname and tags, use ``host,tags``. Alternatively, it
is possible to manually specify a specific parent snapshot using the
``--parent`` option. Finally, note that one would normally set the
``--group-by`` option for the ``forget`` command to the same value.
Change detection is only performed for regular files (not special files,
symlinks or directories) that have the exact same path as they did in a
previous backup of the same location. If a file or one of its containing

View file

@ -219,6 +219,8 @@ paths and tags. The policy is then applied to each group of snapshots individual
This is a safety feature to prevent accidental removal of unrelated backup sets. To
disable grouping and apply the policy to all snapshots regardless of their host,
paths and tags, use ``--group-by ''`` (that is, an empty value to ``--group-by``).
Note that one would normally set the ``--group-by`` option for the ``backup``
command to the same value.
Additionally, you can restrict the policy to only process snapshots which have a
particular hostname with the ``--host`` parameter, or tags with the ``--tag``

View file

@ -8,6 +8,57 @@ import (
"github.com/restic/restic/internal/errors"
)
type SnapshotGroupByOptions struct {
Tag bool
Host bool
Path bool
}
func splitSnapshotGroupBy(s string) (SnapshotGroupByOptions, error) {
var l SnapshotGroupByOptions
for _, option := range strings.Split(s, ",") {
switch option {
case "host", "hosts":
l.Host = true
case "path", "paths":
l.Path = true
case "tag", "tags":
l.Tag = true
case "":
default:
return SnapshotGroupByOptions{}, errors.Fatal("unknown grouping option: '" + option + "'")
}
}
return l, nil
}
func (l SnapshotGroupByOptions) String() string {
var parts []string
if l.Host {
parts = append(parts, "host")
}
if l.Path {
parts = append(parts, "paths")
}
if l.Tag {
parts = append(parts, "tags")
}
return strings.Join(parts, ",")
}
func (l *SnapshotGroupByOptions) Set(s string) error {
parts, err := splitSnapshotGroupBy(s)
if err != nil {
return err
}
*l = parts
return nil
}
func (l *SnapshotGroupByOptions) Type() string {
return "group"
}
// SnapshotGroupKey is the structure for identifying groups in a grouped
// snapshot list. This is used by GroupSnapshots()
type SnapshotGroupKey struct {
@ -18,43 +69,24 @@ type SnapshotGroupKey struct {
// GroupSnapshots takes a list of snapshots and a grouping criteria and creates
// a group list of snapshots.
func GroupSnapshots(snapshots Snapshots, options string) (map[string]Snapshots, bool, error) {
func GroupSnapshots(snapshots Snapshots, groupBy SnapshotGroupByOptions) (map[string]Snapshots, bool, error) {
// group by hostname and dirs
snapshotGroups := make(map[string]Snapshots)
var GroupByTag bool
var GroupByHost bool
var GroupByPath bool
GroupOptionList := strings.Split(options, ",")
for _, option := range GroupOptionList {
switch option {
case "host", "hosts":
GroupByHost = true
case "path", "paths":
GroupByPath = true
case "tag", "tags":
GroupByTag = true
case "":
default:
return nil, false, errors.Fatal("unknown grouping option: '" + option + "'")
}
}
for _, sn := range snapshots {
// Determining grouping-keys
var tags []string
var hostname string
var paths []string
if GroupByTag {
if groupBy.Tag {
tags = sn.Tags
sort.Strings(tags)
}
if GroupByHost {
if groupBy.Host {
hostname = sn.Hostname
}
if GroupByPath {
if groupBy.Path {
paths = sn.Paths
}
@ -70,5 +102,5 @@ func GroupSnapshots(snapshots Snapshots, options string) (map[string]Snapshots,
snapshotGroups[string(k)] = append(snapshotGroups[string(k)], sn)
}
return snapshotGroups, GroupByTag || GroupByHost || GroupByPath, nil
return snapshotGroups, groupBy.Tag || groupBy.Host || groupBy.Path, nil
}

View file

@ -0,0 +1,50 @@
package restic_test
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
)
func TestGroupByOptions(t *testing.T) {
for _, exp := range []struct {
from string
opts restic.SnapshotGroupByOptions
normalized string
}{
{
from: "",
opts: restic.SnapshotGroupByOptions{},
normalized: "",
},
{
from: "host,paths",
opts: restic.SnapshotGroupByOptions{Host: true, Path: true},
normalized: "host,paths",
},
{
from: "host,path,tag",
opts: restic.SnapshotGroupByOptions{Host: true, Path: true, Tag: true},
normalized: "host,paths,tags",
},
{
from: "hosts,paths,tags",
opts: restic.SnapshotGroupByOptions{Host: true, Path: true, Tag: true},
normalized: "host,paths,tags",
},
} {
var opts restic.SnapshotGroupByOptions
test.OK(t, opts.Set(exp.from))
if !cmp.Equal(opts, exp.opts) {
t.Errorf("unexpeted opts %s", cmp.Diff(opts, exp.opts))
}
test.Equals(t, opts.String(), exp.normalized)
}
var opts restic.SnapshotGroupByOptions
err := opts.Set("tags,invalid")
test.Assert(t, err != nil, "missing error on invalid tags")
test.Assert(t, !opts.Host && !opts.Path && !opts.Tag, "unexpected opts %s %s %s", opts.Host, opts.Path, opts.Tag)
}