From acb40d2b94d40a7d46b25f81a2190c96deef1dec Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 10 Dec 2022 15:29:20 +0100 Subject: [PATCH 1/3] Refactor group-by to parse options into a struct --- cmd/restic/cmd_forget.go | 6 +- cmd/restic/cmd_snapshots.go | 4 +- internal/restic/snapshot_group.go | 80 ++++++++++++++++++-------- internal/restic/snapshot_group_test.go | 50 ++++++++++++++++ 4 files changed, 111 insertions(+), 29 deletions(-) create mode 100644 internal/restic/snapshot_group_test.go diff --git a/cmd/restic/cmd_forget.go b/cmd/restic/cmd_forget.go index 472b22b79..cc4923936 100644 --- a/cmd/restic/cmd_forget.go +++ b/cmd/restic/cmd_forget.go @@ -56,7 +56,7 @@ type ForgetOptions struct { Compact bool // Grouping - GroupBy string + GroupBy restic.SnapshotGroupByOptions DryRun bool Prune bool } @@ -90,8 +90,8 @@ func init() { } f.BoolVarP(&forgetOptions.Compact, "compact", "c", false, "use compact output format") - - f.StringVarP(&forgetOptions.GroupBy, "group-by", "g", "host,paths", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')") + forgetOptions.GroupBy = restic.SnapshotGroupByOptions{Host: true, Path: true} + f.VarP(&forgetOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')") f.BoolVarP(&forgetOptions.DryRun, "dry-run", "n", false, "do not delete anything, just print what would be done") f.BoolVar(&forgetOptions.Prune, "prune", false, "automatically run the 'prune' command if snapshots have been removed") diff --git a/cmd/restic/cmd_snapshots.go b/cmd/restic/cmd_snapshots.go index 0bfa4d110..408018736 100644 --- a/cmd/restic/cmd_snapshots.go +++ b/cmd/restic/cmd_snapshots.go @@ -36,7 +36,7 @@ type SnapshotOptions struct { Compact bool Last bool // This option should be removed in favour of Latest. Latest int - GroupBy string + GroupBy restic.SnapshotGroupByOptions } var snapshotOptions SnapshotOptions @@ -54,7 +54,7 @@ func init() { panic(err) } f.IntVar(&snapshotOptions.Latest, "latest", 0, "only show the last `n` snapshots for each host and path") - f.StringVarP(&snapshotOptions.GroupBy, "group-by", "g", "", "`group` snapshots by host, paths and/or tags, separated by comma") + f.VarP(&snapshotOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma") } func runSnapshots(ctx context.Context, opts SnapshotOptions, gopts GlobalOptions, args []string) error { diff --git a/internal/restic/snapshot_group.go b/internal/restic/snapshot_group.go index c8b1a5faa..c3f3307f6 100644 --- a/internal/restic/snapshot_group.go +++ b/internal/restic/snapshot_group.go @@ -8,6 +8,57 @@ import ( "github.com/restic/restic/internal/errors" ) +type SnapshotGroupByOptions struct { + Tag bool + Host bool + Path bool +} + +func splitSnapshotGroupBy(s string) (SnapshotGroupByOptions, error) { + var l SnapshotGroupByOptions + for _, option := range strings.Split(s, ",") { + switch option { + case "host", "hosts": + l.Host = true + case "path", "paths": + l.Path = true + case "tag", "tags": + l.Tag = true + case "": + default: + return SnapshotGroupByOptions{}, errors.Fatal("unknown grouping option: '" + option + "'") + } + } + return l, nil +} + +func (l SnapshotGroupByOptions) String() string { + var parts []string + if l.Host { + parts = append(parts, "host") + } + if l.Path { + parts = append(parts, "paths") + } + if l.Tag { + parts = append(parts, "tags") + } + return strings.Join(parts, ",") +} + +func (l *SnapshotGroupByOptions) Set(s string) error { + parts, err := splitSnapshotGroupBy(s) + if err != nil { + return err + } + *l = parts + return nil +} + +func (l *SnapshotGroupByOptions) Type() string { + return "group" +} + // SnapshotGroupKey is the structure for identifying groups in a grouped // snapshot list. This is used by GroupSnapshots() type SnapshotGroupKey struct { @@ -18,43 +69,24 @@ type SnapshotGroupKey struct { // GroupSnapshots takes a list of snapshots and a grouping criteria and creates // a group list of snapshots. -func GroupSnapshots(snapshots Snapshots, options string) (map[string]Snapshots, bool, error) { +func GroupSnapshots(snapshots Snapshots, groupBy SnapshotGroupByOptions) (map[string]Snapshots, bool, error) { // group by hostname and dirs snapshotGroups := make(map[string]Snapshots) - var GroupByTag bool - var GroupByHost bool - var GroupByPath bool - GroupOptionList := strings.Split(options, ",") - - for _, option := range GroupOptionList { - switch option { - case "host", "hosts": - GroupByHost = true - case "path", "paths": - GroupByPath = true - case "tag", "tags": - GroupByTag = true - case "": - default: - return nil, false, errors.Fatal("unknown grouping option: '" + option + "'") - } - } - for _, sn := range snapshots { // Determining grouping-keys var tags []string var hostname string var paths []string - if GroupByTag { + if groupBy.Tag { tags = sn.Tags sort.Strings(tags) } - if GroupByHost { + if groupBy.Host { hostname = sn.Hostname } - if GroupByPath { + if groupBy.Path { paths = sn.Paths } @@ -70,5 +102,5 @@ func GroupSnapshots(snapshots Snapshots, options string) (map[string]Snapshots, snapshotGroups[string(k)] = append(snapshotGroups[string(k)], sn) } - return snapshotGroups, GroupByTag || GroupByHost || GroupByPath, nil + return snapshotGroups, groupBy.Tag || groupBy.Host || groupBy.Path, nil } diff --git a/internal/restic/snapshot_group_test.go b/internal/restic/snapshot_group_test.go new file mode 100644 index 000000000..78ac99ab1 --- /dev/null +++ b/internal/restic/snapshot_group_test.go @@ -0,0 +1,50 @@ +package restic_test + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" +) + +func TestGroupByOptions(t *testing.T) { + for _, exp := range []struct { + from string + opts restic.SnapshotGroupByOptions + normalized string + }{ + { + from: "", + opts: restic.SnapshotGroupByOptions{}, + normalized: "", + }, + { + from: "host,paths", + opts: restic.SnapshotGroupByOptions{Host: true, Path: true}, + normalized: "host,paths", + }, + { + from: "host,path,tag", + opts: restic.SnapshotGroupByOptions{Host: true, Path: true, Tag: true}, + normalized: "host,paths,tags", + }, + { + from: "hosts,paths,tags", + opts: restic.SnapshotGroupByOptions{Host: true, Path: true, Tag: true}, + normalized: "host,paths,tags", + }, + } { + var opts restic.SnapshotGroupByOptions + test.OK(t, opts.Set(exp.from)) + if !cmp.Equal(opts, exp.opts) { + t.Errorf("unexpeted opts %s", cmp.Diff(opts, exp.opts)) + } + test.Equals(t, opts.String(), exp.normalized) + } + + var opts restic.SnapshotGroupByOptions + err := opts.Set("tags,invalid") + test.Assert(t, err != nil, "missing error on invalid tags") + test.Assert(t, !opts.Host && !opts.Path && !opts.Tag, "unexpected opts %s %s %s", opts.Host, opts.Path, opts.Tag) +} From 2885db7902d8cecd10a1be0ead7dd23b5e6f2c3d Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 10 Dec 2022 15:34:25 +0100 Subject: [PATCH 2/3] backup: add group-by option --- cmd/restic/cmd_backup.go | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index e59f503db..696a19b94 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -89,6 +89,7 @@ type BackupOptions struct { excludePatternOptions Parent string + GroupBy restic.SnapshotGroupByOptions Force bool ExcludeOtherFS bool ExcludeIfPresent []string @@ -121,6 +122,8 @@ func init() { f := cmdBackup.Flags() f.StringVar(&backupOptions.Parent, "parent", "", "use this parent `snapshot` (default: last snapshot in the repository that has the same target files/directories, and is not newer than the snapshot time)") + backupOptions.GroupBy = restic.SnapshotGroupByOptions{Host: true, Path: true} + f.VarP(&backupOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')") f.BoolVarP(&backupOptions.Force, "force", "f", false, `force re-reading the target files/directories (overrides the "parent" flag)`) initExcludePatternOptions(f, &backupOptions.excludePatternOptions) @@ -439,7 +442,21 @@ func findParentSnapshot(ctx context.Context, repo restic.Repository, opts Backup if snName == "" { snName = "latest" } - sn, err := restic.FindFilteredSnapshot(ctx, repo.Backend(), repo, []string{opts.Host}, []restic.TagList{}, targets, &timeStampLimit, snName) + + var hosts []string + var paths []string + var tags []restic.TagList + if opts.GroupBy.Host { + hosts = []string{opts.Host} + } + if opts.GroupBy.Path { + paths = targets + } + if opts.GroupBy.Tag { + tags = []restic.TagList{opts.Tags.Flatten()} + } + + sn, err := restic.FindFilteredSnapshot(ctx, repo.Backend(), repo, hosts, tags, paths, &timeStampLimit, snName) // Snapshot not found is ok if no explicit parent was set if opts.Parent == "" && errors.Is(err, restic.ErrNoSnapshotFound) { err = nil From 0ce182f044d98c91f35605a9fa0a6d59e4b4a73f Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 10 Dec 2022 16:02:29 +0100 Subject: [PATCH 3/3] document backup --group-by --- changelog/unreleased/issue-3941 | 14 ++++++++++++++ cmd/restic/cmd_backup.go | 2 +- doc/040_backup.rst | 15 +++++++++++++-- doc/060_forget.rst | 2 ++ 4 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 changelog/unreleased/issue-3941 diff --git a/changelog/unreleased/issue-3941 b/changelog/unreleased/issue-3941 new file mode 100644 index 000000000..011cd9eaa --- /dev/null +++ b/changelog/unreleased/issue-3941 @@ -0,0 +1,14 @@ +Enhancement: Support `--group-by` for backup parent selection + +The backup command by default selected the parent snapshot based on the hostname +and the backup targets. When the backup path list changed, the backup command +was unable to determine a suitable parent snapshot and had to read all +files again. + +The new `--group-by` option for the backup command allows filtering snapshots +for the parent selection by `host`, `paths` and `tags`. It defaults to +`host,paths` which selects the latest snapshot with hostname and paths matching +those of the backup run. It should be used consistently with `forget --group-by`. + +https://github.com/restic/restic/issues/3941 +https://github.com/restic/restic/pull/4081 diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 696a19b94..7c58f95c4 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -121,7 +121,7 @@ func init() { cmdRoot.AddCommand(cmdBackup) f := cmdBackup.Flags() - f.StringVar(&backupOptions.Parent, "parent", "", "use this parent `snapshot` (default: last snapshot in the repository that has the same target files/directories, and is not newer than the snapshot time)") + f.StringVar(&backupOptions.Parent, "parent", "", "use this parent `snapshot` (default: latest snapshot in the group determined by --group-by and not newer than the timestamp determined by --time)") backupOptions.GroupBy = restic.SnapshotGroupByOptions{Host: true, Path: true} f.VarP(&backupOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')") f.BoolVarP(&backupOptions.Force, "force", "f", false, `force re-reading the target files/directories (overrides the "parent" flag)`) diff --git a/doc/040_backup.rst b/doc/040_backup.rst index b9996311d..3b1a56bd6 100644 --- a/doc/040_backup.rst +++ b/doc/040_backup.rst @@ -139,13 +139,24 @@ File change detection ********************* When restic encounters a file that has already been backed up, whether in the -current backup or a previous one, it makes sure the file's contents are only +current backup or a previous one, it makes sure the file's content is only stored once in the repository. To do so, it normally has to scan the entire -contents of every file. Because this can be very expensive, restic also uses a +content of the file. Because this can be very expensive, restic also uses a change detection rule based on file metadata to determine whether a file is likely unchanged since a previous backup. If it is, the file is not scanned again. +The previous backup snapshot, called "parent" snaphot in restic terminology, +is determined as follows. By default restic groups snapshots by hostname and +backup paths, and then selects the latest snapshot in the group that matches +the current backup. You can change the selection criteria using the +``--group-by`` option, which defaults to ``host,paths``. To select the latest +snapshot with the same paths independent of the hostname, use ``paths``. Or, +to only consider the hostname and tags, use ``host,tags``. Alternatively, it +is possible to manually specify a specific parent snapshot using the +``--parent`` option. Finally, note that one would normally set the +``--group-by`` option for the ``forget`` command to the same value. + Change detection is only performed for regular files (not special files, symlinks or directories) that have the exact same path as they did in a previous backup of the same location. If a file or one of its containing diff --git a/doc/060_forget.rst b/doc/060_forget.rst index a4205de75..b960ddb14 100644 --- a/doc/060_forget.rst +++ b/doc/060_forget.rst @@ -219,6 +219,8 @@ paths and tags. The policy is then applied to each group of snapshots individual This is a safety feature to prevent accidental removal of unrelated backup sets. To disable grouping and apply the policy to all snapshots regardless of their host, paths and tags, use ``--group-by ''`` (that is, an empty value to ``--group-by``). +Note that one would normally set the ``--group-by`` option for the ``backup`` +command to the same value. Additionally, you can restrict the policy to only process snapshots which have a particular hostname with the ``--host`` parameter, or tags with the ``--tag``