diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 4f83a9874..0ff4600b8 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -1,8 +1,10 @@ package main import ( + "math" "sort" "strconv" + "strings" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -39,9 +41,8 @@ Exit status is 0 if the command was successful, and non-zero if there was any er type PruneOptions struct { DryRun bool - MaxUnused string - MaxUnusedPercent float64 // set if MaxUnused is a percentage - MaxUnusedBytes uint64 // set if MaxUnused is an absolute number of bytes + MaxUnused string + maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused MaxRepackSize string MaxRepackBytes uint64 @@ -60,7 +61,7 @@ func init() { func addPruneOptions(c *cobra.Command) { f := c.Flags() - f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused space (allowed suffixes: k/K, m/M, g/G, t/T or value in %)") + f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')") f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)") f.BoolVar(&pruneOptions.RepackCachableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable") } @@ -74,27 +75,46 @@ func verifyPruneOptions(opts *PruneOptions) error { opts.MaxRepackBytes = uint64(size) } - length := len(opts.MaxUnused) - if length == 0 { - return nil + maxUnused := strings.TrimSpace(opts.MaxUnused) + if maxUnused == "" { + return errors.Fatalf("invalid value for --max-unused: %q", opts.MaxUnused) } - var err error - if opts.MaxUnused[length-1] == '%' { - opts.MaxUnusedPercent, err = strconv.ParseFloat(opts.MaxUnused[:length-1], 64) - opts.MaxUnusedBytes = ^uint64(0) - } else { - var size int64 - size, err = parseSizeStr(opts.MaxUnused) - opts.MaxUnusedPercent = 100.0 - opts.MaxUnusedBytes = uint64(size) - } - if err != nil { - return err - } + // parse MaxUnused either as unlimited, a percentage, or an absolute number of bytes + switch { + case maxUnused == "unlimited": + opts.maxUnusedBytes = func(used uint64) uint64 { + return math.MaxUint64 + } - if opts.MaxUnusedPercent < 0.0 || opts.MaxUnusedPercent > 100.0 { - return errors.Fatalf("--max-unused-percent should be between 0 and 100. Given value: %f", opts.MaxUnusedPercent) + case strings.HasSuffix(maxUnused, "%"): + maxUnused = strings.TrimSuffix(maxUnused, "%") + p, err := strconv.ParseFloat(maxUnused, 64) + if err != nil { + return errors.Fatalf("invalid percentage %q passed for --max-unused: %v", opts.MaxUnused, err) + } + + if p < 0 { + return errors.Fatal("percentage for --max-unused must be positive") + } + + if p >= 100 { + return errors.Fatal("percentage for --max-unused must be below 100%") + } + + opts.maxUnusedBytes = func(used uint64) uint64 { + return uint64(p / (100 - p) * float64(used)) + } + + default: + size, err := parseSizeStr(maxUnused) + if err != nil { + return errors.Fatalf("invalid number of bytes %q for --max-unused: %v", opts.MaxUnused, err) + } + + opts.maxUnusedBytes = func(used uint64) uint64 { + return uint64(size) + } } return nil @@ -344,13 +364,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB repackAllPacksWithDuplicates := true - maxUnusedSizeAfter := opts.MaxUnusedBytes - if opts.MaxUnusedPercent < 100.0 { - maxUnusedSizePercent := uint64(opts.MaxUnusedPercent / (100.0 - opts.MaxUnusedPercent) * float64(stats.size.used)) - if maxUnusedSizePercent < maxUnusedSizeAfter { - maxUnusedSizeAfter = maxUnusedSizePercent - } - } + // calculate limit for number of unused bytes in the repo after repacking + maxUnusedSizeAfter := opts.maxUnusedBytes(stats.size.used) // Sort repackCandidates such that packs with highest ratio unused/used space are picked first. // This is equivalent to sorting by unused / total space. diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 6aeeab634..789240e0c 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -1387,25 +1387,25 @@ func TestCheckRestoreNoLock(t *testing.T) { func TestPrune(t *testing.T) { t.Run("0", func(t *testing.T) { - opts := PruneOptions{MaxUnusedPercent: 0.0} + opts := PruneOptions{MaxUnused: "0%"} checkOpts := CheckOptions{ReadData: true, CheckUnused: true} testPrune(t, opts, checkOpts) }) t.Run("50", func(t *testing.T) { - opts := PruneOptions{MaxUnusedPercent: 50.0} + opts := PruneOptions{MaxUnused: "50%"} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) - t.Run("100", func(t *testing.T) { - opts := PruneOptions{MaxUnusedPercent: 100.0} + t.Run("unlimited", func(t *testing.T) { + opts := PruneOptions{MaxUnused: "unlimited"} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) t.Run("CachableOnly", func(t *testing.T) { - opts := PruneOptions{RepackCachableOnly: true} + opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) @@ -1436,7 +1436,7 @@ func testPrune(t *testing.T, pruneOpts PruneOptions, checkOpts CheckOptions) { rtest.OK(t, runCheck(checkOpts, env.gopts, nil)) } -var pruneDefaultOptions = PruneOptions{MaxUnusedPercent: 1.5} +var pruneDefaultOptions = PruneOptions{MaxUnused: "5%"} func listPacks(gopts GlobalOptions, t *testing.T) restic.IDSet { r, err := OpenRepository(gopts) diff --git a/doc/060_forget.rst b/doc/060_forget.rst index 2df82af8b..08381f180 100644 --- a/doc/060_forget.rst +++ b/doc/060_forget.rst @@ -310,39 +310,54 @@ Customize pruning To understand the custom options, we first explain how the pruning process works: -- First all snapshots and directories within snapshots are scanned to determine - which data is still in use. -- Then for all pack files ``prune`` finds out if the file is fully used, partly - used or completely unused. -- Completely unused packs are marked for deletion. Fully used packs are kept. - A partially used pack is either kept or marked for repacking depending on user - options. - Note that for repacking, restic must download the file from the repository - storage and reupload the needed data in the repository. This can be very - time-consuming for remote repositories. -- After deciding what to do, ``prune`` will actually perform the repack, modify - the index according to the changes and delete the obsolete files. +1. All snapshots and directories within snapshots are scanned to determine + which data is still in use. +2. For all files in the repository, restic finds out if the file is fully + used, partly used or completely unused. +3. Completely unused files are marked for deletion. Fully used files are kept. + A partially used file is either kept or marked for repacking depending on user + options. + + Note that for repacking, restic must download the file from the repository + storage and re-upload the needed data in the repository. This can be very + time-consuming for remote repositories. +4. After deciding what to do, ``prune`` will actually perform the repack, modify + the index according to the changes and delete the obsolete files. The ``prune`` command accepts the following options: - ``--max-unused limit`` allow unused data up to the specified limit within the repository. - This allows restic to keep partly used packs instead of repacking them. - The limit can be specified as size, e.g. "200M" or in percentage with respect to the total - repository size, e.g. "0.5%". - ``prune`` tries to repack as little data as possible while still ensuring this + This allows restic to keep partly used files instead of repacking them. + + The limit can be specified in several ways: + + * As an absolute size (e.g. ``200M``). If you want to minimize the space + used by your repository, pass ``0`` to this option. + * As a size relative to the total repo size (e.g. ``10%``). This means that + after prune, at most ``10%`` of the total data stored in the repo may be + unused data. If the repo after prune has as size of 500MB, then at most + 50MB may be unused. + * If the string ``unlimited`` is passed, there is no limit for partly + unused files. This means that as long as some data is still used within + a file stored in the repo, restic will just leave it there. Use this if + you want to minimize the time and bandwidth used by the ``prune`` + operation. + + Restic tries to repack as little data as possible while still ensuring this limit for unused data. - If you want to minimize the space used by your repository, use a value of 0%. - If you want to minimize the time and bandwidth used by the ``prune`` command, use a - high value. A value of 100% will not require any pack file to be repacked. - The default value is 5%. -- ``--max-repack-size size`` if set limits the total size of packs to repack. - As ``prune`` first stores all repacked packs and deletes the obsolete packs at the end, - this option might be handy if you expect many packs to be repacked and fear to run low - on storage. -- ``--repack-cacheable-only`` if set to true only pack files which are cacheable are repacked. - Other pack files are not repacked, if this option is set. - This allows a very fast repacking using only cached data. It can, however, imply that the - unused data in your repository exceeds the value given by ``--max-unused-percent``. - The default value is false. + +- ``--max-repack-size size`` if set limits the total size of files to repack. + As ``prune`` first stores all repacked files and deletes the obsolete files at the end, + this option might be handy if you expect many files to be repacked and fear to run low + on storage. + +- ``--repack-cacheable-only`` if set to true only files which contain + metadata and would be stored in the cache are repacked. Other pack files are + not repacked if this option is set. This allows a very fast repacking + using only cached data. It can, however, imply that the unused data in + your repository exceeds the value given by ``--max-unused``. + The default value is false. + - ``--dry-run`` only show what ``prune`` would do. + - ``--verbose`` increased verbosity shows additional statistics for ``prune``.