forked from TrueCloudLab/restic
Add --read-data-subset flag to check command
Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
This commit is contained in:
parent
c99eabfb37
commit
07d080830e
4 changed files with 120 additions and 19 deletions
7
changelog/0.8.3/issue-1497
Normal file
7
changelog/0.8.3/issue-1497
Normal file
|
@ -0,0 +1,7 @@
|
|||
Enhancement: Add --read-data-subset flag to check command
|
||||
|
||||
This change introduces ability to check integrity of a subset of repository
|
||||
data packs. This can be used to spread integrity check of larger repositories
|
||||
over period of time.
|
||||
|
||||
https://github.com/restic/restic/issues/1497
|
|
@ -3,6 +3,8 @@ package main
|
|||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
@ -26,11 +28,15 @@ repository and not use a local cache.
|
|||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runCheck(checkOptions, globalOptions, args)
|
||||
},
|
||||
PreRunE: func(cmd *cobra.Command, args []string) error {
|
||||
return checkFlags(checkOptions)
|
||||
},
|
||||
}
|
||||
|
||||
// CheckOptions bundles all options for the 'check' command.
|
||||
type CheckOptions struct {
|
||||
ReadData bool
|
||||
ReadDataSubset string
|
||||
CheckUnused bool
|
||||
WithCache bool
|
||||
}
|
||||
|
@ -42,10 +48,45 @@ func init() {
|
|||
|
||||
f := cmdCheck.Flags()
|
||||
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
|
||||
f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read subset of data packs")
|
||||
f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
|
||||
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
|
||||
}
|
||||
|
||||
func checkFlags(opts CheckOptions) error {
|
||||
if opts.ReadData && opts.ReadDataSubset != "" {
|
||||
return errors.Fatalf("check flags --read-data and --read-data-subset cannot be used together")
|
||||
}
|
||||
if opts.ReadDataSubset != "" {
|
||||
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
|
||||
if err != nil || len(dataSubset) != 2 {
|
||||
return errors.Fatalf("check flag --read-data-subset must have two positive integer values, e.g. --read-data-subset=1/2")
|
||||
}
|
||||
if dataSubset[0] == 0 || dataSubset[1] == 0 || dataSubset[0] > dataSubset[1] {
|
||||
return errors.Fatalf("check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// stringToIntSlice converts string to []uint, using '/' as element separator
|
||||
func stringToIntSlice(param string) (split []uint, err error) {
|
||||
if param == "" {
|
||||
return nil, nil
|
||||
}
|
||||
parts := strings.Split(param, "/")
|
||||
result := make([]uint, len(parts))
|
||||
for idx, part := range parts {
|
||||
uintval, err := strconv.ParseUint(part, 10, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result[idx] = uint(uintval)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
|
||||
if gopts.Quiet {
|
||||
return nil
|
||||
|
@ -158,13 +199,25 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
|||
}
|
||||
}
|
||||
|
||||
if opts.ReadData {
|
||||
Verbosef("read all data\n")
|
||||
doReadData := func(bucket, totalBuckets uint) {
|
||||
packs := restic.IDSet{}
|
||||
for pack := range chkr.GetPacks() {
|
||||
if (uint(pack[0]) % totalBuckets) == (bucket - 1) {
|
||||
packs.Insert(pack)
|
||||
}
|
||||
}
|
||||
packCount := uint64(len(packs))
|
||||
|
||||
p := newReadProgress(gopts, restic.Stat{Blobs: chkr.CountPacks()})
|
||||
if packCount < chkr.CountPacks() {
|
||||
Verbosef(fmt.Sprintf("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets))
|
||||
} else {
|
||||
Verbosef("read all data\n")
|
||||
}
|
||||
|
||||
p := newReadProgress(gopts, restic.Stat{Blobs: packCount})
|
||||
errChan := make(chan error)
|
||||
|
||||
go chkr.ReadData(gopts.ctx, p, errChan)
|
||||
go chkr.ReadPacks(gopts.ctx, packs, p, errChan)
|
||||
|
||||
for err := range errChan {
|
||||
errorsFound = true
|
||||
|
@ -172,6 +225,14 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
|||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case opts.ReadData:
|
||||
doReadData(1, 1)
|
||||
case opts.ReadDataSubset != "":
|
||||
dataSubset, _ := stringToIntSlice(opts.ReadDataSubset)
|
||||
doReadData(dataSubset[0], dataSubset[1])
|
||||
}
|
||||
|
||||
if errorsFound {
|
||||
return errors.Fatal("repository contains errors")
|
||||
}
|
||||
|
|
|
@ -87,3 +87,29 @@ yield the same error:
|
|||
Load indexes
|
||||
ciphertext verification failed
|
||||
|
||||
By default, ``check`` command does not check that repository data files
|
||||
are unmodified. Use ``--read-data`` parameter to check all repository
|
||||
data files:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ restic -r /tmp/backup check --read-data
|
||||
load indexes
|
||||
check all packs
|
||||
check snapshots, trees and blobs
|
||||
read all data
|
||||
|
||||
Use ``--read-data-subset=n/t`` parameter to check subset of repository data
|
||||
files. The parameter takes two values, ``n`` and ``t``. All repository data
|
||||
files are logically devided in ``t`` roughly equal groups and only files that
|
||||
belong to the group number ``n`` are checked. For example, the following
|
||||
commands check all repository data files over 5 separate invocations:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ restic -r /tmp/backup check --read-data-subset=1/5
|
||||
$ restic -r /tmp/backup check --read-data-subset=2/5
|
||||
$ restic -r /tmp/backup check --read-data-subset=3/5
|
||||
$ restic -r /tmp/backup check --read-data-subset=4/5
|
||||
$ restic -r /tmp/backup check --read-data-subset=5/5
|
||||
|
||||
|
|
|
@ -622,6 +622,11 @@ func (c *Checker) CountPacks() uint64 {
|
|||
return uint64(len(c.packs))
|
||||
}
|
||||
|
||||
// GetPacks returns IDSet of packs in the repository
|
||||
func (c *Checker) GetPacks() restic.IDSet {
|
||||
return c.packs
|
||||
}
|
||||
|
||||
// checkPack reads a pack and checks the integrity of all blobs.
|
||||
func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
|
||||
debug.Log("checking pack %v", id)
|
||||
|
@ -697,6 +702,11 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
|
|||
|
||||
// ReadData loads all data from the repository and checks the integrity.
|
||||
func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan<- error) {
|
||||
c.ReadPacks(ctx, c.packs, p, errChan)
|
||||
}
|
||||
|
||||
// ReadPacks loads data from specified packs and checks the integrity.
|
||||
func (c *Checker) ReadPacks(ctx context.Context, packs restic.IDSet, p *restic.Progress, errChan chan<- error) {
|
||||
defer close(errChan)
|
||||
|
||||
p.Start()
|
||||
|
@ -705,18 +715,6 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan
|
|||
g, ctx := errgroup.WithContext(ctx)
|
||||
ch := make(chan restic.ID)
|
||||
|
||||
// start producer for channel ch
|
||||
g.Go(func() error {
|
||||
defer close(ch)
|
||||
return c.repo.List(ctx, restic.DataFile, func(id restic.ID, size int64) error {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case ch <- id:
|
||||
}
|
||||
return nil
|
||||
})
|
||||
})
|
||||
|
||||
// run workers
|
||||
for i := 0; i < defaultParallelism; i++ {
|
||||
g.Go(func() error {
|
||||
|
@ -748,6 +746,15 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan
|
|||
})
|
||||
}
|
||||
|
||||
// push packs to ch
|
||||
for pack := range packs {
|
||||
select {
|
||||
case ch <- pack:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
}
|
||||
close(ch)
|
||||
|
||||
err := g.Wait()
|
||||
if err != nil {
|
||||
select {
|
||||
|
|
Loading…
Reference in a new issue