Merge pull request #1556 from ifedorenko/check-subset

Add --read-data-subset flag to check command
This commit is contained in:
Alexander Neumann 2018-02-24 14:53:20 +01:00
commit dc1154c8ad
4 changed files with 120 additions and 19 deletions

View file

@ -0,0 +1,7 @@
Enhancement: Add --read-data-subset flag to check command
This change introduces ability to check integrity of a subset of repository
data packs. This can be used to spread integrity check of larger repositories
over period of time.
https://github.com/restic/restic/issues/1497

View file

@ -3,6 +3,8 @@ package main
import ( import (
"fmt" "fmt"
"os" "os"
"strconv"
"strings"
"time" "time"
"github.com/spf13/cobra" "github.com/spf13/cobra"
@ -26,11 +28,15 @@ repository and not use a local cache.
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
return runCheck(checkOptions, globalOptions, args) return runCheck(checkOptions, globalOptions, args)
}, },
PreRunE: func(cmd *cobra.Command, args []string) error {
return checkFlags(checkOptions)
},
} }
// CheckOptions bundles all options for the 'check' command. // CheckOptions bundles all options for the 'check' command.
type CheckOptions struct { type CheckOptions struct {
ReadData bool ReadData bool
ReadDataSubset string
CheckUnused bool CheckUnused bool
WithCache bool WithCache bool
} }
@ -42,10 +48,45 @@ func init() {
f := cmdCheck.Flags() f := cmdCheck.Flags()
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs") f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read subset of data packs")
f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs") f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache") f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
} }
func checkFlags(opts CheckOptions) error {
if opts.ReadData && opts.ReadDataSubset != "" {
return errors.Fatalf("check flags --read-data and --read-data-subset cannot be used together")
}
if opts.ReadDataSubset != "" {
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
if err != nil || len(dataSubset) != 2 {
return errors.Fatalf("check flag --read-data-subset must have two positive integer values, e.g. --read-data-subset=1/2")
}
if dataSubset[0] == 0 || dataSubset[1] == 0 || dataSubset[0] > dataSubset[1] {
return errors.Fatalf("check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2")
}
}
return nil
}
// stringToIntSlice converts string to []uint, using '/' as element separator
func stringToIntSlice(param string) (split []uint, err error) {
if param == "" {
return nil, nil
}
parts := strings.Split(param, "/")
result := make([]uint, len(parts))
for idx, part := range parts {
uintval, err := strconv.ParseUint(part, 10, 0)
if err != nil {
return nil, err
}
result[idx] = uint(uintval)
}
return result, nil
}
func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress { func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
if gopts.Quiet { if gopts.Quiet {
return nil return nil
@ -158,13 +199,25 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
} }
} }
if opts.ReadData { doReadData := func(bucket, totalBuckets uint) {
Verbosef("read all data\n") packs := restic.IDSet{}
for pack := range chkr.GetPacks() {
if (uint(pack[0]) % totalBuckets) == (bucket - 1) {
packs.Insert(pack)
}
}
packCount := uint64(len(packs))
p := newReadProgress(gopts, restic.Stat{Blobs: chkr.CountPacks()}) if packCount < chkr.CountPacks() {
Verbosef(fmt.Sprintf("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets))
} else {
Verbosef("read all data\n")
}
p := newReadProgress(gopts, restic.Stat{Blobs: packCount})
errChan := make(chan error) errChan := make(chan error)
go chkr.ReadData(gopts.ctx, p, errChan) go chkr.ReadPacks(gopts.ctx, packs, p, errChan)
for err := range errChan { for err := range errChan {
errorsFound = true errorsFound = true
@ -172,6 +225,14 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
} }
} }
switch {
case opts.ReadData:
doReadData(1, 1)
case opts.ReadDataSubset != "":
dataSubset, _ := stringToIntSlice(opts.ReadDataSubset)
doReadData(dataSubset[0], dataSubset[1])
}
if errorsFound { if errorsFound {
return errors.Fatal("repository contains errors") return errors.Fatal("repository contains errors")
} }

View file

@ -87,3 +87,29 @@ yield the same error:
Load indexes Load indexes
ciphertext verification failed ciphertext verification failed
By default, ``check`` command does not check that repository data files
are unmodified. Use ``--read-data`` parameter to check all repository
data files:
.. code-block:: console
$ restic -r /tmp/backup check --read-data
load indexes
check all packs
check snapshots, trees and blobs
read all data
Use ``--read-data-subset=n/t`` parameter to check subset of repository data
files. The parameter takes two values, ``n`` and ``t``. All repository data
files are logically devided in ``t`` roughly equal groups and only files that
belong to the group number ``n`` are checked. For example, the following
commands check all repository data files over 5 separate invocations:
.. code-block:: console
$ restic -r /tmp/backup check --read-data-subset=1/5
$ restic -r /tmp/backup check --read-data-subset=2/5
$ restic -r /tmp/backup check --read-data-subset=3/5
$ restic -r /tmp/backup check --read-data-subset=4/5
$ restic -r /tmp/backup check --read-data-subset=5/5

View file

@ -622,6 +622,11 @@ func (c *Checker) CountPacks() uint64 {
return uint64(len(c.packs)) return uint64(len(c.packs))
} }
// GetPacks returns IDSet of packs in the repository
func (c *Checker) GetPacks() restic.IDSet {
return c.packs
}
// checkPack reads a pack and checks the integrity of all blobs. // checkPack reads a pack and checks the integrity of all blobs.
func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error { func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
debug.Log("checking pack %v", id) debug.Log("checking pack %v", id)
@ -697,6 +702,11 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
// ReadData loads all data from the repository and checks the integrity. // ReadData loads all data from the repository and checks the integrity.
func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan<- error) { func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan<- error) {
c.ReadPacks(ctx, c.packs, p, errChan)
}
// ReadPacks loads data from specified packs and checks the integrity.
func (c *Checker) ReadPacks(ctx context.Context, packs restic.IDSet, p *restic.Progress, errChan chan<- error) {
defer close(errChan) defer close(errChan)
p.Start() p.Start()
@ -705,18 +715,6 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan
g, ctx := errgroup.WithContext(ctx) g, ctx := errgroup.WithContext(ctx)
ch := make(chan restic.ID) ch := make(chan restic.ID)
// start producer for channel ch
g.Go(func() error {
defer close(ch)
return c.repo.List(ctx, restic.DataFile, func(id restic.ID, size int64) error {
select {
case <-ctx.Done():
case ch <- id:
}
return nil
})
})
// run workers // run workers
for i := 0; i < defaultParallelism; i++ { for i := 0; i < defaultParallelism; i++ {
g.Go(func() error { g.Go(func() error {
@ -748,6 +746,15 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan
}) })
} }
// push packs to ch
for pack := range packs {
select {
case ch <- pack:
case <-ctx.Done():
}
}
close(ch)
err := g.Wait() err := g.Wait()
if err != nil { if err != nil {
select { select {