Merge pull request #1556 from ifedorenko/check-subset
Add --read-data-subset flag to check command
This commit is contained in:
commit
dc1154c8ad
4 changed files with 120 additions and 19 deletions
7
changelog/0.8.3/issue-1497
Normal file
7
changelog/0.8.3/issue-1497
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
Enhancement: Add --read-data-subset flag to check command
|
||||||
|
|
||||||
|
This change introduces ability to check integrity of a subset of repository
|
||||||
|
data packs. This can be used to spread integrity check of larger repositories
|
||||||
|
over period of time.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/issues/1497
|
|
@ -3,6 +3,8 @@ package main
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
@ -26,13 +28,17 @@ repository and not use a local cache.
|
||||||
RunE: func(cmd *cobra.Command, args []string) error {
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
return runCheck(checkOptions, globalOptions, args)
|
return runCheck(checkOptions, globalOptions, args)
|
||||||
},
|
},
|
||||||
|
PreRunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
return checkFlags(checkOptions)
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// CheckOptions bundles all options for the 'check' command.
|
// CheckOptions bundles all options for the 'check' command.
|
||||||
type CheckOptions struct {
|
type CheckOptions struct {
|
||||||
ReadData bool
|
ReadData bool
|
||||||
CheckUnused bool
|
ReadDataSubset string
|
||||||
WithCache bool
|
CheckUnused bool
|
||||||
|
WithCache bool
|
||||||
}
|
}
|
||||||
|
|
||||||
var checkOptions CheckOptions
|
var checkOptions CheckOptions
|
||||||
|
@ -42,10 +48,45 @@ func init() {
|
||||||
|
|
||||||
f := cmdCheck.Flags()
|
f := cmdCheck.Flags()
|
||||||
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
|
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
|
||||||
|
f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read subset of data packs")
|
||||||
f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
|
f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
|
||||||
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
|
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func checkFlags(opts CheckOptions) error {
|
||||||
|
if opts.ReadData && opts.ReadDataSubset != "" {
|
||||||
|
return errors.Fatalf("check flags --read-data and --read-data-subset cannot be used together")
|
||||||
|
}
|
||||||
|
if opts.ReadDataSubset != "" {
|
||||||
|
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
|
||||||
|
if err != nil || len(dataSubset) != 2 {
|
||||||
|
return errors.Fatalf("check flag --read-data-subset must have two positive integer values, e.g. --read-data-subset=1/2")
|
||||||
|
}
|
||||||
|
if dataSubset[0] == 0 || dataSubset[1] == 0 || dataSubset[0] > dataSubset[1] {
|
||||||
|
return errors.Fatalf("check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// stringToIntSlice converts string to []uint, using '/' as element separator
|
||||||
|
func stringToIntSlice(param string) (split []uint, err error) {
|
||||||
|
if param == "" {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
parts := strings.Split(param, "/")
|
||||||
|
result := make([]uint, len(parts))
|
||||||
|
for idx, part := range parts {
|
||||||
|
uintval, err := strconv.ParseUint(part, 10, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
result[idx] = uint(uintval)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
|
func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
|
||||||
if gopts.Quiet {
|
if gopts.Quiet {
|
||||||
return nil
|
return nil
|
||||||
|
@ -158,13 +199,25 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.ReadData {
|
doReadData := func(bucket, totalBuckets uint) {
|
||||||
Verbosef("read all data\n")
|
packs := restic.IDSet{}
|
||||||
|
for pack := range chkr.GetPacks() {
|
||||||
|
if (uint(pack[0]) % totalBuckets) == (bucket - 1) {
|
||||||
|
packs.Insert(pack)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
packCount := uint64(len(packs))
|
||||||
|
|
||||||
p := newReadProgress(gopts, restic.Stat{Blobs: chkr.CountPacks()})
|
if packCount < chkr.CountPacks() {
|
||||||
|
Verbosef(fmt.Sprintf("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets))
|
||||||
|
} else {
|
||||||
|
Verbosef("read all data\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
p := newReadProgress(gopts, restic.Stat{Blobs: packCount})
|
||||||
errChan := make(chan error)
|
errChan := make(chan error)
|
||||||
|
|
||||||
go chkr.ReadData(gopts.ctx, p, errChan)
|
go chkr.ReadPacks(gopts.ctx, packs, p, errChan)
|
||||||
|
|
||||||
for err := range errChan {
|
for err := range errChan {
|
||||||
errorsFound = true
|
errorsFound = true
|
||||||
|
@ -172,6 +225,14 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case opts.ReadData:
|
||||||
|
doReadData(1, 1)
|
||||||
|
case opts.ReadDataSubset != "":
|
||||||
|
dataSubset, _ := stringToIntSlice(opts.ReadDataSubset)
|
||||||
|
doReadData(dataSubset[0], dataSubset[1])
|
||||||
|
}
|
||||||
|
|
||||||
if errorsFound {
|
if errorsFound {
|
||||||
return errors.Fatal("repository contains errors")
|
return errors.Fatal("repository contains errors")
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,3 +87,29 @@ yield the same error:
|
||||||
Load indexes
|
Load indexes
|
||||||
ciphertext verification failed
|
ciphertext verification failed
|
||||||
|
|
||||||
|
By default, ``check`` command does not check that repository data files
|
||||||
|
are unmodified. Use ``--read-data`` parameter to check all repository
|
||||||
|
data files:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ restic -r /tmp/backup check --read-data
|
||||||
|
load indexes
|
||||||
|
check all packs
|
||||||
|
check snapshots, trees and blobs
|
||||||
|
read all data
|
||||||
|
|
||||||
|
Use ``--read-data-subset=n/t`` parameter to check subset of repository data
|
||||||
|
files. The parameter takes two values, ``n`` and ``t``. All repository data
|
||||||
|
files are logically devided in ``t`` roughly equal groups and only files that
|
||||||
|
belong to the group number ``n`` are checked. For example, the following
|
||||||
|
commands check all repository data files over 5 separate invocations:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ restic -r /tmp/backup check --read-data-subset=1/5
|
||||||
|
$ restic -r /tmp/backup check --read-data-subset=2/5
|
||||||
|
$ restic -r /tmp/backup check --read-data-subset=3/5
|
||||||
|
$ restic -r /tmp/backup check --read-data-subset=4/5
|
||||||
|
$ restic -r /tmp/backup check --read-data-subset=5/5
|
||||||
|
|
||||||
|
|
|
@ -622,6 +622,11 @@ func (c *Checker) CountPacks() uint64 {
|
||||||
return uint64(len(c.packs))
|
return uint64(len(c.packs))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetPacks returns IDSet of packs in the repository
|
||||||
|
func (c *Checker) GetPacks() restic.IDSet {
|
||||||
|
return c.packs
|
||||||
|
}
|
||||||
|
|
||||||
// checkPack reads a pack and checks the integrity of all blobs.
|
// checkPack reads a pack and checks the integrity of all blobs.
|
||||||
func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
|
func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
|
||||||
debug.Log("checking pack %v", id)
|
debug.Log("checking pack %v", id)
|
||||||
|
@ -697,6 +702,11 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
|
||||||
|
|
||||||
// ReadData loads all data from the repository and checks the integrity.
|
// ReadData loads all data from the repository and checks the integrity.
|
||||||
func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan<- error) {
|
func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan<- error) {
|
||||||
|
c.ReadPacks(ctx, c.packs, p, errChan)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadPacks loads data from specified packs and checks the integrity.
|
||||||
|
func (c *Checker) ReadPacks(ctx context.Context, packs restic.IDSet, p *restic.Progress, errChan chan<- error) {
|
||||||
defer close(errChan)
|
defer close(errChan)
|
||||||
|
|
||||||
p.Start()
|
p.Start()
|
||||||
|
@ -705,18 +715,6 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan
|
||||||
g, ctx := errgroup.WithContext(ctx)
|
g, ctx := errgroup.WithContext(ctx)
|
||||||
ch := make(chan restic.ID)
|
ch := make(chan restic.ID)
|
||||||
|
|
||||||
// start producer for channel ch
|
|
||||||
g.Go(func() error {
|
|
||||||
defer close(ch)
|
|
||||||
return c.repo.List(ctx, restic.DataFile, func(id restic.ID, size int64) error {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
case ch <- id:
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// run workers
|
// run workers
|
||||||
for i := 0; i < defaultParallelism; i++ {
|
for i := 0; i < defaultParallelism; i++ {
|
||||||
g.Go(func() error {
|
g.Go(func() error {
|
||||||
|
@ -748,6 +746,15 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// push packs to ch
|
||||||
|
for pack := range packs {
|
||||||
|
select {
|
||||||
|
case ch <- pack:
|
||||||
|
case <-ctx.Done():
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(ch)
|
||||||
|
|
||||||
err := g.Wait()
|
err := g.Wait()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
select {
|
select {
|
||||||
|
|
Loading…
Reference in a new issue