Merge pull request #1556 from ifedorenko/check-subset

Add --read-data-subset flag to check command
2018-02-24 14:53:20 +01:00 · 2018-02-24 14:53:20 +01:00 · dc1154c8ad
commit dc1154c8ad
parent dfd37afee2 07d080830e
4 changed files with 120 additions and 19 deletions
--- a/changelog/0.8.3/issue-1497
+++ b/changelog/0.8.3/issue-1497
@ -0,0 +1,7 @@
 Enhancement: Add --read-data-subset flag to check command
 This change introduces ability to check integrity of a subset of repository 
 data packs. This can be used to spread integrity check of larger repositories
 over period of time.
 https://github.com/restic/restic/issues/1497
--- a/cmd/restic/cmd_check.go
+++ b/cmd/restic/cmd_check.go
@ -3,6 +3,8 @@ package main
 import (
 	"fmt"
 	"os"
 	"strconv"
 	"strings"
 	"time"
 	"github.com/spf13/cobra"
@ -26,11 +28,15 @@ repository and not use a local cache.
 	RunE: func(cmd *cobra.Command, args []string) error {
 		return runCheck(checkOptions, globalOptions, args)
 	},
 	PreRunE: func(cmd *cobra.Command, args []string) error {
 		return checkFlags(checkOptions)
 	},
 }
 // CheckOptions bundles all options for the 'check' command.
 type CheckOptions struct {
 	ReadData       bool
 	ReadDataSubset string
 	CheckUnused    bool
 	WithCache      bool
 }
@ -42,10 +48,45 @@ func init() {
 	f := cmdCheck.Flags()
 	f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
 	f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read subset of data packs")
 	f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
 	f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
 }
 func checkFlags(opts CheckOptions) error {
 	if opts.ReadData && opts.ReadDataSubset != "" {
 		return errors.Fatalf("check flags --read-data and --read-data-subset cannot be used together")
 	}
 	if opts.ReadDataSubset != "" {
 		dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
 		if err != nil || len(dataSubset) != 2 {
 			return errors.Fatalf("check flag --read-data-subset must have two positive integer values, e.g. --read-data-subset=1/2")
 		}
 		if dataSubset[0] == 0 || dataSubset[1] == 0 || dataSubset[0] > dataSubset[1] {
 			return errors.Fatalf("check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2")
 		}
 	}
 	return nil
 }
 // stringToIntSlice converts string to []uint, using '/' as element separator
 func stringToIntSlice(param string) (split []uint, err error) {
 	if param == "" {
 		return nil, nil
 	}
 	parts := strings.Split(param, "/")
 	result := make([]uint, len(parts))
 	for idx, part := range parts {
 		uintval, err := strconv.ParseUint(part, 10, 0)
 		if err != nil {
 			return nil, err
 		}
 		result[idx] = uint(uintval)
 	}
 	return result, nil
 }
 func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
 	if gopts.Quiet {
 		return nil
@ -158,13 +199,25 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
 		}
 	}
-	if opts.ReadData {
+	doReadData := func(bucket, totalBuckets uint) {
-		Verbosef("read all data\n")
+		packs := restic.IDSet{}
 		for pack := range chkr.GetPacks() {
 			if (uint(pack[0]) % totalBuckets) == (bucket - 1) {
 				packs.Insert(pack)
 			}
 		}
 		packCount := uint64(len(packs))
-		p := newReadProgress(gopts, restic.Stat{Blobs: chkr.CountPacks()})
+		if packCount < chkr.CountPacks() {
 			Verbosef(fmt.Sprintf("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets))
 		} else {
 			Verbosef("read all data\n")
 		}
 		p := newReadProgress(gopts, restic.Stat{Blobs: packCount})
 		errChan := make(chan error)
-		go chkr.ReadData(gopts.ctx, p, errChan)
+		go chkr.ReadPacks(gopts.ctx, packs, p, errChan)
 		for err := range errChan {
 			errorsFound = true
@ -172,6 +225,14 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
 		}
 	}
 	switch {
 	case opts.ReadData:
 		doReadData(1, 1)
 	case opts.ReadDataSubset != "":
 		dataSubset, _ := stringToIntSlice(opts.ReadDataSubset)
 		doReadData(dataSubset[0], dataSubset[1])
 	}
 	if errorsFound {
 		return errors.Fatal("repository contains errors")
 	}
--- a/doc/045_working_with_repos.rst
+++ b/doc/045_working_with_repos.rst
@ -87,3 +87,29 @@ yield the same error:
    Load indexes
    ciphertext verification failed
 By default, ``check`` command does not check that repository data files 
 are unmodified. Use ``--read-data`` parameter to check all repository
 data files:
 .. code-block:: console
    $ restic -r /tmp/backup check --read-data
    load indexes
    check all packs
    check snapshots, trees and blobs
    read all data
 Use ``--read-data-subset=n/t`` parameter to check subset of repository data
 files. The parameter takes two values, ``n`` and ``t``. All repository data 
 files are logically devided in ``t`` roughly equal groups and only files that
 belong to the group number ``n`` are checked. For example, the following 
 commands check all repository data files over 5 separate invocations:
 .. code-block:: console
    $ restic -r /tmp/backup check --read-data-subset=1/5
    $ restic -r /tmp/backup check --read-data-subset=2/5
    $ restic -r /tmp/backup check --read-data-subset=3/5
    $ restic -r /tmp/backup check --read-data-subset=4/5
    $ restic -r /tmp/backup check --read-data-subset=5/5
--- a/internal/checker/checker.go
+++ b/internal/checker/checker.go
@ -622,6 +622,11 @@ func (c *Checker) CountPacks() uint64 {
 	return uint64(len(c.packs))
 }
 // GetPacks returns IDSet of packs in the repository
 func (c *Checker) GetPacks() restic.IDSet {
 	return c.packs
 }
 // checkPack reads a pack and checks the integrity of all blobs.
 func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
 	debug.Log("checking pack %v", id)
@ -697,6 +702,11 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
 // ReadData loads all data from the repository and checks the integrity.
 func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan<- error) {
 	c.ReadPacks(ctx, c.packs, p, errChan)
 }
 // ReadPacks loads data from specified packs and checks the integrity.
 func (c *Checker) ReadPacks(ctx context.Context, packs restic.IDSet, p *restic.Progress, errChan chan<- error) {
 	defer close(errChan)
 	p.Start()
@ -705,18 +715,6 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan
 	g, ctx := errgroup.WithContext(ctx)
 	ch := make(chan restic.ID)
 	// start producer for channel ch
 	g.Go(func() error {
 		defer close(ch)
 		return c.repo.List(ctx, restic.DataFile, func(id restic.ID, size int64) error {
 			select {
 			case <-ctx.Done():
 			case ch <- id:
 			}
 			return nil
 		})
 	})
 	// run workers
 	for i := 0; i < defaultParallelism; i++ {
 		g.Go(func() error {
@ -748,6 +746,15 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan
 		})
 	}
 	// push packs to ch
 	for pack := range packs {
 		select {
 		case ch <- pack:
 		case <-ctx.Done():
 		}
 	}
 	close(ch)
 	err := g.Wait()
 	if err != nil {
 		select {