restic/cmd/restic/cmd_check.go
2022-04-09 12:27:25 +02:00

405 lines
11 KiB
Go

package main
import (
"io/ioutil"
"math/rand"
"strconv"
"strings"
"sync"
"time"
"github.com/spf13/cobra"
"github.com/restic/restic/internal/cache"
"github.com/restic/restic/internal/checker"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
)
var cmdCheck = &cobra.Command{
Use: "check [flags]",
Short: "Check the repository for errors",
Long: `
The "check" command tests the repository for errors and reports any errors it
finds. It can also be used to read all data and therefore simulate a restore.
By default, the "check" command will always load all data directly from the
repository and not use a local cache.
EXIT STATUS
===========
Exit status is 0 if the command was successful, and non-zero if there was any error.
`,
DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error {
return runCheck(checkOptions, globalOptions, args)
},
PreRunE: func(cmd *cobra.Command, args []string) error {
return checkFlags(checkOptions)
},
}
// CheckOptions bundles all options for the 'check' command.
type CheckOptions struct {
ReadData bool
ReadDataSubset string
CheckUnused bool
WithCache bool
}
var checkOptions CheckOptions
func init() {
cmdRoot.AddCommand(cmdCheck)
f := cmdCheck.Flags()
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read a `subset` of data packs, specified as 'n/t' for specific part, or either 'x%' or 'x.y%' or a size in bytes with suffixes k/K, m/M, g/G, t/T for a random subset")
f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
}
func checkFlags(opts CheckOptions) error {
if opts.ReadData && opts.ReadDataSubset != "" {
return errors.Fatal("check flags --read-data and --read-data-subset cannot be used together")
}
if opts.ReadDataSubset != "" {
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
argumentError := errors.Fatal("check flag --read-data-subset has invalid value, please see documentation")
if err == nil {
if len(dataSubset) != 2 {
return argumentError
}
if dataSubset[0] == 0 || dataSubset[1] == 0 || dataSubset[0] > dataSubset[1] {
return errors.Fatal("check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2")
}
if dataSubset[1] > totalBucketsMax {
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
}
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err != nil {
return argumentError
}
if percentage <= 0.0 || percentage > 100.0 {
return errors.Fatal(
"check flag --read-data-subset=x% x must be above 0.0% and at most 100.0%")
}
} else {
fileSize, err := parseSizeStr(opts.ReadDataSubset)
if err != nil {
return argumentError
}
if fileSize <= 0.0 {
return errors.Fatal(
"check flag --read-data-subset=n n must be above 0")
}
}
}
return nil
}
// See doReadData in runCheck below for why this is 256.
const totalBucketsMax = 256
// stringToIntSlice converts string to []uint, using '/' as element separator
func stringToIntSlice(param string) (split []uint, err error) {
if param == "" {
return nil, nil
}
parts := strings.Split(param, "/")
result := make([]uint, len(parts))
for idx, part := range parts {
uintval, err := strconv.ParseUint(part, 10, 0)
if err != nil {
return nil, err
}
result[idx] = uint(uintval)
}
return result, nil
}
// ParsePercentage parses a percentage string of the form "X%" where X is a float constant,
// and returns the value of that constant. It does not check the range of the value.
func parsePercentage(s string) (float64, error) {
if !strings.HasSuffix(s, "%") {
return 0, errors.Errorf(`parsePercentage: %q does not end in "%%"`, s)
}
s = s[:len(s)-1]
p, err := strconv.ParseFloat(s, 64)
if err != nil {
return 0, errors.Errorf("parsePercentage: %v", err)
}
return p, nil
}
// prepareCheckCache configures a special cache directory for check.
//
// * if --with-cache is specified, the default cache is used
// * if the user explicitly requested --no-cache, we don't use any cache
// * if the user provides --cache-dir, we use a cache in a temporary sub-directory of the specified directory and the sub-directory is deleted after the check
// * by default, we use a cache in a temporary directory that is deleted after the check
func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions) (cleanup func()) {
cleanup = func() {}
if opts.WithCache {
// use the default cache, no setup needed
return cleanup
}
if gopts.NoCache {
// don't use any cache, no setup needed
return cleanup
}
cachedir := gopts.CacheDir
if cachedir == "" {
cachedir = cache.EnvDir()
}
// use a cache in a temporary directory
tempdir, err := ioutil.TempDir(cachedir, "restic-check-cache-")
if err != nil {
// if an error occurs, don't use any cache
Warnf("unable to create temporary directory for cache during check, disabling cache: %v\n", err)
gopts.NoCache = true
return cleanup
}
gopts.CacheDir = tempdir
Verbosef("using temporary cache in %v\n", tempdir)
cleanup = func() {
err := fs.RemoveAll(tempdir)
if err != nil {
Warnf("error removing temporary cache directory: %v\n", err)
}
}
return cleanup
}
func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
if len(args) != 0 {
return errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags")
}
cleanup := prepareCheckCache(opts, &gopts)
AddCleanupHandler(func() error {
cleanup()
return nil
})
repo, err := OpenRepository(gopts)
if err != nil {
return err
}
if !gopts.NoLock {
Verbosef("create exclusive lock for repository\n")
lock, err := lockRepoExclusive(gopts.ctx, repo)
defer unlockRepo(lock)
if err != nil {
return err
}
}
chkr := checker.New(repo, opts.CheckUnused)
err = chkr.LoadSnapshots(gopts.ctx)
if err != nil {
return err
}
Verbosef("load indexes\n")
hints, errs := chkr.LoadIndex(gopts.ctx)
dupFound := false
for _, hint := range hints {
Printf("%v\n", hint)
if _, ok := hint.(checker.ErrDuplicatePacks); ok {
dupFound = true
}
}
if dupFound {
Printf("This is non-critical, you can run `restic rebuild-index' to correct this\n")
}
if len(errs) > 0 {
for _, err := range errs {
Warnf("error: %v\n", err)
}
return errors.Fatal("LoadIndex returned errors")
}
errorsFound := false
orphanedPacks := 0
errChan := make(chan error)
Verbosef("check all packs\n")
go chkr.Packs(gopts.ctx, errChan)
for err := range errChan {
if checker.IsOrphanedPack(err) {
orphanedPacks++
Verbosef("%v\n", err)
continue
}
errorsFound = true
Warnf("%v\n", err)
}
if orphanedPacks > 0 {
Verbosef("%d additional files were found in the repo, which likely contain duplicate data.\nYou can run `restic prune` to correct this.\n", orphanedPacks)
}
Verbosef("check snapshots, trees and blobs\n")
errChan = make(chan error)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
bar := newProgressMax(!gopts.Quiet, 0, "snapshots")
defer bar.Done()
chkr.Structure(gopts.ctx, bar, errChan)
}()
for err := range errChan {
errorsFound = true
if e, ok := err.(checker.TreeError); ok {
Warnf("error for tree %v:\n", e.ID.Str())
for _, treeErr := range e.Errors {
Warnf(" %v\n", treeErr)
}
} else {
Warnf("error: %v\n", err)
}
}
// Wait for the progress bar to be complete before printing more below.
// Must happen after `errChan` is read from in the above loop to avoid
// deadlocking in the case of errors.
wg.Wait()
if opts.CheckUnused {
for _, id := range chkr.UnusedBlobs(gopts.ctx) {
Verbosef("unused blob %v\n", id)
errorsFound = true
}
}
doReadData := func(packs map[restic.ID]int64) {
packCount := uint64(len(packs))
p := newProgressMax(!gopts.Quiet, packCount, "packs")
errChan := make(chan error)
go chkr.ReadPacks(gopts.ctx, packs, p, errChan)
for err := range errChan {
errorsFound = true
Warnf("%v\n", err)
}
p.Done()
}
switch {
case opts.ReadData:
Verbosef("read all data\n")
doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1))
case opts.ReadDataSubset != "":
var packs map[restic.ID]int64
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
if err == nil {
bucket := dataSubset[0]
totalBuckets := dataSubset[1]
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
packCount := uint64(len(packs))
Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err == nil {
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
Verbosef("read %.1f%% of data packs\n", percentage)
}
} else {
repoSize := int64(0)
allPacks := chkr.GetPacks()
for _, size := range allPacks {
repoSize += size
}
if repoSize == 0 {
return errors.Fatal("Cannot read from a repository having size 0")
}
subsetSize, _ := parseSizeStr(opts.ReadDataSubset)
if subsetSize > repoSize {
subsetSize = repoSize
}
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
Verbosef("read %d bytes of data packs\n", subsetSize)
}
if packs == nil {
return errors.Fatal("internal error: failed to select packs to check")
}
doReadData(packs)
}
if errorsFound {
return errors.Fatal("repository contains errors")
}
Verbosef("no errors were found\n")
return nil
}
// selectPacksByBucket selects subsets of packs by ranges of buckets.
func selectPacksByBucket(allPacks map[restic.ID]int64, bucket, totalBuckets uint) map[restic.ID]int64 {
packs := make(map[restic.ID]int64)
for pack, size := range allPacks {
// If we ever check more than the first byte
// of pack, update totalBucketsMax.
if (uint(pack[0]) % totalBuckets) == (bucket - 1) {
packs[pack] = size
}
}
return packs
}
// selectRandomPacksByPercentage selects the given percentage of packs which are randomly choosen.
func selectRandomPacksByPercentage(allPacks map[restic.ID]int64, percentage float64) map[restic.ID]int64 {
packCount := len(allPacks)
packsToCheck := int(float64(packCount) * (percentage / 100.0))
if packCount > 0 && packsToCheck < 1 {
packsToCheck = 1
}
timeNs := time.Now().UnixNano()
r := rand.New(rand.NewSource(timeNs))
idx := r.Perm(packCount)
var keys []restic.ID
for k := range allPacks {
keys = append(keys, k)
}
packs := make(map[restic.ID]int64)
for i := 0; i < packsToCheck; i++ {
id := keys[idx[i]]
packs[id] = allPacks[id]
}
return packs
}
func selectRandomPacksByFileSize(allPacks map[restic.ID]int64, subsetSize int64, repoSize int64) map[restic.ID]int64 {
subsetPercentage := (float64(subsetSize) / float64(repoSize)) * 100.0
packs := selectRandomPacksByPercentage(allPacks, subsetPercentage)
return packs
}