forked from TrueCloudLab/restic
118d599d0a
The old name still works, but is deprecated.
430 lines
12 KiB
Go
430 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"math/rand"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
|
|
"github.com/restic/restic/internal/cache"
|
|
"github.com/restic/restic/internal/checker"
|
|
"github.com/restic/restic/internal/errors"
|
|
"github.com/restic/restic/internal/fs"
|
|
"github.com/restic/restic/internal/restic"
|
|
)
|
|
|
|
var cmdCheck = &cobra.Command{
|
|
Use: "check [flags]",
|
|
Short: "Check the repository for errors",
|
|
Long: `
|
|
The "check" command tests the repository for errors and reports any errors it
|
|
finds. It can also be used to read all data and therefore simulate a restore.
|
|
|
|
By default, the "check" command will always load all data directly from the
|
|
repository and not use a local cache.
|
|
|
|
EXIT STATUS
|
|
===========
|
|
|
|
Exit status is 0 if the command was successful, and non-zero if there was any error.
|
|
`,
|
|
DisableAutoGenTag: true,
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
return runCheck(cmd.Context(), checkOptions, globalOptions, args)
|
|
},
|
|
PreRunE: func(cmd *cobra.Command, args []string) error {
|
|
return checkFlags(checkOptions)
|
|
},
|
|
}
|
|
|
|
// CheckOptions bundles all options for the 'check' command.
|
|
type CheckOptions struct {
|
|
ReadData bool
|
|
ReadDataSubset string
|
|
CheckUnused bool
|
|
WithCache bool
|
|
}
|
|
|
|
var checkOptions CheckOptions
|
|
|
|
func init() {
|
|
cmdRoot.AddCommand(cmdCheck)
|
|
|
|
f := cmdCheck.Flags()
|
|
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
|
|
f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read a `subset` of data packs, specified as 'n/t' for specific part, or either 'x%' or 'x.y%' or a size in bytes with suffixes k/K, m/M, g/G, t/T for a random subset")
|
|
var ignored bool
|
|
f.BoolVar(&ignored, "check-unused", false, "find unused blobs")
|
|
err := f.MarkDeprecated("check-unused", "`--check-unused` is deprecated and will be ignored")
|
|
if err != nil {
|
|
// MarkDeprecated only returns an error when the flag is not found
|
|
panic(err)
|
|
}
|
|
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
|
|
}
|
|
|
|
func checkFlags(opts CheckOptions) error {
|
|
if opts.ReadData && opts.ReadDataSubset != "" {
|
|
return errors.Fatal("check flags --read-data and --read-data-subset cannot be used together")
|
|
}
|
|
if opts.ReadDataSubset != "" {
|
|
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
|
|
argumentError := errors.Fatal("check flag --read-data-subset has invalid value, please see documentation")
|
|
if err == nil {
|
|
if len(dataSubset) != 2 {
|
|
return argumentError
|
|
}
|
|
if dataSubset[0] == 0 || dataSubset[1] == 0 || dataSubset[0] > dataSubset[1] {
|
|
return errors.Fatal("check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2")
|
|
}
|
|
if dataSubset[1] > totalBucketsMax {
|
|
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
|
|
}
|
|
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
|
|
percentage, err := parsePercentage(opts.ReadDataSubset)
|
|
if err != nil {
|
|
return argumentError
|
|
}
|
|
|
|
if percentage <= 0.0 || percentage > 100.0 {
|
|
return errors.Fatal(
|
|
"check flag --read-data-subset=x% x must be above 0.0% and at most 100.0%")
|
|
}
|
|
|
|
} else {
|
|
fileSize, err := parseSizeStr(opts.ReadDataSubset)
|
|
if err != nil {
|
|
return argumentError
|
|
}
|
|
if fileSize <= 0.0 {
|
|
return errors.Fatal(
|
|
"check flag --read-data-subset=n n must be above 0")
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// See doReadData in runCheck below for why this is 256.
|
|
const totalBucketsMax = 256
|
|
|
|
// stringToIntSlice converts string to []uint, using '/' as element separator
|
|
func stringToIntSlice(param string) (split []uint, err error) {
|
|
if param == "" {
|
|
return nil, nil
|
|
}
|
|
parts := strings.Split(param, "/")
|
|
result := make([]uint, len(parts))
|
|
for idx, part := range parts {
|
|
uintval, err := strconv.ParseUint(part, 10, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
result[idx] = uint(uintval)
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// ParsePercentage parses a percentage string of the form "X%" where X is a float constant,
|
|
// and returns the value of that constant. It does not check the range of the value.
|
|
func parsePercentage(s string) (float64, error) {
|
|
if !strings.HasSuffix(s, "%") {
|
|
return 0, errors.Errorf(`parsePercentage: %q does not end in "%%"`, s)
|
|
}
|
|
s = s[:len(s)-1]
|
|
|
|
p, err := strconv.ParseFloat(s, 64)
|
|
if err != nil {
|
|
return 0, errors.Errorf("parsePercentage: %v", err)
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// prepareCheckCache configures a special cache directory for check.
|
|
//
|
|
// - if --with-cache is specified, the default cache is used
|
|
// - if the user explicitly requested --no-cache, we don't use any cache
|
|
// - if the user provides --cache-dir, we use a cache in a temporary sub-directory of the specified directory and the sub-directory is deleted after the check
|
|
// - by default, we use a cache in a temporary directory that is deleted after the check
|
|
func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions) (cleanup func()) {
|
|
cleanup = func() {}
|
|
if opts.WithCache {
|
|
// use the default cache, no setup needed
|
|
return cleanup
|
|
}
|
|
|
|
if gopts.NoCache {
|
|
// don't use any cache, no setup needed
|
|
return cleanup
|
|
}
|
|
|
|
cachedir := gopts.CacheDir
|
|
if cachedir == "" {
|
|
cachedir = cache.EnvDir()
|
|
}
|
|
|
|
// use a cache in a temporary directory
|
|
tempdir, err := os.MkdirTemp(cachedir, "restic-check-cache-")
|
|
if err != nil {
|
|
// if an error occurs, don't use any cache
|
|
Warnf("unable to create temporary directory for cache during check, disabling cache: %v\n", err)
|
|
gopts.NoCache = true
|
|
return cleanup
|
|
}
|
|
|
|
gopts.CacheDir = tempdir
|
|
Verbosef("using temporary cache in %v\n", tempdir)
|
|
|
|
cleanup = func() {
|
|
err := fs.RemoveAll(tempdir)
|
|
if err != nil {
|
|
Warnf("error removing temporary cache directory: %v\n", err)
|
|
}
|
|
}
|
|
|
|
return cleanup
|
|
}
|
|
|
|
func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args []string) error {
|
|
if len(args) != 0 {
|
|
return errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags")
|
|
}
|
|
|
|
cleanup := prepareCheckCache(opts, &gopts)
|
|
AddCleanupHandler(func(code int) (int, error) {
|
|
cleanup()
|
|
return code, nil
|
|
})
|
|
|
|
repo, err := OpenRepository(ctx, gopts)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !gopts.NoLock {
|
|
Verbosef("create exclusive lock for repository\n")
|
|
var lock *restic.Lock
|
|
lock, ctx, err = lockRepoExclusive(ctx, repo, gopts.RetryLock, gopts.JSON)
|
|
defer unlockRepo(lock)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
chkr := checker.New(repo, opts.CheckUnused)
|
|
err = chkr.LoadSnapshots(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
Verbosef("load indexes\n")
|
|
hints, errs := chkr.LoadIndex(ctx)
|
|
|
|
errorsFound := false
|
|
suggestIndexRebuild := false
|
|
mixedFound := false
|
|
for _, hint := range hints {
|
|
switch hint.(type) {
|
|
case *checker.ErrDuplicatePacks, *checker.ErrOldIndexFormat:
|
|
Printf("%v\n", hint)
|
|
suggestIndexRebuild = true
|
|
case *checker.ErrMixedPack:
|
|
Printf("%v\n", hint)
|
|
mixedFound = true
|
|
default:
|
|
Warnf("error: %v\n", hint)
|
|
errorsFound = true
|
|
}
|
|
}
|
|
|
|
if suggestIndexRebuild {
|
|
Printf("Duplicate packs/old indexes are non-critical, you can run `restic repair index' to correct this.\n")
|
|
}
|
|
if mixedFound {
|
|
Printf("Mixed packs with tree and data blobs are non-critical, you can run `restic prune` to correct this.\n")
|
|
}
|
|
|
|
if len(errs) > 0 {
|
|
for _, err := range errs {
|
|
Warnf("error: %v\n", err)
|
|
}
|
|
return errors.Fatal("LoadIndex returned errors")
|
|
}
|
|
|
|
orphanedPacks := 0
|
|
errChan := make(chan error)
|
|
|
|
Verbosef("check all packs\n")
|
|
go chkr.Packs(ctx, errChan)
|
|
|
|
for err := range errChan {
|
|
if checker.IsOrphanedPack(err) {
|
|
orphanedPacks++
|
|
Verbosef("%v\n", err)
|
|
} else if err == checker.ErrLegacyLayout {
|
|
Verbosef("repository still uses the S3 legacy layout\nPlease run `restic migrate s3legacy` to correct this.\n")
|
|
} else {
|
|
errorsFound = true
|
|
Warnf("%v\n", err)
|
|
}
|
|
}
|
|
|
|
if orphanedPacks > 0 {
|
|
Verbosef("%d additional files were found in the repo, which likely contain duplicate data.\nThis is non-critical, you can run `restic prune` to correct this.\n", orphanedPacks)
|
|
}
|
|
|
|
Verbosef("check snapshots, trees and blobs\n")
|
|
errChan = make(chan error)
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
bar := newProgressMax(!gopts.Quiet, 0, "snapshots")
|
|
defer bar.Done()
|
|
chkr.Structure(ctx, bar, errChan)
|
|
}()
|
|
|
|
for err := range errChan {
|
|
errorsFound = true
|
|
if e, ok := err.(*checker.TreeError); ok {
|
|
var clean string
|
|
if stdoutCanUpdateStatus() {
|
|
clean = clearLine(0)
|
|
}
|
|
Warnf(clean+"error for tree %v:\n", e.ID.Str())
|
|
for _, treeErr := range e.Errors {
|
|
Warnf(" %v\n", treeErr)
|
|
}
|
|
} else {
|
|
Warnf("error: %v\n", err)
|
|
}
|
|
}
|
|
|
|
// Wait for the progress bar to be complete before printing more below.
|
|
// Must happen after `errChan` is read from in the above loop to avoid
|
|
// deadlocking in the case of errors.
|
|
wg.Wait()
|
|
|
|
if opts.CheckUnused {
|
|
for _, id := range chkr.UnusedBlobs(ctx) {
|
|
Verbosef("unused blob %v\n", id)
|
|
errorsFound = true
|
|
}
|
|
}
|
|
|
|
doReadData := func(packs map[restic.ID]int64) {
|
|
packCount := uint64(len(packs))
|
|
|
|
p := newProgressMax(!gopts.Quiet, packCount, "packs")
|
|
errChan := make(chan error)
|
|
|
|
go chkr.ReadPacks(ctx, packs, p, errChan)
|
|
|
|
for err := range errChan {
|
|
errorsFound = true
|
|
Warnf("%v\n", err)
|
|
}
|
|
p.Done()
|
|
}
|
|
|
|
switch {
|
|
case opts.ReadData:
|
|
Verbosef("read all data\n")
|
|
doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1))
|
|
case opts.ReadDataSubset != "":
|
|
var packs map[restic.ID]int64
|
|
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
|
|
if err == nil {
|
|
bucket := dataSubset[0]
|
|
totalBuckets := dataSubset[1]
|
|
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
|
|
packCount := uint64(len(packs))
|
|
Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
|
|
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
|
|
percentage, err := parsePercentage(opts.ReadDataSubset)
|
|
if err == nil {
|
|
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
|
|
Verbosef("read %.1f%% of data packs\n", percentage)
|
|
}
|
|
} else {
|
|
repoSize := int64(0)
|
|
allPacks := chkr.GetPacks()
|
|
for _, size := range allPacks {
|
|
repoSize += size
|
|
}
|
|
if repoSize == 0 {
|
|
return errors.Fatal("Cannot read from a repository having size 0")
|
|
}
|
|
subsetSize, _ := parseSizeStr(opts.ReadDataSubset)
|
|
if subsetSize > repoSize {
|
|
subsetSize = repoSize
|
|
}
|
|
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
|
|
Verbosef("read %d bytes of data packs\n", subsetSize)
|
|
}
|
|
if packs == nil {
|
|
return errors.Fatal("internal error: failed to select packs to check")
|
|
}
|
|
doReadData(packs)
|
|
}
|
|
|
|
if errorsFound {
|
|
return errors.Fatal("repository contains errors")
|
|
}
|
|
|
|
Verbosef("no errors were found\n")
|
|
|
|
return nil
|
|
}
|
|
|
|
// selectPacksByBucket selects subsets of packs by ranges of buckets.
|
|
func selectPacksByBucket(allPacks map[restic.ID]int64, bucket, totalBuckets uint) map[restic.ID]int64 {
|
|
packs := make(map[restic.ID]int64)
|
|
for pack, size := range allPacks {
|
|
// If we ever check more than the first byte
|
|
// of pack, update totalBucketsMax.
|
|
if (uint(pack[0]) % totalBuckets) == (bucket - 1) {
|
|
packs[pack] = size
|
|
}
|
|
}
|
|
return packs
|
|
}
|
|
|
|
// selectRandomPacksByPercentage selects the given percentage of packs which are randomly choosen.
|
|
func selectRandomPacksByPercentage(allPacks map[restic.ID]int64, percentage float64) map[restic.ID]int64 {
|
|
packCount := len(allPacks)
|
|
packsToCheck := int(float64(packCount) * (percentage / 100.0))
|
|
if packCount > 0 && packsToCheck < 1 {
|
|
packsToCheck = 1
|
|
}
|
|
timeNs := time.Now().UnixNano()
|
|
r := rand.New(rand.NewSource(timeNs))
|
|
idx := r.Perm(packCount)
|
|
|
|
var keys []restic.ID
|
|
for k := range allPacks {
|
|
keys = append(keys, k)
|
|
}
|
|
|
|
packs := make(map[restic.ID]int64)
|
|
|
|
for i := 0; i < packsToCheck; i++ {
|
|
id := keys[idx[i]]
|
|
packs[id] = allPacks[id]
|
|
}
|
|
return packs
|
|
}
|
|
|
|
func selectRandomPacksByFileSize(allPacks map[restic.ID]int64, subsetSize int64, repoSize int64) map[restic.ID]int64 {
|
|
subsetPercentage := (float64(subsetSize) / float64(repoSize)) * 100.0
|
|
packs := selectRandomPacksByPercentage(allPacks, subsetPercentage)
|
|
return packs
|
|
}
|