forked from TrueCloudLab/restic
ddf0b8cd0b
If a data blob and a tree blob with the same ID (= same content) exist, then the checker did not report a data or tree blob as unused when the blob of the other type was still in use.
310 lines
7.9 KiB
Go
310 lines
7.9 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
|
|
"github.com/restic/restic/internal/checker"
|
|
"github.com/restic/restic/internal/errors"
|
|
"github.com/restic/restic/internal/fs"
|
|
"github.com/restic/restic/internal/restic"
|
|
)
|
|
|
|
var cmdCheck = &cobra.Command{
|
|
Use: "check [flags]",
|
|
Short: "Check the repository for errors",
|
|
Long: `
|
|
The "check" command tests the repository for errors and reports any errors it
|
|
finds. It can also be used to read all data and therefore simulate a restore.
|
|
|
|
By default, the "check" command will always load all data directly from the
|
|
repository and not use a local cache.
|
|
|
|
EXIT STATUS
|
|
===========
|
|
|
|
Exit status is 0 if the command was successful, and non-zero if there was any error.
|
|
`,
|
|
DisableAutoGenTag: true,
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
return runCheck(checkOptions, globalOptions, args)
|
|
},
|
|
PreRunE: func(cmd *cobra.Command, args []string) error {
|
|
return checkFlags(checkOptions)
|
|
},
|
|
}
|
|
|
|
// CheckOptions bundles all options for the 'check' command.
|
|
type CheckOptions struct {
|
|
ReadData bool
|
|
ReadDataSubset string
|
|
CheckUnused bool
|
|
WithCache bool
|
|
}
|
|
|
|
var checkOptions CheckOptions
|
|
|
|
func init() {
|
|
cmdRoot.AddCommand(cmdCheck)
|
|
|
|
f := cmdCheck.Flags()
|
|
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
|
|
f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read subset n of m data packs (format: `n/m`)")
|
|
f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
|
|
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
|
|
}
|
|
|
|
func checkFlags(opts CheckOptions) error {
|
|
if opts.ReadData && opts.ReadDataSubset != "" {
|
|
return errors.Fatalf("check flags --read-data and --read-data-subset cannot be used together")
|
|
}
|
|
if opts.ReadDataSubset != "" {
|
|
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
|
|
if err != nil || len(dataSubset) != 2 {
|
|
return errors.Fatalf("check flag --read-data-subset must have two positive integer values, e.g. --read-data-subset=1/2")
|
|
}
|
|
if dataSubset[0] == 0 || dataSubset[1] == 0 || dataSubset[0] > dataSubset[1] {
|
|
return errors.Fatalf("check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2")
|
|
}
|
|
if dataSubset[1] > totalBucketsMax {
|
|
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// See doReadData in runCheck below for why this is 256.
|
|
const totalBucketsMax = 256
|
|
|
|
// stringToIntSlice converts string to []uint, using '/' as element separator
|
|
func stringToIntSlice(param string) (split []uint, err error) {
|
|
if param == "" {
|
|
return nil, nil
|
|
}
|
|
parts := strings.Split(param, "/")
|
|
result := make([]uint, len(parts))
|
|
for idx, part := range parts {
|
|
uintval, err := strconv.ParseUint(part, 10, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
result[idx] = uint(uintval)
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
|
|
if gopts.Quiet {
|
|
return nil
|
|
}
|
|
|
|
readProgress := restic.NewProgress()
|
|
|
|
readProgress.OnUpdate = func(s restic.Stat, d time.Duration, ticker bool) {
|
|
status := fmt.Sprintf("[%s] %s %d / %d items",
|
|
formatDuration(d),
|
|
formatPercent(s.Blobs, todo.Blobs),
|
|
s.Blobs, todo.Blobs)
|
|
|
|
if w := stdoutTerminalWidth(); w > 0 {
|
|
if len(status) > w {
|
|
max := w - len(status) - 4
|
|
status = status[:max] + "... "
|
|
}
|
|
}
|
|
|
|
PrintProgress("%s", status)
|
|
}
|
|
|
|
readProgress.OnDone = func(s restic.Stat, d time.Duration, ticker bool) {
|
|
fmt.Printf("\nduration: %s\n", formatDuration(d))
|
|
}
|
|
|
|
return readProgress
|
|
}
|
|
|
|
// prepareCheckCache configures a special cache directory for check.
|
|
//
|
|
// * if --with-cache is specified, the default cache is used
|
|
// * if the user explicitly requested --no-cache, we don't use any cache
|
|
// * if the user provides --cache-dir, we use a cache in a temporary sub-directory of the specified directory and the sub-directory is deleted after the check
|
|
// * by default, we use a cache in a temporary directory that is deleted after the check
|
|
func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions) (cleanup func()) {
|
|
cleanup = func() {}
|
|
if opts.WithCache {
|
|
// use the default cache, no setup needed
|
|
return cleanup
|
|
}
|
|
|
|
if gopts.NoCache {
|
|
// don't use any cache, no setup needed
|
|
return cleanup
|
|
}
|
|
|
|
cachedir := gopts.CacheDir
|
|
|
|
// use a cache in a temporary directory
|
|
tempdir, err := ioutil.TempDir(cachedir, "restic-check-cache-")
|
|
if err != nil {
|
|
// if an error occurs, don't use any cache
|
|
Warnf("unable to create temporary directory for cache during check, disabling cache: %v\n", err)
|
|
gopts.NoCache = true
|
|
return cleanup
|
|
}
|
|
|
|
gopts.CacheDir = tempdir
|
|
Verbosef("using temporary cache in %v\n", tempdir)
|
|
|
|
cleanup = func() {
|
|
err := fs.RemoveAll(tempdir)
|
|
if err != nil {
|
|
Warnf("error removing temporary cache directory: %v\n", err)
|
|
}
|
|
}
|
|
|
|
return cleanup
|
|
}
|
|
|
|
func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
|
if len(args) != 0 {
|
|
return errors.Fatal("check has no arguments")
|
|
}
|
|
|
|
cleanup := prepareCheckCache(opts, &gopts)
|
|
AddCleanupHandler(func() error {
|
|
cleanup()
|
|
return nil
|
|
})
|
|
|
|
repo, err := OpenRepository(gopts)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !gopts.NoLock {
|
|
Verbosef("create exclusive lock for repository\n")
|
|
lock, err := lockRepoExclusive(repo)
|
|
defer unlockRepo(lock)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
chkr := checker.New(repo)
|
|
|
|
Verbosef("load indexes\n")
|
|
hints, errs := chkr.LoadIndex(gopts.ctx)
|
|
|
|
dupFound := false
|
|
for _, hint := range hints {
|
|
Printf("%v\n", hint)
|
|
if _, ok := hint.(checker.ErrDuplicatePacks); ok {
|
|
dupFound = true
|
|
}
|
|
}
|
|
|
|
if dupFound {
|
|
Printf("This is non-critical, you can run `restic rebuild-index' to correct this\n")
|
|
}
|
|
|
|
if len(errs) > 0 {
|
|
for _, err := range errs {
|
|
Warnf("error: %v\n", err)
|
|
}
|
|
return errors.Fatal("LoadIndex returned errors")
|
|
}
|
|
|
|
errorsFound := false
|
|
orphanedPacks := 0
|
|
errChan := make(chan error)
|
|
|
|
Verbosef("check all packs\n")
|
|
go chkr.Packs(gopts.ctx, errChan)
|
|
|
|
for err := range errChan {
|
|
if checker.IsOrphanedPack(err) {
|
|
orphanedPacks++
|
|
Verbosef("%v\n", err)
|
|
continue
|
|
}
|
|
errorsFound = true
|
|
Warnf("%v\n", err)
|
|
}
|
|
|
|
if orphanedPacks > 0 {
|
|
Verbosef("%d additional files were found in the repo, which likely contain duplicate data.\nYou can run `restic prune` to correct this.\n", orphanedPacks)
|
|
}
|
|
|
|
Verbosef("check snapshots, trees and blobs\n")
|
|
errChan = make(chan error)
|
|
go chkr.Structure(gopts.ctx, errChan)
|
|
|
|
for err := range errChan {
|
|
errorsFound = true
|
|
if e, ok := err.(checker.TreeError); ok {
|
|
Warnf("error for tree %v:\n", e.ID.Str())
|
|
for _, treeErr := range e.Errors {
|
|
Warnf(" %v\n", treeErr)
|
|
}
|
|
} else {
|
|
Warnf("error: %v\n", err)
|
|
}
|
|
}
|
|
|
|
if opts.CheckUnused {
|
|
for _, id := range chkr.UnusedBlobs() {
|
|
Verbosef("unused blob %v\n", id)
|
|
errorsFound = true
|
|
}
|
|
}
|
|
|
|
doReadData := func(bucket, totalBuckets uint) {
|
|
packs := restic.IDSet{}
|
|
for pack := range chkr.GetPacks() {
|
|
// If we ever check more than the first byte
|
|
// of pack, update totalBucketsMax.
|
|
if (uint(pack[0]) % totalBuckets) == (bucket - 1) {
|
|
packs.Insert(pack)
|
|
}
|
|
}
|
|
packCount := uint64(len(packs))
|
|
|
|
if packCount < chkr.CountPacks() {
|
|
Verbosef(fmt.Sprintf("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets))
|
|
} else {
|
|
Verbosef("read all data\n")
|
|
}
|
|
|
|
p := newReadProgress(gopts, restic.Stat{Blobs: packCount})
|
|
errChan := make(chan error)
|
|
|
|
go chkr.ReadPacks(gopts.ctx, packs, p, errChan)
|
|
|
|
for err := range errChan {
|
|
errorsFound = true
|
|
Warnf("%v\n", err)
|
|
}
|
|
}
|
|
|
|
switch {
|
|
case opts.ReadData:
|
|
doReadData(1, 1)
|
|
case opts.ReadDataSubset != "":
|
|
dataSubset, _ := stringToIntSlice(opts.ReadDataSubset)
|
|
doReadData(dataSubset[0], dataSubset[1])
|
|
}
|
|
|
|
if errorsFound {
|
|
return errors.Fatal("repository contains errors")
|
|
}
|
|
|
|
Verbosef("no errors were found\n")
|
|
|
|
return nil
|
|
}
|