restic/cmd/restic/cmd_find.go
2024-08-30 12:45:20 +02:00

650 lines
16 KiB
Go

package main
import (
"context"
"encoding/json"
"sort"
"strings"
"time"
"github.com/spf13/cobra"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/filter"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/walker"
)
var cmdFind = &cobra.Command{
Use: "find [flags] PATTERN...",
Short: "Find a file, a directory or restic IDs",
Long: `
The "find" command searches for files or directories in snapshots stored in the
repo.
It can also be used to search for restic blobs or trees for troubleshooting.`,
Example: `restic find config.json
restic find --json "*.yml" "*.json"
restic find --json --blob 420f620f b46ebe8a ddd38656
restic find --show-pack-id --blob 420f620f
restic find --tree 577c2bc9 f81f2e22 a62827a9
restic find --pack 025c1d06
EXIT STATUS
===========
Exit status is 0 if the command was successful.
Exit status is 1 if there was any error.
Exit status is 10 if the repository does not exist.
Exit status is 11 if the repository is already locked.
Exit status is 12 if the password is incorrect.
`,
GroupID: cmdGroupDefault,
DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error {
return runFind(cmd.Context(), findOptions, globalOptions, args)
},
}
// FindOptions bundles all options for the find command.
type FindOptions struct {
Oldest string
Newest string
Snapshots []string
BlobID, TreeID bool
PackID, ShowPackID bool
CaseInsensitive bool
ListLong bool
HumanReadable bool
restic.SnapshotFilter
}
var findOptions FindOptions
func init() {
cmdRoot.AddCommand(cmdFind)
f := cmdFind.Flags()
f.StringVarP(&findOptions.Oldest, "oldest", "O", "", "oldest modification date/time")
f.StringVarP(&findOptions.Newest, "newest", "N", "", "newest modification date/time")
f.StringArrayVarP(&findOptions.Snapshots, "snapshot", "s", nil, "snapshot `id` to search in (can be given multiple times)")
f.BoolVar(&findOptions.BlobID, "blob", false, "pattern is a blob-ID")
f.BoolVar(&findOptions.TreeID, "tree", false, "pattern is a tree-ID")
f.BoolVar(&findOptions.PackID, "pack", false, "pattern is a pack-ID")
f.BoolVar(&findOptions.ShowPackID, "show-pack-id", false, "display the pack-ID the blobs belong to (with --blob or --tree)")
f.BoolVarP(&findOptions.CaseInsensitive, "ignore-case", "i", false, "ignore case for pattern")
f.BoolVarP(&findOptions.ListLong, "long", "l", false, "use a long listing format showing size and mode")
f.BoolVar(&findOptions.HumanReadable, "human-readable", false, "print sizes in human readable format")
initMultiSnapshotFilter(f, &findOptions.SnapshotFilter, true)
}
type findPattern struct {
oldest, newest time.Time
pattern []string
ignoreCase bool
}
var timeFormats = []string{
"2006-01-02",
"2006-01-02 15:04",
"2006-01-02 15:04:05",
"2006-01-02 15:04:05 -0700",
"2006-01-02 15:04:05 MST",
"02.01.2006",
"02.01.2006 15:04",
"02.01.2006 15:04:05",
"02.01.2006 15:04:05 -0700",
"02.01.2006 15:04:05 MST",
"Mon Jan 2 15:04:05 -0700 MST 2006",
}
func parseTime(str string) (time.Time, error) {
for _, fmt := range timeFormats {
if t, err := time.ParseInLocation(fmt, str, time.Local); err == nil {
return t, nil
}
}
return time.Time{}, errors.Fatalf("unable to parse time: %q", str)
}
type statefulOutput struct {
ListLong bool
HumanReadable bool
JSON bool
inuse bool
newsn *restic.Snapshot
oldsn *restic.Snapshot
hits int
}
func (s *statefulOutput) PrintPatternJSON(path string, node *restic.Node) {
type findNode restic.Node
b, err := json.Marshal(struct {
// Add these attributes
Path string `json:"path,omitempty"`
Permissions string `json:"permissions,omitempty"`
*findNode
// Make the following attributes disappear
Name byte `json:"name,omitempty"`
ExtendedAttributes byte `json:"extended_attributes,omitempty"`
GenericAttributes byte `json:"generic_attributes,omitempty"`
Device byte `json:"device,omitempty"`
Content byte `json:"content,omitempty"`
Subtree byte `json:"subtree,omitempty"`
}{
Path: path,
Permissions: node.Mode.String(),
findNode: (*findNode)(node),
})
if err != nil {
Warnf("Marshall failed: %v\n", err)
return
}
if !s.inuse {
Printf("[")
s.inuse = true
}
if s.newsn != s.oldsn {
if s.oldsn != nil {
Printf("],\"hits\":%d,\"snapshot\":%q},", s.hits, s.oldsn.ID())
}
Printf(`{"matches":[`)
s.oldsn = s.newsn
s.hits = 0
}
if s.hits > 0 {
Printf(",")
}
Print(string(b))
s.hits++
}
func (s *statefulOutput) PrintPatternNormal(path string, node *restic.Node) {
if s.newsn != s.oldsn {
if s.oldsn != nil {
Verbosef("\n")
}
s.oldsn = s.newsn
Verbosef("Found matching entries in snapshot %s from %s\n", s.oldsn.ID().Str(), s.oldsn.Time.Local().Format(TimeFormat))
}
Println(formatNode(path, node, s.ListLong, s.HumanReadable))
}
func (s *statefulOutput) PrintPattern(path string, node *restic.Node) {
if s.JSON {
s.PrintPatternJSON(path, node)
} else {
s.PrintPatternNormal(path, node)
}
}
func (s *statefulOutput) PrintObjectJSON(kind, id, nodepath, treeID string, sn *restic.Snapshot) {
b, err := json.Marshal(struct {
// Add these attributes
ObjectType string `json:"object_type"`
ID string `json:"id"`
Path string `json:"path"`
ParentTree string `json:"parent_tree,omitempty"`
SnapshotID string `json:"snapshot"`
Time time.Time `json:"time,omitempty"`
}{
ObjectType: kind,
ID: id,
Path: nodepath,
SnapshotID: sn.ID().String(),
ParentTree: treeID,
Time: sn.Time,
})
if err != nil {
Warnf("Marshall failed: %v\n", err)
return
}
if !s.inuse {
Printf("[")
s.inuse = true
}
if s.hits > 0 {
Printf(",")
}
Print(string(b))
s.hits++
}
func (s *statefulOutput) PrintObjectNormal(kind, id, nodepath, treeID string, sn *restic.Snapshot) {
Printf("Found %s %s\n", kind, id)
if kind == "blob" {
Printf(" ... in file %s\n", nodepath)
Printf(" (tree %s)\n", treeID)
} else {
Printf(" ... path %s\n", nodepath)
}
Printf(" ... in snapshot %s (%s)\n", sn.ID().Str(), sn.Time.Local().Format(TimeFormat))
}
func (s *statefulOutput) PrintObject(kind, id, nodepath, treeID string, sn *restic.Snapshot) {
if s.JSON {
s.PrintObjectJSON(kind, id, nodepath, treeID, sn)
} else {
s.PrintObjectNormal(kind, id, nodepath, treeID, sn)
}
}
func (s *statefulOutput) Finish() {
if s.JSON {
// do some finishing up
if s.oldsn != nil {
Printf("],\"hits\":%d,\"snapshot\":%q}", s.hits, s.oldsn.ID())
}
if s.inuse {
Printf("]\n")
} else {
Printf("[]\n")
}
return
}
}
// Finder bundles information needed to find a file or directory.
type Finder struct {
repo restic.Repository
pat findPattern
out statefulOutput
blobIDs map[string]struct{}
treeIDs map[string]struct{}
itemsFound int
}
func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error {
debug.Log("searching in snapshot %s\n for entries within [%s %s]", sn.ID(), f.pat.oldest, f.pat.newest)
if sn.Tree == nil {
return errors.Errorf("snapshot %v has no tree", sn.ID().Str())
}
f.out.newsn = sn
return walker.Walk(ctx, f.repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) error {
if err != nil {
debug.Log("Error loading tree %v: %v", parentTreeID, err)
Printf("Unable to load tree %s\n ... which belongs to snapshot %s\n", parentTreeID, sn.ID())
return walker.ErrSkipNode
}
if node == nil {
return nil
}
normalizedNodepath := nodepath
if f.pat.ignoreCase {
normalizedNodepath = strings.ToLower(nodepath)
}
var foundMatch bool
for _, pat := range f.pat.pattern {
found, err := filter.Match(pat, normalizedNodepath)
if err != nil {
return err
}
if found {
foundMatch = true
break
}
}
var errIfNoMatch error
if node.Type == "dir" {
var childMayMatch bool
for _, pat := range f.pat.pattern {
mayMatch, err := filter.ChildMatch(pat, normalizedNodepath)
if err != nil {
return err
}
if mayMatch {
childMayMatch = true
break
}
}
if !childMayMatch {
errIfNoMatch = walker.ErrSkipNode
}
}
if !foundMatch {
return errIfNoMatch
}
if !f.pat.oldest.IsZero() && node.ModTime.Before(f.pat.oldest) {
debug.Log(" ModTime is older than %s\n", f.pat.oldest)
return errIfNoMatch
}
if !f.pat.newest.IsZero() && node.ModTime.After(f.pat.newest) {
debug.Log(" ModTime is newer than %s\n", f.pat.newest)
return errIfNoMatch
}
debug.Log(" found match\n")
f.out.PrintPattern(nodepath, node)
return nil
}})
}
func (f *Finder) findIDs(ctx context.Context, sn *restic.Snapshot) error {
debug.Log("searching IDs in snapshot %s", sn.ID())
if sn.Tree == nil {
return errors.Errorf("snapshot %v has no tree", sn.ID().Str())
}
f.out.newsn = sn
return walker.Walk(ctx, f.repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) error {
if err != nil {
debug.Log("Error loading tree %v: %v", parentTreeID, err)
Printf("Unable to load tree %s\n ... which belongs to snapshot %s\n", parentTreeID, sn.ID())
return walker.ErrSkipNode
}
if node == nil {
return nil
}
if node.Type == "dir" && f.treeIDs != nil {
treeID := node.Subtree
found := false
if _, ok := f.treeIDs[treeID.Str()]; ok {
found = true
} else if _, ok := f.treeIDs[treeID.String()]; ok {
found = true
}
if found {
f.out.PrintObject("tree", treeID.String(), nodepath, "", sn)
f.itemsFound++
// Terminate if we have found all trees (and we are not
// looking for blobs)
if f.itemsFound >= len(f.treeIDs) && f.blobIDs == nil {
// Return an error to terminate the Walk
return errors.New("OK")
}
}
}
if node.Type == "file" && f.blobIDs != nil {
for _, id := range node.Content {
if ctx.Err() != nil {
return ctx.Err()
}
idStr := id.String()
if _, ok := f.blobIDs[idStr]; !ok {
// Look for short ID form
if _, ok := f.blobIDs[id.Str()]; !ok {
continue
}
// Replace the short ID with the long one
f.blobIDs[idStr] = struct{}{}
delete(f.blobIDs, id.Str())
}
f.out.PrintObject("blob", idStr, nodepath, parentTreeID.String(), sn)
}
}
return nil
}})
}
var errAllPacksFound = errors.New("all packs found")
// packsToBlobs converts the list of pack IDs to a list of blob IDs that
// belong to those packs.
func (f *Finder) packsToBlobs(ctx context.Context, packs []string) error {
packIDs := make(map[string]struct{})
for _, p := range packs {
packIDs[p] = struct{}{}
}
if f.blobIDs == nil {
f.blobIDs = make(map[string]struct{})
}
debug.Log("Looking for packs...")
err := f.repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error {
idStr := id.String()
if _, ok := packIDs[idStr]; !ok {
// Look for short ID form
if _, ok := packIDs[id.Str()]; !ok {
return nil
}
delete(packIDs, id.Str())
} else {
// forget found id
delete(packIDs, idStr)
}
debug.Log("Found pack %s", idStr)
blobs, _, err := f.repo.ListPack(ctx, id, size)
if err != nil {
return err
}
for _, b := range blobs {
f.blobIDs[b.ID.String()] = struct{}{}
}
// Stop searching when all packs have been found
if len(packIDs) == 0 {
return errAllPacksFound
}
return nil
})
if err != nil && err != errAllPacksFound {
return err
}
if err != errAllPacksFound {
// try to resolve unknown pack ids from the index
packIDs, err = f.indexPacksToBlobs(ctx, packIDs)
if err != nil {
return err
}
}
if len(packIDs) > 0 {
list := make([]string, 0, len(packIDs))
for h := range packIDs {
list = append(list, h)
}
sort.Strings(list)
return errors.Fatalf("unable to find pack(s): %v", list)
}
debug.Log("%d blobs found", len(f.blobIDs))
return nil
}
func (f *Finder) indexPacksToBlobs(ctx context.Context, packIDs map[string]struct{}) (map[string]struct{}, error) {
wctx, cancel := context.WithCancel(ctx)
defer cancel()
// remember which packs were found in the index
indexPackIDs := make(map[string]struct{})
err := f.repo.ListBlobs(wctx, func(pb restic.PackedBlob) {
idStr := pb.PackID.String()
// keep entry in packIDs as Each() returns individual index entries
matchingID := false
if _, ok := packIDs[idStr]; ok {
matchingID = true
} else {
if _, ok := packIDs[pb.PackID.Str()]; ok {
// expand id
delete(packIDs, pb.PackID.Str())
packIDs[idStr] = struct{}{}
matchingID = true
}
}
if matchingID {
f.blobIDs[pb.ID.String()] = struct{}{}
indexPackIDs[idStr] = struct{}{}
}
})
if err != nil {
return nil, err
}
for id := range indexPackIDs {
delete(packIDs, id)
}
if len(indexPackIDs) > 0 {
list := make([]string, 0, len(indexPackIDs))
for h := range indexPackIDs {
list = append(list, h)
}
Warnf("some pack files are missing from the repository, getting their blobs from the repository index: %v\n\n", list)
}
return packIDs, nil
}
func (f *Finder) findObjectPack(id string, t restic.BlobType) {
rid, err := restic.ParseID(id)
if err != nil {
Printf("Note: cannot find pack for object '%s', unable to parse ID: %v\n", id, err)
return
}
blobs := f.repo.LookupBlob(t, rid)
if len(blobs) == 0 {
Printf("Object %s not found in the index\n", rid.Str())
return
}
for _, b := range blobs {
if b.ID.Equal(rid) {
Printf("Object belongs to pack %s\n ... Pack %s: %s\n", b.PackID, b.PackID.Str(), b.String())
break
}
}
}
func (f *Finder) findObjectsPacks() {
for i := range f.blobIDs {
f.findObjectPack(i, restic.DataBlob)
}
for i := range f.treeIDs {
f.findObjectPack(i, restic.TreeBlob)
}
}
func runFind(ctx context.Context, opts FindOptions, gopts GlobalOptions, args []string) error {
if len(args) == 0 {
return errors.Fatal("wrong number of arguments")
}
var err error
pat := findPattern{pattern: args}
if opts.CaseInsensitive {
for i := range pat.pattern {
pat.pattern[i] = strings.ToLower(pat.pattern[i])
}
pat.ignoreCase = true
}
if opts.Oldest != "" {
if pat.oldest, err = parseTime(opts.Oldest); err != nil {
return err
}
}
if opts.Newest != "" {
if pat.newest, err = parseTime(opts.Newest); err != nil {
return err
}
}
// Check at most only one kind of IDs is provided: currently we
// can't mix types
if (opts.BlobID && opts.TreeID) ||
(opts.BlobID && opts.PackID) ||
(opts.TreeID && opts.PackID) {
return errors.Fatal("cannot have several ID types")
}
ctx, repo, unlock, err := openWithReadLock(ctx, gopts, gopts.NoLock)
if err != nil {
return err
}
defer unlock()
snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile)
if err != nil {
return err
}
bar := newIndexProgress(gopts.Quiet, gopts.JSON)
if err = repo.LoadIndex(ctx, bar); err != nil {
return err
}
f := &Finder{
repo: repo,
pat: pat,
out: statefulOutput{ListLong: opts.ListLong, HumanReadable: opts.HumanReadable, JSON: gopts.JSON},
}
if opts.BlobID {
f.blobIDs = make(map[string]struct{})
for _, pat := range f.pat.pattern {
f.blobIDs[pat] = struct{}{}
}
}
if opts.TreeID {
f.treeIDs = make(map[string]struct{})
for _, pat := range f.pat.pattern {
f.treeIDs[pat] = struct{}{}
}
}
if opts.PackID {
err := f.packsToBlobs(ctx, f.pat.pattern)
if err != nil {
return err
}
}
var filteredSnapshots []*restic.Snapshot
for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, opts.Snapshots) {
filteredSnapshots = append(filteredSnapshots, sn)
}
if ctx.Err() != nil {
return ctx.Err()
}
sort.Slice(filteredSnapshots, func(i, j int) bool {
return filteredSnapshots[i].Time.Before(filteredSnapshots[j].Time)
})
for _, sn := range filteredSnapshots {
if f.blobIDs != nil || f.treeIDs != nil {
if err = f.findIDs(ctx, sn); err != nil && err.Error() != "OK" {
return err
}
continue
}
if err = f.findInSnapshot(ctx, sn); err != nil {
return err
}
}
f.out.Finish()
if opts.ShowPackID && (f.blobIDs != nil || f.treeIDs != nil) {
f.findObjectsPacks()
}
return nil
}