restic/internal/archiver/exclude.go
Michael Eischer 48dbefc37e fs / archiver: convert to handle based interface
The actual implementation still relies on file paths, but with the
abstraction layer in place, an FS implementation can ensure atomic file
accesses in the future.
2024-11-16 12:56:23 +01:00

332 lines
9.3 KiB
Go

package archiver
import (
"bytes"
"fmt"
"io"
"os"
"strings"
"sync"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/fs"
)
// RejectByNameFunc is a function that takes a filename of a
// file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup.
type RejectByNameFunc func(path string) bool
// RejectFunc is a function that takes a filename and os.FileInfo of a
// file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup.
type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool
func CombineRejectByNames(funcs []RejectByNameFunc) SelectByNameFunc {
return func(item string) bool {
for _, reject := range funcs {
if reject(item) {
return false
}
}
return true
}
}
func CombineRejects(funcs []RejectFunc) SelectFunc {
return func(item string, fi os.FileInfo, fs fs.FS) bool {
for _, reject := range funcs {
if reject(item, fi, fs) {
return false
}
}
return true
}
}
type rejectionCache struct {
m map[string]bool
mtx sync.Mutex
}
func newRejectionCache() *rejectionCache {
return &rejectionCache{m: make(map[string]bool)}
}
// Lock locks the mutex in rc.
func (rc *rejectionCache) Lock() {
rc.mtx.Lock()
}
// Unlock unlocks the mutex in rc.
func (rc *rejectionCache) Unlock() {
rc.mtx.Unlock()
}
// Get returns the last stored value for dir and a second boolean that
// indicates whether that value was actually written to the cache. It is the
// callers responsibility to call rc.Lock and rc.Unlock before using this
// method, otherwise data races may occur.
func (rc *rejectionCache) Get(dir string) (bool, bool) {
v, ok := rc.m[dir]
return v, ok
}
// Store stores a new value for dir. It is the callers responsibility to call
// rc.Lock and rc.Unlock before using this method, otherwise data races may
// occur.
func (rc *rejectionCache) Store(dir string, rejected bool) {
rc.m[dir] = rejected
}
// RejectIfPresent returns a RejectByNameFunc which itself returns whether a path
// should be excluded. The RejectByNameFunc considers a file to be excluded when
// it resides in a directory with an exclusion file, that is specified by
// excludeFileSpec in the form "filename[:content]". The returned error is
// non-nil if the filename component of excludeFileSpec is empty. If rc is
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
// of a directory based on previous visits.
func RejectIfPresent(excludeFileSpec string, warnf func(msg string, args ...interface{})) (RejectFunc, error) {
if excludeFileSpec == "" {
return nil, errors.New("name for exclusion tagfile is empty")
}
colon := strings.Index(excludeFileSpec, ":")
if colon == 0 {
return nil, fmt.Errorf("no name for exclusion tagfile provided")
}
tf, tc := "", ""
if colon > 0 {
tf = excludeFileSpec[:colon]
tc = excludeFileSpec[colon+1:]
} else {
tf = excludeFileSpec
}
debug.Log("using %q as exclusion tagfile", tf)
rc := newRejectionCache()
return func(filename string, _ os.FileInfo, fs fs.FS) bool {
return isExcludedByFile(filename, tf, tc, rc, fs, warnf)
}, nil
}
// isExcludedByFile interprets filename as a path and returns true if that file
// is in an excluded directory. A directory is identified as excluded if it contains a
// tagfile which bears the name specified in tagFilename and starts with
// header. If rc is non-nil, it is used to expedite the evaluation of a
// directory based on previous visits.
func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS, warnf func(msg string, args ...interface{})) bool {
if tagFilename == "" {
return false
}
if fs.Base(filename) == tagFilename {
return false // do not exclude the tagfile itself
}
rc.Lock()
defer rc.Unlock()
dir := fs.Dir(filename)
rejected, visited := rc.Get(dir)
if visited {
return rejected
}
rejected = isDirExcludedByFile(dir, tagFilename, header, fs, warnf)
rc.Store(dir, rejected)
return rejected
}
func isDirExcludedByFile(dir, tagFilename, header string, fsInst fs.FS, warnf func(msg string, args ...interface{})) bool {
tf := fsInst.Join(dir, tagFilename)
_, err := fsInst.Lstat(tf)
if errors.Is(err, os.ErrNotExist) {
return false
}
if err != nil {
warnf("could not access exclusion tagfile: %v", err)
return false
}
// when no signature is given, the mere presence of tf is enough reason
// to exclude filename
if len(header) == 0 {
return true
}
// From this stage, errors mean tagFilename exists but it is malformed.
// Warnings will be generated so that the user is informed that the
// indented ignore-action is not performed.
f, err := fsInst.OpenFile(tf, fs.O_RDONLY, false)
if err != nil {
warnf("could not open exclusion tagfile: %v", err)
return false
}
defer func() {
_ = f.Close()
}()
buf := make([]byte, len(header))
_, err = io.ReadFull(f, buf)
// EOF is handled with a dedicated message, otherwise the warning were too cryptic
if err == io.EOF {
warnf("invalid (too short) signature in exclusion tagfile %q\n", tf)
return false
}
if err != nil {
warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err)
return false
}
if !bytes.Equal(buf, []byte(header)) {
warnf("invalid signature in exclusion tagfile %q\n", tf)
return false
}
return true
}
// deviceMap is used to track allowed source devices for backup. This is used to
// check for crossing mount points during backup (for --one-file-system). It
// maps the name of a source path to its device ID.
type deviceMap map[string]uint64
// newDeviceMap creates a new device map from the list of source paths.
func newDeviceMap(allowedSourcePaths []string, fs fs.FS) (deviceMap, error) {
deviceMap := make(map[string]uint64)
for _, item := range allowedSourcePaths {
item, err := fs.Abs(fs.Clean(item))
if err != nil {
return nil, err
}
fi, err := fs.Lstat(item)
if err != nil {
return nil, err
}
id, err := fs.DeviceID(fi)
if err != nil {
return nil, err
}
deviceMap[item] = id
}
if len(deviceMap) == 0 {
return nil, errors.New("zero allowed devices")
}
return deviceMap, nil
}
// IsAllowed returns true if the path is located on an allowed device.
func (m deviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) {
for dir := item; ; dir = fs.Dir(dir) {
debug.Log("item %v, test dir %v", item, dir)
// find a parent directory that is on an allowed device (otherwise
// we would not traverse the directory at all)
allowedID, ok := m[dir]
if !ok {
if dir == fs.Dir(dir) {
// arrived at root, no allowed device found. this should not happen.
break
}
continue
}
// if the item has a different device ID than the parent directory,
// we crossed a file system boundary
if allowedID != deviceID {
debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID)
return false, nil
}
// item is on allowed device, accept it
debug.Log("item %v allowed", item)
return true, nil
}
return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m)
}
// RejectByDevice returns a RejectFunc that rejects files which are on a
// different file systems than the files/dirs in samples.
func RejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) {
deviceMap, err := newDeviceMap(samples, filesystem)
if err != nil {
return nil, err
}
debug.Log("allowed devices: %v\n", deviceMap)
return func(item string, fi os.FileInfo, fs fs.FS) bool {
id, err := fs.DeviceID(fi)
if err != nil {
// This should never happen because gatherDevices() would have
// errored out earlier. If it still does that's a reason to panic.
panic(err)
}
allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs)
if err != nil {
// this should not happen
panic(fmt.Sprintf("error checking device ID of %v: %v", item, err))
}
if allowed {
// accept item
return false
}
// reject everything except directories
if !fi.IsDir() {
return true
}
// special case: make sure we keep mountpoints (directories which
// contain a mounted file system). Test this by checking if the parent
// directory would be included.
parentDir := fs.Dir(fs.Clean(item))
parentFI, err := fs.Lstat(parentDir)
if err != nil {
debug.Log("item %v: error running lstat() on parent directory: %v", item, err)
// if in doubt, reject
return true
}
parentDeviceID, err := fs.DeviceID(parentFI)
if err != nil {
debug.Log("item %v: getting device ID of parent directory: %v", item, err)
// if in doubt, reject
return true
}
parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs)
if err != nil {
debug.Log("item %v: error checking parent directory: %v", item, err)
// if in doubt, reject
return true
}
if parentAllowed {
// we found a mount point, so accept the directory
return false
}
// reject everything else
return true
}, nil
}
func RejectBySize(maxSize int64) (RejectFunc, error) {
return func(item string, fi os.FileInfo, _ fs.FS) bool {
// directory will be ignored
if fi.IsDir() {
return false
}
filesize := fi.Size()
if filesize > maxSize {
debug.Log("file %s is oversize: %d", item, filesize)
return true
}
return false
}, nil
}