forked from TrueCloudLab/restic
318 lines
9 KiB
Go
318 lines
9 KiB
Go
package archiver
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/restic/restic/internal/debug"
|
|
"github.com/restic/restic/internal/errors"
|
|
"github.com/restic/restic/internal/fs"
|
|
)
|
|
|
|
// RejectByNameFunc is a function that takes a filename of a
|
|
// file that would be included in the backup. The function returns true if it
|
|
// should be excluded (rejected) from the backup.
|
|
type RejectByNameFunc func(path string) bool
|
|
|
|
// RejectFunc is a function that takes a filename and os.FileInfo of a
|
|
// file that would be included in the backup. The function returns true if it
|
|
// should be excluded (rejected) from the backup.
|
|
type RejectFunc func(path string, fi *fs.ExtendedFileInfo, fs fs.FS) bool
|
|
|
|
func CombineRejectByNames(funcs []RejectByNameFunc) SelectByNameFunc {
|
|
return func(item string) bool {
|
|
for _, reject := range funcs {
|
|
if reject(item) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
func CombineRejects(funcs []RejectFunc) SelectFunc {
|
|
return func(item string, fi *fs.ExtendedFileInfo, fs fs.FS) bool {
|
|
for _, reject := range funcs {
|
|
if reject(item, fi, fs) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
type rejectionCache struct {
|
|
m map[string]bool
|
|
mtx sync.Mutex
|
|
}
|
|
|
|
func newRejectionCache() *rejectionCache {
|
|
return &rejectionCache{m: make(map[string]bool)}
|
|
}
|
|
|
|
// Lock locks the mutex in rc.
|
|
func (rc *rejectionCache) Lock() {
|
|
rc.mtx.Lock()
|
|
}
|
|
|
|
// Unlock unlocks the mutex in rc.
|
|
func (rc *rejectionCache) Unlock() {
|
|
rc.mtx.Unlock()
|
|
}
|
|
|
|
// Get returns the last stored value for dir and a second boolean that
|
|
// indicates whether that value was actually written to the cache. It is the
|
|
// callers responsibility to call rc.Lock and rc.Unlock before using this
|
|
// method, otherwise data races may occur.
|
|
func (rc *rejectionCache) Get(dir string) (bool, bool) {
|
|
v, ok := rc.m[dir]
|
|
return v, ok
|
|
}
|
|
|
|
// Store stores a new value for dir. It is the callers responsibility to call
|
|
// rc.Lock and rc.Unlock before using this method, otherwise data races may
|
|
// occur.
|
|
func (rc *rejectionCache) Store(dir string, rejected bool) {
|
|
rc.m[dir] = rejected
|
|
}
|
|
|
|
// RejectIfPresent returns a RejectByNameFunc which itself returns whether a path
|
|
// should be excluded. The RejectByNameFunc considers a file to be excluded when
|
|
// it resides in a directory with an exclusion file, that is specified by
|
|
// excludeFileSpec in the form "filename[:content]". The returned error is
|
|
// non-nil if the filename component of excludeFileSpec is empty. If rc is
|
|
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
|
|
// of a directory based on previous visits.
|
|
func RejectIfPresent(excludeFileSpec string, warnf func(msg string, args ...interface{})) (RejectFunc, error) {
|
|
if excludeFileSpec == "" {
|
|
return nil, errors.New("name for exclusion tagfile is empty")
|
|
}
|
|
colon := strings.Index(excludeFileSpec, ":")
|
|
if colon == 0 {
|
|
return nil, fmt.Errorf("no name for exclusion tagfile provided")
|
|
}
|
|
tf, tc := "", ""
|
|
if colon > 0 {
|
|
tf = excludeFileSpec[:colon]
|
|
tc = excludeFileSpec[colon+1:]
|
|
} else {
|
|
tf = excludeFileSpec
|
|
}
|
|
debug.Log("using %q as exclusion tagfile", tf)
|
|
rc := newRejectionCache()
|
|
return func(filename string, _ *fs.ExtendedFileInfo, fs fs.FS) bool {
|
|
return isExcludedByFile(filename, tf, tc, rc, fs, warnf)
|
|
}, nil
|
|
}
|
|
|
|
// isExcludedByFile interprets filename as a path and returns true if that file
|
|
// is in an excluded directory. A directory is identified as excluded if it contains a
|
|
// tagfile which bears the name specified in tagFilename and starts with
|
|
// header. If rc is non-nil, it is used to expedite the evaluation of a
|
|
// directory based on previous visits.
|
|
func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS, warnf func(msg string, args ...interface{})) bool {
|
|
if tagFilename == "" {
|
|
return false
|
|
}
|
|
|
|
if fs.Base(filename) == tagFilename {
|
|
return false // do not exclude the tagfile itself
|
|
}
|
|
rc.Lock()
|
|
defer rc.Unlock()
|
|
|
|
dir := fs.Dir(filename)
|
|
rejected, visited := rc.Get(dir)
|
|
if visited {
|
|
return rejected
|
|
}
|
|
rejected = isDirExcludedByFile(dir, tagFilename, header, fs, warnf)
|
|
rc.Store(dir, rejected)
|
|
return rejected
|
|
}
|
|
|
|
func isDirExcludedByFile(dir, tagFilename, header string, fsInst fs.FS, warnf func(msg string, args ...interface{})) bool {
|
|
tf := fsInst.Join(dir, tagFilename)
|
|
_, err := fsInst.Lstat(tf)
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
return false
|
|
}
|
|
if err != nil {
|
|
warnf("could not access exclusion tagfile: %v", err)
|
|
return false
|
|
}
|
|
// when no signature is given, the mere presence of tf is enough reason
|
|
// to exclude filename
|
|
if len(header) == 0 {
|
|
return true
|
|
}
|
|
// From this stage, errors mean tagFilename exists but it is malformed.
|
|
// Warnings will be generated so that the user is informed that the
|
|
// indented ignore-action is not performed.
|
|
f, err := fsInst.OpenFile(tf, fs.O_RDONLY, false)
|
|
if err != nil {
|
|
warnf("could not open exclusion tagfile: %v", err)
|
|
return false
|
|
}
|
|
defer func() {
|
|
_ = f.Close()
|
|
}()
|
|
buf := make([]byte, len(header))
|
|
_, err = io.ReadFull(f, buf)
|
|
// EOF is handled with a dedicated message, otherwise the warning were too cryptic
|
|
if err == io.EOF {
|
|
warnf("invalid (too short) signature in exclusion tagfile %q\n", tf)
|
|
return false
|
|
}
|
|
if err != nil {
|
|
warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err)
|
|
return false
|
|
}
|
|
if !bytes.Equal(buf, []byte(header)) {
|
|
warnf("invalid signature in exclusion tagfile %q\n", tf)
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// deviceMap is used to track allowed source devices for backup. This is used to
|
|
// check for crossing mount points during backup (for --one-file-system). It
|
|
// maps the name of a source path to its device ID.
|
|
type deviceMap map[string]uint64
|
|
|
|
// newDeviceMap creates a new device map from the list of source paths.
|
|
func newDeviceMap(allowedSourcePaths []string, fs fs.FS) (deviceMap, error) {
|
|
if runtime.GOOS == "windows" {
|
|
return nil, errors.New("Device IDs are not supported on Windows")
|
|
}
|
|
|
|
deviceMap := make(map[string]uint64)
|
|
|
|
for _, item := range allowedSourcePaths {
|
|
item, err := fs.Abs(fs.Clean(item))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fi, err := fs.Lstat(item)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
deviceMap[item] = fi.DeviceID
|
|
}
|
|
|
|
if len(deviceMap) == 0 {
|
|
return nil, errors.New("zero allowed devices")
|
|
}
|
|
|
|
return deviceMap, nil
|
|
}
|
|
|
|
// IsAllowed returns true if the path is located on an allowed device.
|
|
func (m deviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) {
|
|
for dir := item; ; dir = fs.Dir(dir) {
|
|
debug.Log("item %v, test dir %v", item, dir)
|
|
|
|
// find a parent directory that is on an allowed device (otherwise
|
|
// we would not traverse the directory at all)
|
|
allowedID, ok := m[dir]
|
|
if !ok {
|
|
if dir == fs.Dir(dir) {
|
|
// arrived at root, no allowed device found. this should not happen.
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
|
|
// if the item has a different device ID than the parent directory,
|
|
// we crossed a file system boundary
|
|
if allowedID != deviceID {
|
|
debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID)
|
|
return false, nil
|
|
}
|
|
|
|
// item is on allowed device, accept it
|
|
debug.Log("item %v allowed", item)
|
|
return true, nil
|
|
}
|
|
|
|
return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m)
|
|
}
|
|
|
|
// RejectByDevice returns a RejectFunc that rejects files which are on a
|
|
// different file systems than the files/dirs in samples.
|
|
func RejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) {
|
|
deviceMap, err := newDeviceMap(samples, filesystem)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
debug.Log("allowed devices: %v\n", deviceMap)
|
|
|
|
return func(item string, fi *fs.ExtendedFileInfo, fs fs.FS) bool {
|
|
allowed, err := deviceMap.IsAllowed(fs.Clean(item), fi.DeviceID, fs)
|
|
if err != nil {
|
|
// this should not happen
|
|
panic(fmt.Sprintf("error checking device ID of %v: %v", item, err))
|
|
}
|
|
|
|
if allowed {
|
|
// accept item
|
|
return false
|
|
}
|
|
|
|
// reject everything except directories
|
|
if !fi.IsDir() {
|
|
return true
|
|
}
|
|
|
|
// special case: make sure we keep mountpoints (directories which
|
|
// contain a mounted file system). Test this by checking if the parent
|
|
// directory would be included.
|
|
parentDir := fs.Dir(fs.Clean(item))
|
|
|
|
parentFI, err := fs.Lstat(parentDir)
|
|
if err != nil {
|
|
debug.Log("item %v: error running lstat() on parent directory: %v", item, err)
|
|
// if in doubt, reject
|
|
return true
|
|
}
|
|
|
|
parentAllowed, err := deviceMap.IsAllowed(parentDir, parentFI.DeviceID, fs)
|
|
if err != nil {
|
|
debug.Log("item %v: error checking parent directory: %v", item, err)
|
|
// if in doubt, reject
|
|
return true
|
|
}
|
|
|
|
if parentAllowed {
|
|
// we found a mount point, so accept the directory
|
|
return false
|
|
}
|
|
|
|
// reject everything else
|
|
return true
|
|
}, nil
|
|
}
|
|
|
|
func RejectBySize(maxSize int64) (RejectFunc, error) {
|
|
return func(item string, fi *fs.ExtendedFileInfo, _ fs.FS) bool {
|
|
// directory will be ignored
|
|
if fi.IsDir() {
|
|
return false
|
|
}
|
|
|
|
filesize := fi.Size
|
|
if filesize > maxSize {
|
|
debug.Log("file %s is oversize: %d", item, filesize)
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}, nil
|
|
}
|