forked from TrueCloudLab/restic
48dbefc37e
The actual implementation still relies on file paths, but with the abstraction layer in place, an FS implementation can ensure atomic file accesses in the future.
332 lines
9.3 KiB
Go
332 lines
9.3 KiB
Go
package archiver
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/restic/restic/internal/debug"
|
|
"github.com/restic/restic/internal/errors"
|
|
"github.com/restic/restic/internal/fs"
|
|
)
|
|
|
|
// RejectByNameFunc is a function that takes a filename of a
|
|
// file that would be included in the backup. The function returns true if it
|
|
// should be excluded (rejected) from the backup.
|
|
type RejectByNameFunc func(path string) bool
|
|
|
|
// RejectFunc is a function that takes a filename and os.FileInfo of a
|
|
// file that would be included in the backup. The function returns true if it
|
|
// should be excluded (rejected) from the backup.
|
|
type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool
|
|
|
|
func CombineRejectByNames(funcs []RejectByNameFunc) SelectByNameFunc {
|
|
return func(item string) bool {
|
|
for _, reject := range funcs {
|
|
if reject(item) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
func CombineRejects(funcs []RejectFunc) SelectFunc {
|
|
return func(item string, fi os.FileInfo, fs fs.FS) bool {
|
|
for _, reject := range funcs {
|
|
if reject(item, fi, fs) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
type rejectionCache struct {
|
|
m map[string]bool
|
|
mtx sync.Mutex
|
|
}
|
|
|
|
func newRejectionCache() *rejectionCache {
|
|
return &rejectionCache{m: make(map[string]bool)}
|
|
}
|
|
|
|
// Lock locks the mutex in rc.
|
|
func (rc *rejectionCache) Lock() {
|
|
rc.mtx.Lock()
|
|
}
|
|
|
|
// Unlock unlocks the mutex in rc.
|
|
func (rc *rejectionCache) Unlock() {
|
|
rc.mtx.Unlock()
|
|
}
|
|
|
|
// Get returns the last stored value for dir and a second boolean that
|
|
// indicates whether that value was actually written to the cache. It is the
|
|
// callers responsibility to call rc.Lock and rc.Unlock before using this
|
|
// method, otherwise data races may occur.
|
|
func (rc *rejectionCache) Get(dir string) (bool, bool) {
|
|
v, ok := rc.m[dir]
|
|
return v, ok
|
|
}
|
|
|
|
// Store stores a new value for dir. It is the callers responsibility to call
|
|
// rc.Lock and rc.Unlock before using this method, otherwise data races may
|
|
// occur.
|
|
func (rc *rejectionCache) Store(dir string, rejected bool) {
|
|
rc.m[dir] = rejected
|
|
}
|
|
|
|
// RejectIfPresent returns a RejectByNameFunc which itself returns whether a path
|
|
// should be excluded. The RejectByNameFunc considers a file to be excluded when
|
|
// it resides in a directory with an exclusion file, that is specified by
|
|
// excludeFileSpec in the form "filename[:content]". The returned error is
|
|
// non-nil if the filename component of excludeFileSpec is empty. If rc is
|
|
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
|
|
// of a directory based on previous visits.
|
|
func RejectIfPresent(excludeFileSpec string, warnf func(msg string, args ...interface{})) (RejectFunc, error) {
|
|
if excludeFileSpec == "" {
|
|
return nil, errors.New("name for exclusion tagfile is empty")
|
|
}
|
|
colon := strings.Index(excludeFileSpec, ":")
|
|
if colon == 0 {
|
|
return nil, fmt.Errorf("no name for exclusion tagfile provided")
|
|
}
|
|
tf, tc := "", ""
|
|
if colon > 0 {
|
|
tf = excludeFileSpec[:colon]
|
|
tc = excludeFileSpec[colon+1:]
|
|
} else {
|
|
tf = excludeFileSpec
|
|
}
|
|
debug.Log("using %q as exclusion tagfile", tf)
|
|
rc := newRejectionCache()
|
|
return func(filename string, _ os.FileInfo, fs fs.FS) bool {
|
|
return isExcludedByFile(filename, tf, tc, rc, fs, warnf)
|
|
}, nil
|
|
}
|
|
|
|
// isExcludedByFile interprets filename as a path and returns true if that file
|
|
// is in an excluded directory. A directory is identified as excluded if it contains a
|
|
// tagfile which bears the name specified in tagFilename and starts with
|
|
// header. If rc is non-nil, it is used to expedite the evaluation of a
|
|
// directory based on previous visits.
|
|
func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS, warnf func(msg string, args ...interface{})) bool {
|
|
if tagFilename == "" {
|
|
return false
|
|
}
|
|
|
|
if fs.Base(filename) == tagFilename {
|
|
return false // do not exclude the tagfile itself
|
|
}
|
|
rc.Lock()
|
|
defer rc.Unlock()
|
|
|
|
dir := fs.Dir(filename)
|
|
rejected, visited := rc.Get(dir)
|
|
if visited {
|
|
return rejected
|
|
}
|
|
rejected = isDirExcludedByFile(dir, tagFilename, header, fs, warnf)
|
|
rc.Store(dir, rejected)
|
|
return rejected
|
|
}
|
|
|
|
func isDirExcludedByFile(dir, tagFilename, header string, fsInst fs.FS, warnf func(msg string, args ...interface{})) bool {
|
|
tf := fsInst.Join(dir, tagFilename)
|
|
_, err := fsInst.Lstat(tf)
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
return false
|
|
}
|
|
if err != nil {
|
|
warnf("could not access exclusion tagfile: %v", err)
|
|
return false
|
|
}
|
|
// when no signature is given, the mere presence of tf is enough reason
|
|
// to exclude filename
|
|
if len(header) == 0 {
|
|
return true
|
|
}
|
|
// From this stage, errors mean tagFilename exists but it is malformed.
|
|
// Warnings will be generated so that the user is informed that the
|
|
// indented ignore-action is not performed.
|
|
f, err := fsInst.OpenFile(tf, fs.O_RDONLY, false)
|
|
if err != nil {
|
|
warnf("could not open exclusion tagfile: %v", err)
|
|
return false
|
|
}
|
|
defer func() {
|
|
_ = f.Close()
|
|
}()
|
|
buf := make([]byte, len(header))
|
|
_, err = io.ReadFull(f, buf)
|
|
// EOF is handled with a dedicated message, otherwise the warning were too cryptic
|
|
if err == io.EOF {
|
|
warnf("invalid (too short) signature in exclusion tagfile %q\n", tf)
|
|
return false
|
|
}
|
|
if err != nil {
|
|
warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err)
|
|
return false
|
|
}
|
|
if !bytes.Equal(buf, []byte(header)) {
|
|
warnf("invalid signature in exclusion tagfile %q\n", tf)
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// deviceMap is used to track allowed source devices for backup. This is used to
|
|
// check for crossing mount points during backup (for --one-file-system). It
|
|
// maps the name of a source path to its device ID.
|
|
type deviceMap map[string]uint64
|
|
|
|
// newDeviceMap creates a new device map from the list of source paths.
|
|
func newDeviceMap(allowedSourcePaths []string, fs fs.FS) (deviceMap, error) {
|
|
deviceMap := make(map[string]uint64)
|
|
|
|
for _, item := range allowedSourcePaths {
|
|
item, err := fs.Abs(fs.Clean(item))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fi, err := fs.Lstat(item)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
id, err := fs.DeviceID(fi)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
deviceMap[item] = id
|
|
}
|
|
|
|
if len(deviceMap) == 0 {
|
|
return nil, errors.New("zero allowed devices")
|
|
}
|
|
|
|
return deviceMap, nil
|
|
}
|
|
|
|
// IsAllowed returns true if the path is located on an allowed device.
|
|
func (m deviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) {
|
|
for dir := item; ; dir = fs.Dir(dir) {
|
|
debug.Log("item %v, test dir %v", item, dir)
|
|
|
|
// find a parent directory that is on an allowed device (otherwise
|
|
// we would not traverse the directory at all)
|
|
allowedID, ok := m[dir]
|
|
if !ok {
|
|
if dir == fs.Dir(dir) {
|
|
// arrived at root, no allowed device found. this should not happen.
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
|
|
// if the item has a different device ID than the parent directory,
|
|
// we crossed a file system boundary
|
|
if allowedID != deviceID {
|
|
debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID)
|
|
return false, nil
|
|
}
|
|
|
|
// item is on allowed device, accept it
|
|
debug.Log("item %v allowed", item)
|
|
return true, nil
|
|
}
|
|
|
|
return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m)
|
|
}
|
|
|
|
// RejectByDevice returns a RejectFunc that rejects files which are on a
|
|
// different file systems than the files/dirs in samples.
|
|
func RejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) {
|
|
deviceMap, err := newDeviceMap(samples, filesystem)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
debug.Log("allowed devices: %v\n", deviceMap)
|
|
|
|
return func(item string, fi os.FileInfo, fs fs.FS) bool {
|
|
id, err := fs.DeviceID(fi)
|
|
if err != nil {
|
|
// This should never happen because gatherDevices() would have
|
|
// errored out earlier. If it still does that's a reason to panic.
|
|
panic(err)
|
|
}
|
|
|
|
allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs)
|
|
if err != nil {
|
|
// this should not happen
|
|
panic(fmt.Sprintf("error checking device ID of %v: %v", item, err))
|
|
}
|
|
|
|
if allowed {
|
|
// accept item
|
|
return false
|
|
}
|
|
|
|
// reject everything except directories
|
|
if !fi.IsDir() {
|
|
return true
|
|
}
|
|
|
|
// special case: make sure we keep mountpoints (directories which
|
|
// contain a mounted file system). Test this by checking if the parent
|
|
// directory would be included.
|
|
parentDir := fs.Dir(fs.Clean(item))
|
|
|
|
parentFI, err := fs.Lstat(parentDir)
|
|
if err != nil {
|
|
debug.Log("item %v: error running lstat() on parent directory: %v", item, err)
|
|
// if in doubt, reject
|
|
return true
|
|
}
|
|
|
|
parentDeviceID, err := fs.DeviceID(parentFI)
|
|
if err != nil {
|
|
debug.Log("item %v: getting device ID of parent directory: %v", item, err)
|
|
// if in doubt, reject
|
|
return true
|
|
}
|
|
|
|
parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs)
|
|
if err != nil {
|
|
debug.Log("item %v: error checking parent directory: %v", item, err)
|
|
// if in doubt, reject
|
|
return true
|
|
}
|
|
|
|
if parentAllowed {
|
|
// we found a mount point, so accept the directory
|
|
return false
|
|
}
|
|
|
|
// reject everything else
|
|
return true
|
|
}, nil
|
|
}
|
|
|
|
func RejectBySize(maxSize int64) (RejectFunc, error) {
|
|
return func(item string, fi os.FileInfo, _ fs.FS) bool {
|
|
// directory will be ignored
|
|
if fi.IsDir() {
|
|
return false
|
|
}
|
|
|
|
filesize := fi.Size()
|
|
if filesize > maxSize {
|
|
debug.Log("file %s is oversize: %d", item, filesize)
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}, nil
|
|
}
|