rclone/fs/filter.go
Nick Craig-Wood 753b0717be Refactor the List and ListDir interface
Gives more accurate error propagation, control of depth of recursion
and short circuit recursion where possible.

Most of the the heavy lifting is done in the "fs" package, making file
system implementations a bit simpler.

This commit contains some code originally by Klaus Post.

Fixes #316
2016-05-06 16:52:34 +01:00

379 lines
9.5 KiB
Go

// Control the filtering of files
package fs
import (
"bufio"
"fmt"
"os"
"path"
"regexp"
"strconv"
"strings"
"time"
"github.com/spf13/pflag"
)
// Global
var (
// Flags
deleteExcluded = pflag.BoolP("delete-excluded", "", false, "Delete files on dest excluded from sync")
filterRule = pflag.StringP("filter", "f", "", "Add a file-filtering rule")
filterFrom = pflag.StringP("filter-from", "", "", "Read filtering patterns from a file")
excludeRule = pflag.StringP("exclude", "", "", "Exclude files matching pattern")
excludeFrom = pflag.StringP("exclude-from", "", "", "Read exclude patterns from file")
includeRule = pflag.StringP("include", "", "", "Include files matching pattern")
includeFrom = pflag.StringP("include-from", "", "", "Read include patterns from file")
filesFrom = pflag.StringP("files-from", "", "", "Read list of source-file names from file")
minAge = pflag.StringP("min-age", "", "", "Don't transfer any file younger than this in s or suffix ms|s|m|h|d|w|M|y")
maxAge = pflag.StringP("max-age", "", "", "Don't transfer any file older than this in s or suffix ms|s|m|h|d|w|M|y")
minSize SizeSuffix
maxSize SizeSuffix
dumpFilters = pflag.BoolP("dump-filters", "", false, "Dump the filters to the output")
//cvsExclude = pflag.BoolP("cvs-exclude", "C", false, "Exclude files in the same way CVS does")
)
func init() {
pflag.VarP(&minSize, "min-size", "", "Don't transfer any file smaller than this in k or suffix k|M|G")
pflag.VarP(&maxSize, "max-size", "", "Don't transfer any file larger than this in k or suffix k|M|G")
}
// rule is one filter rule
type rule struct {
Include bool
Regexp *regexp.Regexp
}
// Match returns true if rule matches path
func (r *rule) Match(path string) bool {
return r.Regexp.MatchString(path)
}
// String the rule
func (r *rule) String() string {
c := "-"
if r.Include {
c = "+"
}
return fmt.Sprintf("%s %s", c, r.Regexp.String())
}
// filesMap describes the map of files to transfer
type filesMap map[string]struct{}
// Filter describes any filtering in operation
type Filter struct {
DeleteExcluded bool
MinSize int64
MaxSize int64
ModTimeFrom time.Time
ModTimeTo time.Time
rules []rule
files filesMap // files if filesFrom
dirs filesMap // dirs from filesFrom
}
// We use time conventions
var ageSuffixes = []struct {
Suffix string
Multiplier time.Duration
}{
{Suffix: "ms", Multiplier: time.Millisecond},
{Suffix: "s", Multiplier: time.Second},
{Suffix: "m", Multiplier: time.Minute},
{Suffix: "h", Multiplier: time.Hour},
{Suffix: "d", Multiplier: time.Hour * 24},
{Suffix: "w", Multiplier: time.Hour * 24 * 7},
{Suffix: "M", Multiplier: time.Hour * 24 * 30},
{Suffix: "y", Multiplier: time.Hour * 24 * 365},
// Default to second
{Suffix: "", Multiplier: time.Second},
}
// ParseDuration parses a duration string. Accept ms|s|m|h|d|w|M|y suffixes. Defaults to second if not provided
func ParseDuration(age string) (time.Duration, error) {
var period float64
for _, ageSuffix := range ageSuffixes {
if strings.HasSuffix(age, ageSuffix.Suffix) {
numberString := age[:len(age)-len(ageSuffix.Suffix)]
var err error
period, err = strconv.ParseFloat(numberString, 64)
if err != nil {
return time.Duration(0), err
}
period *= float64(ageSuffix.Multiplier)
break
}
}
return time.Duration(period), nil
}
// NewFilter parses the command line options and creates a Filter object
func NewFilter() (f *Filter, err error) {
f = &Filter{
DeleteExcluded: *deleteExcluded,
MinSize: int64(minSize),
MaxSize: int64(maxSize),
}
addImplicitExclude := false
if *includeRule != "" {
err = f.Add(true, *includeRule)
if err != nil {
return nil, err
}
addImplicitExclude = true
}
if *includeFrom != "" {
err := forEachLine(*includeFrom, func(line string) error {
return f.Add(true, line)
})
if err != nil {
return nil, err
}
addImplicitExclude = true
}
if *excludeRule != "" {
err = f.Add(false, *excludeRule)
if err != nil {
return nil, err
}
}
if *excludeFrom != "" {
err := forEachLine(*excludeFrom, func(line string) error {
return f.Add(false, line)
})
if err != nil {
return nil, err
}
}
if *filterRule != "" {
err = f.AddRule(*filterRule)
if err != nil {
return nil, err
}
}
if *filterFrom != "" {
err := forEachLine(*filterFrom, f.AddRule)
if err != nil {
return nil, err
}
}
if *filesFrom != "" {
err := forEachLine(*filesFrom, func(line string) error {
return f.AddFile(line)
})
if err != nil {
return nil, err
}
}
if addImplicitExclude {
err = f.Add(false, "*")
if err != nil {
return nil, err
}
}
if *minAge != "" {
duration, err := ParseDuration(*minAge)
if err != nil {
return nil, err
}
f.ModTimeTo = time.Now().Add(-duration)
Debug(nil, "--min-age %v to %v", duration, f.ModTimeTo)
}
if *maxAge != "" {
duration, err := ParseDuration(*maxAge)
if err != nil {
return nil, err
}
f.ModTimeFrom = time.Now().Add(-duration)
if !f.ModTimeTo.IsZero() && f.ModTimeTo.Before(f.ModTimeFrom) {
return nil, fmt.Errorf("Argument --min-age can't be larger than --max-age")
}
Debug(nil, "--max-age %v to %v", duration, f.ModTimeFrom)
}
if *dumpFilters {
fmt.Println("--- start filters ---")
fmt.Println(f.DumpFilters())
fmt.Println("--- end filters ---")
}
return f, nil
}
// Add adds a filter rule with include or exclude status indicated
func (f *Filter) Add(Include bool, glob string) error {
re, err := globToRegexp(glob)
if err != nil {
return err
}
rule := rule{
Include: Include,
Regexp: re,
}
f.rules = append(f.rules, rule)
return nil
}
// AddRule adds a filter rule with include/exclude indicated by the prefix
//
// These are
//
// + glob
// - glob
// !
//
// '+' includes the glob, '-' excludes it and '!' resets the filter list
//
// Line comments may be introduced with '#' or ';'
func (f *Filter) AddRule(rule string) error {
switch {
case rule == "!":
f.Clear()
return nil
case strings.HasPrefix(rule, "- "):
return f.Add(false, rule[2:])
case strings.HasPrefix(rule, "+ "):
return f.Add(true, rule[2:])
}
return fmt.Errorf("Malformed rule %q", rule)
}
// AddFile adds a single file to the files from list
func (f *Filter) AddFile(file string) error {
if f.files == nil {
f.files = make(filesMap)
f.dirs = make(filesMap)
}
file = strings.Trim(file, "/")
f.files[file] = struct{}{}
// Put all the parent directories into f.dirs
for {
file = path.Dir(file)
if file == "." {
break
}
if _, found := f.dirs[file]; found {
break
}
f.dirs[file] = struct{}{}
}
return nil
}
// Clear clears all the filter rules
func (f *Filter) Clear() {
f.rules = nil
}
// InActive returns false if any filters are active
func (f *Filter) InActive() bool {
return (f.files == nil &&
f.ModTimeFrom.IsZero() &&
f.ModTimeTo.IsZero() &&
f.MinSize == 0 &&
f.MaxSize == 0 &&
len(f.rules) == 0)
}
// includeRemote returns whether this remote passes the filter rules.
func (f *Filter) includeRemote(remote string) bool {
for _, rule := range f.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
}
// IncludeDirectory returns whether this directory should be included
// in the sync or not.
func (f *Filter) IncludeDirectory(remote string) bool {
remote = strings.Trim(remote, "/")
// filesFrom takes precedence
if f.files != nil {
_, include := f.dirs[remote]
return include
}
return f.includeRemote(remote + "/")
}
// Include returns whether this object should be included into the
// sync or not
func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
// filesFrom takes precedence
if f.files != nil {
_, include := f.files[remote]
return include
}
if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) {
return false
}
if !f.ModTimeTo.IsZero() && modTime.After(f.ModTimeTo) {
return false
}
if f.MinSize != 0 && size < f.MinSize {
return false
}
if f.MaxSize != 0 && size > f.MaxSize {
return false
}
return f.includeRemote(remote)
}
// IncludeObject returns whether this object should be included into
// the sync or not. This is a convenience function to avoid calling
// o.ModTime(), which is an expensive operation.
func (f *Filter) IncludeObject(o Object) bool {
var modTime time.Time
if !f.ModTimeFrom.IsZero() || !f.ModTimeTo.IsZero() {
modTime = o.ModTime()
} else {
modTime = time.Unix(0, 0)
}
return f.Include(o.Remote(), o.Size(), modTime)
}
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';'
func forEachLine(path string, fn func(string) error) (err error) {
in, err := os.Open(path)
if err != nil {
return err
}
defer CheckClose(in, &err)
scanner := bufio.NewScanner(in)
for scanner.Scan() {
line := scanner.Text()
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
err := fn(line)
if err != nil {
return err
}
}
return scanner.Err()
}
// DumpFilters dumps the filters in textual form, 1 per line
func (f *Filter) DumpFilters() string {
rules := []string{}
if !f.ModTimeFrom.IsZero() {
rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String()))
}
if !f.ModTimeTo.IsZero() {
rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String()))
}
for _, rule := range f.rules {
rules = append(rules, rule.String())
}
return strings.Join(rules, "\n")
}