filter: factor rules into its own file

This commit is contained in:
Nick Craig-Wood 2022-10-08 17:56:04 +01:00
parent 7be9855a70
commit 4a31961c4f
2 changed files with 265 additions and 178 deletions

View file

@ -2,14 +2,11 @@
package filter package filter
import ( import (
"bufio"
"context" "context"
"errors" "errors"
"fmt" "fmt"
"log" "log"
"os"
"path" "path"
"regexp"
"strings" "strings"
"time" "time"
@ -22,73 +19,11 @@ import (
// This is accessed through GetConfig and AddConfig // This is accessed through GetConfig and AddConfig
var globalConfig = mustNewFilter(nil) var globalConfig = mustNewFilter(nil)
// rule is one filter rule
type rule struct {
Include bool
Regexp *regexp.Regexp
}
// Match returns true if rule matches path
func (r *rule) Match(path string) bool {
return r.Regexp.MatchString(path)
}
// String the rule
func (r *rule) String() string {
c := "-"
if r.Include {
c = "+"
}
return fmt.Sprintf("%s %s", c, r.Regexp.String())
}
// rules is a slice of rules
type rules struct {
rules []rule
existing map[string]struct{}
}
// add adds a rule if it doesn't exist already
func (rs *rules) add(Include bool, re *regexp.Regexp) {
if rs.existing == nil {
rs.existing = make(map[string]struct{})
}
newRule := rule{
Include: Include,
Regexp: re,
}
newRuleString := newRule.String()
if _, ok := rs.existing[newRuleString]; ok {
return // rule already exists
}
rs.rules = append(rs.rules, newRule)
rs.existing[newRuleString] = struct{}{}
}
// clear clears all the rules
func (rs *rules) clear() {
rs.rules = nil
rs.existing = nil
}
// len returns the number of rules
func (rs *rules) len() int {
return len(rs.rules)
}
// FilesMap describes the map of files to transfer
type FilesMap map[string]struct{}
// Opt configures the filter // Opt configures the filter
type Opt struct { type Opt struct {
DeleteExcluded bool DeleteExcluded bool
FilterRule []string RulesOpt // embedded so we don't change the JSON API
FilterFrom []string
ExcludeRule []string
ExcludeFrom []string
ExcludeFile []string ExcludeFile []string
IncludeRule []string
IncludeFrom []string
FilesFrom []string FilesFrom []string
FilesFromRaw []string FilesFromRaw []string
MinAge fs.Duration MinAge fs.Duration
@ -106,6 +41,9 @@ var DefaultOpt = Opt{
MaxSize: fs.SizeSuffix(-1), MaxSize: fs.SizeSuffix(-1),
} }
// FilesMap describes the map of files to transfer
type FilesMap map[string]struct{}
// Filter describes any filtering in operation // Filter describes any filtering in operation
type Filter struct { type Filter struct {
Opt Opt Opt Opt
@ -142,57 +80,9 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom) fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom)
} }
addImplicitExclude := false err = parseRules(&f.Opt.RulesOpt, f.Add, f.Clear)
foundExcludeRule := false if err != nil {
return nil, err
for _, rule := range f.Opt.IncludeRule {
err = f.Add(true, rule)
if err != nil {
return nil, err
}
addImplicitExclude = true
}
for _, rule := range f.Opt.IncludeFrom {
err := forEachLine(rule, false, func(line string) error {
return f.Add(true, line)
})
if err != nil {
return nil, err
}
addImplicitExclude = true
}
for _, rule := range f.Opt.ExcludeRule {
err = f.Add(false, rule)
if err != nil {
return nil, err
}
foundExcludeRule = true
}
for _, rule := range f.Opt.ExcludeFrom {
err := forEachLine(rule, false, func(line string) error {
return f.Add(false, line)
})
if err != nil {
return nil, err
}
foundExcludeRule = true
}
if addImplicitExclude && foundExcludeRule {
fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
}
for _, rule := range f.Opt.FilterRule {
err = f.AddRule(rule)
if err != nil {
return nil, err
}
}
for _, rule := range f.Opt.FilterFrom {
err := forEachLine(rule, false, f.AddRule)
if err != nil {
return nil, err
}
} }
inActive := f.InActive() inActive := f.InActive()
@ -225,12 +115,6 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
} }
} }
if addImplicitExclude {
err = f.Add(false, "/**")
if err != nil {
return nil, err
}
}
if fs.GetConfig(context.Background()).Dump&fs.DumpFilters != 0 { if fs.GetConfig(context.Background()).Dump&fs.DumpFilters != 0 {
fmt.Println("--- start filters ---") fmt.Println("--- start filters ---")
fmt.Println(f.DumpFilters()) fmt.Println(f.DumpFilters())
@ -309,16 +193,7 @@ func (f *Filter) Add(Include bool, glob string) error {
// //
// Line comments may be introduced with '#' or ';' // Line comments may be introduced with '#' or ';'
func (f *Filter) AddRule(rule string) error { func (f *Filter) AddRule(rule string) error {
switch { return addRule(rule, f.Add, f.Clear)
case rule == "!":
f.Clear()
return nil
case strings.HasPrefix(rule, "- "):
return f.Add(false, rule[2:])
case strings.HasPrefix(rule, "+ "):
return f.Add(true, rule[2:])
}
return fmt.Errorf("malformed rule %q", rule)
} }
// initAddFile creates f.files and f.dirs // initAddFile creates f.files and f.dirs
@ -380,12 +255,7 @@ func (f *Filter) IncludeRemote(remote string) bool {
_, include := f.files[remote] _, include := f.files[remote]
return include return include
} }
for _, rule := range f.fileRules.rules { return f.fileRules.include(remote)
if rule.Match(remote) {
return rule.Include
}
}
return true
} }
// ListContainsExcludeFile checks if exclude file is present in the list. // ListContainsExcludeFile checks if exclude file is present in the list.
@ -428,13 +298,7 @@ func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (b
return include, nil return include, nil
} }
remote += "/" remote += "/"
for _, rule := range f.dirRules.rules { return f.dirRules.include(remote), nil
if rule.Match(remote) {
return rule.Include, nil
}
}
return true, nil
} }
} }
@ -490,41 +354,9 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
} else { } else {
modTime = time.Unix(0, 0) modTime = time.Unix(0, 0)
} }
return f.Include(o.Remote(), o.Size(), modTime) return f.Include(o.Remote(), o.Size(), modTime)
} }
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';' if raw is false
func forEachLine(path string, raw bool, fn func(string) error) (err error) {
var scanner *bufio.Scanner
if path == "-" {
scanner = bufio.NewScanner(os.Stdin)
} else {
in, err := os.Open(path)
if err != nil {
return err
}
scanner = bufio.NewScanner(in)
defer fs.CheckClose(in, &err)
}
for scanner.Scan() {
line := scanner.Text()
if !raw {
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
}
err := fn(line)
if err != nil {
return err
}
}
return scanner.Err()
}
// DumpFilters dumps the filters in textual form, 1 per line // DumpFilters dumps the filters in textual form, 1 per line
func (f *Filter) DumpFilters() string { func (f *Filter) DumpFilters() string {
rules := []string{} rules := []string{}

255
fs/filter/rules.go Normal file
View file

@ -0,0 +1,255 @@
package filter
import (
"bufio"
"fmt"
"os"
"regexp"
"strings"
"github.com/rclone/rclone/fs"
)
// RulesOpt is configuration for a rule set
type RulesOpt struct {
FilterRule []string
FilterFrom []string
ExcludeRule []string
ExcludeFrom []string
IncludeRule []string
IncludeFrom []string
}
// rule is one filter rule
type rule struct {
Include bool
Regexp *regexp.Regexp
}
// Match returns true if rule matches path
func (r *rule) Match(path string) bool {
return r.Regexp.MatchString(path)
}
// String the rule
func (r *rule) String() string {
c := "-"
if r.Include {
c = "+"
}
return fmt.Sprintf("%s %s", c, r.Regexp.String())
}
// rules is a slice of rules
type rules struct {
rules []rule
existing map[string]struct{}
}
type addFn func(Include bool, glob string) error
// add adds a rule if it doesn't exist already
func (rs *rules) add(Include bool, re *regexp.Regexp) {
if rs.existing == nil {
rs.existing = make(map[string]struct{})
}
newRule := rule{
Include: Include,
Regexp: re,
}
newRuleString := newRule.String()
if _, ok := rs.existing[newRuleString]; ok {
return // rule already exists
}
rs.rules = append(rs.rules, newRule)
rs.existing[newRuleString] = struct{}{}
}
// Add adds a filter rule with include or exclude status indicated
func (rs *rules) Add(Include bool, glob string) error {
re, err := GlobToRegexp(glob, false /* f.Opt.IgnoreCase */)
if err != nil {
return err
}
rs.add(Include, re)
return nil
}
type clearFn func()
// clear clears all the rules
func (rs *rules) clear() {
rs.rules = nil
rs.existing = nil
}
// len returns the number of rules
func (rs *rules) len() int {
return len(rs.rules)
}
// include returns whether this remote passes the filter rules.
func (rs *rules) include(remote string) bool {
for _, rule := range rs.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
}
// include returns whether this collection of strings remote passes
// the filter rules.
//
// the first rule is evaluated on all the remotes and if it matches
// then the result is returned. If not the next rule is tested and so
// on.
func (rs *rules) includeMany(remotes []string) bool {
for _, rule := range rs.rules {
for _, remote := range remotes {
if rule.Match(remote) {
return rule.Include
}
}
}
return true
}
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';' if raw is false
func forEachLine(path string, raw bool, fn func(string) error) (err error) {
var scanner *bufio.Scanner
if path == "-" {
scanner = bufio.NewScanner(os.Stdin)
} else {
in, err := os.Open(path)
if err != nil {
return err
}
scanner = bufio.NewScanner(in)
defer fs.CheckClose(in, &err)
}
for scanner.Scan() {
line := scanner.Text()
if !raw {
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
}
err := fn(line)
if err != nil {
return err
}
}
return scanner.Err()
}
// AddRule adds a filter rule with include/exclude indicated by the prefix
//
// These are
//
// # Comment
// + glob
// - glob
// !
//
// '+' includes the glob, '-' excludes it and '!' resets the filter list
//
// Line comments may be introduced with '#' or ';'
func addRule(rule string, add addFn, clear clearFn) error {
switch {
case rule == "!":
clear()
return nil
case strings.HasPrefix(rule, "- "):
return add(false, rule[2:])
case strings.HasPrefix(rule, "+ "):
return add(true, rule[2:])
}
return fmt.Errorf("malformed rule %q", rule)
}
// AddRule adds a filter rule with include/exclude indicated by the prefix
//
// These are
//
// # Comment
// + glob
// - glob
// !
//
// '+' includes the glob, '-' excludes it and '!' resets the filter list
//
// Line comments may be introduced with '#' or ';'
func (rs *rules) AddRule(rule string) error {
return addRule(rule, rs.Add, rs.clear)
}
// Parse the rules passed in and add them to the function
func parseRules(opt *RulesOpt, add addFn, clear clearFn) (err error) {
addImplicitExclude := false
foundExcludeRule := false
for _, rule := range opt.IncludeRule {
err = add(true, rule)
if err != nil {
return err
}
addImplicitExclude = true
}
for _, rule := range opt.IncludeFrom {
err := forEachLine(rule, false, func(line string) error {
return add(true, line)
})
if err != nil {
return err
}
addImplicitExclude = true
}
for _, rule := range opt.ExcludeRule {
err = add(false, rule)
if err != nil {
return err
}
foundExcludeRule = true
}
for _, rule := range opt.ExcludeFrom {
err := forEachLine(rule, false, func(line string) error {
return add(false, line)
})
if err != nil {
return err
}
foundExcludeRule = true
}
if addImplicitExclude && foundExcludeRule {
fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
}
for _, rule := range opt.FilterRule {
err = addRule(rule, add, clear)
if err != nil {
return err
}
}
for _, rule := range opt.FilterFrom {
err := forEachLine(rule, false, func(rule string) error {
return addRule(rule, add, clear)
})
if err != nil {
return err
}
}
if addImplicitExclude {
err = add(false, "/**")
if err != nil {
return err
}
}
return nil
}