filter: factor rules into its own file

This commit is contained in:
Nick Craig-Wood 2022-10-08 17:56:04 +01:00
parent 7be9855a70
commit 4a31961c4f
2 changed files with 265 additions and 178 deletions

View file

@ -2,14 +2,11 @@
package filter
import (
"bufio"
"context"
"errors"
"fmt"
"log"
"os"
"path"
"regexp"
"strings"
"time"
@ -22,73 +19,11 @@ import (
// This is accessed through GetConfig and AddConfig
var globalConfig = mustNewFilter(nil)
// rule is one filter rule
type rule struct {
Include bool
Regexp *regexp.Regexp
}
// Match returns true if rule matches path
func (r *rule) Match(path string) bool {
return r.Regexp.MatchString(path)
}
// String the rule
func (r *rule) String() string {
c := "-"
if r.Include {
c = "+"
}
return fmt.Sprintf("%s %s", c, r.Regexp.String())
}
// rules is a slice of rules
type rules struct {
rules []rule
existing map[string]struct{}
}
// add adds a rule if it doesn't exist already
func (rs *rules) add(Include bool, re *regexp.Regexp) {
if rs.existing == nil {
rs.existing = make(map[string]struct{})
}
newRule := rule{
Include: Include,
Regexp: re,
}
newRuleString := newRule.String()
if _, ok := rs.existing[newRuleString]; ok {
return // rule already exists
}
rs.rules = append(rs.rules, newRule)
rs.existing[newRuleString] = struct{}{}
}
// clear clears all the rules
func (rs *rules) clear() {
rs.rules = nil
rs.existing = nil
}
// len returns the number of rules
func (rs *rules) len() int {
return len(rs.rules)
}
// FilesMap describes the map of files to transfer
type FilesMap map[string]struct{}
// Opt configures the filter
type Opt struct {
DeleteExcluded bool
FilterRule []string
FilterFrom []string
ExcludeRule []string
ExcludeFrom []string
RulesOpt // embedded so we don't change the JSON API
ExcludeFile []string
IncludeRule []string
IncludeFrom []string
FilesFrom []string
FilesFromRaw []string
MinAge fs.Duration
@ -106,6 +41,9 @@ var DefaultOpt = Opt{
MaxSize: fs.SizeSuffix(-1),
}
// FilesMap describes the map of files to transfer
type FilesMap map[string]struct{}
// Filter describes any filtering in operation
type Filter struct {
Opt Opt
@ -142,58 +80,10 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom)
}
addImplicitExclude := false
foundExcludeRule := false
for _, rule := range f.Opt.IncludeRule {
err = f.Add(true, rule)
err = parseRules(&f.Opt.RulesOpt, f.Add, f.Clear)
if err != nil {
return nil, err
}
addImplicitExclude = true
}
for _, rule := range f.Opt.IncludeFrom {
err := forEachLine(rule, false, func(line string) error {
return f.Add(true, line)
})
if err != nil {
return nil, err
}
addImplicitExclude = true
}
for _, rule := range f.Opt.ExcludeRule {
err = f.Add(false, rule)
if err != nil {
return nil, err
}
foundExcludeRule = true
}
for _, rule := range f.Opt.ExcludeFrom {
err := forEachLine(rule, false, func(line string) error {
return f.Add(false, line)
})
if err != nil {
return nil, err
}
foundExcludeRule = true
}
if addImplicitExclude && foundExcludeRule {
fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
}
for _, rule := range f.Opt.FilterRule {
err = f.AddRule(rule)
if err != nil {
return nil, err
}
}
for _, rule := range f.Opt.FilterFrom {
err := forEachLine(rule, false, f.AddRule)
if err != nil {
return nil, err
}
}
inActive := f.InActive()
@ -225,12 +115,6 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
}
}
if addImplicitExclude {
err = f.Add(false, "/**")
if err != nil {
return nil, err
}
}
if fs.GetConfig(context.Background()).Dump&fs.DumpFilters != 0 {
fmt.Println("--- start filters ---")
fmt.Println(f.DumpFilters())
@ -309,16 +193,7 @@ func (f *Filter) Add(Include bool, glob string) error {
//
// Line comments may be introduced with '#' or ';'
func (f *Filter) AddRule(rule string) error {
switch {
case rule == "!":
f.Clear()
return nil
case strings.HasPrefix(rule, "- "):
return f.Add(false, rule[2:])
case strings.HasPrefix(rule, "+ "):
return f.Add(true, rule[2:])
}
return fmt.Errorf("malformed rule %q", rule)
return addRule(rule, f.Add, f.Clear)
}
// initAddFile creates f.files and f.dirs
@ -380,12 +255,7 @@ func (f *Filter) IncludeRemote(remote string) bool {
_, include := f.files[remote]
return include
}
for _, rule := range f.fileRules.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
return f.fileRules.include(remote)
}
// ListContainsExcludeFile checks if exclude file is present in the list.
@ -428,13 +298,7 @@ func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (b
return include, nil
}
remote += "/"
for _, rule := range f.dirRules.rules {
if rule.Match(remote) {
return rule.Include, nil
}
}
return true, nil
return f.dirRules.include(remote), nil
}
}
@ -490,41 +354,9 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
} else {
modTime = time.Unix(0, 0)
}
return f.Include(o.Remote(), o.Size(), modTime)
}
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';' if raw is false
func forEachLine(path string, raw bool, fn func(string) error) (err error) {
var scanner *bufio.Scanner
if path == "-" {
scanner = bufio.NewScanner(os.Stdin)
} else {
in, err := os.Open(path)
if err != nil {
return err
}
scanner = bufio.NewScanner(in)
defer fs.CheckClose(in, &err)
}
for scanner.Scan() {
line := scanner.Text()
if !raw {
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
}
err := fn(line)
if err != nil {
return err
}
}
return scanner.Err()
}
// DumpFilters dumps the filters in textual form, 1 per line
func (f *Filter) DumpFilters() string {
rules := []string{}

255
fs/filter/rules.go Normal file
View file

@ -0,0 +1,255 @@
package filter
import (
"bufio"
"fmt"
"os"
"regexp"
"strings"
"github.com/rclone/rclone/fs"
)
// RulesOpt is configuration for a rule set
type RulesOpt struct {
FilterRule []string
FilterFrom []string
ExcludeRule []string
ExcludeFrom []string
IncludeRule []string
IncludeFrom []string
}
// rule is one filter rule
type rule struct {
Include bool
Regexp *regexp.Regexp
}
// Match returns true if rule matches path
func (r *rule) Match(path string) bool {
return r.Regexp.MatchString(path)
}
// String the rule
func (r *rule) String() string {
c := "-"
if r.Include {
c = "+"
}
return fmt.Sprintf("%s %s", c, r.Regexp.String())
}
// rules is a slice of rules
type rules struct {
rules []rule
existing map[string]struct{}
}
type addFn func(Include bool, glob string) error
// add adds a rule if it doesn't exist already
func (rs *rules) add(Include bool, re *regexp.Regexp) {
if rs.existing == nil {
rs.existing = make(map[string]struct{})
}
newRule := rule{
Include: Include,
Regexp: re,
}
newRuleString := newRule.String()
if _, ok := rs.existing[newRuleString]; ok {
return // rule already exists
}
rs.rules = append(rs.rules, newRule)
rs.existing[newRuleString] = struct{}{}
}
// Add adds a filter rule with include or exclude status indicated
func (rs *rules) Add(Include bool, glob string) error {
re, err := GlobToRegexp(glob, false /* f.Opt.IgnoreCase */)
if err != nil {
return err
}
rs.add(Include, re)
return nil
}
type clearFn func()
// clear clears all the rules
func (rs *rules) clear() {
rs.rules = nil
rs.existing = nil
}
// len returns the number of rules
func (rs *rules) len() int {
return len(rs.rules)
}
// include returns whether this remote passes the filter rules.
func (rs *rules) include(remote string) bool {
for _, rule := range rs.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
}
// include returns whether this collection of strings remote passes
// the filter rules.
//
// the first rule is evaluated on all the remotes and if it matches
// then the result is returned. If not the next rule is tested and so
// on.
func (rs *rules) includeMany(remotes []string) bool {
for _, rule := range rs.rules {
for _, remote := range remotes {
if rule.Match(remote) {
return rule.Include
}
}
}
return true
}
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';' if raw is false
func forEachLine(path string, raw bool, fn func(string) error) (err error) {
var scanner *bufio.Scanner
if path == "-" {
scanner = bufio.NewScanner(os.Stdin)
} else {
in, err := os.Open(path)
if err != nil {
return err
}
scanner = bufio.NewScanner(in)
defer fs.CheckClose(in, &err)
}
for scanner.Scan() {
line := scanner.Text()
if !raw {
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
}
err := fn(line)
if err != nil {
return err
}
}
return scanner.Err()
}
// AddRule adds a filter rule with include/exclude indicated by the prefix
//
// These are
//
// # Comment
// + glob
// - glob
// !
//
// '+' includes the glob, '-' excludes it and '!' resets the filter list
//
// Line comments may be introduced with '#' or ';'
func addRule(rule string, add addFn, clear clearFn) error {
switch {
case rule == "!":
clear()
return nil
case strings.HasPrefix(rule, "- "):
return add(false, rule[2:])
case strings.HasPrefix(rule, "+ "):
return add(true, rule[2:])
}
return fmt.Errorf("malformed rule %q", rule)
}
// AddRule adds a filter rule with include/exclude indicated by the prefix
//
// These are
//
// # Comment
// + glob
// - glob
// !
//
// '+' includes the glob, '-' excludes it and '!' resets the filter list
//
// Line comments may be introduced with '#' or ';'
func (rs *rules) AddRule(rule string) error {
return addRule(rule, rs.Add, rs.clear)
}
// Parse the rules passed in and add them to the function
func parseRules(opt *RulesOpt, add addFn, clear clearFn) (err error) {
addImplicitExclude := false
foundExcludeRule := false
for _, rule := range opt.IncludeRule {
err = add(true, rule)
if err != nil {
return err
}
addImplicitExclude = true
}
for _, rule := range opt.IncludeFrom {
err := forEachLine(rule, false, func(line string) error {
return add(true, line)
})
if err != nil {
return err
}
addImplicitExclude = true
}
for _, rule := range opt.ExcludeRule {
err = add(false, rule)
if err != nil {
return err
}
foundExcludeRule = true
}
for _, rule := range opt.ExcludeFrom {
err := forEachLine(rule, false, func(line string) error {
return add(false, line)
})
if err != nil {
return err
}
foundExcludeRule = true
}
if addImplicitExclude && foundExcludeRule {
fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
}
for _, rule := range opt.FilterRule {
err = addRule(rule, add, clear)
if err != nil {
return err
}
}
for _, rule := range opt.FilterFrom {
err := forEachLine(rule, false, func(rule string) error {
return addRule(rule, add, clear)
})
if err != nil {
return err
}
}
if addImplicitExclude {
err = add(false, "/**")
if err != nil {
return err
}
}
return nil
}