filter: Added --files-from-raw flag

--files-from parses input files by ignoring comments starting with # and ;
and stripping whitespace from start and end of strings.

The --files-from-raw flag was added that reads every line from the file ignoring
comment characters and not stripping whitespace while maintaining
backwards compatibility.

Fixes #3762
This commit is contained in:
Ankur Gupta 2020-04-03 15:06:24 +05:30 committed by GitHub
parent 3911a49256
commit 08c2cb784f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 119 additions and 23 deletions

View file

@ -132,13 +132,13 @@ Eg
"this file contains a comma, in the file name.txt",6 "this file contains a comma, in the file name.txt",6
Note that the --absolute parameter is useful for making lists of files Note that the --absolute parameter is useful for making lists of files
to pass to an rclone copy with the --files-from flag. to pass to an rclone copy with the --files-from-raw flag.
For example to find all the files modified within one day and copy For example to find all the files modified within one day and copy
those only (without traversing the whole directory structure): those only (without traversing the whole directory structure):
rclone lsf --absolute --files-only --max-age 1d /path/to/local > new_files rclone lsf --absolute --files-only --max-age 1d /path/to/local > new_files
rclone copy --files-from new_files /path/to/local remote:path rclone copy --files-from-raw new_files /path/to/local remote:path
` + lshelp.Help, ` + lshelp.Help,
Run: func(command *cobra.Command, args []string) { Run: func(command *cobra.Command, args []string) {

View file

@ -1542,6 +1542,7 @@ For the filtering options
* `--include` * `--include`
* `--include-from` * `--include-from`
* `--files-from` * `--files-from`
* `--files-from-raw`
* `--min-size` * `--min-size`
* `--max-size` * `--max-size`
* `--min-age` * `--min-age`

View file

@ -17,8 +17,9 @@ Each path as it passes through rclone is matched against the include
and exclude rules like `--include`, `--exclude`, `--include-from`, and exclude rules like `--include`, `--exclude`, `--include-from`,
`--exclude-from`, `--filter`, or `--filter-from`. The simplest way to `--exclude-from`, `--filter`, or `--filter-from`. The simplest way to
try them out is using the `ls` command, or `--dry-run` together with try them out is using the `ls` command, or `--dry-run` together with
`-v`. `--filter-from`, `--exclude-from`, `--include-from`, `--files-from` `-v`. `--filter-from`, `--exclude-from`, `--include-from`, `--files-from`,
understand `-` as a file name to mean read from standard input. `--files-from-raw` understand `-` as a file name to mean read from standard
input.
## Patterns ## ## Patterns ##
@ -179,6 +180,7 @@ type.
* `--exclude-from` * `--exclude-from`
* `--filter` * `--filter`
* `--filter-from` * `--filter-from`
* `--filter-from-raw`
**Important** You should not use `--include*` together with `--exclude*`. **Important** You should not use `--include*` together with `--exclude*`.
It may produce different results than you expected. In that case try to use: `--filter*`. It may produce different results than you expected. In that case try to use: `--filter*`.
@ -306,9 +308,9 @@ This reads a list of file names from the file passed in and **only**
these files are transferred. The **filtering rules are ignored** these files are transferred. The **filtering rules are ignored**
completely if you use this option. completely if you use this option.
`--files-from` expects a list of files as it's input. [rclone lsf](/commands/rclone_lsf/) `--files-from` expects a list of files as it's input. Leading / trailing
has a compatible format that can be used to export file lists from whitespace is stripped from the input lines and lines starting with `#`
remotes. and `;` are ignored.
Rclone will traverse the file system if you use `--files-from`, Rclone will traverse the file system if you use `--files-from`,
effectively using the files in `--files-from` as a set of filters. effectively using the files in `--files-from` as a set of filters.
@ -324,7 +326,8 @@ are read in the order that they are placed on the command line.
Paths within the `--files-from` file will be interpreted as starting Paths within the `--files-from` file will be interpreted as starting
with the root specified in the command. Leading `/` characters are with the root specified in the command. Leading `/` characters are
ignored. ignored. See [--files-from-raw](#files-from-raw-read-list-of-source-file-names-without-any-processing)
if you need the input to be processed in a raw manner.
For example, suppose you had `files-from.txt` with this content: For example, suppose you had `files-from.txt` with this content:
@ -384,6 +387,13 @@ In this case there will be an extra `home` directory on the remote:
/home/user1/dir/file → remote:backup/home/user1/dir/file /home/user1/dir/file → remote:backup/home/user1/dir/file
/home/user2/stuff → remote:backup/home/user2/stuff /home/user2/stuff → remote:backup/home/user2/stuff
### `--files-from-raw` - Read list of source-file names without any processing ###
This option is same as `--files-from` with the only difference being that the input
is read in a raw manner. This means that lines with leading/trailing whitespace and
lines starting with `;` or `#` are read without any processing. [rclone lsf](/commands/rclone_lsf/)
has a compatible format that can be used to export file lists from remotes, which
can then be used as an input to `--files-from-raw`.
### `--min-size` - Don't transfer any file smaller than this ### ### `--min-size` - Don't transfer any file smaller than this ###
This option controls the minimum size file which will be transferred. This option controls the minimum size file which will be transferred.

View file

@ -88,6 +88,7 @@ type Opt struct {
IncludeRule []string IncludeRule []string
IncludeFrom []string IncludeFrom []string
FilesFrom []string FilesFrom []string
FilesFromRaw []string
MinAge fs.Duration MinAge fs.Duration
MaxAge fs.Duration MaxAge fs.Duration
MinSize fs.SizeSuffix MinSize fs.SizeSuffix
@ -150,7 +151,7 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
addImplicitExclude = true addImplicitExclude = true
} }
for _, rule := range f.Opt.IncludeFrom { for _, rule := range f.Opt.IncludeFrom {
err := forEachLine(rule, func(line string) error { err := forEachLine(rule, false, func(line string) error {
return f.Add(true, line) return f.Add(true, line)
}) })
if err != nil { if err != nil {
@ -166,7 +167,7 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
foundExcludeRule = true foundExcludeRule = true
} }
for _, rule := range f.Opt.ExcludeFrom { for _, rule := range f.Opt.ExcludeFrom {
err := forEachLine(rule, func(line string) error { err := forEachLine(rule, false, func(line string) error {
return f.Add(false, line) return f.Add(false, line)
}) })
if err != nil { if err != nil {
@ -186,25 +187,42 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
} }
} }
for _, rule := range f.Opt.FilterFrom { for _, rule := range f.Opt.FilterFrom {
err := forEachLine(rule, f.AddRule) err := forEachLine(rule, false, f.AddRule)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} }
inActive := f.InActive() inActive := f.InActive()
for _, rule := range f.Opt.FilesFrom { for _, rule := range f.Opt.FilesFrom {
if !inActive { if !inActive {
return nil, fmt.Errorf("The usage of --files-from overrides all other filters, it should be used alone") return nil, fmt.Errorf("The usage of --files-from overrides all other filters, it should be used alone or with --files-from-raw")
} }
f.initAddFile() // init to show --files-from set even if no files within f.initAddFile() // init to show --files-from set even if no files within
err := forEachLine(rule, func(line string) error { err := forEachLine(rule, false, func(line string) error {
return f.AddFile(line) return f.AddFile(line)
}) })
if err != nil { if err != nil {
return nil, err return nil, err
} }
} }
for _, rule := range f.Opt.FilesFromRaw {
// --files-from-raw can be used with --files-from, hence we do
// not need to get the value of f.InActive again
if !inActive {
return nil, fmt.Errorf("The usage of --files-from-raw overrides all other filters, it should be used alone or with --files-from")
}
f.initAddFile() // init to show --files-from set even if no files within
err := forEachLine(rule, true, func(line string) error {
return f.AddFile(line)
})
if err != nil {
return nil, err
}
}
if addImplicitExclude { if addImplicitExclude {
err = f.Add(false, "/**") err = f.Add(false, "/**")
if err != nil { if err != nil {
@ -463,8 +481,8 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
// forEachLine calls fn on every line in the file pointed to by path // forEachLine calls fn on every line in the file pointed to by path
// //
// It ignores empty lines and lines starting with '#' or ';' // It ignores empty lines and lines starting with '#' or ';' if raw is false
func forEachLine(path string, fn func(string) error) (err error) { func forEachLine(path string, raw bool, fn func(string) error) (err error) {
var scanner *bufio.Scanner var scanner *bufio.Scanner
if path == "-" { if path == "-" {
scanner = bufio.NewScanner(os.Stdin) scanner = bufio.NewScanner(os.Stdin)
@ -478,9 +496,11 @@ func forEachLine(path string, fn func(string) error) (err error) {
} }
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
line = strings.TrimSpace(line) if !raw {
if len(line) == 0 || line[0] == '#' || line[0] == ';' { line = strings.TrimSpace(line)
continue if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
} }
err := fn(line) err := fn(line)
if err != nil { if err != nil {

View file

@ -65,6 +65,29 @@ func TestNewFilterForbiddenMixOfFilesFromAndFilterRule(t *testing.T) {
require.Contains(t, err.Error(), "The usage of --files-from overrides all other filters") require.Contains(t, err.Error(), "The usage of --files-from overrides all other filters")
} }
func TestNewFilterForbiddenMixOfFilesFromRawAndFilterRule(t *testing.T) {
Opt := DefaultOpt
// Set up the input
Opt.FilterRule = []string{"- filter1", "- filter1b"}
Opt.FilesFromRaw = []string{testFile(t, "#comment\nfiles1\nfiles2\n")}
rm := func(p string) {
err := os.Remove(p)
if err != nil {
t.Logf("error removing %q: %v", p, err)
}
}
// Reset the input
defer func() {
rm(Opt.FilesFromRaw[0])
}()
_, err := NewFilter(&Opt)
require.Error(t, err)
require.Contains(t, err.Error(), "The usage of --files-from-raw overrides all other filters")
}
func TestNewFilterWithFilesFromAlone(t *testing.T) { func TestNewFilterWithFilesFromAlone(t *testing.T) {
Opt := DefaultOpt Opt := DefaultOpt
@ -93,6 +116,34 @@ func TestNewFilterWithFilesFromAlone(t *testing.T) {
} }
} }
func TestNewFilterWithFilesFromRaw(t *testing.T) {
Opt := DefaultOpt
// Set up the input
Opt.FilesFromRaw = []string{testFile(t, "#comment\nfiles1\nfiles2\n")}
rm := func(p string) {
err := os.Remove(p)
if err != nil {
t.Logf("error removing %q: %v", p, err)
}
}
// Reset the input
defer func() {
rm(Opt.FilesFromRaw[0])
}()
f, err := NewFilter(&Opt)
require.NoError(t, err)
assert.Len(t, f.files, 3)
for _, name := range []string{"#comment", "files1", "files2"} {
_, ok := f.files[name]
if !ok {
t.Errorf("Didn't find file %q in f.files", name)
}
}
}
func TestNewFilterFullExceptFilesFromOpt(t *testing.T) { func TestNewFilterFullExceptFilesFromOpt(t *testing.T) {
Opt := DefaultOpt Opt := DefaultOpt
@ -517,7 +568,7 @@ func TestFilterAddDirRuleOrFileRule(t *testing.T) {
} }
} }
func testFilterForEachLine(t *testing.T, useStdin bool) { func testFilterForEachLine(t *testing.T, useStdin, raw bool) {
file := testFile(t, `; comment file := testFile(t, `; comment
one one
# another comment # another comment
@ -546,20 +597,33 @@ five
}() }()
fileName = "-" fileName = "-"
} }
err := forEachLine(fileName, func(s string) error { err := forEachLine(fileName, raw, func(s string) error {
lines = append(lines, s) lines = append(lines, s)
return nil return nil
}) })
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, "one,two,three,four,five,six", strings.Join(lines, ",")) if raw {
assert.Equal(t, "; comment,one,# another comment,,,two, # indented comment,three ,four ,five, six ",
strings.Join(lines, ","))
} else {
assert.Equal(t, "one,two,three,four,five,six", strings.Join(lines, ","))
}
} }
func TestFilterForEachLine(t *testing.T) { func TestFilterForEachLine(t *testing.T) {
testFilterForEachLine(t, false) testFilterForEachLine(t, false, false)
} }
func TestFilterForEachLineStdin(t *testing.T) { func TestFilterForEachLineStdin(t *testing.T) {
testFilterForEachLine(t, true) testFilterForEachLine(t, true, false)
}
func TestFilterForEachLineWithRaw(t *testing.T) {
testFilterForEachLine(t, false, true)
}
func TestFilterForEachLineStdinWithRaw(t *testing.T) {
testFilterForEachLine(t, true, true)
} }
func TestFilterMatchesFromDocs(t *testing.T) { func TestFilterMatchesFromDocs(t *testing.T) {

View file

@ -31,6 +31,7 @@ func AddFlags(flagSet *pflag.FlagSet) {
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern") flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern")
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)")
flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)")
flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)")
flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y") flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y")
flags.FVarP(flagSet, &Opt.MaxAge, "max-age", "", "Only transfer files younger than this in s or suffix ms|s|m|h|d|w|M|y") flags.FVarP(flagSet, &Opt.MaxAge, "max-age", "", "Only transfer files younger than this in s or suffix ms|s|m|h|d|w|M|y")
flags.FVarP(flagSet, &Opt.MinSize, "min-size", "", "Only transfer files bigger than this in k or suffix b|k|M|G") flags.FVarP(flagSet, &Opt.MinSize, "min-size", "", "Only transfer files bigger than this in k or suffix b|k|M|G")