From 08c2cb784fcbe75254708e6217b2f8fb4285f9be Mon Sep 17 00:00:00 2001 From: Ankur Gupta <7876747+ankur0493@users.noreply.github.com> Date: Fri, 3 Apr 2020 15:06:24 +0530 Subject: [PATCH] filter: Added --files-from-raw flag --files-from parses input files by ignoring comments starting with # and ; and stripping whitespace from start and end of strings. The --files-from-raw flag was added that reads every line from the file ignoring comment characters and not stripping whitespace while maintaining backwards compatibility. Fixes #3762 --- cmd/lsf/lsf.go | 4 +- docs/content/docs.md | 1 + docs/content/filtering.md | 22 ++++++--- fs/filter/filter.go | 40 +++++++++++---- fs/filter/filter_test.go | 74 ++++++++++++++++++++++++++-- fs/filter/filterflags/filterflags.go | 1 + 6 files changed, 119 insertions(+), 23 deletions(-) diff --git a/cmd/lsf/lsf.go b/cmd/lsf/lsf.go index f82972e6b..9a019776d 100644 --- a/cmd/lsf/lsf.go +++ b/cmd/lsf/lsf.go @@ -132,13 +132,13 @@ Eg "this file contains a comma, in the file name.txt",6 Note that the --absolute parameter is useful for making lists of files -to pass to an rclone copy with the --files-from flag. +to pass to an rclone copy with the --files-from-raw flag. For example to find all the files modified within one day and copy those only (without traversing the whole directory structure): rclone lsf --absolute --files-only --max-age 1d /path/to/local > new_files - rclone copy --files-from new_files /path/to/local remote:path + rclone copy --files-from-raw new_files /path/to/local remote:path ` + lshelp.Help, Run: func(command *cobra.Command, args []string) { diff --git a/docs/content/docs.md b/docs/content/docs.md index 274ba9a0b..b0027aae6 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -1542,6 +1542,7 @@ For the filtering options * `--include` * `--include-from` * `--files-from` + * `--files-from-raw` * `--min-size` * `--max-size` * `--min-age` diff --git a/docs/content/filtering.md b/docs/content/filtering.md index 96bcbfa89..b69e494fe 100644 --- a/docs/content/filtering.md +++ b/docs/content/filtering.md @@ -17,8 +17,9 @@ Each path as it passes through rclone is matched against the include and exclude rules like `--include`, `--exclude`, `--include-from`, `--exclude-from`, `--filter`, or `--filter-from`. The simplest way to try them out is using the `ls` command, or `--dry-run` together with -`-v`. `--filter-from`, `--exclude-from`, `--include-from`, `--files-from` -understand `-` as a file name to mean read from standard input. +`-v`. `--filter-from`, `--exclude-from`, `--include-from`, `--files-from`, +`--files-from-raw` understand `-` as a file name to mean read from standard +input. ## Patterns ## @@ -179,6 +180,7 @@ type. * `--exclude-from` * `--filter` * `--filter-from` + * `--filter-from-raw` **Important** You should not use `--include*` together with `--exclude*`. It may produce different results than you expected. In that case try to use: `--filter*`. @@ -306,9 +308,9 @@ This reads a list of file names from the file passed in and **only** these files are transferred. The **filtering rules are ignored** completely if you use this option. -`--files-from` expects a list of files as it's input. [rclone lsf](/commands/rclone_lsf/) -has a compatible format that can be used to export file lists from -remotes. +`--files-from` expects a list of files as it's input. Leading / trailing +whitespace is stripped from the input lines and lines starting with `#` +and `;` are ignored. Rclone will traverse the file system if you use `--files-from`, effectively using the files in `--files-from` as a set of filters. @@ -324,7 +326,8 @@ are read in the order that they are placed on the command line. Paths within the `--files-from` file will be interpreted as starting with the root specified in the command. Leading `/` characters are -ignored. +ignored. See [--files-from-raw](#files-from-raw-read-list-of-source-file-names-without-any-processing) +if you need the input to be processed in a raw manner. For example, suppose you had `files-from.txt` with this content: @@ -384,6 +387,13 @@ In this case there will be an extra `home` directory on the remote: /home/user1/dir/file → remote:backup/home/user1/dir/file /home/user2/stuff → remote:backup/home/user2/stuff +### `--files-from-raw` - Read list of source-file names without any processing ### +This option is same as `--files-from` with the only difference being that the input +is read in a raw manner. This means that lines with leading/trailing whitespace and +lines starting with `;` or `#` are read without any processing. [rclone lsf](/commands/rclone_lsf/) +has a compatible format that can be used to export file lists from remotes, which +can then be used as an input to `--files-from-raw`. + ### `--min-size` - Don't transfer any file smaller than this ### This option controls the minimum size file which will be transferred. diff --git a/fs/filter/filter.go b/fs/filter/filter.go index aa52ab010..c083f2349 100644 --- a/fs/filter/filter.go +++ b/fs/filter/filter.go @@ -88,6 +88,7 @@ type Opt struct { IncludeRule []string IncludeFrom []string FilesFrom []string + FilesFromRaw []string MinAge fs.Duration MaxAge fs.Duration MinSize fs.SizeSuffix @@ -150,7 +151,7 @@ func NewFilter(opt *Opt) (f *Filter, err error) { addImplicitExclude = true } for _, rule := range f.Opt.IncludeFrom { - err := forEachLine(rule, func(line string) error { + err := forEachLine(rule, false, func(line string) error { return f.Add(true, line) }) if err != nil { @@ -166,7 +167,7 @@ func NewFilter(opt *Opt) (f *Filter, err error) { foundExcludeRule = true } for _, rule := range f.Opt.ExcludeFrom { - err := forEachLine(rule, func(line string) error { + err := forEachLine(rule, false, func(line string) error { return f.Add(false, line) }) if err != nil { @@ -186,25 +187,42 @@ func NewFilter(opt *Opt) (f *Filter, err error) { } } for _, rule := range f.Opt.FilterFrom { - err := forEachLine(rule, f.AddRule) + err := forEachLine(rule, false, f.AddRule) if err != nil { return nil, err } } inActive := f.InActive() + for _, rule := range f.Opt.FilesFrom { if !inActive { - return nil, fmt.Errorf("The usage of --files-from overrides all other filters, it should be used alone") + return nil, fmt.Errorf("The usage of --files-from overrides all other filters, it should be used alone or with --files-from-raw") } f.initAddFile() // init to show --files-from set even if no files within - err := forEachLine(rule, func(line string) error { + err := forEachLine(rule, false, func(line string) error { return f.AddFile(line) }) if err != nil { return nil, err } } + + for _, rule := range f.Opt.FilesFromRaw { + // --files-from-raw can be used with --files-from, hence we do + // not need to get the value of f.InActive again + if !inActive { + return nil, fmt.Errorf("The usage of --files-from-raw overrides all other filters, it should be used alone or with --files-from") + } + f.initAddFile() // init to show --files-from set even if no files within + err := forEachLine(rule, true, func(line string) error { + return f.AddFile(line) + }) + if err != nil { + return nil, err + } + } + if addImplicitExclude { err = f.Add(false, "/**") if err != nil { @@ -463,8 +481,8 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool { // forEachLine calls fn on every line in the file pointed to by path // -// It ignores empty lines and lines starting with '#' or ';' -func forEachLine(path string, fn func(string) error) (err error) { +// It ignores empty lines and lines starting with '#' or ';' if raw is false +func forEachLine(path string, raw bool, fn func(string) error) (err error) { var scanner *bufio.Scanner if path == "-" { scanner = bufio.NewScanner(os.Stdin) @@ -478,9 +496,11 @@ func forEachLine(path string, fn func(string) error) (err error) { } for scanner.Scan() { line := scanner.Text() - line = strings.TrimSpace(line) - if len(line) == 0 || line[0] == '#' || line[0] == ';' { - continue + if !raw { + line = strings.TrimSpace(line) + if len(line) == 0 || line[0] == '#' || line[0] == ';' { + continue + } } err := fn(line) if err != nil { diff --git a/fs/filter/filter_test.go b/fs/filter/filter_test.go index 0d1b4e90f..399d4616e 100644 --- a/fs/filter/filter_test.go +++ b/fs/filter/filter_test.go @@ -65,6 +65,29 @@ func TestNewFilterForbiddenMixOfFilesFromAndFilterRule(t *testing.T) { require.Contains(t, err.Error(), "The usage of --files-from overrides all other filters") } +func TestNewFilterForbiddenMixOfFilesFromRawAndFilterRule(t *testing.T) { + Opt := DefaultOpt + + // Set up the input + Opt.FilterRule = []string{"- filter1", "- filter1b"} + Opt.FilesFromRaw = []string{testFile(t, "#comment\nfiles1\nfiles2\n")} + + rm := func(p string) { + err := os.Remove(p) + if err != nil { + t.Logf("error removing %q: %v", p, err) + } + } + // Reset the input + defer func() { + rm(Opt.FilesFromRaw[0]) + }() + + _, err := NewFilter(&Opt) + require.Error(t, err) + require.Contains(t, err.Error(), "The usage of --files-from-raw overrides all other filters") +} + func TestNewFilterWithFilesFromAlone(t *testing.T) { Opt := DefaultOpt @@ -93,6 +116,34 @@ func TestNewFilterWithFilesFromAlone(t *testing.T) { } } +func TestNewFilterWithFilesFromRaw(t *testing.T) { + Opt := DefaultOpt + + // Set up the input + Opt.FilesFromRaw = []string{testFile(t, "#comment\nfiles1\nfiles2\n")} + + rm := func(p string) { + err := os.Remove(p) + if err != nil { + t.Logf("error removing %q: %v", p, err) + } + } + // Reset the input + defer func() { + rm(Opt.FilesFromRaw[0]) + }() + + f, err := NewFilter(&Opt) + require.NoError(t, err) + assert.Len(t, f.files, 3) + for _, name := range []string{"#comment", "files1", "files2"} { + _, ok := f.files[name] + if !ok { + t.Errorf("Didn't find file %q in f.files", name) + } + } +} + func TestNewFilterFullExceptFilesFromOpt(t *testing.T) { Opt := DefaultOpt @@ -517,7 +568,7 @@ func TestFilterAddDirRuleOrFileRule(t *testing.T) { } } -func testFilterForEachLine(t *testing.T, useStdin bool) { +func testFilterForEachLine(t *testing.T, useStdin, raw bool) { file := testFile(t, `; comment one # another comment @@ -546,20 +597,33 @@ five }() fileName = "-" } - err := forEachLine(fileName, func(s string) error { + err := forEachLine(fileName, raw, func(s string) error { lines = append(lines, s) return nil }) require.NoError(t, err) - assert.Equal(t, "one,two,three,four,five,six", strings.Join(lines, ",")) + if raw { + assert.Equal(t, "; comment,one,# another comment,,,two, # indented comment,three ,four ,five, six ", + strings.Join(lines, ",")) + } else { + assert.Equal(t, "one,two,three,four,five,six", strings.Join(lines, ",")) + } } func TestFilterForEachLine(t *testing.T) { - testFilterForEachLine(t, false) + testFilterForEachLine(t, false, false) } func TestFilterForEachLineStdin(t *testing.T) { - testFilterForEachLine(t, true) + testFilterForEachLine(t, true, false) +} + +func TestFilterForEachLineWithRaw(t *testing.T) { + testFilterForEachLine(t, false, true) +} + +func TestFilterForEachLineStdinWithRaw(t *testing.T) { + testFilterForEachLine(t, true, true) } func TestFilterMatchesFromDocs(t *testing.T) { diff --git a/fs/filter/filterflags/filterflags.go b/fs/filter/filterflags/filterflags.go index 603218b04..e2e48ca27 100644 --- a/fs/filter/filterflags/filterflags.go +++ b/fs/filter/filterflags/filterflags.go @@ -31,6 +31,7 @@ func AddFlags(flagSet *pflag.FlagSet) { flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern") flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)") + flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)") flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y") flags.FVarP(flagSet, &Opt.MaxAge, "max-age", "", "Only transfer files younger than this in s or suffix ms|s|m|h|d|w|M|y") flags.FVarP(flagSet, &Opt.MinSize, "min-size", "", "Only transfer files bigger than this in k or suffix b|k|M|G")