diff --git a/docs/content/docs.md b/docs/content/docs.md index eab4b5b29..535ed2486 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -2355,6 +2355,12 @@ For the filtering options * `--min-age` * `--max-age` * `--dump filters` + * `--metadata-include` + * `--metadata-include-from` + * `--metadata-exclude` + * `--metadata-exclude-from` + * `--metadata-filter` + * `--metadata-filter-from` See the [filtering section](/filtering/). diff --git a/docs/content/filtering.md b/docs/content/filtering.md index e315dfffc..bfff3eab9 100644 --- a/docs/content/filtering.md +++ b/docs/content/filtering.md @@ -32,7 +32,7 @@ you expect. Instead use a `--filter...` flag. ## Patterns for matching path/file names -### Pattern syntax +### Pattern syntax {#patterns} Here is a formal definition of the pattern syntax, [examples](#examples) are below. @@ -194,7 +194,7 @@ them into regular expressions. | Rooted Regexp | `/{{.*\.jpe?g}}` | `/file.jpeg` | `/file.png` | | | | `/file.jpg` | `/dir/file.jpg` | -## How filter rules are applied to files +## How filter rules are applied to files {#how-filter-rules-work} Rclone path/file name filters are made up of one or more of the following flags: @@ -757,6 +757,43 @@ E.g. for the following directory structure: The command `rclone ls --exclude-if-present .ignore dir1` does not list `dir3`, `file3` or `.ignore`. +## Metadata filters {#metadata} + +The metadata filters work in a very similar way to the normal file +name filters, except they match [metadata](/docs/#metadata) on the +object. + +The metadata should be specified as `key=value` patterns. This may be +wildcarded using the normal [filter patterns](#patterns) or [regular +expressions](#regexp). + +For example if you wished to list only local files with a mode of +`100664` you could do that with: + + rclone lsf -M --files-only --metadata-include "mode=100664" . + +Or if you wished to show files with an `atime`, `mtime` or `btime` at a given date: + + rclone lsf -M --files-only --metadata-include "[abm]time=2022-12-16*" . + +Like file filtering, metadata filtering only applies to files not to +directories. + +The filters can be applied using these flags. + +- `--metadata-include` - Include metadatas matching pattern +- `--metadata-include-from` - Read metadata include patterns from file (use - to read from stdin) +- `--metadata-exclude` - Exclude metadatas matching pattern +- `--metadata-exclude-from` - Read metadata exclude patterns from file (use - to read from stdin) +- `--metadata-filter` - Add a metadata filtering rule +- `--metadata-filter-from` - Read metadata filtering patterns from a file (use - to read from stdin) + +Each flag can be repeated. See the section on [how filter rules are +applied](#how-filter-rules-work) for more details - these flags work +in an identical way to the file name filtering flags, but instead of +file name patterns have metadata patterns. + + ## Common pitfalls The most frequent filter support issues on diff --git a/docs/content/internetarchive.md b/docs/content/internetarchive.md index f0a90278c..e02690849 100644 --- a/docs/content/internetarchive.md +++ b/docs/content/internetarchive.md @@ -66,6 +66,22 @@ It can be triggered when you did a server-side copy. Reading metadata will also provide custom (non-standard nor reserved) ones. +## Filtering auto generated files + +The Internet Archive automatically creates metadata files after +upload. These can cause problems when doing an `rclone sync` as rclone +will try, and fail, to delete them. These metadata files are not +changeable, as they are created by the Internet Archive automatically. + +These auto-created files can be excluded from the sync using [metadata +filtering](/filtering/#metadata). + + rclone sync ... --metadata-exclude "source=metadata" --metadata-exclude "format=Metadata" + +Which excludes from the sync any files which have the +`source=metadata` or `format=Metadata` flags which are added to +Internet Archive auto-created files. + ## Configuration Here is an example of making an internetarchive configuration. diff --git a/fs/filter/filter.go b/fs/filter/filter.go index 047eb4bbb..156dbd4a7 100644 --- a/fs/filter/filter.go +++ b/fs/filter/filter.go @@ -26,6 +26,7 @@ type Opt struct { ExcludeFile []string FilesFrom []string FilesFromRaw []string + MetaRules RulesOpt MinAge fs.Duration MaxAge fs.Duration MinSize fs.SizeSuffix @@ -51,6 +52,7 @@ type Filter struct { ModTimeTo time.Time fileRules rules dirRules rules + metaRules rules files FilesMap // files if filesFrom dirs FilesMap // dirs from filesFrom } @@ -85,6 +87,11 @@ func NewFilter(opt *Opt) (f *Filter, err error) { return nil, err } + err = parseRules(&f.Opt.MetaRules, f.metaRules.Add, f.metaRules.clear) + if err != nil { + return nil, err + } + inActive := f.InActive() for _, rule := range f.Opt.FilesFrom { @@ -234,6 +241,7 @@ func (f *Filter) Files() FilesMap { func (f *Filter) Clear() { f.fileRules.clear() f.dirRules.clear() + f.metaRules.clear() } // InActive returns false if any filters are active @@ -245,6 +253,7 @@ func (f *Filter) InActive() bool { f.Opt.MaxSize < 0 && f.fileRules.len() == 0 && f.dirRules.len() == 0 && + f.metaRules.len() == 0 && len(f.Opt.ExcludeFile) == 0) } @@ -322,7 +331,7 @@ func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remo // Include returns whether this object should be included into the // sync or not -func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { +func (f *Filter) Include(remote string, size int64, modTime time.Time, metadata fs.Metadata) bool { // filesFrom takes precedence if f.files != nil { _, include := f.files[remote] @@ -340,6 +349,20 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) { return false } + if f.metaRules.len() > 0 { + metadatas := make([]string, 0, len(metadata)+1) + for key, value := range metadata { + metadatas = append(metadatas, fmt.Sprintf("%s=%s", key, value)) + } + if len(metadata) == 0 { + // If there is no metadata, add a null one + // otherwise the default action isn't taken + metadatas = append(metadatas, "\x00=\x00") + } + if !f.metaRules.includeMany(metadatas) { + return false + } + } return f.IncludeRemote(remote) } @@ -354,7 +377,17 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool { } else { modTime = time.Unix(0, 0) } - return f.Include(o.Remote(), o.Size(), modTime) + var metadata fs.Metadata + if f.metaRules.len() > 0 { + var err error + metadata, err = fs.GetMetadata(ctx, o) + if err != nil { + fs.Errorf(o, "Failed to read metadata: %v", err) + metadata = nil + } + + } + return f.Include(o.Remote(), o.Size(), modTime, metadata) } // DumpFilters dumps the filters in textual form, 1 per line @@ -374,6 +407,12 @@ func (f *Filter) DumpFilters() string { for _, dirRule := range f.dirRules.rules { rules = append(rules, dirRule.String()) } + if f.metaRules.len() > 0 { + rules = append(rules, "--- Metadata filter rules ---") + for _, metaRule := range f.metaRules.rules { + rules = append(rules, metaRule.String()) + } + } return strings.Join(rules, "\n") } diff --git a/fs/filter/filter_test.go b/fs/filter/filter_test.go index 6feab8ecc..ca24ecf27 100644 --- a/fs/filter/filter_test.go +++ b/fs/filter/filter_test.go @@ -23,6 +23,7 @@ func TestNewFilterDefault(t *testing.T) { assert.Equal(t, fs.SizeSuffix(-1), f.Opt.MaxSize) assert.Len(t, f.fileRules.rules, 0) assert.Len(t, f.dirRules.rules, 0) + assert.Len(t, f.metaRules.rules, 0) assert.Nil(t, f.files) assert.True(t, f.InActive()) } @@ -207,7 +208,7 @@ type includeTest struct { func testInclude(t *testing.T, f *Filter, tests []includeTest) { for _, test := range tests { - got := f.Include(test.in, test.size, time.Unix(test.modTime, 0)) + got := f.Include(test.in, test.size, time.Unix(test.modTime, 0), nil) assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0))) } } @@ -527,6 +528,56 @@ func TestNewFilterMatchesRegexp(t *testing.T) { assert.False(t, f.InActive()) } +type includeTestMetadata struct { + in string + metadata fs.Metadata + want bool +} + +func testIncludeMetadata(t *testing.T, f *Filter, tests []includeTestMetadata) { + for _, test := range tests { + got := f.Include(test.in, 0, time.Time{}, test.metadata) + assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, metadata=%+v", test.in, test.metadata)) + } +} + +func TestNewFilterMetadataInclude(t *testing.T) { + f, err := NewFilter(nil) + require.NoError(t, err) + add := func(s string) { + err := f.metaRules.AddRule(s) + require.NoError(t, err) + } + add(`+ t*=t*`) + add(`- *`) + testIncludeMetadata(t, f, []includeTestMetadata{ + {"nil", nil, false}, + {"empty", fs.Metadata{}, false}, + {"ok1", fs.Metadata{"thing": "thang"}, true}, + {"ok2", fs.Metadata{"thing1": "thang1"}, true}, + {"missing", fs.Metadata{"Thing1": "Thang1"}, false}, + }) + assert.False(t, f.InActive()) +} + +func TestNewFilterMetadataExclude(t *testing.T) { + f, err := NewFilter(nil) + require.NoError(t, err) + add := func(s string) { + err := f.metaRules.AddRule(s) + require.NoError(t, err) + } + add(`- thing=thang`) + add(`+ *`) + testIncludeMetadata(t, f, []includeTestMetadata{ + {"nil", nil, true}, + {"empty", fs.Metadata{}, true}, + {"ok1", fs.Metadata{"thing": "thang"}, false}, + {"missing1", fs.Metadata{"thing1": "thang1"}, true}, + }) + assert.False(t, f.InActive()) +} + func TestFilterAddDirRuleOrFileRule(t *testing.T) { for _, test := range []struct { included bool @@ -713,7 +764,7 @@ func TestFilterMatchesFromDocs(t *testing.T) { require.NoError(t, err) err = f.Add(false, "*") require.NoError(t, err) - included := f.Include(test.file, 0, time.Unix(0, 0)) + included := f.Include(test.file, 0, time.Unix(0, 0), nil) if included != test.included { t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included) } diff --git a/fs/filter/filterflags/filterflags.go b/fs/filter/filterflags/filterflags.go index 3963e0c04..5bb17b8a3 100644 --- a/fs/filter/filterflags/filterflags.go +++ b/fs/filter/filterflags/filterflags.go @@ -3,6 +3,7 @@ package filterflags import ( "context" + "fmt" "github.com/rclone/rclone/fs/config/flags" "github.com/rclone/rclone/fs/filter" @@ -26,17 +27,27 @@ func Reload(ctx context.Context) (err error) { return nil } +// AddRuleFlags add a set of rules flags with prefix +func AddRuleFlags(flagSet *pflag.FlagSet, Opt *filter.RulesOpt, what, prefix string) { + shortFilter := "" + if prefix == "" { + shortFilter = "f" + } + flags.StringArrayVarP(flagSet, &Opt.FilterRule, prefix+"filter", shortFilter, nil, fmt.Sprintf("Add a %s filtering rule", what)) + flags.StringArrayVarP(flagSet, &Opt.FilterFrom, prefix+"filter-from", "", nil, fmt.Sprintf("Read %s filtering patterns from a file (use - to read from stdin)", what)) + flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, prefix+"exclude", "", nil, fmt.Sprintf("Exclude %ss matching pattern", what)) + flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, prefix+"exclude-from", "", nil, fmt.Sprintf("Read %s exclude patterns from file (use - to read from stdin)", what)) + flags.StringArrayVarP(flagSet, &Opt.IncludeRule, prefix+"include", "", nil, fmt.Sprintf("Include %ss matching pattern", what)) + flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, prefix+"include-from", "", nil, fmt.Sprintf("Read %s include patterns from file (use - to read from stdin)", what)) +} + // AddFlags adds the non filing system specific flags to the command func AddFlags(flagSet *pflag.FlagSet) { rc.AddOptionReload("filter", &Opt, Reload) flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync") - flags.StringArrayVarP(flagSet, &Opt.FilterRule, "filter", "f", nil, "Add a file-filtering rule") - flags.StringArrayVarP(flagSet, &Opt.FilterFrom, "filter-from", "", nil, "Read filtering patterns from a file (use - to read from stdin)") - flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, "exclude", "", nil, "Exclude files matching pattern") - flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, "exclude-from", "", nil, "Read exclude patterns from file (use - to read from stdin)") + AddRuleFlags(flagSet, &Opt.RulesOpt, "file", "") + AddRuleFlags(flagSet, &Opt.MetaRules, "metadata", "metadata-") flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present") - flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern") - flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)") flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y") diff --git a/fs/operations/operations.go b/fs/operations/operations.go index 351066332..fe4a27eb4 100644 --- a/fs/operations/operations.go +++ b/fs/operations/operations.go @@ -1469,7 +1469,7 @@ func Rmdirs(ctx context.Context, f fs.Fs, dir string, leaveRoot bool) error { dir := toDelete[i] // If a filter matches the directory then that // directory is a candidate for deletion - if !fi.Include(dir+"/", 0, time.Now()) { + if !fi.IncludeRemote(dir + "/") { continue } err = TryRmdir(ctx, f, dir)