Add support to filter files based on their age

This commit is contained in:
Adriano Aurélio Meirelles 2015-12-17 09:52:38 -02:00
parent 4f50b26af0
commit 3cbd57d9ad
3 changed files with 174 additions and 33 deletions

View file

@ -7,7 +7,9 @@ import (
"fmt"
"os"
"regexp"
"strconv"
"strings"
"time"
"github.com/spf13/pflag"
)
@ -23,6 +25,8 @@ var (
includeRule = pflag.StringP("include", "", "", "Include files matching pattern")
includeFrom = pflag.StringP("include-from", "", "", "Read include patterns from file")
filesFrom = pflag.StringP("files-from", "", "", "Read list of source-file names from file")
minAge = pflag.StringP("min-age", "", "", "Don't transfer any file younger than this in s or suffix ms|s|m|h|d|w|M|y")
maxAge = pflag.StringP("max-age", "", "", "Don't transfer any file older than this in s or suffix ms|s|m|h|d|w|M|y")
minSize SizeSuffix
maxSize SizeSuffix
dumpFilters = pflag.BoolP("dump-filters", "", false, "Dump the filters to the output")
@ -62,10 +66,50 @@ type Filter struct {
DeleteExcluded bool
MinSize int64
MaxSize int64
ModTimeFrom time.Time
ModTimeTo time.Time
rules []rule
files filesMap
}
// We use time conventions
var ageSuffixes = []struct {
Suffix string
Multiplier time.Duration
}{
{Suffix: "ms", Multiplier: time.Millisecond},
{Suffix: "s", Multiplier: time.Second},
{Suffix: "m", Multiplier: time.Minute},
{Suffix: "h", Multiplier: time.Hour},
{Suffix: "d", Multiplier: time.Hour * 24},
{Suffix: "w", Multiplier: time.Hour * 24 * 7},
{Suffix: "M", Multiplier: time.Hour * 24 * 30},
{Suffix: "y", Multiplier: time.Hour * 24 * 365},
// Default to second
{Suffix: "", Multiplier: time.Second},
}
// ParseDuration parses a duration string. Accept ms|s|m|h|d|w|M|y suffixes. Defaults to second if not provided
func ParseDuration(age string) (time.Duration, error) {
var period float64
for _, ageSuffix := range ageSuffixes {
if strings.HasSuffix(age, ageSuffix.Suffix) {
numberString := age[:len(age)-len(ageSuffix.Suffix)]
var err error
period, err = strconv.ParseFloat(numberString, 64)
if err != nil {
return time.Duration(0), err
}
period *= float64(ageSuffix.Multiplier)
break
}
}
return time.Duration(period), nil
}
// NewFilter parses the command line options and creates a Filter object
func NewFilter() (f *Filter, err error) {
f = &Filter{
@ -73,6 +117,7 @@ func NewFilter() (f *Filter, err error) {
MinSize: int64(minSize),
MaxSize: int64(maxSize),
}
if *includeRule != "" {
err = f.Add(true, *includeRule)
if err != nil {
@ -131,6 +176,23 @@ func NewFilter() (f *Filter, err error) {
return nil, err
}
}
if *minAge != "" {
duration, err := ParseDuration(*minAge)
if err != nil {
return nil, err
}
f.ModTimeTo = time.Now().Add(-duration)
}
if *maxAge != "" {
duration, err := ParseDuration(*maxAge)
if err != nil {
return nil, err
}
f.ModTimeFrom = time.Now().Add(-duration)
if !f.ModTimeTo.IsZero() && f.ModTimeTo.Before(f.ModTimeFrom) {
return nil, fmt.Errorf("Argument --min-age can't be larger than --max-age")
}
}
if *dumpFilters {
fmt.Println("--- start filters ---")
fmt.Println(f.DumpFilters())
@ -194,12 +256,18 @@ func (f *Filter) Clear() {
// Include returns whether this object should be included into the
// sync or not
func (f *Filter) Include(remote string, size int64) bool {
func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
// filesFrom takes precedence
if f.files != nil {
_, include := f.files[remote]
return include
}
if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) {
return false
}
if !f.ModTimeFrom.IsZero() && modTime.After(f.ModTimeTo) {
return false
}
if f.MinSize != 0 && size < f.MinSize {
return false
}
@ -214,6 +282,21 @@ func (f *Filter) Include(remote string, size int64) bool {
return true
}
// IncludeObject returns whether this object should be included into
// the sync or not. This is a convenience function to avoid calling
// o.ModTime(), which is an expensive operation.
func (f *Filter) IncludeObject(o Object) bool {
var modTime time.Time
if !f.ModTimeFrom.IsZero() || !f.ModTimeFrom.IsZero() {
modTime = o.ModTime()
} else {
modTime = time.Unix(0, 0)
}
return f.Include(o.Remote(), o.Size(), modTime)
}
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';'
@ -241,6 +324,12 @@ func forEachLine(path string, fn func(string) error) (err error) {
// DumpFilters dumps the filters in textual form, 1 per line
func (f *Filter) DumpFilters() string {
rules := []string{}
if !f.ModTimeFrom.IsZero() {
rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String()))
}
if !f.ModTimeTo.IsZero() {
rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String()))
}
for _, rule := range f.rules {
rules = append(rules, rule.String())
}

View file

@ -5,8 +5,43 @@ import (
"os"
"strings"
"testing"
"time"
)
func TestAgeSuffix(t *testing.T) {
for i, test := range []struct {
in string
want float64
err bool
}{
{"0", 0, false},
{"", 0, true},
{"1ms", float64(time.Millisecond), false},
{"1s", float64(time.Second), false},
{"1m", float64(time.Minute), false},
{"1h", float64(time.Hour), false},
{"1d", float64(time.Hour) * 24, false},
{"1w", float64(time.Hour) * 24 * 7, false},
{"1M", float64(time.Hour) * 24 * 30, false},
{"1y", float64(time.Hour) * 24 * 365, false},
{"1.5y", float64(time.Hour) * 24 * 365 * 1.5, false},
{"-1s", -float64(time.Second), false},
{"1.s", float64(time.Second), false},
{"1x", 0, true},
} {
duration, err := ParseDuration(test.in)
if (err != nil) != test.err {
t.Errorf("%d: Expecting error %v but got error %v", i, test.err, err)
continue
}
got := float64(duration)
if test.want != got {
t.Errorf("%d: Want %v got %v", i, test.want, got)
}
}
}
func TestNewFilterDefault(t *testing.T) {
f, err := NewFilter()
if err != nil {
@ -137,16 +172,17 @@ func TestNewFilterFull(t *testing.T) {
}
type includeTest struct {
in string
size int64
want bool
in string
size int64
modTime int64
want bool
}
func testInclude(t *testing.T, f *Filter, tests []includeTest) {
for _, test := range tests {
got := f.Include(test.in, test.size)
got := f.Include(test.in, test.size, time.Unix(test.modTime, 0))
if test.want != got {
t.Errorf("%q,%d: want %v got %v", test.in, test.size, test.want, got)
t.Errorf("%q,%d,%d: want %v got %v", test.in, test.size, test.modTime, test.want, got)
}
}
}
@ -165,10 +201,10 @@ func TestNewFilterIncludeFiles(t *testing.T) {
t.Error(err)
}
testInclude(t, f, []includeTest{
{"file1.jpg", 0, true},
{"file2.jpg", 1, true},
{"potato/file2.jpg", 2, false},
{"file3.jpg", 3, false},
{"file1.jpg", 0, 0, true},
{"file2.jpg", 1, 0, true},
{"potato/file2.jpg", 2, 0, false},
{"file3.jpg", 3, 0, false},
})
}
@ -179,9 +215,9 @@ func TestNewFilterMinSize(t *testing.T) {
}
f.MinSize = 100
testInclude(t, f, []includeTest{
{"file1.jpg", 100, true},
{"file2.jpg", 101, true},
{"potato/file2.jpg", 99, false},
{"file1.jpg", 100, 0, true},
{"file2.jpg", 101, 0, true},
{"potato/file2.jpg", 99, 0, false},
})
}
@ -192,9 +228,25 @@ func TestNewFilterMaxSize(t *testing.T) {
}
f.MaxSize = 100
testInclude(t, f, []includeTest{
{"file1.jpg", 100, true},
{"file2.jpg", 101, false},
{"potato/file2.jpg", 99, true},
{"file1.jpg", 100, 0, true},
{"file2.jpg", 101, 0, false},
{"potato/file2.jpg", 99, 0, true},
})
}
func TestNewFilterModFile(t *testing.T) {
f, err := NewFilter()
if err != nil {
t.Fatal(err)
}
f.ModTimeFrom = time.Unix(1447346230, 0)
f.ModTimeTo = time.Unix(1447432630, 0)
testInclude(t, f, []includeTest{
{"file1.jpg", 100, 1447346230, true},
{"file2.jpg", 101, 1447389430, true},
{"file3.jpg", 102, 1447432630, true},
{"potato/file1.jpg", 98, 1447346229, false},
{"potato/file2.jpg", 99, 1447432631, false},
})
}
@ -221,19 +273,19 @@ func TestNewFilterMatches(t *testing.T) {
add("+ /sausage3**")
add("- *")
testInclude(t, f, []includeTest{
{"cleared", 100, false},
{"file1.jpg", 100, false},
{"file2.png", 100, true},
{"afile2.png", 100, false},
{"file3.jpg", 101, true},
{"file4.png", 101, false},
{"potato", 101, false},
{"sausage1", 101, true},
{"sausage1/potato", 101, false},
{"sausage2potato", 101, true},
{"sausage2/potato", 101, false},
{"sausage3/potato", 101, true},
{"unicorn", 99, false},
{"cleared", 100, 0, false},
{"file1.jpg", 100, 0, false},
{"file2.png", 100, 0, true},
{"afile2.png", 100, 0, false},
{"file3.jpg", 101, 0, true},
{"file4.png", 101, 0, false},
{"potato", 101, 0, false},
{"sausage1", 101, 0, true},
{"sausage1/potato", 101, 0, false},
{"sausage2potato", 101, 0, true},
{"sausage2/potato", 101, 0, false},
{"sausage3/potato", 101, 0, true},
{"unicorn", 99, 0, false},
})
}
@ -316,7 +368,7 @@ func TestFilterMatchesFromDocs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
included := f.Include(test.file, 0)
included := f.Include(test.file, 0, time.Unix(0, 0))
if included != test.included {
t.Logf("%q match %q: want %v got %v", test.glob, test.file, test.included, included)
}

View file

@ -387,7 +387,7 @@ func readFilesMap(fs Fs, obeyInclude bool) map[string]Object {
remote := o.Remote()
if _, ok := files[remote]; !ok {
// Make sure we don't delete excluded files if not required
if !obeyInclude || Config.Filter.DeleteExcluded || Config.Filter.Include(remote, o.Size()) {
if !obeyInclude || Config.Filter.DeleteExcluded || Config.Filter.IncludeObject(o) {
files[remote] = o
} else {
Debug(o, "Excluded from sync (and deletion)")
@ -450,10 +450,10 @@ func syncCopyMove(fdst, fsrc Fs, Delete bool, DoMove bool) error {
go func() {
for src := range fsrc.List() {
remote := src.Remote()
if !Config.Filter.Include(remote, src.Size()) {
if !Config.Filter.IncludeObject(src) {
Debug(src, "Excluding from sync")
} else {
remote := src.Remote()
if dst, dstFound := delFiles[remote]; dstFound {
delete(delFiles, remote)
toBeChecked <- ObjectPair{src, dst}