dedupe: implement keep smallest too

This is to help deduping google docs and their exported versions if
they accidentally get uploaded to the source again.

See: https://forum.rclone.org/t/my-stupidity-or-a-bug/13861
This commit is contained in:
Nick Craig-Wood 2020-01-16 13:47:15 +00:00
parent 1bd9f522e0
commit 81002747c5
3 changed files with 43 additions and 20 deletions

View file

@ -94,6 +94,7 @@ Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" +
* ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one. * ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one.
* ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one. * ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one.
* ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one. * ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one.
* ` + "`" + `--dedupe-mode smallest` + "`" + ` - removes identical files then keeps the smallest one.
* ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different. * ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different.
For example to rename all the identically named photos in your Google Photos directory, do For example to rename all the identically named photos in your Google Photos directory, do

View file

@ -125,14 +125,6 @@ func dedupeInteractive(ctx context.Context, f fs.Fs, ht hash.Type, remote string
} }
} }
type objectsSortedByModTime []fs.Object
func (objs objectsSortedByModTime) Len() int { return len(objs) }
func (objs objectsSortedByModTime) Swap(i, j int) { objs[i], objs[j] = objs[j], objs[i] }
func (objs objectsSortedByModTime) Less(i, j int) bool {
return objs[i].ModTime(context.TODO()).Before(objs[j].ModTime(context.TODO()))
}
// DeduplicateMode is how the dedupe command chooses what to do // DeduplicateMode is how the dedupe command chooses what to do
type DeduplicateMode int type DeduplicateMode int
@ -145,6 +137,7 @@ const (
DeduplicateOldest // choose the oldest object DeduplicateOldest // choose the oldest object
DeduplicateRename // rename the objects DeduplicateRename // rename the objects
DeduplicateLargest // choose the largest object DeduplicateLargest // choose the largest object
DeduplicateSmallest // choose the smallest object
) )
func (x DeduplicateMode) String() string { func (x DeduplicateMode) String() string {
@ -163,6 +156,8 @@ func (x DeduplicateMode) String() string {
return "rename" return "rename"
case DeduplicateLargest: case DeduplicateLargest:
return "largest" return "largest"
case DeduplicateSmallest:
return "smallest"
} }
return "unknown" return "unknown"
} }
@ -184,6 +179,8 @@ func (x *DeduplicateMode) Set(s string) error {
*x = DeduplicateRename *x = DeduplicateRename
case "largest": case "largest":
*x = DeduplicateLargest *x = DeduplicateLargest
case "smallest":
*x = DeduplicateSmallest
default: default:
return errors.Errorf("Unknown mode for dedupe %q.", s) return errors.Errorf("Unknown mode for dedupe %q.", s)
} }
@ -248,6 +245,20 @@ func dedupeMergeDuplicateDirs(ctx context.Context, f fs.Fs, duplicateDirs [][]fs
return nil return nil
} }
// sort oldest first
func sortOldestFirst(objs []fs.Object) {
sort.Slice(objs, func(i, j int) bool {
return objs[i].ModTime(context.TODO()).Before(objs[j].ModTime(context.TODO()))
})
}
// sort smallest first
func sortSmallestFirst(objs []fs.Object) {
sort.Slice(objs, func(i, j int) bool {
return objs[i].Size() < objs[j].Size()
})
}
// Deduplicate interactively finds duplicate files and offers to // Deduplicate interactively finds duplicate files and offers to
// delete all but one or rename them to be different. Only useful with // delete all but one or rename them to be different. Only useful with
// Google Drive which can have duplicate file names. // Google Drive which can have duplicate file names.
@ -296,24 +307,19 @@ func Deduplicate(ctx context.Context, f fs.Fs, mode DeduplicateMode) error {
case DeduplicateFirst: case DeduplicateFirst:
dedupeDeleteAllButOne(ctx, 0, remote, objs) dedupeDeleteAllButOne(ctx, 0, remote, objs)
case DeduplicateNewest: case DeduplicateNewest:
sort.Sort(objectsSortedByModTime(objs)) // sort oldest first sortOldestFirst(objs)
dedupeDeleteAllButOne(ctx, len(objs)-1, remote, objs) dedupeDeleteAllButOne(ctx, len(objs)-1, remote, objs)
case DeduplicateOldest: case DeduplicateOldest:
sort.Sort(objectsSortedByModTime(objs)) // sort oldest first sortOldestFirst(objs)
dedupeDeleteAllButOne(ctx, 0, remote, objs) dedupeDeleteAllButOne(ctx, 0, remote, objs)
case DeduplicateRename: case DeduplicateRename:
dedupeRename(ctx, f, remote, objs) dedupeRename(ctx, f, remote, objs)
case DeduplicateLargest: case DeduplicateLargest:
largest, largestIndex := int64(-1), -1 sortSmallestFirst(objs)
for i, obj := range objs { dedupeDeleteAllButOne(ctx, len(objs)-1, remote, objs)
size := obj.Size() case DeduplicateSmallest:
if size > largest { sortSmallestFirst(objs)
largest, largestIndex = size, i dedupeDeleteAllButOne(ctx, 0, remote, objs)
}
}
if largestIndex > -1 {
dedupeDeleteAllButOne(ctx, largestIndex, remote, objs)
}
case DeduplicateSkip: case DeduplicateSkip:
// skip // skip
default: default:

View file

@ -152,6 +152,22 @@ func TestDeduplicateLargest(t *testing.T) {
fstest.CheckItems(t, r.Fremote, file3) fstest.CheckItems(t, r.Fremote, file3)
} }
func TestDeduplicateSmallest(t *testing.T) {
r := fstest.NewRun(t)
defer r.Finalise()
skipIfCantDedupe(t, r.Fremote)
file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one too", t2)
file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t3)
r.CheckWithDuplicates(t, file1, file2, file3)
err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateSmallest)
require.NoError(t, err)
fstest.CheckItems(t, r.Fremote, file1)
}
func TestDeduplicateRename(t *testing.T) { func TestDeduplicateRename(t *testing.T) {
r := fstest.NewRun(t) r := fstest.NewRun(t)
defer r.Finalise() defer r.Finalise()