forked from TrueCloudLab/rclone
dedupe: implement keep smallest too
This is to help deduping google docs and their exported versions if they accidentally get uploaded to the source again. See: https://forum.rclone.org/t/my-stupidity-or-a-bug/13861
This commit is contained in:
parent
1bd9f522e0
commit
81002747c5
3 changed files with 43 additions and 20 deletions
|
@ -94,6 +94,7 @@ Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" +
|
|||
* ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one.
|
||||
* ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one.
|
||||
* ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one.
|
||||
* ` + "`" + `--dedupe-mode smallest` + "`" + ` - removes identical files then keeps the smallest one.
|
||||
* ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different.
|
||||
|
||||
For example to rename all the identically named photos in your Google Photos directory, do
|
||||
|
|
|
@ -125,14 +125,6 @@ func dedupeInteractive(ctx context.Context, f fs.Fs, ht hash.Type, remote string
|
|||
}
|
||||
}
|
||||
|
||||
type objectsSortedByModTime []fs.Object
|
||||
|
||||
func (objs objectsSortedByModTime) Len() int { return len(objs) }
|
||||
func (objs objectsSortedByModTime) Swap(i, j int) { objs[i], objs[j] = objs[j], objs[i] }
|
||||
func (objs objectsSortedByModTime) Less(i, j int) bool {
|
||||
return objs[i].ModTime(context.TODO()).Before(objs[j].ModTime(context.TODO()))
|
||||
}
|
||||
|
||||
// DeduplicateMode is how the dedupe command chooses what to do
|
||||
type DeduplicateMode int
|
||||
|
||||
|
@ -145,6 +137,7 @@ const (
|
|||
DeduplicateOldest // choose the oldest object
|
||||
DeduplicateRename // rename the objects
|
||||
DeduplicateLargest // choose the largest object
|
||||
DeduplicateSmallest // choose the smallest object
|
||||
)
|
||||
|
||||
func (x DeduplicateMode) String() string {
|
||||
|
@ -163,6 +156,8 @@ func (x DeduplicateMode) String() string {
|
|||
return "rename"
|
||||
case DeduplicateLargest:
|
||||
return "largest"
|
||||
case DeduplicateSmallest:
|
||||
return "smallest"
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
@ -184,6 +179,8 @@ func (x *DeduplicateMode) Set(s string) error {
|
|||
*x = DeduplicateRename
|
||||
case "largest":
|
||||
*x = DeduplicateLargest
|
||||
case "smallest":
|
||||
*x = DeduplicateSmallest
|
||||
default:
|
||||
return errors.Errorf("Unknown mode for dedupe %q.", s)
|
||||
}
|
||||
|
@ -248,6 +245,20 @@ func dedupeMergeDuplicateDirs(ctx context.Context, f fs.Fs, duplicateDirs [][]fs
|
|||
return nil
|
||||
}
|
||||
|
||||
// sort oldest first
|
||||
func sortOldestFirst(objs []fs.Object) {
|
||||
sort.Slice(objs, func(i, j int) bool {
|
||||
return objs[i].ModTime(context.TODO()).Before(objs[j].ModTime(context.TODO()))
|
||||
})
|
||||
}
|
||||
|
||||
// sort smallest first
|
||||
func sortSmallestFirst(objs []fs.Object) {
|
||||
sort.Slice(objs, func(i, j int) bool {
|
||||
return objs[i].Size() < objs[j].Size()
|
||||
})
|
||||
}
|
||||
|
||||
// Deduplicate interactively finds duplicate files and offers to
|
||||
// delete all but one or rename them to be different. Only useful with
|
||||
// Google Drive which can have duplicate file names.
|
||||
|
@ -296,24 +307,19 @@ func Deduplicate(ctx context.Context, f fs.Fs, mode DeduplicateMode) error {
|
|||
case DeduplicateFirst:
|
||||
dedupeDeleteAllButOne(ctx, 0, remote, objs)
|
||||
case DeduplicateNewest:
|
||||
sort.Sort(objectsSortedByModTime(objs)) // sort oldest first
|
||||
sortOldestFirst(objs)
|
||||
dedupeDeleteAllButOne(ctx, len(objs)-1, remote, objs)
|
||||
case DeduplicateOldest:
|
||||
sort.Sort(objectsSortedByModTime(objs)) // sort oldest first
|
||||
sortOldestFirst(objs)
|
||||
dedupeDeleteAllButOne(ctx, 0, remote, objs)
|
||||
case DeduplicateRename:
|
||||
dedupeRename(ctx, f, remote, objs)
|
||||
case DeduplicateLargest:
|
||||
largest, largestIndex := int64(-1), -1
|
||||
for i, obj := range objs {
|
||||
size := obj.Size()
|
||||
if size > largest {
|
||||
largest, largestIndex = size, i
|
||||
}
|
||||
}
|
||||
if largestIndex > -1 {
|
||||
dedupeDeleteAllButOne(ctx, largestIndex, remote, objs)
|
||||
}
|
||||
sortSmallestFirst(objs)
|
||||
dedupeDeleteAllButOne(ctx, len(objs)-1, remote, objs)
|
||||
case DeduplicateSmallest:
|
||||
sortSmallestFirst(objs)
|
||||
dedupeDeleteAllButOne(ctx, 0, remote, objs)
|
||||
case DeduplicateSkip:
|
||||
// skip
|
||||
default:
|
||||
|
|
|
@ -152,6 +152,22 @@ func TestDeduplicateLargest(t *testing.T) {
|
|||
fstest.CheckItems(t, r.Fremote, file3)
|
||||
}
|
||||
|
||||
func TestDeduplicateSmallest(t *testing.T) {
|
||||
r := fstest.NewRun(t)
|
||||
defer r.Finalise()
|
||||
skipIfCantDedupe(t, r.Fremote)
|
||||
|
||||
file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
|
||||
file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one too", t2)
|
||||
file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t3)
|
||||
r.CheckWithDuplicates(t, file1, file2, file3)
|
||||
|
||||
err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateSmallest)
|
||||
require.NoError(t, err)
|
||||
|
||||
fstest.CheckItems(t, r.Fremote, file1)
|
||||
}
|
||||
|
||||
func TestDeduplicateRename(t *testing.T) {
|
||||
r := fstest.NewRun(t)
|
||||
defer r.Finalise()
|
||||
|
|
Loading…
Reference in a new issue