dedupe: add --dedupe-mode list to just list dupes, changing nothing

This commit is contained in:
Nick Craig-Wood 2020-12-02 15:25:55 +00:00
parent 507f861c67
commit 86014cebd7
2 changed files with 27 additions and 8 deletions

View file

@ -123,6 +123,7 @@ Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" +
* ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one. * ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one.
* ` + "`" + `--dedupe-mode smallest` + "`" + ` - removes identical files then keeps the smallest one. * ` + "`" + `--dedupe-mode smallest` + "`" + ` - removes identical files then keeps the smallest one.
* ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different. * ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different.
* ` + "`" + `--dedupe-mode list` + "`" + ` - lists duplicate dirs and files only and changes nothing.
For example to rename all the identically named photos in your Google Photos directory, do For example to rename all the identically named photos in your Google Photos directory, do

View file

@ -138,9 +138,9 @@ func dedupeDeleteIdentical(ctx context.Context, ht hash.Type, remote string, obj
return remainingObjs return remainingObjs
} }
// dedupeInteractive interactively dedupes the slice of objects // dedupeList lists the duplicates and does nothing
func dedupeInteractive(ctx context.Context, f fs.Fs, ht hash.Type, remote string, objs []fs.Object, byHash bool) { func dedupeList(ctx context.Context, f fs.Fs, ht hash.Type, remote string, objs []fs.Object, byHash bool) {
fmt.Printf("%s: %d duplicates remain\n", remote, len(objs)) fmt.Printf("%s: %d duplicates\n", remote, len(objs))
for i, o := range objs { for i, o := range objs {
hashValue := "" hashValue := ""
if ht != hash.None { if ht != hash.None {
@ -156,6 +156,11 @@ func dedupeInteractive(ctx context.Context, f fs.Fs, ht hash.Type, remote string
fmt.Printf(" %d: %12d bytes, %s, %v %32s\n", i+1, o.Size(), o.ModTime(ctx).Local().Format("2006-01-02 15:04:05.000000000"), ht, hashValue) fmt.Printf(" %d: %12d bytes, %s, %v %32s\n", i+1, o.Size(), o.ModTime(ctx).Local().Format("2006-01-02 15:04:05.000000000"), ht, hashValue)
} }
} }
}
// dedupeInteractive interactively dedupes the slice of objects
func dedupeInteractive(ctx context.Context, f fs.Fs, ht hash.Type, remote string, objs []fs.Object, byHash bool) {
dedupeList(ctx, f, ht, remote, objs, byHash)
commands := []string{"sSkip and do nothing", "kKeep just one (choose which in next step)"} commands := []string{"sSkip and do nothing", "kKeep just one (choose which in next step)"}
if !byHash { if !byHash {
commands = append(commands, "rRename all to be different (by changing file.jpg to file-1.jpg)") commands = append(commands, "rRename all to be different (by changing file.jpg to file-1.jpg)")
@ -183,6 +188,7 @@ const (
DeduplicateRename // rename the objects DeduplicateRename // rename the objects
DeduplicateLargest // choose the largest object DeduplicateLargest // choose the largest object
DeduplicateSmallest // choose the smallest object DeduplicateSmallest // choose the smallest object
DeduplicateList // list duplicates only
) )
func (x DeduplicateMode) String() string { func (x DeduplicateMode) String() string {
@ -203,6 +209,8 @@ func (x DeduplicateMode) String() string {
return "largest" return "largest"
case DeduplicateSmallest: case DeduplicateSmallest:
return "smallest" return "smallest"
case DeduplicateList:
return "list"
} }
return "unknown" return "unknown"
} }
@ -226,6 +234,8 @@ func (x *DeduplicateMode) Set(s string) error {
*x = DeduplicateLargest *x = DeduplicateLargest
case "smallest": case "smallest":
*x = DeduplicateSmallest *x = DeduplicateSmallest
case "list":
*x = DeduplicateList
default: default:
return errors.Errorf("Unknown mode for dedupe %q.", s) return errors.Errorf("Unknown mode for dedupe %q.", s)
} }
@ -326,10 +336,16 @@ func Deduplicate(ctx context.Context, f fs.Fs, mode DeduplicateMode, byHash bool
return err return err
} }
if len(duplicateDirs) != 0 { if len(duplicateDirs) != 0 {
if mode != DeduplicateList {
err = dedupeMergeDuplicateDirs(ctx, f, duplicateDirs) err = dedupeMergeDuplicateDirs(ctx, f, duplicateDirs)
if err != nil { if err != nil {
return err return err
} }
} else {
for _, dir := range duplicateDirs {
fmt.Printf("%s: %d duplicates of this directory\n", dir[0].Remote(), len(dir))
}
}
} }
} }
@ -361,7 +377,7 @@ func Deduplicate(ctx context.Context, f fs.Fs, mode DeduplicateMode, byHash bool
for remote, objs := range files { for remote, objs := range files {
if len(objs) > 1 { if len(objs) > 1 {
fs.Logf(remote, "Found %d files with duplicate %s", len(objs), what) fs.Logf(remote, "Found %d files with duplicate %s", len(objs), what)
if !byHash { if !byHash && mode != DeduplicateList {
objs = dedupeDeleteIdentical(ctx, ht, remote, objs) objs = dedupeDeleteIdentical(ctx, ht, remote, objs)
if len(objs) <= 1 { if len(objs) <= 1 {
fs.Logf(remote, "All duplicates removed") fs.Logf(remote, "All duplicates removed")
@ -388,7 +404,9 @@ func Deduplicate(ctx context.Context, f fs.Fs, mode DeduplicateMode, byHash bool
sortSmallestFirst(objs) sortSmallestFirst(objs)
dedupeDeleteAllButOne(ctx, 0, remote, objs) dedupeDeleteAllButOne(ctx, 0, remote, objs)
case DeduplicateSkip: case DeduplicateSkip:
fs.Logf(remote, "Skipping %d files with duplicate names %s", len(objs), what) fs.Logf(remote, "Skipping %d files with duplicate %s", len(objs), what)
case DeduplicateList:
dedupeList(ctx, f, ht, remote, objs, byHash)
default: default:
//skip //skip
} }