dedupe: implement merging of duplicate directories - fixes #1243

This commit is contained in:
Nick Craig-Wood 2017-08-02 21:34:22 +01:00
parent db1995e63a
commit bfe812ea6b
2 changed files with 74 additions and 0 deletions

View file

@ -25,6 +25,10 @@ By default ` + "`" + `dedupe` + "`" + ` interactively finds duplicate files and
delete all but one or rename them to be different. Only useful with
Google Drive which can have duplicate file names.
In the first pass it will merge directories with the same name. It
will do this iteratively until all the identical directories have been
merged.
The ` + "`" + `dedupe` + "`" + ` command will delete all but one of any identical (same
md5sum) files it finds without confirmation. This means that for most
duplicated files the ` + "`" + `dedupe` + "`" + ` command will not be interactive. You

View file

@ -1351,11 +1351,81 @@ func (x *DeduplicateMode) Type() string {
// Check it satisfies the interface
var _ pflag.Value = (*DeduplicateMode)(nil)
// dedupeFindDuplicateDirs scans f for duplicate directories
func dedupeFindDuplicateDirs(f Fs) ([][]Directory, error) {
duplicateDirs := [][]Directory{}
err := Walk(f, "", true, Config.MaxDepth, func(dirPath string, entries DirEntries, err error) error {
if err != nil {
return err
}
dirs := map[string][]Directory{}
entries.ForDir(func(d Directory) {
dirs[d.Remote()] = append(dirs[d.Remote()], d)
})
for _, ds := range dirs {
if len(ds) > 1 {
duplicateDirs = append(duplicateDirs, ds)
}
}
return nil
})
if err != nil {
return nil, errors.Wrap(err, "find duplicate dirs")
}
return duplicateDirs, nil
}
// dedupeMergeDuplicateDirs merges all the duplicate directories found
func dedupeMergeDuplicateDirs(f Fs, duplicateDirs [][]Directory) error {
mergeDirs := f.Features().MergeDirs
if mergeDirs == nil {
return errors.Errorf("%v: can't merge directories", f)
}
dirCacheFlush := f.Features().DirCacheFlush
if dirCacheFlush == nil {
return errors.Errorf("%v: can't flush dir cache", f)
}
for _, dirs := range duplicateDirs {
if !Config.DryRun {
Infof(dirs[0], "Merging contents of duplicate directories")
err := mergeDirs(dirs)
if err != nil {
return errors.Wrap(err, "merge duplicate dirs")
}
} else {
Infof(dirs[0], "NOT Merging contents of duplicate directories as --dry-run")
}
}
dirCacheFlush()
return nil
}
// Deduplicate interactively finds duplicate files and offers to
// delete all but one or rename them to be different. Only useful with
// Google Drive which can have duplicate file names.
func Deduplicate(f Fs, mode DeduplicateMode) error {
Infof(f, "Looking for duplicates using %v mode.", mode)
// Find duplicate directories first and fix them - repeat
// until all fixed
for {
duplicateDirs, err := dedupeFindDuplicateDirs(f)
if err != nil {
return err
}
if len(duplicateDirs) == 0 {
break
}
err = dedupeMergeDuplicateDirs(f, duplicateDirs)
if err != nil {
return err
}
if Config.DryRun {
break
}
}
// Now find duplicate files
files := map[string][]Object{}
err := Walk(f, "", true, Config.MaxDepth, func(dirPath string, entries DirEntries, err error) error {
if err != nil {