forked from TrueCloudLab/rclone
dedupe: implement merging of duplicate directories - fixes #1243
This commit is contained in:
parent
db1995e63a
commit
bfe812ea6b
2 changed files with 74 additions and 0 deletions
|
@ -25,6 +25,10 @@ By default ` + "`" + `dedupe` + "`" + ` interactively finds duplicate files and
|
|||
delete all but one or rename them to be different. Only useful with
|
||||
Google Drive which can have duplicate file names.
|
||||
|
||||
In the first pass it will merge directories with the same name. It
|
||||
will do this iteratively until all the identical directories have been
|
||||
merged.
|
||||
|
||||
The ` + "`" + `dedupe` + "`" + ` command will delete all but one of any identical (same
|
||||
md5sum) files it finds without confirmation. This means that for most
|
||||
duplicated files the ` + "`" + `dedupe` + "`" + ` command will not be interactive. You
|
||||
|
|
|
@ -1351,11 +1351,81 @@ func (x *DeduplicateMode) Type() string {
|
|||
// Check it satisfies the interface
|
||||
var _ pflag.Value = (*DeduplicateMode)(nil)
|
||||
|
||||
// dedupeFindDuplicateDirs scans f for duplicate directories
|
||||
func dedupeFindDuplicateDirs(f Fs) ([][]Directory, error) {
|
||||
duplicateDirs := [][]Directory{}
|
||||
err := Walk(f, "", true, Config.MaxDepth, func(dirPath string, entries DirEntries, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirs := map[string][]Directory{}
|
||||
entries.ForDir(func(d Directory) {
|
||||
dirs[d.Remote()] = append(dirs[d.Remote()], d)
|
||||
})
|
||||
for _, ds := range dirs {
|
||||
if len(ds) > 1 {
|
||||
duplicateDirs = append(duplicateDirs, ds)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "find duplicate dirs")
|
||||
}
|
||||
return duplicateDirs, nil
|
||||
}
|
||||
|
||||
// dedupeMergeDuplicateDirs merges all the duplicate directories found
|
||||
func dedupeMergeDuplicateDirs(f Fs, duplicateDirs [][]Directory) error {
|
||||
mergeDirs := f.Features().MergeDirs
|
||||
if mergeDirs == nil {
|
||||
return errors.Errorf("%v: can't merge directories", f)
|
||||
}
|
||||
dirCacheFlush := f.Features().DirCacheFlush
|
||||
if dirCacheFlush == nil {
|
||||
return errors.Errorf("%v: can't flush dir cache", f)
|
||||
}
|
||||
for _, dirs := range duplicateDirs {
|
||||
if !Config.DryRun {
|
||||
Infof(dirs[0], "Merging contents of duplicate directories")
|
||||
err := mergeDirs(dirs)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "merge duplicate dirs")
|
||||
}
|
||||
} else {
|
||||
Infof(dirs[0], "NOT Merging contents of duplicate directories as --dry-run")
|
||||
}
|
||||
}
|
||||
dirCacheFlush()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Deduplicate interactively finds duplicate files and offers to
|
||||
// delete all but one or rename them to be different. Only useful with
|
||||
// Google Drive which can have duplicate file names.
|
||||
func Deduplicate(f Fs, mode DeduplicateMode) error {
|
||||
Infof(f, "Looking for duplicates using %v mode.", mode)
|
||||
|
||||
// Find duplicate directories first and fix them - repeat
|
||||
// until all fixed
|
||||
for {
|
||||
duplicateDirs, err := dedupeFindDuplicateDirs(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(duplicateDirs) == 0 {
|
||||
break
|
||||
}
|
||||
err = dedupeMergeDuplicateDirs(f, duplicateDirs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if Config.DryRun {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Now find duplicate files
|
||||
files := map[string][]Object{}
|
||||
err := Walk(f, "", true, Config.MaxDepth, func(dirPath string, entries DirEntries, err error) error {
|
||||
if err != nil {
|
||||
|
|
Loading…
Reference in a new issue