dedupe: Stop dedupe deleting files with identical IDs #4013

Before this change if there were two files with the same name and the
same ID in the same directory, dedupe would delete one of them but
since these are actually the same file (with the same ID) then both
files would be deleted leading to data loss.

This should never actually happen, however it did happen as part of a
bug introduced in rclone which was fixed by

dfc7215bf9 drive: fix duplicate items when using --drive-shared-with-me #4018

This change checks to see if any of the duplicates have the same ID
and if they do it refuses to delete them.
This commit is contained in:
Nick Craig-Wood 2020-03-11 10:21:23 +00:00
parent 9a5178be7a
commit 5fa6a28f70

View file

@ -77,6 +77,31 @@ func dedupeDeleteAllButOne(ctx context.Context, keep int, remote string, objs []
// dedupeDeleteIdentical deletes all but one of identical (by hash) copies
func dedupeDeleteIdentical(ctx context.Context, ht hash.Type, remote string, objs []fs.Object) (remainingObjs []fs.Object) {
// Make map of IDs
IDs := make(map[string]int, len(objs))
for _, o := range objs {
if do, ok := o.(fs.IDer); ok {
if ID := do.ID(); ID != "" {
IDs[ID]++
}
}
}
// Remove duplicate IDs
newObjs := objs[:0]
for _, o := range objs {
if do, ok := o.(fs.IDer); ok {
if ID := do.ID(); ID != "" {
if IDs[ID] <= 1 {
newObjs = append(newObjs, o)
} else {
fs.Logf(o, "Ignoring as it appears %d times in the listing and deleting would lead to data loss", IDs[ID])
}
}
}
}
objs = newObjs
// See how many of these duplicates are identical
byHash := make(map[string][]fs.Object, len(objs))
for _, o := range objs {