forked from TrueCloudLab/rclone
dedupe: Stop dedupe deleting files with identical IDs #4013
Before this change if there were two files with the same name and the
same ID in the same directory, dedupe would delete one of them but
since these are actually the same file (with the same ID) then both
files would be deleted leading to data loss.
This should never actually happen, however it did happen as part of a
bug introduced in rclone which was fixed by
dfc7215bf9
drive: fix duplicate items when using --drive-shared-with-me #4018
This change checks to see if any of the duplicates have the same ID
and if they do it refuses to delete them.
This commit is contained in:
parent
9a5178be7a
commit
5fa6a28f70
1 changed files with 25 additions and 0 deletions
|
@ -77,6 +77,31 @@ func dedupeDeleteAllButOne(ctx context.Context, keep int, remote string, objs []
|
|||
|
||||
// dedupeDeleteIdentical deletes all but one of identical (by hash) copies
|
||||
func dedupeDeleteIdentical(ctx context.Context, ht hash.Type, remote string, objs []fs.Object) (remainingObjs []fs.Object) {
|
||||
// Make map of IDs
|
||||
IDs := make(map[string]int, len(objs))
|
||||
for _, o := range objs {
|
||||
if do, ok := o.(fs.IDer); ok {
|
||||
if ID := do.ID(); ID != "" {
|
||||
IDs[ID]++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove duplicate IDs
|
||||
newObjs := objs[:0]
|
||||
for _, o := range objs {
|
||||
if do, ok := o.(fs.IDer); ok {
|
||||
if ID := do.ID(); ID != "" {
|
||||
if IDs[ID] <= 1 {
|
||||
newObjs = append(newObjs, o)
|
||||
} else {
|
||||
fs.Logf(o, "Ignoring as it appears %d times in the listing and deleting would lead to data loss", IDs[ID])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
objs = newObjs
|
||||
|
||||
// See how many of these duplicates are identical
|
||||
byHash := make(map[string][]fs.Object, len(objs))
|
||||
for _, o := range objs {
|
||||
|
|
Loading…
Reference in a new issue