From de2d967abd75b0cae73e3a2de79fdbc0e954e223 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 3 Jan 2017 23:03:20 +0000 Subject: [PATCH] Stop --track-renames hashing matching files - fixes #984 Also only hash files of the correct size. This speeds it up a lot. --- fs/sync.go | 191 ++++++++++++++++++++++------------------------------- 1 file changed, 78 insertions(+), 113 deletions(-) diff --git a/fs/sync.go b/fs/sync.go index 756428c00..fc93d913f 100644 --- a/fs/sync.go +++ b/fs/sync.go @@ -283,41 +283,6 @@ func (s *syncCopyMove) pairChecker(in ObjectPairChan, out ObjectPairChan, wg *sy } } -// tryRename renames a src object when doing track renames if -// possible, it returns true if the object was renamed. -func (s *syncCopyMove) tryRename(src Object) bool { - Stats.Checking(src.Remote()) - defer Stats.DoneChecking(src.Remote()) - - hash, err := s.renameHash(src) - if err != nil { - Debug(src, "Failed to read hash: %v", err) - return false - } - if hash == "" { - return false - } - - dst := s.popRenameMap(hash) - if dst == nil { - return false - } - - err = MoveFile(s.fdst, s.fdst, src.Remote(), dst.Remote()) - if err != nil { - Debug(src, "Failed to rename to %q: %v", dst.Remote(), err) - return false - } - - // remove file from dstFiles if present - s.dstFilesMu.Lock() - delete(s.dstFiles, dst.Remote()) - s.dstFilesMu.Unlock() - - Debug(src, "Renamed from %q", dst.Remote()) - return true -} - // pairRenamer reads Objects~s on in and attempts to rename them, // otherwise it sends them out if they need transferring. func (s *syncCopyMove) pairRenamer(in ObjectPairChan, out ObjectPairChan, wg *sync.WaitGroup) { @@ -455,58 +420,24 @@ func (s *syncCopyMove) deleteFiles(checkSrcMap bool) error { // renameHash makes a string with the size and the hash for rename detection // // it may return an empty string in which case no hash could be made -func (s *syncCopyMove) renameHash(obj Object) (hash string, err error) { +func (s *syncCopyMove) renameHash(obj Object) (hash string) { + var err error hash, err = obj.Hash(s.commonHash) if err != nil { - return hash, err + Debug(obj, "Hash failed: %v", err) + return "" } if hash == "" { - return hash, nil + return "" } - return fmt.Sprintf("%d,%s", obj.Size(), hash), nil + return fmt.Sprintf("%d,%s", obj.Size(), hash) } -// makeRenameMap builds a map of the destination files by hash -func (s *syncCopyMove) makeRenameMap() error { - Debug(s.fdst, "Making map for --track-renames") - - s.renameMap = make(map[string][]Object) - in := make(chan Object, Config.Checkers) - go s.pumpMapToChan(s.dstFiles, in) - - var wg sync.WaitGroup - wg.Add(Config.Transfers) - for i := 0; i < Config.Transfers; i++ { - go func() { - defer wg.Done() - for { - if s.aborting() { - return - } - select { - case obj, ok := <-in: - if !ok { - return - } - Stats.Checking(obj.Remote()) - hash, err := s.renameHash(obj) - Stats.DoneChecking(obj.Remote()) - if err != nil { - s.processError(err) - } else if hash != "" { - s.renameMapMu.Lock() - s.renameMap[hash] = append(s.renameMap[hash], obj) - s.renameMapMu.Unlock() - } - case <-s.abort: - return - } - } - }() - } - wg.Wait() - Debug(s.fdst, "Finished making map for --track-renames") - return s.currentError() +// pushRenameMap adds the object with hash to the rename map +func (s *syncCopyMove) pushRenameMap(hash string, obj Object) { + s.renameMapMu.Lock() + s.renameMap[hash] = append(s.renameMap[hash], obj) + s.renameMapMu.Unlock() } // popRenameMap finds the object with hash and pop the first match from @@ -526,30 +457,74 @@ func (s *syncCopyMove) popRenameMap(hash string) (dst Object) { return dst } -// delRenameMap removes obj from renameMap -func (s *syncCopyMove) delRenameMap(obj Object) { - hash, err := s.renameHash(obj) - if err != nil { - return +// makeRenameMap builds a map of the destination files by hash that +// match sizes in the slice of objects in renameCheck +func (s *syncCopyMove) makeRenameMap(renameCheck []Object) { + Debug(s.fdst, "Making map for --track-renames") + + // first make a map of possible sizes we need to check + possibleSizes := map[int64]struct{}{} + for _, obj := range renameCheck { + possibleSizes[obj.Size()] = struct{}{} } - if hash == "" { - return - } - s.renameMapMu.Lock() - dsts := s.renameMap[hash] - for i, dst := range dsts { - if obj.Remote() == dst.Remote() { - // remove obj from list if found - dsts = append(dsts[:i], dsts[i+1:]...) - if len(dsts) > 0 { - s.renameMap[hash] = dsts - } else { - delete(s.renameMap, hash) + + // pump all the dstFiles into in + in := make(chan Object, Config.Checkers) + go s.pumpMapToChan(s.dstFiles, in) + + // now make a map of size,hash for all dstFiles + s.renameMap = make(map[string][]Object) + var wg sync.WaitGroup + wg.Add(Config.Transfers) + for i := 0; i < Config.Transfers; i++ { + go func() { + defer wg.Done() + for obj := range in { + // only create hash for dst Object if its size could match + if _, found := possibleSizes[obj.Size()]; found { + Stats.Checking(obj.Remote()) + hash := s.renameHash(obj) + if hash != "" { + s.pushRenameMap(hash, obj) + } + Stats.DoneChecking(obj.Remote()) + } } - break - } + }() } - s.renameMapMu.Unlock() + wg.Wait() + Debug(s.fdst, "Finished making map for --track-renames") +} + +// tryRename renames a src object when doing track renames if +// possible, it returns true if the object was renamed. +func (s *syncCopyMove) tryRename(src Object) bool { + Stats.Checking(src.Remote()) + defer Stats.DoneChecking(src.Remote()) + + hash := s.renameHash(src) + if hash == "" { + return false + } + + dst := s.popRenameMap(hash) + if dst == nil { + return false + } + + err := MoveFile(s.fdst, s.fdst, src.Remote(), dst.Remote()) + if err != nil { + Debug(src, "Failed to rename to %q: %v", dst.Remote(), err) + return false + } + + // remove file from dstFiles if present + s.dstFilesMu.Lock() + delete(s.dstFiles, dst.Remote()) + s.dstFilesMu.Unlock() + + Debug(src, "Renamed from %q", dst.Remote()) + return true } // Syncs fsrc into fdst @@ -596,14 +571,6 @@ func (s *syncCopyMove) run() error { } } - // Build the map of destination files by hash if required - // Have dstFiles complete at this point - if s.trackRenames { - if err = s.makeRenameMap(); err != nil { - return err - } - } - // Delete files first if required if s.deleteBefore { err = s.deleteFiles(true) @@ -648,10 +615,6 @@ func (s *syncCopyMove) run() error { delete(s.dstFiles, remote) } s.dstFilesMu.Unlock() - if ok && s.trackRenames { - // remove file from rename tracking also - s.delRenameMap(dst) - } } if dst != nil { s.toBeChecked <- ObjectPair{src, dst} @@ -665,6 +628,8 @@ func (s *syncCopyMove) run() error { } if s.trackRenames { + // Build the map of the remaining dstFiles by hash + s.makeRenameMap(renameCheck) // Attempt renames for all the files which don't have a matching dst for _, src := range renameCheck { s.toBeRenamed <- ObjectPair{src, nil}