Stop --track-renames hashing matching files - fixes #984

Also only hash files of the correct size.

This speeds it up a lot.
This commit is contained in:
Nick Craig-Wood 2017-01-03 23:03:20 +00:00
parent 6611d92e21
commit de2d967abd

View file

@ -283,41 +283,6 @@ func (s *syncCopyMove) pairChecker(in ObjectPairChan, out ObjectPairChan, wg *sy
} }
} }
// tryRename renames a src object when doing track renames if
// possible, it returns true if the object was renamed.
func (s *syncCopyMove) tryRename(src Object) bool {
Stats.Checking(src.Remote())
defer Stats.DoneChecking(src.Remote())
hash, err := s.renameHash(src)
if err != nil {
Debug(src, "Failed to read hash: %v", err)
return false
}
if hash == "" {
return false
}
dst := s.popRenameMap(hash)
if dst == nil {
return false
}
err = MoveFile(s.fdst, s.fdst, src.Remote(), dst.Remote())
if err != nil {
Debug(src, "Failed to rename to %q: %v", dst.Remote(), err)
return false
}
// remove file from dstFiles if present
s.dstFilesMu.Lock()
delete(s.dstFiles, dst.Remote())
s.dstFilesMu.Unlock()
Debug(src, "Renamed from %q", dst.Remote())
return true
}
// pairRenamer reads Objects~s on in and attempts to rename them, // pairRenamer reads Objects~s on in and attempts to rename them,
// otherwise it sends them out if they need transferring. // otherwise it sends them out if they need transferring.
func (s *syncCopyMove) pairRenamer(in ObjectPairChan, out ObjectPairChan, wg *sync.WaitGroup) { func (s *syncCopyMove) pairRenamer(in ObjectPairChan, out ObjectPairChan, wg *sync.WaitGroup) {
@ -455,58 +420,24 @@ func (s *syncCopyMove) deleteFiles(checkSrcMap bool) error {
// renameHash makes a string with the size and the hash for rename detection // renameHash makes a string with the size and the hash for rename detection
// //
// it may return an empty string in which case no hash could be made // it may return an empty string in which case no hash could be made
func (s *syncCopyMove) renameHash(obj Object) (hash string, err error) { func (s *syncCopyMove) renameHash(obj Object) (hash string) {
var err error
hash, err = obj.Hash(s.commonHash) hash, err = obj.Hash(s.commonHash)
if err != nil { if err != nil {
return hash, err Debug(obj, "Hash failed: %v", err)
return ""
} }
if hash == "" { if hash == "" {
return hash, nil return ""
} }
return fmt.Sprintf("%d,%s", obj.Size(), hash), nil return fmt.Sprintf("%d,%s", obj.Size(), hash)
} }
// makeRenameMap builds a map of the destination files by hash // pushRenameMap adds the object with hash to the rename map
func (s *syncCopyMove) makeRenameMap() error { func (s *syncCopyMove) pushRenameMap(hash string, obj Object) {
Debug(s.fdst, "Making map for --track-renames") s.renameMapMu.Lock()
s.renameMap[hash] = append(s.renameMap[hash], obj)
s.renameMap = make(map[string][]Object) s.renameMapMu.Unlock()
in := make(chan Object, Config.Checkers)
go s.pumpMapToChan(s.dstFiles, in)
var wg sync.WaitGroup
wg.Add(Config.Transfers)
for i := 0; i < Config.Transfers; i++ {
go func() {
defer wg.Done()
for {
if s.aborting() {
return
}
select {
case obj, ok := <-in:
if !ok {
return
}
Stats.Checking(obj.Remote())
hash, err := s.renameHash(obj)
Stats.DoneChecking(obj.Remote())
if err != nil {
s.processError(err)
} else if hash != "" {
s.renameMapMu.Lock()
s.renameMap[hash] = append(s.renameMap[hash], obj)
s.renameMapMu.Unlock()
}
case <-s.abort:
return
}
}
}()
}
wg.Wait()
Debug(s.fdst, "Finished making map for --track-renames")
return s.currentError()
} }
// popRenameMap finds the object with hash and pop the first match from // popRenameMap finds the object with hash and pop the first match from
@ -526,30 +457,74 @@ func (s *syncCopyMove) popRenameMap(hash string) (dst Object) {
return dst return dst
} }
// delRenameMap removes obj from renameMap // makeRenameMap builds a map of the destination files by hash that
func (s *syncCopyMove) delRenameMap(obj Object) { // match sizes in the slice of objects in renameCheck
hash, err := s.renameHash(obj) func (s *syncCopyMove) makeRenameMap(renameCheck []Object) {
if err != nil { Debug(s.fdst, "Making map for --track-renames")
return
// first make a map of possible sizes we need to check
possibleSizes := map[int64]struct{}{}
for _, obj := range renameCheck {
possibleSizes[obj.Size()] = struct{}{}
} }
if hash == "" {
return // pump all the dstFiles into in
} in := make(chan Object, Config.Checkers)
s.renameMapMu.Lock() go s.pumpMapToChan(s.dstFiles, in)
dsts := s.renameMap[hash]
for i, dst := range dsts { // now make a map of size,hash for all dstFiles
if obj.Remote() == dst.Remote() { s.renameMap = make(map[string][]Object)
// remove obj from list if found var wg sync.WaitGroup
dsts = append(dsts[:i], dsts[i+1:]...) wg.Add(Config.Transfers)
if len(dsts) > 0 { for i := 0; i < Config.Transfers; i++ {
s.renameMap[hash] = dsts go func() {
} else { defer wg.Done()
delete(s.renameMap, hash) for obj := range in {
// only create hash for dst Object if its size could match
if _, found := possibleSizes[obj.Size()]; found {
Stats.Checking(obj.Remote())
hash := s.renameHash(obj)
if hash != "" {
s.pushRenameMap(hash, obj)
}
Stats.DoneChecking(obj.Remote())
}
} }
break }()
}
} }
s.renameMapMu.Unlock() wg.Wait()
Debug(s.fdst, "Finished making map for --track-renames")
}
// tryRename renames a src object when doing track renames if
// possible, it returns true if the object was renamed.
func (s *syncCopyMove) tryRename(src Object) bool {
Stats.Checking(src.Remote())
defer Stats.DoneChecking(src.Remote())
hash := s.renameHash(src)
if hash == "" {
return false
}
dst := s.popRenameMap(hash)
if dst == nil {
return false
}
err := MoveFile(s.fdst, s.fdst, src.Remote(), dst.Remote())
if err != nil {
Debug(src, "Failed to rename to %q: %v", dst.Remote(), err)
return false
}
// remove file from dstFiles if present
s.dstFilesMu.Lock()
delete(s.dstFiles, dst.Remote())
s.dstFilesMu.Unlock()
Debug(src, "Renamed from %q", dst.Remote())
return true
} }
// Syncs fsrc into fdst // Syncs fsrc into fdst
@ -596,14 +571,6 @@ func (s *syncCopyMove) run() error {
} }
} }
// Build the map of destination files by hash if required
// Have dstFiles complete at this point
if s.trackRenames {
if err = s.makeRenameMap(); err != nil {
return err
}
}
// Delete files first if required // Delete files first if required
if s.deleteBefore { if s.deleteBefore {
err = s.deleteFiles(true) err = s.deleteFiles(true)
@ -648,10 +615,6 @@ func (s *syncCopyMove) run() error {
delete(s.dstFiles, remote) delete(s.dstFiles, remote)
} }
s.dstFilesMu.Unlock() s.dstFilesMu.Unlock()
if ok && s.trackRenames {
// remove file from rename tracking also
s.delRenameMap(dst)
}
} }
if dst != nil { if dst != nil {
s.toBeChecked <- ObjectPair{src, dst} s.toBeChecked <- ObjectPair{src, dst}
@ -665,6 +628,8 @@ func (s *syncCopyMove) run() error {
} }
if s.trackRenames { if s.trackRenames {
// Build the map of the remaining dstFiles by hash
s.makeRenameMap(renameCheck)
// Attempt renames for all the files which don't have a matching dst // Attempt renames for all the files which don't have a matching dst
for _, src := range renameCheck { for _, src := range renameCheck {
s.toBeRenamed <- ObjectPair{src, nil} s.toBeRenamed <- ObjectPair{src, nil}