Stop --track-renames hashing matching files - fixes #984
Also only hash files of the correct size. This speeds it up a lot.
This commit is contained in:
parent
6611d92e21
commit
de2d967abd
1 changed files with 78 additions and 113 deletions
191
fs/sync.go
191
fs/sync.go
|
@ -283,41 +283,6 @@ func (s *syncCopyMove) pairChecker(in ObjectPairChan, out ObjectPairChan, wg *sy
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// tryRename renames a src object when doing track renames if
|
|
||||||
// possible, it returns true if the object was renamed.
|
|
||||||
func (s *syncCopyMove) tryRename(src Object) bool {
|
|
||||||
Stats.Checking(src.Remote())
|
|
||||||
defer Stats.DoneChecking(src.Remote())
|
|
||||||
|
|
||||||
hash, err := s.renameHash(src)
|
|
||||||
if err != nil {
|
|
||||||
Debug(src, "Failed to read hash: %v", err)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if hash == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
dst := s.popRenameMap(hash)
|
|
||||||
if dst == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
err = MoveFile(s.fdst, s.fdst, src.Remote(), dst.Remote())
|
|
||||||
if err != nil {
|
|
||||||
Debug(src, "Failed to rename to %q: %v", dst.Remote(), err)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove file from dstFiles if present
|
|
||||||
s.dstFilesMu.Lock()
|
|
||||||
delete(s.dstFiles, dst.Remote())
|
|
||||||
s.dstFilesMu.Unlock()
|
|
||||||
|
|
||||||
Debug(src, "Renamed from %q", dst.Remote())
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// pairRenamer reads Objects~s on in and attempts to rename them,
|
// pairRenamer reads Objects~s on in and attempts to rename them,
|
||||||
// otherwise it sends them out if they need transferring.
|
// otherwise it sends them out if they need transferring.
|
||||||
func (s *syncCopyMove) pairRenamer(in ObjectPairChan, out ObjectPairChan, wg *sync.WaitGroup) {
|
func (s *syncCopyMove) pairRenamer(in ObjectPairChan, out ObjectPairChan, wg *sync.WaitGroup) {
|
||||||
|
@ -455,58 +420,24 @@ func (s *syncCopyMove) deleteFiles(checkSrcMap bool) error {
|
||||||
// renameHash makes a string with the size and the hash for rename detection
|
// renameHash makes a string with the size and the hash for rename detection
|
||||||
//
|
//
|
||||||
// it may return an empty string in which case no hash could be made
|
// it may return an empty string in which case no hash could be made
|
||||||
func (s *syncCopyMove) renameHash(obj Object) (hash string, err error) {
|
func (s *syncCopyMove) renameHash(obj Object) (hash string) {
|
||||||
|
var err error
|
||||||
hash, err = obj.Hash(s.commonHash)
|
hash, err = obj.Hash(s.commonHash)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return hash, err
|
Debug(obj, "Hash failed: %v", err)
|
||||||
|
return ""
|
||||||
}
|
}
|
||||||
if hash == "" {
|
if hash == "" {
|
||||||
return hash, nil
|
return ""
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("%d,%s", obj.Size(), hash), nil
|
return fmt.Sprintf("%d,%s", obj.Size(), hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
// makeRenameMap builds a map of the destination files by hash
|
// pushRenameMap adds the object with hash to the rename map
|
||||||
func (s *syncCopyMove) makeRenameMap() error {
|
func (s *syncCopyMove) pushRenameMap(hash string, obj Object) {
|
||||||
Debug(s.fdst, "Making map for --track-renames")
|
s.renameMapMu.Lock()
|
||||||
|
s.renameMap[hash] = append(s.renameMap[hash], obj)
|
||||||
s.renameMap = make(map[string][]Object)
|
s.renameMapMu.Unlock()
|
||||||
in := make(chan Object, Config.Checkers)
|
|
||||||
go s.pumpMapToChan(s.dstFiles, in)
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
wg.Add(Config.Transfers)
|
|
||||||
for i := 0; i < Config.Transfers; i++ {
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
for {
|
|
||||||
if s.aborting() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case obj, ok := <-in:
|
|
||||||
if !ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
Stats.Checking(obj.Remote())
|
|
||||||
hash, err := s.renameHash(obj)
|
|
||||||
Stats.DoneChecking(obj.Remote())
|
|
||||||
if err != nil {
|
|
||||||
s.processError(err)
|
|
||||||
} else if hash != "" {
|
|
||||||
s.renameMapMu.Lock()
|
|
||||||
s.renameMap[hash] = append(s.renameMap[hash], obj)
|
|
||||||
s.renameMapMu.Unlock()
|
|
||||||
}
|
|
||||||
case <-s.abort:
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
Debug(s.fdst, "Finished making map for --track-renames")
|
|
||||||
return s.currentError()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// popRenameMap finds the object with hash and pop the first match from
|
// popRenameMap finds the object with hash and pop the first match from
|
||||||
|
@ -526,30 +457,74 @@ func (s *syncCopyMove) popRenameMap(hash string) (dst Object) {
|
||||||
return dst
|
return dst
|
||||||
}
|
}
|
||||||
|
|
||||||
// delRenameMap removes obj from renameMap
|
// makeRenameMap builds a map of the destination files by hash that
|
||||||
func (s *syncCopyMove) delRenameMap(obj Object) {
|
// match sizes in the slice of objects in renameCheck
|
||||||
hash, err := s.renameHash(obj)
|
func (s *syncCopyMove) makeRenameMap(renameCheck []Object) {
|
||||||
if err != nil {
|
Debug(s.fdst, "Making map for --track-renames")
|
||||||
return
|
|
||||||
|
// first make a map of possible sizes we need to check
|
||||||
|
possibleSizes := map[int64]struct{}{}
|
||||||
|
for _, obj := range renameCheck {
|
||||||
|
possibleSizes[obj.Size()] = struct{}{}
|
||||||
}
|
}
|
||||||
if hash == "" {
|
|
||||||
return
|
// pump all the dstFiles into in
|
||||||
}
|
in := make(chan Object, Config.Checkers)
|
||||||
s.renameMapMu.Lock()
|
go s.pumpMapToChan(s.dstFiles, in)
|
||||||
dsts := s.renameMap[hash]
|
|
||||||
for i, dst := range dsts {
|
// now make a map of size,hash for all dstFiles
|
||||||
if obj.Remote() == dst.Remote() {
|
s.renameMap = make(map[string][]Object)
|
||||||
// remove obj from list if found
|
var wg sync.WaitGroup
|
||||||
dsts = append(dsts[:i], dsts[i+1:]...)
|
wg.Add(Config.Transfers)
|
||||||
if len(dsts) > 0 {
|
for i := 0; i < Config.Transfers; i++ {
|
||||||
s.renameMap[hash] = dsts
|
go func() {
|
||||||
} else {
|
defer wg.Done()
|
||||||
delete(s.renameMap, hash)
|
for obj := range in {
|
||||||
|
// only create hash for dst Object if its size could match
|
||||||
|
if _, found := possibleSizes[obj.Size()]; found {
|
||||||
|
Stats.Checking(obj.Remote())
|
||||||
|
hash := s.renameHash(obj)
|
||||||
|
if hash != "" {
|
||||||
|
s.pushRenameMap(hash, obj)
|
||||||
|
}
|
||||||
|
Stats.DoneChecking(obj.Remote())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break
|
}()
|
||||||
}
|
|
||||||
}
|
}
|
||||||
s.renameMapMu.Unlock()
|
wg.Wait()
|
||||||
|
Debug(s.fdst, "Finished making map for --track-renames")
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryRename renames a src object when doing track renames if
|
||||||
|
// possible, it returns true if the object was renamed.
|
||||||
|
func (s *syncCopyMove) tryRename(src Object) bool {
|
||||||
|
Stats.Checking(src.Remote())
|
||||||
|
defer Stats.DoneChecking(src.Remote())
|
||||||
|
|
||||||
|
hash := s.renameHash(src)
|
||||||
|
if hash == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
dst := s.popRenameMap(hash)
|
||||||
|
if dst == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
err := MoveFile(s.fdst, s.fdst, src.Remote(), dst.Remote())
|
||||||
|
if err != nil {
|
||||||
|
Debug(src, "Failed to rename to %q: %v", dst.Remote(), err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove file from dstFiles if present
|
||||||
|
s.dstFilesMu.Lock()
|
||||||
|
delete(s.dstFiles, dst.Remote())
|
||||||
|
s.dstFilesMu.Unlock()
|
||||||
|
|
||||||
|
Debug(src, "Renamed from %q", dst.Remote())
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Syncs fsrc into fdst
|
// Syncs fsrc into fdst
|
||||||
|
@ -596,14 +571,6 @@ func (s *syncCopyMove) run() error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build the map of destination files by hash if required
|
|
||||||
// Have dstFiles complete at this point
|
|
||||||
if s.trackRenames {
|
|
||||||
if err = s.makeRenameMap(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete files first if required
|
// Delete files first if required
|
||||||
if s.deleteBefore {
|
if s.deleteBefore {
|
||||||
err = s.deleteFiles(true)
|
err = s.deleteFiles(true)
|
||||||
|
@ -648,10 +615,6 @@ func (s *syncCopyMove) run() error {
|
||||||
delete(s.dstFiles, remote)
|
delete(s.dstFiles, remote)
|
||||||
}
|
}
|
||||||
s.dstFilesMu.Unlock()
|
s.dstFilesMu.Unlock()
|
||||||
if ok && s.trackRenames {
|
|
||||||
// remove file from rename tracking also
|
|
||||||
s.delRenameMap(dst)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if dst != nil {
|
if dst != nil {
|
||||||
s.toBeChecked <- ObjectPair{src, dst}
|
s.toBeChecked <- ObjectPair{src, dst}
|
||||||
|
@ -665,6 +628,8 @@ func (s *syncCopyMove) run() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.trackRenames {
|
if s.trackRenames {
|
||||||
|
// Build the map of the remaining dstFiles by hash
|
||||||
|
s.makeRenameMap(renameCheck)
|
||||||
// Attempt renames for all the files which don't have a matching dst
|
// Attempt renames for all the files which don't have a matching dst
|
||||||
for _, src := range renameCheck {
|
for _, src := range renameCheck {
|
||||||
s.toBeRenamed <- ObjectPair{src, nil}
|
s.toBeRenamed <- ObjectPair{src, nil}
|
||||||
|
|
Loading…
Reference in a new issue