2014-03-28 17:56:04 +00:00
// Generic operations on filesystems and objects
package fs
import (
"fmt"
2014-08-01 16:58:39 +00:00
"io"
2015-03-01 12:38:31 +00:00
"mime"
"path"
2016-01-23 20:16:47 +00:00
"strings"
2014-03-28 17:56:04 +00:00
"sync"
2015-10-02 18:48:48 +00:00
"sync/atomic"
2015-06-03 14:08:27 +00:00
"time"
2016-01-23 20:16:47 +00:00
"golang.org/x/text/unicode/norm"
2014-03-28 17:56:04 +00:00
)
2015-09-22 17:47:16 +00:00
// CalculateModifyWindow works out modify window for Fses passed in -
// sets Config.ModifyWindow
2014-03-28 17:56:04 +00:00
//
// This is the largest modify window of all the fses in use, and the
// user configured value
func CalculateModifyWindow ( fs ... Fs ) {
for _ , f := range fs {
if f != nil {
precision := f . Precision ( )
if precision > Config . ModifyWindow {
Config . ModifyWindow = precision
}
2015-08-20 19:48:58 +00:00
if precision == ModTimeNotSupported {
Debug ( f , "Modify window not supported" )
return
}
2014-03-28 17:56:04 +00:00
}
}
2015-08-20 19:48:58 +00:00
Debug ( fs [ 0 ] , "Modify window is %s" , Config . ModifyWindow )
2014-03-28 17:56:04 +00:00
}
2016-01-11 12:39:33 +00:00
// HashEquals checks to see if src == dst, but ignores empty strings
// and returns true if either is empty.
func HashEquals ( src , dst string ) bool {
2015-08-16 22:24:34 +00:00
if src == "" || dst == "" {
return true
}
return src == dst
}
2016-01-11 12:39:33 +00:00
// CheckHashes checks the two files to see if they have common
// known hash types and compares them
2014-03-28 17:56:04 +00:00
//
2016-01-24 18:06:57 +00:00
// Returns
2015-08-20 19:48:58 +00:00
//
2016-01-24 18:06:57 +00:00
// equal - which is equality of the hashes
//
// hash - the HashType. This is HashNone if either of the hashes were
// unset or a compatible hash couldn't be found.
//
// err - may return an error which will already have been logged
2014-03-28 17:56:04 +00:00
//
2015-08-20 19:48:58 +00:00
// If an error is returned it will return equal as false
2016-01-24 18:06:57 +00:00
func CheckHashes ( src , dst Object ) ( equal bool , hash HashType , err error ) {
2016-01-11 12:39:33 +00:00
common := src . Fs ( ) . Hashes ( ) . Overlap ( dst . Fs ( ) . Hashes ( ) )
2016-01-24 18:06:57 +00:00
// Debug(nil, "Shared hashes: %v", common)
2016-01-11 12:39:33 +00:00
if common . Count ( ) == 0 {
2016-01-24 18:06:57 +00:00
return true , HashNone , nil
2016-01-11 12:39:33 +00:00
}
2016-01-24 18:06:57 +00:00
hash = common . GetOne ( )
srcHash , err := src . Hash ( hash )
2014-03-28 17:56:04 +00:00
if err != nil {
Stats . Error ( )
2016-01-11 12:39:33 +00:00
ErrorLog ( src , "Failed to calculate src hash: %s" , err )
2016-01-24 18:06:57 +00:00
return false , hash , err
2015-08-20 19:48:58 +00:00
}
2016-01-11 12:39:33 +00:00
if srcHash == "" {
2016-01-24 18:06:57 +00:00
return true , HashNone , nil
2014-03-28 17:56:04 +00:00
}
2016-01-24 18:06:57 +00:00
dstHash , err := dst . Hash ( hash )
2014-03-28 17:56:04 +00:00
if err != nil {
Stats . Error ( )
2016-01-11 12:39:33 +00:00
ErrorLog ( dst , "Failed to calculate dst hash: %s" , err )
2016-01-24 18:06:57 +00:00
return false , hash , err
2015-08-20 19:48:58 +00:00
}
2016-01-11 12:39:33 +00:00
if dstHash == "" {
2016-01-24 18:06:57 +00:00
return true , HashNone , nil
2014-03-28 17:56:04 +00:00
}
2016-01-24 18:06:57 +00:00
return srcHash == dstHash , hash , nil
2014-03-28 17:56:04 +00:00
}
2015-09-22 17:47:16 +00:00
// Equal checks to see if the src and dst objects are equal by looking at
2016-01-11 12:39:33 +00:00
// size, mtime and hash
2014-03-28 17:56:04 +00:00
//
// If the src and dst size are different then it is considered to be
2015-06-06 07:38:45 +00:00
// not equal. If --size-only is in effect then this is the only check
// that is done.
2014-03-28 17:56:04 +00:00
//
// If the size is the same and the mtime is the same then it is
2015-06-06 07:38:45 +00:00
// considered to be equal. This check is skipped if using --checksum.
2014-03-28 17:56:04 +00:00
//
2015-06-06 07:38:45 +00:00
// If the size is the same and mtime is different, unreadable or
2016-01-11 12:39:33 +00:00
// --checksum is set and the hash is the same then the file is
2015-06-06 07:38:45 +00:00
// considered to be equal. In this case the mtime on the dst is
// updated if --checksum is not set.
2014-03-28 17:56:04 +00:00
//
// Otherwise the file is considered to be not equal including if there
// were errors reading info.
func Equal ( src , dst Object ) bool {
if src . Size ( ) != dst . Size ( ) {
Debug ( src , "Sizes differ" )
return false
}
2015-06-06 07:38:45 +00:00
if Config . SizeOnly {
Debug ( src , "Sizes identical" )
return true
}
2014-03-28 17:56:04 +00:00
2015-06-03 14:08:27 +00:00
var srcModTime time . Time
if ! Config . CheckSum {
2015-08-20 19:48:58 +00:00
if Config . ModifyWindow == ModTimeNotSupported {
Debug ( src , "Sizes identical" )
return true
}
2015-06-03 14:08:27 +00:00
// Size the same so check the mtime
srcModTime = src . ModTime ( )
dstModTime := dst . ModTime ( )
dt := dstModTime . Sub ( srcModTime )
ModifyWindow := Config . ModifyWindow
if dt >= ModifyWindow || dt <= - ModifyWindow {
Debug ( src , "Modification times differ by %s: %v, %v" , dt , srcModTime , dstModTime )
} else {
Debug ( src , "Size and modification time the same (differ by %s, within tolerance %s)" , dt , ModifyWindow )
return true
}
2014-03-28 17:56:04 +00:00
}
// mtime is unreadable or different but size is the same so
2016-01-11 12:39:33 +00:00
// check the hash
2016-01-24 18:06:57 +00:00
same , hash , _ := CheckHashes ( src , dst )
2014-03-28 17:56:04 +00:00
if ! same {
2016-01-11 12:39:33 +00:00
Debug ( src , "Hash differ" )
2014-03-28 17:56:04 +00:00
return false
}
2015-06-03 14:08:27 +00:00
if ! Config . CheckSum {
2016-01-11 12:39:33 +00:00
// Size and hash the same but mtime different so update the
2015-06-03 14:08:27 +00:00
// mtime of the dst object here
dst . SetModTime ( srcModTime )
}
2014-03-28 17:56:04 +00:00
2016-01-24 18:06:57 +00:00
if hash == HashNone {
2015-08-20 19:48:58 +00:00
Debug ( src , "Size of src and dst objects identical" )
} else {
2016-01-24 18:06:57 +00:00
Debug ( src , "Size and %v of src and dst objects identical" , hash )
2015-08-20 19:48:58 +00:00
}
2014-03-28 17:56:04 +00:00
return true
}
2015-09-22 17:47:16 +00:00
// MimeType returns a guess at the mime type from the extension
2015-03-01 12:38:31 +00:00
func MimeType ( o Object ) string {
mimeType := mime . TypeByExtension ( path . Ext ( o . Remote ( ) ) )
if mimeType == "" {
mimeType = "application/octet-stream"
}
return mimeType
}
2014-07-15 18:27:05 +00:00
// Used to remove a failed copy
2015-03-14 17:54:41 +00:00
//
// Returns whether the file was succesfully removed or not
func removeFailedCopy ( dst Object ) bool {
if dst == nil {
return false
}
Debug ( dst , "Removing failed copy" )
removeErr := dst . Remove ( )
if removeErr != nil {
Debug ( dst , "Failed to remove failed copy: %s" , removeErr )
return false
2014-07-15 18:27:05 +00:00
}
2015-03-14 17:54:41 +00:00
return true
2014-07-15 18:27:05 +00:00
}
2014-04-18 16:04:21 +00:00
// Copy src object to dst or f if nil
//
// If dst is nil then the object must not exist already. If you do
// call Copy() with dst nil on a pre-existing file then some filing
// systems (eg Drive) may duplicate the file.
func Copy ( f Fs , dst , src Object ) {
2015-02-02 17:29:08 +00:00
const maxTries = 10
tries := 0
doUpdate := dst != nil
2015-02-14 18:48:08 +00:00
var err , inErr error
2015-02-02 17:29:08 +00:00
tryAgain :
2015-02-14 18:48:08 +00:00
// Try server side copy first - if has optional interface and
// is same underlying remote
actionTaken := "Copied (server side copy)"
if fCopy , ok := f . ( Copier ) ; ok && src . Fs ( ) . Name ( ) == f . Name ( ) {
var newDst Object
newDst , err = fCopy . Copy ( src , src . Remote ( ) )
if err == nil {
dst = newDst
}
} else {
err = ErrorCantCopy
2014-03-28 17:56:04 +00:00
}
2015-02-14 18:48:08 +00:00
// If can't server side copy, do it manually
if err == ErrorCantCopy {
var in0 io . ReadCloser
in0 , err = src . Open ( )
if err != nil {
Stats . Error ( )
ErrorLog ( src , "Failed to open: %s" , err )
return
}
2015-10-06 14:35:22 +00:00
// On big files add a buffer
if src . Size ( ) > 10 << 20 {
in0 , _ = newAsyncReader ( in0 , 4 , 4 << 20 )
}
2015-09-15 14:46:06 +00:00
in := NewAccount ( in0 , src ) // account the transfer
2014-03-28 17:56:04 +00:00
2015-02-14 18:48:08 +00:00
if doUpdate {
actionTaken = "Copied (updated existing)"
err = dst . Update ( in , src . ModTime ( ) , src . Size ( ) )
} else {
actionTaken = "Copied (new)"
dst , err = f . Put ( in , src . Remote ( ) , src . ModTime ( ) , src . Size ( ) )
}
inErr = in . Close ( )
2014-04-18 16:04:21 +00:00
}
2015-02-02 17:29:08 +00:00
// Retry if err returned a retry error
if r , ok := err . ( Retry ) ; ok && r . Retry ( ) && tries < maxTries {
tries ++
Log ( src , "Received error: %v - retrying %d/%d" , err , tries , maxTries )
2015-03-14 17:54:41 +00:00
if removeFailedCopy ( dst ) {
// If we removed dst, then nil it out and note we are not updating
dst = nil
doUpdate = false
}
2015-02-02 17:29:08 +00:00
goto tryAgain
}
2014-03-28 17:56:04 +00:00
if err == nil {
err = inErr
}
if err != nil {
Stats . Error ( )
2015-08-08 19:10:31 +00:00
ErrorLog ( src , "Failed to copy: %s" , err )
2014-07-15 18:27:05 +00:00
removeFailedCopy ( dst )
2014-03-28 17:56:04 +00:00
return
}
2014-07-15 18:27:05 +00:00
2014-07-19 11:38:58 +00:00
// Verify sizes are the same after transfer
if src . Size ( ) != dst . Size ( ) {
Stats . Error ( )
err = fmt . Errorf ( "Corrupted on transfer: sizes differ %d vs %d" , src . Size ( ) , dst . Size ( ) )
2015-08-08 19:10:31 +00:00
ErrorLog ( dst , "%s" , err )
2014-07-19 11:38:58 +00:00
removeFailedCopy ( dst )
return
}
2016-01-11 12:39:33 +00:00
// Verify hashes are the same after transfer - ignoring blank hashes
// TODO(klauspost): This could be extended, so we always create a hash type matching
// the destination, and calculate it while sending.
common := src . Fs ( ) . Hashes ( ) . Overlap ( dst . Fs ( ) . Hashes ( ) )
2016-01-24 18:06:57 +00:00
// Debug(src, "common hashes: %v", common)
2016-01-11 12:39:33 +00:00
if ! Config . SizeOnly && common . Count ( ) > 0 {
// Get common hash type
hashType := common . GetOne ( )
srcSum , err := src . Hash ( hashType )
if err != nil {
2014-07-15 18:27:05 +00:00
Stats . Error ( )
2016-01-11 12:39:33 +00:00
ErrorLog ( src , "Failed to read src hash: %s" , err )
} else if srcSum != "" {
dstSum , err := dst . Hash ( hashType )
if err != nil {
2015-06-09 12:18:40 +00:00
Stats . Error ( )
2016-01-11 12:39:33 +00:00
ErrorLog ( dst , "Failed to read hash: %s" , err )
} else if ! HashEquals ( srcSum , dstSum ) {
2015-06-09 12:18:40 +00:00
Stats . Error ( )
2016-01-11 12:39:33 +00:00
err = fmt . Errorf ( "Corrupted on transfer: %v hash differ %q vs %q" , hashType , srcSum , dstSum )
2015-08-08 19:10:31 +00:00
ErrorLog ( dst , "%s" , err )
2015-06-09 12:18:40 +00:00
removeFailedCopy ( dst )
return
}
2014-07-15 18:27:05 +00:00
}
}
2014-04-18 16:46:57 +00:00
Debug ( src , actionTaken )
2014-03-28 17:56:04 +00:00
}
// Check to see if src needs to be copied to dst and if so puts it in out
2014-04-18 15:34:59 +00:00
func checkOne ( pair ObjectPair , out ObjectPairChan ) {
src , dst := pair . src , pair . dst
2014-03-28 17:56:04 +00:00
if dst == nil {
2014-04-18 15:34:59 +00:00
Debug ( src , "Couldn't find file - need to transfer" )
out <- pair
2014-03-28 17:56:04 +00:00
return
}
// Check to see if can store this
if ! src . Storable ( ) {
return
}
2016-01-05 10:35:36 +00:00
// If we should ignore existing files, don't transfer
if Config . IgnoreExisting {
Debug ( src , "Destination exists, skipping" )
return
}
2014-03-28 17:56:04 +00:00
// Check to see if changed or not
if Equal ( src , dst ) {
Debug ( src , "Unchanged skipping" )
return
}
2014-04-18 15:34:59 +00:00
out <- pair
2014-03-28 17:56:04 +00:00
}
2015-09-22 17:47:16 +00:00
// PairChecker reads Objects~s on in send to out if they need transferring.
2014-03-28 17:56:04 +00:00
//
2016-01-11 12:39:33 +00:00
// FIXME potentially doing lots of hashes at once
2014-04-18 15:34:59 +00:00
func PairChecker ( in ObjectPairChan , out ObjectPairChan , wg * sync . WaitGroup ) {
2014-03-28 17:56:04 +00:00
defer wg . Done ( )
for pair := range in {
src := pair . src
Stats . Checking ( src )
2014-04-18 15:34:59 +00:00
checkOne ( pair , out )
2014-03-28 17:56:04 +00:00
Stats . DoneChecking ( src )
}
}
2015-09-22 17:47:16 +00:00
// PairCopier reads Objects on in and copies them.
2015-02-14 18:48:08 +00:00
func PairCopier ( in ObjectPairChan , fdst Fs , wg * sync . WaitGroup ) {
2014-03-28 17:56:04 +00:00
defer wg . Done ( )
2014-04-18 15:34:59 +00:00
for pair := range in {
src := pair . src
2014-03-28 17:56:04 +00:00
Stats . Transferring ( src )
2014-06-26 14:33:06 +00:00
if Config . DryRun {
2016-01-31 15:53:09 +00:00
Log ( src , "Not copying as --dry-run" )
2014-06-26 14:33:06 +00:00
} else {
Copy ( fdst , pair . dst , src )
}
2014-03-28 17:56:04 +00:00
Stats . DoneTransferring ( src )
}
}
2015-09-22 17:47:16 +00:00
// PairMover reads Objects on in and moves them if possible, or copies
// them if not
2015-08-24 20:42:23 +00:00
func PairMover ( in ObjectPairChan , fdst Fs , wg * sync . WaitGroup ) {
defer wg . Done ( )
// See if we have Move available
fdstMover , haveMover := fdst . ( Mover )
for pair := range in {
src := pair . src
dst := pair . dst
Stats . Transferring ( src )
if Config . DryRun {
2016-01-31 15:53:09 +00:00
Log ( src , "Not moving as --dry-run" )
2015-08-24 20:42:23 +00:00
} else if haveMover {
// Delete destination if it exists
if pair . dst != nil {
err := dst . Remove ( )
if err != nil {
Stats . Error ( )
2015-09-22 06:31:12 +00:00
ErrorLog ( dst , "Couldn't delete: %v" , err )
2015-08-24 20:42:23 +00:00
}
}
2015-09-22 06:31:12 +00:00
_ , err := fdstMover . Move ( src , src . Remote ( ) )
if err != nil {
Stats . Error ( )
ErrorLog ( dst , "Couldn't move: %v" , err )
} else {
Debug ( src , "Moved" )
}
2015-08-24 20:42:23 +00:00
} else {
Copy ( fdst , pair . dst , src )
}
Stats . DoneTransferring ( src )
}
}
2015-09-22 17:47:16 +00:00
// DeleteFiles removes all the files passed in the channel
func DeleteFiles ( toBeDeleted ObjectsChan ) {
2014-03-28 17:56:04 +00:00
var wg sync . WaitGroup
wg . Add ( Config . Transfers )
for i := 0 ; i < Config . Transfers ; i ++ {
go func ( ) {
defer wg . Done ( )
2015-09-22 17:47:16 +00:00
for dst := range toBeDeleted {
2014-03-28 17:56:04 +00:00
if Config . DryRun {
2016-01-31 15:53:09 +00:00
Log ( dst , "Not deleting as --dry-run" )
2014-03-28 17:56:04 +00:00
} else {
Stats . Checking ( dst )
err := dst . Remove ( )
Stats . DoneChecking ( dst )
if err != nil {
Stats . Error ( )
2015-08-08 19:10:31 +00:00
ErrorLog ( dst , "Couldn't delete: %s" , err )
2014-03-28 17:56:04 +00:00
} else {
Debug ( dst , "Deleted" )
}
}
}
} ( )
}
2014-07-22 22:03:14 +00:00
Log ( nil , "Waiting for deletions to finish" )
2014-03-28 17:56:04 +00:00
wg . Wait ( )
}
2016-01-12 13:33:03 +00:00
// Read a map of Object.Remote to Object for the given Fs.
// If includeAll is specified all files will be added,
// otherwise only files passing the filter will be added.
func readFilesMap ( fs Fs , includeAll bool ) map [ string ] Object {
2015-03-14 17:11:24 +00:00
files := make ( map [ string ] Object )
2016-01-23 20:16:47 +00:00
normalised := make ( map [ string ] struct { } )
2015-03-14 17:11:24 +00:00
for o := range fs . List ( ) {
remote := o . Remote ( )
2016-01-23 20:16:47 +00:00
normalisedRemote := strings . ToLower ( norm . NFC . String ( remote ) )
2015-03-14 17:11:24 +00:00
if _ , ok := files [ remote ] ; ! ok {
2015-11-12 11:46:04 +00:00
// Make sure we don't delete excluded files if not required
2016-01-12 13:33:03 +00:00
if includeAll || Config . Filter . IncludeObject ( o ) {
2015-11-12 11:46:04 +00:00
files [ remote ] = o
2016-01-23 20:16:47 +00:00
if _ , ok := normalised [ normalisedRemote ] ; ok {
Log ( o , "Warning: File found with same name but different case on %v" , o . Fs ( ) )
}
2015-11-12 11:46:04 +00:00
} else {
Debug ( o , "Excluded from sync (and deletion)" )
}
2015-03-14 17:11:24 +00:00
} else {
Log ( o , "Duplicate file detected" )
}
2016-01-23 20:16:47 +00:00
normalised [ normalisedRemote ] = struct { } { }
2015-03-14 17:11:24 +00:00
}
return files
}
2015-09-22 17:47:16 +00:00
// Same returns true if fdst and fsrc point to the same underlying Fs
func Same ( fdst , fsrc Fs ) bool {
2015-09-01 19:50:28 +00:00
return fdst . Name ( ) == fsrc . Name ( ) && fdst . Root ( ) == fsrc . Root ( )
}
2014-03-28 17:56:04 +00:00
// Syncs fsrc into fdst
2014-04-18 15:34:59 +00:00
//
// If Delete is true then it deletes any files in fdst that aren't in fsrc
2015-08-24 20:42:23 +00:00
//
// If DoMove is true then files will be moved instead of copied
func syncCopyMove ( fdst , fsrc Fs , Delete bool , DoMove bool ) error {
2015-09-22 17:47:16 +00:00
if Same ( fdst , fsrc ) {
2015-09-01 19:50:28 +00:00
ErrorLog ( fdst , "Nothing to do as source and destination are the same" )
return nil
}
2014-03-28 17:56:04 +00:00
err := fdst . Mkdir ( )
if err != nil {
Stats . Error ( )
return err
}
Log ( fdst , "Building file list" )
2016-01-12 13:33:03 +00:00
// Read the files of both source and destination
var listWg sync . WaitGroup
listWg . Add ( 2 )
var dstFiles map [ string ] Object
var srcFiles map [ string ] Object
var srcObjects = make ( ObjectsChan , Config . Transfers )
2016-01-23 18:26:01 +00:00
// Read dst files including excluded files if DeleteExcluded is set
2016-01-12 13:33:03 +00:00
go func ( ) {
dstFiles = readFilesMap ( fdst , Config . Filter . DeleteExcluded )
listWg . Done ( )
} ( )
2016-01-23 18:26:01 +00:00
// Read src file not including excluded files
2016-01-12 13:33:03 +00:00
go func ( ) {
srcFiles = readFilesMap ( fsrc , false )
listWg . Done ( )
for _ , v := range srcFiles {
srcObjects <- v
}
close ( srcObjects )
} ( )
startDeletion := make ( chan struct { } , 0 )
// Delete files if asked
var delWg sync . WaitGroup
delWg . Add ( 1 )
go func ( ) {
if ! Delete {
return
}
defer func ( ) {
Debug ( fdst , "Deletion finished" )
delWg . Done ( )
} ( )
_ = <- startDeletion
Debug ( fdst , "Starting deletion" )
if Stats . Errored ( ) {
ErrorLog ( fdst , "Not deleting files as there were IO errors" )
return
}
// Delete the spare files
toDelete := make ( ObjectsChan , Config . Transfers )
go func ( ) {
for key , fs := range dstFiles {
_ , exists := srcFiles [ key ]
if ! exists {
toDelete <- fs
}
}
close ( toDelete )
} ( )
DeleteFiles ( toDelete )
} ( )
// Wait for all files to be read
listWg . Wait ( )
// Start deleting, unless we must delete after transfer
if Delete && ! Config . DeleteAfter {
close ( startDeletion )
}
// If deletes must finish before starting transfers, we must wait now.
if Delete && Config . DeleteBefore {
Log ( fdst , "Waiting for deletes to finish (before)" )
delWg . Wait ( )
}
2014-03-28 17:56:04 +00:00
// Read source files checking them off against dest files
2015-09-22 17:47:16 +00:00
toBeChecked := make ( ObjectPairChan , Config . Transfers )
toBeUploaded := make ( ObjectPairChan , Config . Transfers )
2014-03-28 17:56:04 +00:00
var checkerWg sync . WaitGroup
checkerWg . Add ( Config . Checkers )
for i := 0 ; i < Config . Checkers ; i ++ {
2015-09-22 17:47:16 +00:00
go PairChecker ( toBeChecked , toBeUploaded , & checkerWg )
2014-03-28 17:56:04 +00:00
}
var copierWg sync . WaitGroup
copierWg . Add ( Config . Transfers )
for i := 0 ; i < Config . Transfers ; i ++ {
2015-08-24 20:42:23 +00:00
if DoMove {
2015-09-22 17:47:16 +00:00
go PairMover ( toBeUploaded , fdst , & copierWg )
2015-08-24 20:42:23 +00:00
} else {
2015-09-22 17:47:16 +00:00
go PairCopier ( toBeUploaded , fdst , & copierWg )
2015-08-24 20:42:23 +00:00
}
2014-03-28 17:56:04 +00:00
}
go func ( ) {
2016-01-12 13:33:03 +00:00
for src := range srcObjects {
2016-01-23 18:26:01 +00:00
remote := src . Remote ( )
if dst , dstFound := dstFiles [ remote ] ; dstFound {
toBeChecked <- ObjectPair { src , dst }
2014-03-28 17:56:04 +00:00
} else {
2016-01-23 18:26:01 +00:00
// No need to check since doesn't exist
toBeUploaded <- ObjectPair { src , nil }
2014-03-28 17:56:04 +00:00
}
}
2015-09-22 17:47:16 +00:00
close ( toBeChecked )
2014-03-28 17:56:04 +00:00
} ( )
Log ( fdst , "Waiting for checks to finish" )
checkerWg . Wait ( )
2015-09-22 17:47:16 +00:00
close ( toBeUploaded )
2014-03-28 17:56:04 +00:00
Log ( fdst , "Waiting for transfers to finish" )
copierWg . Wait ( )
2016-01-12 13:33:03 +00:00
// If deleting after, start deletion now
if Delete && Config . DeleteAfter {
close ( startDeletion )
}
// Unless we have already waited, wait for deletion to finish.
if Delete && ! Config . DeleteBefore {
Log ( fdst , "Waiting for deletes to finish (during+after)" )
delWg . Wait ( )
2014-04-18 15:34:59 +00:00
}
2016-01-12 13:33:03 +00:00
2014-03-28 17:56:04 +00:00
return nil
}
2015-09-22 17:47:16 +00:00
// Sync fsrc into fdst
2015-08-24 20:42:23 +00:00
func Sync ( fdst , fsrc Fs ) error {
return syncCopyMove ( fdst , fsrc , true , false )
}
2015-09-22 17:47:16 +00:00
// CopyDir copies fsrc into fdst
2015-08-24 20:42:23 +00:00
func CopyDir ( fdst , fsrc Fs ) error {
return syncCopyMove ( fdst , fsrc , false , false )
}
2015-09-22 17:47:16 +00:00
// MoveDir moves fsrc into fdst
2015-08-24 20:42:23 +00:00
func MoveDir ( fdst , fsrc Fs ) error {
2015-09-22 17:47:16 +00:00
if Same ( fdst , fsrc ) {
2015-09-01 19:50:28 +00:00
ErrorLog ( fdst , "Nothing to do as source and destination are the same" )
return nil
}
2015-08-24 20:42:23 +00:00
// First attempt to use DirMover
if fdstDirMover , ok := fdst . ( DirMover ) ; ok && fsrc . Name ( ) == fdst . Name ( ) {
err := fdstDirMover . DirMove ( fsrc )
Debug ( fdst , "Using server side directory move" )
switch err {
case ErrorCantDirMove , ErrorDirExists :
Debug ( fdst , "Server side directory move failed - fallback to copy/delete: %v" , err )
case nil :
Debug ( fdst , "Server side directory move succeeded" )
return nil
default :
Stats . Error ( )
ErrorLog ( fdst , "Server side directory move failed: %v" , err )
return err
}
}
// Now move the files
err := syncCopyMove ( fdst , fsrc , false , true )
if err != nil || Stats . Errored ( ) {
ErrorLog ( fdst , "Not deleting files as there were IO errors" )
return err
}
return Purge ( fsrc )
}
2016-01-11 12:39:33 +00:00
// Check the files in fsrc and fdst according to Size and hash
2014-03-28 17:56:04 +00:00
func Check ( fdst , fsrc Fs ) error {
2016-01-17 10:08:28 +00:00
differences := int32 ( 0 )
2015-11-24 16:54:12 +00:00
var (
wg sync . WaitGroup
dstFiles , srcFiles map [ string ] Object
)
2014-03-28 17:56:04 +00:00
2015-11-24 16:54:12 +00:00
wg . Add ( 2 )
go func ( ) {
defer wg . Done ( )
// Read the destination files
Log ( fdst , "Building file list" )
2016-01-12 13:33:03 +00:00
dstFiles = readFilesMap ( fdst , false )
2015-11-24 16:54:12 +00:00
Debug ( fdst , "Done building file list" )
} ( )
2014-03-28 17:56:04 +00:00
2015-11-24 16:54:12 +00:00
go func ( ) {
defer wg . Done ( )
// Read the source files
Log ( fsrc , "Building file list" )
2016-01-12 13:33:03 +00:00
srcFiles = readFilesMap ( fsrc , false )
2015-11-24 16:54:12 +00:00
Debug ( fdst , "Done building file list" )
} ( )
wg . Wait ( )
// FIXME could do this as it goes along and make it use less
// memory.
2015-03-14 17:11:24 +00:00
// Move all the common files into commonFiles and delete then
// from srcFiles and dstFiles
2014-03-28 17:56:04 +00:00
commonFiles := make ( map [ string ] [ ] Object )
2015-03-14 17:11:24 +00:00
for remote , src := range srcFiles {
2014-03-28 17:56:04 +00:00
if dst , ok := dstFiles [ remote ] ; ok {
commonFiles [ remote ] = [ ] Object { dst , src }
2015-03-14 17:11:24 +00:00
delete ( srcFiles , remote )
2014-03-28 17:56:04 +00:00
delete ( dstFiles , remote )
}
}
Log ( fdst , "%d files not in %v" , len ( dstFiles ) , fsrc )
for _ , dst := range dstFiles {
Stats . Error ( )
2015-08-08 19:10:31 +00:00
ErrorLog ( dst , "File not in %v" , fsrc )
2016-01-17 10:08:28 +00:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 17:56:04 +00:00
}
Log ( fsrc , "%d files not in %s" , len ( srcFiles ) , fdst )
for _ , src := range srcFiles {
Stats . Error ( )
2015-08-08 19:10:31 +00:00
ErrorLog ( src , "File not in %v" , fdst )
2016-01-17 10:08:28 +00:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 17:56:04 +00:00
}
checks := make ( chan [ ] Object , Config . Transfers )
go func ( ) {
for _ , check := range commonFiles {
checks <- check
}
close ( checks )
} ( )
var checkerWg sync . WaitGroup
checkerWg . Add ( Config . Checkers )
for i := 0 ; i < Config . Checkers ; i ++ {
go func ( ) {
defer checkerWg . Done ( )
for check := range checks {
dst , src := check [ 0 ] , check [ 1 ]
Stats . Checking ( src )
if src . Size ( ) != dst . Size ( ) {
Stats . DoneChecking ( src )
Stats . Error ( )
2015-08-08 19:10:31 +00:00
ErrorLog ( src , "Sizes differ" )
2016-01-17 10:08:28 +00:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 17:56:04 +00:00
continue
}
2016-01-11 12:39:33 +00:00
same , _ , err := CheckHashes ( src , dst )
2014-03-28 17:56:04 +00:00
Stats . DoneChecking ( src )
if err != nil {
continue
}
if ! same {
Stats . Error ( )
2016-01-17 10:08:28 +00:00
atomic . AddInt32 ( & differences , 1 )
2015-08-08 19:10:31 +00:00
ErrorLog ( src , "Md5sums differ" )
2014-03-28 17:56:04 +00:00
}
Debug ( src , "OK" )
}
} ( )
}
Log ( fdst , "Waiting for checks to finish" )
checkerWg . Wait ( )
Log ( fdst , "%d differences found" , Stats . GetErrors ( ) )
2016-01-17 10:08:28 +00:00
if differences > 0 {
return fmt . Errorf ( "%d differences found" , differences )
2014-03-28 17:56:04 +00:00
}
return nil
}
2015-09-22 17:47:16 +00:00
// ListFn lists the Fs to the supplied function
2014-03-28 17:56:04 +00:00
//
// Lists in parallel which may get them out of order
2014-07-12 11:09:20 +00:00
func ListFn ( f Fs , fn func ( Object ) ) error {
2014-03-28 17:56:04 +00:00
in := f . List ( )
var wg sync . WaitGroup
wg . Add ( Config . Checkers )
for i := 0 ; i < Config . Checkers ; i ++ {
go func ( ) {
defer wg . Done ( )
for o := range in {
2015-11-24 16:54:12 +00:00
if Config . Filter . IncludeObject ( o ) {
fn ( o )
}
2014-03-28 17:56:04 +00:00
}
} ( )
}
wg . Wait ( )
return nil
}
2015-02-28 15:30:40 +00:00
// mutex for synchronized output
var outMutex sync . Mutex
// Synchronized fmt.Fprintf
2015-09-22 06:31:12 +00:00
//
// Ignores errors from Fprintf
func syncFprintf ( w io . Writer , format string , a ... interface { } ) {
2015-02-28 15:30:40 +00:00
outMutex . Lock ( )
defer outMutex . Unlock ( )
2015-09-22 06:31:12 +00:00
_ , _ = fmt . Fprintf ( w , format , a ... )
2015-02-28 15:30:40 +00:00
}
2015-09-15 14:46:06 +00:00
// List the Fs to the supplied writer
2014-07-12 11:09:20 +00:00
//
2015-11-24 16:54:12 +00:00
// Shows size and path - obeys includes and excludes
2014-07-12 11:09:20 +00:00
//
// Lists in parallel which may get them out of order
2014-08-01 16:58:39 +00:00
func List ( f Fs , w io . Writer ) error {
2014-07-12 11:09:20 +00:00
return ListFn ( f , func ( o Object ) {
2015-02-28 15:30:40 +00:00
syncFprintf ( w , "%9d %s\n" , o . Size ( ) , o . Remote ( ) )
2014-07-12 11:09:20 +00:00
} )
}
2015-09-22 17:47:16 +00:00
// ListLong lists the Fs to the supplied writer
2014-07-12 11:09:20 +00:00
//
2015-11-24 16:54:12 +00:00
// Shows size, mod time and path - obeys includes and excludes
2014-07-12 11:09:20 +00:00
//
// Lists in parallel which may get them out of order
2014-08-01 16:58:39 +00:00
func ListLong ( f Fs , w io . Writer ) error {
2014-07-12 11:09:20 +00:00
return ListFn ( f , func ( o Object ) {
Stats . Checking ( o )
modTime := o . ModTime ( )
Stats . DoneChecking ( o )
2015-09-22 18:04:12 +00:00
syncFprintf ( w , "%9d %s %s\n" , o . Size ( ) , modTime . Local ( ) . Format ( "2006-01-02 15:04:05.000000000" ) , o . Remote ( ) )
2014-07-12 11:09:20 +00:00
} )
}
2015-09-22 17:47:16 +00:00
// Md5sum list the Fs to the supplied writer
2014-07-12 11:09:20 +00:00
//
2015-11-24 16:54:12 +00:00
// Produces the same output as the md5sum command - obeys includes and
// excludes
2014-07-12 11:09:20 +00:00
//
// Lists in parallel which may get them out of order
2014-08-01 16:58:39 +00:00
func Md5sum ( f Fs , w io . Writer ) error {
2016-01-11 12:39:33 +00:00
return hashLister ( HashMD5 , f , w )
}
// Sha1sum list the Fs to the supplied writer
//
// Obeys includes and excludes
//
// Lists in parallel which may get them out of order
func Sha1sum ( f Fs , w io . Writer ) error {
return hashLister ( HashSHA1 , f , w )
}
func hashLister ( ht HashType , f Fs , w io . Writer ) error {
2014-07-12 11:09:20 +00:00
return ListFn ( f , func ( o Object ) {
Stats . Checking ( o )
2016-01-11 12:39:33 +00:00
sum , err := o . Hash ( ht )
2014-07-12 11:09:20 +00:00
Stats . DoneChecking ( o )
2016-01-11 12:39:33 +00:00
if err == ErrHashUnsupported {
sum = "UNSUPPORTED"
} else if err != nil {
Debug ( o , "Failed to read %v: %v" , ht , err )
sum = "ERROR"
2014-07-12 11:09:20 +00:00
}
2016-01-17 13:56:00 +00:00
syncFprintf ( w , "%*s %s\n" , HashWidth [ ht ] , sum , o . Remote ( ) )
2014-07-12 11:09:20 +00:00
} )
}
2015-10-02 18:48:48 +00:00
// Count counts the objects and their sizes in the Fs
2015-11-24 16:54:12 +00:00
//
// Obeys includes and excludes
2015-10-02 18:48:48 +00:00
func Count ( f Fs ) ( objects int64 , size int64 , err error ) {
err = ListFn ( f , func ( o Object ) {
atomic . AddInt64 ( & objects , 1 )
atomic . AddInt64 ( & size , o . Size ( ) )
} )
return
}
2015-09-22 17:47:16 +00:00
// ListDir lists the directories/buckets/containers in the Fs to the supplied writer
2014-08-01 16:58:39 +00:00
func ListDir ( f Fs , w io . Writer ) error {
2014-03-28 17:56:04 +00:00
for dir := range f . ListDir ( ) {
2015-02-28 15:30:40 +00:00
syncFprintf ( w , "%12d %13s %9d %s\n" , dir . Bytes , dir . When . Format ( "2006-01-02 15:04:05" ) , dir . Count , dir . Name )
2014-03-28 17:56:04 +00:00
}
return nil
}
2015-09-22 17:47:16 +00:00
// Mkdir makes a destination directory or container
2014-03-28 17:56:04 +00:00
func Mkdir ( f Fs ) error {
err := f . Mkdir ( )
if err != nil {
Stats . Error ( )
return err
}
return nil
}
2015-09-22 17:47:16 +00:00
// Rmdir removes a container but not if not empty
2014-03-28 17:56:04 +00:00
func Rmdir ( f Fs ) error {
if Config . DryRun {
Log ( f , "Not deleting as dry run is set" )
} else {
err := f . Rmdir ( )
if err != nil {
Stats . Error ( )
return err
}
}
return nil
}
2015-09-22 17:47:16 +00:00
// Purge removes a container and all of its contents
2014-03-28 17:56:04 +00:00
//
// FIXME doesn't delete local directories
func Purge ( f Fs ) error {
2015-11-08 14:16:00 +00:00
doFallbackPurge := true
2014-07-25 17:19:49 +00:00
var err error
2014-03-28 17:56:04 +00:00
if purger , ok := f . ( Purger ) ; ok {
2015-11-08 14:16:00 +00:00
doFallbackPurge = false
2014-07-13 09:45:13 +00:00
if Config . DryRun {
2016-01-31 15:53:09 +00:00
Log ( f , "Not purging as --dry-run set" )
2014-07-13 09:45:13 +00:00
} else {
2014-07-25 17:19:49 +00:00
err = purger . Purge ( )
2015-11-08 14:16:00 +00:00
if err == ErrorCantPurge {
doFallbackPurge = true
}
2014-03-28 17:56:04 +00:00
}
2015-11-08 14:16:00 +00:00
}
if doFallbackPurge {
2014-07-25 17:19:49 +00:00
// DeleteFiles and Rmdir observe --dry-run
2014-03-28 17:56:04 +00:00
DeleteFiles ( f . List ( ) )
2014-07-25 17:19:49 +00:00
err = Rmdir ( f )
}
if err != nil {
Stats . Error ( )
return err
2014-03-28 17:56:04 +00:00
}
return nil
}
2015-12-02 22:25:32 +00:00
// Delete removes all the contents of a container. Unlike Purge, it
// obeys includes and excludes.
func Delete ( f Fs ) error {
wg := new ( sync . WaitGroup )
delete := make ( ObjectsChan , Config . Transfers )
wg . Add ( 1 )
go func ( ) {
defer wg . Done ( )
DeleteFiles ( delete )
} ( )
err := ListFn ( f , func ( o Object ) {
delete <- o
} )
close ( delete )
2016-02-15 16:43:59 +00:00
wg . Wait ( )
2015-12-02 22:25:32 +00:00
return err
}
2016-01-31 12:58:41 +00:00
// Deduplicate interactively finds duplicate files and offers to
// delete all but one or rename them to be different. Only useful with
// Google Drive which can have duplicate file names.
func Deduplicate ( f Fs ) error {
mover , ok := f . ( Mover )
if ! ok {
return fmt . Errorf ( "%v can't Move files" , f )
}
Log ( f , "Looking for duplicates" )
files := map [ string ] [ ] Object { }
for o := range f . List ( ) {
remote := o . Remote ( )
files [ remote ] = append ( files [ remote ] , o )
}
for remote , objs := range files {
if len ( objs ) > 1 {
fmt . Printf ( "%s: Found %d duplicates\n" , remote , len ( objs ) )
for i , o := range objs {
md5sum , err := o . Hash ( HashMD5 )
if err != nil {
md5sum = err . Error ( )
}
fmt . Printf ( " %d: %12d bytes, %s, md5sum %32s\n" , i + 1 , o . Size ( ) , o . ModTime ( ) . Format ( "2006-01-02 15:04:05.000000000" ) , md5sum )
}
switch Command ( [ ] string { "sSkip and do nothing" , "kKeep just one (choose which in next step)" , "rRename all to be different (by changing file.jpg to file-1.jpg)" } ) {
case 's' :
case 'k' :
keep := ChooseNumber ( "Enter the number of the file to keep" , 1 , len ( objs ) )
deleted := 0
for i , o := range objs {
if i + 1 == keep {
continue
}
err := o . Remove ( )
if err != nil {
ErrorLog ( o , "Failed to delete: %v" , err )
continue
}
deleted ++
}
fmt . Printf ( "%s: Deleted %d extra copies\n" , remote , deleted )
case 'r' :
ext := path . Ext ( remote )
base := remote [ : len ( remote ) - len ( ext ) ]
for i , o := range objs {
newName := fmt . Sprintf ( "%s-%d%s" , base , i + 1 , ext )
newObj , err := mover . Move ( o , newName )
if err != nil {
ErrorLog ( o , "Failed to rename: %v" , err )
continue
}
fmt . Printf ( "%v: renamed from: %v\n" , newObj , o )
}
}
}
}
return nil
}