jottacloud: docs, fixes and tests for MD5 calculation

* Add docs for --jottacloud-md5-memory-limit
  * Factor out readMD5 function and add tests
  * Fix accounting
  * Make sure temp file is deleted at the start (not Windows)
This commit is contained in:
Nick Craig-Wood 2018-08-20 15:38:21 +01:00
parent ee4485a316
commit f9cf70e3aa
3 changed files with 154 additions and 51 deletions

View file

@ -17,6 +17,7 @@ import (
"github.com/ncw/rclone/backend/jottacloud/api"
"github.com/ncw/rclone/fs"
"github.com/ncw/rclone/fs/accounting"
"github.com/ncw/rclone/fs/config"
"github.com/ncw/rclone/fs/config/configmap"
"github.com/ncw/rclone/fs/config/configstruct"
@ -67,7 +68,7 @@ func init() {
}},
}, {
Name: "md5_memory_limit",
Help: "Files with a size bigger than this value will be cached on disk otherwise they just read into memory ",
Help: "Files bigger than this will be cached on disk to calculate the MD5 if required.",
Default: fs.SizeSuffix(10 * 1024 * 1024),
Advanced: true,
}},
@ -753,67 +754,86 @@ func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) {
return resp.Body, err
}
// Read the md5 of in returning a reader which will read the same contents
//
// The cleanup function should be called when out is finished with
// regardless of whether this function returned an error or not.
func readMD5(in io.Reader, size, threshold int64) (md5sum string, out io.Reader, cleanup func(), err error) {
// we need a MD5
md5Hasher := md5.New()
// use the teeReader to write to the local file AND caclulate the MD5 while doing so
teeReader := io.TeeReader(in, md5Hasher)
// nothing to clean up by default
cleanup = func() {}
// don't cache small files on disk to reduce wear of the disk
if size > threshold {
var tempFile *os.File
// create the cache file
tempFile, err = ioutil.TempFile("", cachePrefix)
if err != nil {
return
}
_ = os.Remove(tempFile.Name()) // Delete the file - may not work on Windows
// clean up the file after we are done downloading
cleanup = func() {
// the file should normally already be close, but just to make sure
_ = tempFile.Close()
_ = os.Remove(tempFile.Name()) // delete the cache file after we are done - may be deleted already
}
// copy the ENTIRE file to disc and calculate the MD5 in the process
if _, err = io.Copy(tempFile, teeReader); err != nil {
return
}
// jump to the start of the local file so we can pass it along
if _, err = tempFile.Seek(0, 0); err != nil {
return
}
// replace the already read source with a reader of our cached file
out = tempFile
} else {
// that's a small file, just read it into memory
var inData []byte
inData, err = ioutil.ReadAll(teeReader)
if err != nil {
return
}
// set the reader to our read memory block
out = bytes.NewReader(inData)
}
return hex.EncodeToString(md5Hasher.Sum(nil)), out, cleanup, nil
}
// Update the object with the contents of the io.Reader, modTime and size
//
// If existing is set then it updates the object rather than creating a new one
//
// The new object may have been created if an error is returned
func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) {
size := src.Size()
md5String, err := src.Hash(hash.MD5)
if err != nil || md5String == "" {
// if the source can't provide a MD5 hash we are screwed and need to calculate it ourselfs
// we read the entire file and cache it in the localdir and send it AFTERwards
// we need a MD5
md5Hasher := md5.New()
// use the teeReader to write to the local file AND caclulate the MD5 while doing so
teeReader := io.TeeReader(in, md5Hasher)
// don't cache small files on disk to reduce wear of the disk
if src.Size() > int64(o.fs.opt.MD5MemoryThreshold) {
// create the cache file
tempFile, err := ioutil.TempFile("", cachePrefix)
if err != nil {
return err
}
// clean up the file after we are done downloading
defer func() {
// the file should normally already be close, but just to make sure
_ = tempFile.Close()
// delete the cache file after we are done
_ = os.Remove(tempFile.Name())
}()
// reade the ENTIRE file to disc and calculate the MD5 in the process
if _, err = io.Copy(tempFile, teeReader); err != nil {
return err
}
// jump to the start of the local file so we can pass it along
if _, err = tempFile.Seek(0, 0); err != nil {
return err
}
// replace the already read source with a reader of our cached file
in = tempFile
} else {
// that's a small file, just read it into memory
var inData []byte
inData, err = ioutil.ReadAll(teeReader)
if err != nil {
return err
}
// set the reader to our read memory block
in = bytes.NewReader(inData)
// unwrap the accounting from the input, we use wrap to put it
// back on after the buffering
var wrap accounting.WrapFn
in, wrap = accounting.UnWrap(in)
var cleanup func()
md5String, in, cleanup, err = readMD5(in, size, int64(o.fs.opt.MD5MemoryThreshold))
defer cleanup()
if err != nil {
return errors.Wrap(err, "failed to calculate MD5")
}
// YEAH, the thing we need the MD5 string
md5String = hex.EncodeToString(md5Hasher.Sum(nil))
// Wrap the accounting back onto the stream
in = wrap(in)
}
size := src.Size()
var resp *http.Response
var result api.JottaFile
opts := rest.Opts{