crypt: calculate hashes for uploads from local disk

Before this change crypt would not calculate hashes for files it was
uploading. This is because, in the general case, they have to be
downloaded, encrypted and hashed which is too resource intensive.

However this causes backends which need the hash first before
uploading (eg s3/b2 when uploading chunked files) not to have a hash
of the file. This causes cryptcheck to complain about missing hashes
on large files uploaded via s3/b2.

This change calculates hashes for the upload if the upload is coming
from a local filesystem. It does this by encrypting and hashing the
local file re-using the code used by cryptcheck. For a local disk this
is not a lot more intensive than calculating the hash.

See: https://forum.rclone.org/t/strange-output-for-cryptcheck/15437
Fixes: #2809
This commit is contained in:
Nick Craig-Wood 2020-04-09 16:43:34 +01:00
parent 44b1a591a8
commit 1648c1a0f3
3 changed files with 210 additions and 33 deletions

View file

@ -510,6 +510,7 @@ func (c *Cipher) DecryptDirName(in string) (string, error) {
return c.decryptFileName(in)
}
// NameEncryptionMode returns the encryption mode in use for names
func (c *Cipher) NameEncryptionMode() NameEncryptionMode {
return c.mode
}
@ -660,13 +661,19 @@ func (fh *encrypter) finish(err error) (int, error) {
}
// Encrypt data encrypts the data stream
func (c *Cipher) EncryptData(in io.Reader) (io.Reader, error) {
func (c *Cipher) encryptData(in io.Reader) (io.Reader, *encrypter, error) {
in, wrap := accounting.UnWrap(in) // unwrap the accounting off the Reader
out, err := c.newEncrypter(in, nil)
if err != nil {
return nil, err
return nil, nil, err
}
return wrap(out), nil // and wrap the accounting back on
return wrap(out), out, nil // and wrap the accounting back on
}
// EncryptData encrypts the data stream
func (c *Cipher) EncryptData(in io.Reader) (io.Reader, error) {
out, _, err := c.encryptData(in)
return out, err
}
// decrypter decrypts an io.ReaderCloser on the fly

View file

@ -327,7 +327,7 @@ type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ..
// put implements Put or PutStream
func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options []fs.OpenOption, put putFn) (fs.Object, error) {
// Encrypt the data into wrappedIn
wrappedIn, err := f.cipher.EncryptData(in)
wrappedIn, encrypter, err := f.cipher.encryptData(in)
if err != nil {
return nil, err
}
@ -351,7 +351,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options [
}
// Transfer the data
o, err := put(ctx, wrappedIn, f.newObjectInfo(src), options...)
o, err := put(ctx, wrappedIn, f.newObjectInfo(src, encrypter.nonce), options...)
if err != nil {
return nil, err
}
@ -504,11 +504,11 @@ func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo,
if do == nil {
return nil, errors.New("can't PutUnchecked")
}
wrappedIn, err := f.cipher.EncryptData(in)
wrappedIn, encrypter, err := f.cipher.encryptData(in)
if err != nil {
return nil, err
}
o, err := do(ctx, wrappedIn, f.newObjectInfo(src))
o, err := do(ctx, wrappedIn, f.newObjectInfo(src, encrypter.nonce))
if err != nil {
return nil, err
}
@ -561,6 +561,37 @@ func (f *Fs) DecryptFileName(encryptedFileName string) (string, error) {
return f.cipher.DecryptFileName(encryptedFileName)
}
// computeHashWithNonce takes the nonce and encrypts the contents of
// src with it, and calculates the hash given by HashType on the fly
//
// Note that we break lots of encapsulation in this function.
func (f *Fs) computeHashWithNonce(ctx context.Context, nonce nonce, src fs.Object, hashType hash.Type) (hashStr string, err error) {
// Open the src for input
in, err := src.Open(ctx)
if err != nil {
return "", errors.Wrap(err, "failed to open src")
}
defer fs.CheckClose(in, &err)
// Now encrypt the src with the nonce
out, err := f.cipher.newEncrypter(in, &nonce)
if err != nil {
return "", errors.Wrap(err, "failed to make encrypter")
}
// pipe into hash
m, err := hash.NewMultiHasherTypes(hash.NewHashSet(hashType))
if err != nil {
return "", errors.Wrap(err, "failed to make hasher")
}
_, err = io.Copy(m, out)
if err != nil {
return "", errors.Wrap(err, "failed to hash data")
}
return m.Sums()[hashType], nil
}
// ComputeHash takes the nonce from o, and encrypts the contents of
// src with it, and calculates the hash given by HashType on the fly
//
@ -597,30 +628,7 @@ func (f *Fs) ComputeHash(ctx context.Context, o *Object, src fs.Object, hashType
return "", errors.Wrap(err, "failed to close nonce read")
}
// Open the src for input
in, err = src.Open(ctx)
if err != nil {
return "", errors.Wrap(err, "failed to open src")
}
defer fs.CheckClose(in, &err)
// Now encrypt the src with the nonce
out, err := f.cipher.newEncrypter(in, &nonce)
if err != nil {
return "", errors.Wrap(err, "failed to make encrypter")
}
// pipe into hash
m, err := hash.NewMultiHasherTypes(hash.NewHashSet(hashType))
if err != nil {
return "", errors.Wrap(err, "failed to make hasher")
}
_, err = io.Copy(m, out)
if err != nil {
return "", errors.Wrap(err, "failed to hash data")
}
return m.Sums()[hashType], nil
return f.computeHashWithNonce(ctx, nonce, src, hashType)
}
// MergeDirs merges the contents of all the directories passed
@ -834,12 +842,14 @@ func (f *Fs) Disconnect(ctx context.Context) error {
type ObjectInfo struct {
fs.ObjectInfo
f *Fs
nonce nonce
}
func (f *Fs) newObjectInfo(src fs.ObjectInfo) *ObjectInfo {
func (f *Fs) newObjectInfo(src fs.ObjectInfo, nonce nonce) *ObjectInfo {
return &ObjectInfo{
ObjectInfo: src,
f: f,
nonce: nonce,
}
}
@ -865,6 +875,23 @@ func (o *ObjectInfo) Size() int64 {
// Hash returns the selected checksum of the file
// If no checksum is available it returns ""
func (o *ObjectInfo) Hash(ctx context.Context, hash hash.Type) (string, error) {
var srcObj fs.Object
var ok bool
// Get the underlying object if there is one
if srcObj, ok = o.ObjectInfo.(fs.Object); ok {
// Prefer direct interface assertion
} else if do, ok := o.ObjectInfo.(fs.ObjectUnWrapper); ok {
// Otherwise likely is a operations.OverrideRemote
srcObj = do.UnWrap()
} else {
return "", nil
}
// if this is wrapping a local object then we work out the hash
if srcObj.Fs().Features().IsLocal {
// Read the data and encrypt it to calculate the hash
fs.Debugf(o, "Computing %v hash of encrypted source", hash)
return o.f.computeHashWithNonce(ctx, o.nonce, srcObj, hash)
}
return "", nil
}

View file

@ -0,0 +1,143 @@
package crypt
import (
"bytes"
"context"
"crypto/md5"
"fmt"
"io"
"testing"
"time"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/object"
"github.com/rclone/rclone/lib/random"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type testWrapper struct {
fs.ObjectInfo
}
// UnWrap returns the Object that this Object is wrapping or nil if it
// isn't wrapping anything
func (o testWrapper) UnWrap() fs.Object {
if o, ok := o.ObjectInfo.(fs.Object); ok {
return o
}
return nil
}
// Create a temporary local fs to upload things from
func makeTempLocalFs(t *testing.T) (localFs fs.Fs, cleanup func()) {
localFs, err := fs.TemporaryLocalFs()
require.NoError(t, err)
cleanup = func() {
require.NoError(t, localFs.Rmdir(context.Background(), ""))
}
return localFs, cleanup
}
// Upload a file to a remote
func uploadFile(t *testing.T, f fs.Fs, remote, contents string) (obj fs.Object, cleanup func()) {
inBuf := bytes.NewBufferString(contents)
t1 := time.Date(2012, time.December, 17, 18, 32, 31, 0, time.UTC)
upSrc := object.NewStaticObjectInfo(remote, t1, int64(len(contents)), true, nil, nil)
obj, err := f.Put(context.Background(), inBuf, upSrc)
require.NoError(t, err)
cleanup = func() {
require.NoError(t, obj.Remove(context.Background()))
}
return obj, cleanup
}
// Test the ObjectInfo
func testObjectInfo(t *testing.T, f *Fs, wrap bool) {
var (
contents = random.String(100)
path = "hash_test_object"
ctx = context.Background()
)
if wrap {
path = "_wrap"
}
localFs, cleanupLocalFs := makeTempLocalFs(t)
defer cleanupLocalFs()
obj, cleanupObj := uploadFile(t, localFs, path, contents)
defer cleanupObj()
// encrypt the data
inBuf := bytes.NewBufferString(contents)
var outBuf bytes.Buffer
enc, err := f.cipher.newEncrypter(inBuf, nil)
require.NoError(t, err)
nonce := enc.nonce // read the nonce at the start
_, err = io.Copy(&outBuf, enc)
require.NoError(t, err)
var oi fs.ObjectInfo = obj
if wrap {
// wrap the object in a fs.ObjectUnwrapper if required
oi = testWrapper{oi}
}
// wrap the object in a crypt for upload using the nonce we
// saved from the encryptor
src := f.newObjectInfo(oi, nonce)
// Test ObjectInfo methods
assert.Equal(t, int64(outBuf.Len()), src.Size())
assert.Equal(t, f, src.Fs())
assert.NotEqual(t, path, src.Remote())
// Test ObjectInfo.Hash
wantHash := md5.Sum(outBuf.Bytes())
gotHash, err := src.Hash(ctx, hash.MD5)
require.NoError(t, err)
assert.Equal(t, fmt.Sprintf("%x", wantHash), gotHash)
}
func testComputeHash(t *testing.T, f *Fs) {
var (
contents = random.String(100)
path = "compute_hash_test"
ctx = context.Background()
hashType = f.Fs.Hashes().GetOne()
)
if hashType == hash.None {
t.Skipf("%v: does not support hashes", f.Fs)
}
localFs, cleanupLocalFs := makeTempLocalFs(t)
defer cleanupLocalFs()
// Upload a file to localFs as a test object
localObj, cleanupLocalObj := uploadFile(t, localFs, path, contents)
defer cleanupLocalObj()
// Upload the same data to the remote Fs also
remoteObj, cleanupRemoteObj := uploadFile(t, f, path, contents)
defer cleanupRemoteObj()
// Calculate the expected Hash of the remote object
computedHash, err := f.ComputeHash(ctx, remoteObj.(*Object), localObj, hashType)
require.NoError(t, err)
// Test computed hash matches remote object hash
remoteObjHash, err := remoteObj.(*Object).Object.Hash(ctx, hashType)
require.NoError(t, err)
assert.Equal(t, remoteObjHash, computedHash)
}
// InternalTest is called by fstests.Run to extra tests
func (f *Fs) InternalTest(t *testing.T) {
t.Run("ObjectInfo", func(t *testing.T) { testObjectInfo(t, f, false) })
t.Run("ObjectInfoWrap", func(t *testing.T) { testObjectInfo(t, f, true) })
t.Run("ComputeHash", func(t *testing.T) { testComputeHash(t, f) })
}