Add support for multiple hash types.

Add support for multiple hash types with negotiation of common hash types for comparison.

Manually rebased version of #277 (see discussion there)
This commit is contained in:
klauspost 2016-01-11 13:39:33 +01:00
parent 2142c75846
commit 78c3a5ccfa
22 changed files with 815 additions and 135 deletions

View file

@ -533,6 +533,11 @@ func (f *Fs) Precision() time.Duration {
return fs.ModTimeNotSupported
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashMD5)
}
// Copy src to this remote using server side copy operations.
//
// This is stored with the remote path given
@ -585,8 +590,11 @@ func (o *Object) Remote() string {
return o.remote
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
// Hash returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashMD5 {
return "", fs.ErrHashUnsupported
}
if o.info.ContentProperties.Md5 != nil {
return *o.info.ContentProperties.Md5, nil
}

View file

@ -74,6 +74,7 @@ type Object struct {
remote string // The remote path
info api.File // Info from the b2 object if known
modTime time.Time // The modified time of the object if known
sha1 string // SHA-1 hash if known
}
// ------------------------------------------------------------
@ -580,6 +581,11 @@ func (f *Fs) Purge() error {
return errReturn
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashSHA1)
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -600,9 +606,16 @@ func (o *Object) Remote() string {
return o.remote
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
return "", nil
// Hash returns the Sha-1 of an object returning a lowercase hex string
// Hash returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashSHA1 {
return "", fs.ErrHashUnsupported
}
// Error is logged in readFileMetadata
_ = o.readFileMetadata()
return o.sha1, nil
}
// Size returns the size of an object in bytes
@ -652,23 +665,40 @@ func parseTimeString(timeString string) (result time.Time, err error) {
//
// It attempts to read the objects mtime and if that isn't present the
// LastModified returned in the http headers
//
// SHA-1 will also be updated once the request has completed.
func (o *Object) ModTime() (result time.Time) {
if !o.modTime.IsZero() {
// The error is logged in readFileMetadata
_ = o.readFileMetadata()
return o.modTime
}
// readFileMetadata attempts to read the modified time and
// SHA-1 hash of the remote object.
//
// If the objects mtime and if that isn't present the
// LastModified returned in the http headers.
//
// It is safe to call this function multiple times, and the
// result is cached between calls.
func (o *Object) readFileMetadata() error {
// Return if already know it
if !o.modTime.IsZero() && o.sha1 != "" {
return nil
}
// Return the current time if can't read metadata
result = time.Now()
// Set modtime to now, as default value.
o.modTime = time.Now()
// Read metadata (need ID)
// Read metadata (we need the ID)
err := o.readMetaData()
if err != nil {
fs.Debug(o, "Failed to read metadata: %v", err)
return result
fs.Debug(o, "Failed to get file metadata: %v", err)
return err
}
// Return the UploadTimestamp if can't get file info
result = time.Time(o.info.UploadTimestamp)
// Use the UploadTimestamp if can't get file info
o.modTime = time.Time(o.info.UploadTimestamp)
// Now read the metadata for the modified time
opts := rest.Opts{
@ -682,17 +712,20 @@ func (o *Object) ModTime() (result time.Time) {
_, err = o.fs.srv.CallJSON(&opts, &request, &response)
if err != nil {
fs.Debug(o, "Failed to get file info: %v", err)
return result
return err
}
o.sha1 = response.SHA1
// Parse the result
timeString := response.Info[timeKey]
parsed, err := parseTimeString(timeString)
if err != nil {
fs.Debug(o, "Failed to parse mod time string %q: %v", timeString, err)
return result
return err
}
return parsed
o.modTime = parsed
return nil
}
// SetModTime sets the modification time of the local fs object
@ -785,6 +818,9 @@ func (o *Object) Open() (in io.ReadCloser, err error) {
} else {
o.modTime = parsed
}
if o.sha1 == "" {
o.sha1 = resp.Header.Get(sha1Header)
}
return newOpenFile(o, resp), nil
}
@ -939,6 +975,7 @@ func (o *Object) Update(in io.Reader, modTime time.Time, size int64) (err error)
o.info.Action = "upload"
o.info.Size = response.Size
o.info.UploadTimestamp = api.Timestamp(time.Now()) // FIXME not quite right
o.sha1 = response.SHA1
return nil
}

View file

@ -84,6 +84,11 @@ size and path.
Produces an md5sum file for all the objects in the path. This
is in the same format as the standard md5sum tool produces.
### rclone sha1sum remote:path ###
Produces an sha1sum file for all the objects in the path. This
is in the same format as the standard sha1sum tool produces.
### rclone size remote:path ###
Prints the total size of objects in remote:path and the number of

View file

@ -10,8 +10,8 @@ Rclone has a sophisticated set of include and exclude rules. Some of
these are based on patterns and some on other things like file size.
The filters are applied for the `copy`, `sync`, `move`, `ls`, `lsl`,
`md5sum`, `size` and `check` operations. Note that `purge` does not
obey the filters.
`md5sum`, `sha1sum`, `size` and `check` operations.
Note that `purge` does not obey the filters.
Each path as it passes through rclone is matched against the include
and exclude rules. The paths are matched without a leading `/`.

View file

@ -782,6 +782,11 @@ func (f *Fs) DirMove(src fs.Fs) error {
return nil
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashMD5)
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -802,8 +807,11 @@ func (o *Object) Remote() string {
return o.remote
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
// Hash returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashMD5 {
return "", fs.ErrHashUnsupported
}
return o.md5sum, nil
}

View file

@ -523,6 +523,11 @@ func (f *Fs) DirMove(src fs.Fs) error {
return nil
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashNone)
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -543,9 +548,9 @@ func (o *Object) Remote() string {
return o.remote
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
return "", nil
// Hash is unsupported on Dropbox
func (o *Object) Hash(t fs.HashType) (string, error) {
return "", fs.ErrHashUnsupported
}
// Size returns the size of an object in bytes

View file

@ -106,6 +106,9 @@ type Fs interface {
// Precision of the ModTimes in this Fs
Precision() time.Duration
// Returns the supported hash types of the filesystem
Hashes() HashSet
}
// Object is a filesystem like object provided by an Fs
@ -121,7 +124,7 @@ type Object interface {
// Md5sum returns the md5 checksum of the file
// If no Md5sum is available it returns ""
Md5sum() (string, error)
Hash(HashType) (string, error)
// ModTime returns the modification date of the file
// It should return a best guess if one isn't available

235
fs/hash.go Normal file
View file

@ -0,0 +1,235 @@
package fs
import (
"crypto/md5"
"crypto/sha1"
"encoding/hex"
"fmt"
"hash"
"io"
"strings"
)
// HashType indicates a standard hashing algorithm
type HashType int
// ErrHashUnsupported should be returned by filesystem,
// if it is requested to deliver an unsupported hash type.
var ErrHashUnsupported = fmt.Errorf("hash type not supported")
const (
// HashNone indicates no hashes are supported
HashNone HashType = 0
// HashMD5 indicates MD5 support
HashMD5 HashType = 1 << iota
// HashSHA1 indicates SHA-1 support
HashSHA1
)
// SupportedHashes returns a set of all the supported hashes by
// HashStream and MultiHasher.
var SupportedHashes = NewHashSet(HashMD5, HashSHA1)
// HashStream will calculate hashes of all supported hash types.
func HashStream(r io.Reader) (map[HashType]string, error) {
return HashStreamTypes(r, SupportedHashes)
}
// HashStreamTypes will calculate hashes of the requested hash types.
func HashStreamTypes(r io.Reader, set HashSet) (map[HashType]string, error) {
hashers, err := hashFromTypes(set)
if err != nil {
return nil, err
}
_, err = io.Copy(hashToMultiWriter(hashers), r)
if err != nil {
return nil, err
}
var ret = make(map[HashType]string)
for k, v := range hashers {
ret[k] = hex.EncodeToString(v.Sum(nil))
}
return ret, nil
}
// String returns a string representation of the hash type.
// The function will panic if the hash type is unknown.
func (h HashType) String() string {
switch h {
case HashNone:
return "None"
case HashMD5:
return "MD5"
case HashSHA1:
return "SHA-1"
default:
err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
panic(err)
}
}
// hashFromTypes will return hashers for all the requested types.
// The types must be a subset of SupportedHashes,
// and this function must support all types.
func hashFromTypes(set HashSet) (map[HashType]hash.Hash, error) {
if !set.SubsetOf(SupportedHashes) {
return nil, fmt.Errorf("Requested set %08x contains unknown hash types", int(set))
}
var hashers = make(map[HashType]hash.Hash)
types := set.Array()
for _, t := range types {
switch t {
case HashMD5:
hashers[t] = md5.New()
case HashSHA1:
hashers[t] = sha1.New()
default:
err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
panic(err)
}
}
return hashers, nil
}
// hashToMultiWriter will return a set of hashers into a
// single multiwriter, where one write will update all
// the hashers.
func hashToMultiWriter(h map[HashType]hash.Hash) io.Writer {
// Convert to to slice
var w = make([]io.Writer, 0, len(h))
for _, v := range h {
w = append(w, v)
}
return io.MultiWriter(w...)
}
// A MultiHasher will construct various hashes on
// all incoming writes.
type MultiHasher struct {
io.Writer
h map[HashType]hash.Hash // Hashes
}
// NewMultiHasher will return a hash writer that will write all
// supported hash types.
func NewMultiHasher() *MultiHasher {
h, err := NewMultiHasherTypes(SupportedHashes)
if err != nil {
panic("internal error: could not create multihasher")
}
return h
}
// NewMultiHasherTypes will return a hash writer that will write
// the requested hash types.
func NewMultiHasherTypes(set HashSet) (*MultiHasher, error) {
hashers, err := hashFromTypes(set)
if err != nil {
return nil, err
}
m := MultiHasher{h: hashers, Writer: hashToMultiWriter(hashers)}
return &m, nil
}
// Sums returns the sums of all accumulated hashes as hex encoded
// strings.
func (m *MultiHasher) Sums() map[HashType]string {
dst := make(map[HashType]string)
for k, v := range m.h {
dst[k] = hex.EncodeToString(v.Sum(nil))
}
return dst
}
// A HashSet Indicates one or more hash types.
type HashSet int
// NewHashSet will create a new hash set with the hash types supplied
func NewHashSet(t ...HashType) HashSet {
h := HashSet(HashNone)
return h.Add(t...)
}
// Add one or more hash types to the set.
// Returns the modified hash set.
func (h *HashSet) Add(t ...HashType) HashSet {
for _, v := range t {
*h |= HashSet(v)
}
return *h
}
// Contains returns true if the
func (h HashSet) Contains(t HashType) bool {
return int(h)&int(t) != 0
}
// Overlap returns the overlapping hash types
func (h HashSet) Overlap(t HashSet) HashSet {
return HashSet(int(h) & int(t))
}
// SubsetOf will return true if all types of h
// is present in the set c
func (h HashSet) SubsetOf(c HashSet) bool {
return int(h)|int(c) == int(c)
}
// GetOne will return a hash type.
// Currently the first is returned, but it could be
// improved to return the strongest.
func (h HashSet) GetOne() HashType {
v := int(h)
i := uint(0)
for v != 0 {
if v&1 != 0 {
return HashType(1 << i)
}
i++
v >>= 1
}
return HashType(HashNone)
}
// Array returns an array of all hash types in the set
func (h HashSet) Array() (ht []HashType) {
v := int(h)
i := uint(0)
for v != 0 {
if v&1 != 0 {
ht = append(ht, HashType(1<<i))
}
i++
v >>= 1
}
return ht
}
// Count returns the number of hash types in the set
func (h HashSet) Count() int {
if int(h) == 0 {
return 0
}
// credit: https://code.google.com/u/arnehormann/
x := uint64(h)
x -= (x >> 1) & 0x5555555555555555
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
x += x >> 4
x &= 0x0f0f0f0f0f0f0f0f
x *= 0x0101010101010101
return int(x >> 56)
}
// String returns a string representation of the hash set.
// The function will panic if it contains an unknown type.
func (h HashSet) String() string {
a := h.Array()
var r []string
for _, v := range a {
r = append(r, v.String())
}
return "[" + strings.Join(r, ", ") + "]"
}

260
fs/hash_test.go Normal file
View file

@ -0,0 +1,260 @@
package fs_test
import (
"bytes"
"io"
"testing"
"github.com/ncw/rclone/fs"
)
func TestHashSet(t *testing.T) {
var h fs.HashSet
if h.Count() != 0 {
t.Fatalf("expected empty set to have 0 elements, got %d", h.Count())
}
a := h.Array()
if len(a) != 0 {
t.Fatalf("expected empty slice, got %d", len(a))
}
h = h.Add(fs.HashMD5)
if h.Count() != 1 {
t.Fatalf("expected 1 element, got %d", h.Count())
}
if h.GetOne() != fs.HashMD5 {
t.Fatalf("expected HashMD5, got %v", h.GetOne())
}
a = h.Array()
if len(a) != 1 {
t.Fatalf("expected 1 element, got %d", len(a))
}
if a[0] != fs.HashMD5 {
t.Fatalf("expected HashMD5, got %v", a[0])
}
// Test overlap, with all hashes
h = h.Overlap(fs.SupportedHashes)
if h.Count() != 1 {
t.Fatalf("expected 1 element, got %d", h.Count())
}
if h.GetOne() != fs.HashMD5 {
t.Fatalf("expected HashMD5, got %v", h.GetOne())
}
if !h.SubsetOf(fs.SupportedHashes) {
t.Fatalf("expected to be subset of all hashes")
}
if !h.SubsetOf(fs.NewHashSet(fs.HashMD5)) {
t.Fatalf("expected to be subset of itself")
}
h = h.Add(fs.HashSHA1)
if h.Count() != 2 {
t.Fatalf("expected 2 elements, got %d", h.Count())
}
one := h.GetOne()
if !(one == fs.HashMD5 || one == fs.HashSHA1) {
t.Fatalf("expected to be either MD5 or SHA1, got %v", one)
}
if !h.SubsetOf(fs.SupportedHashes) {
t.Fatalf("expected to be subset of all hashes")
}
if h.SubsetOf(fs.NewHashSet(fs.HashMD5)) {
t.Fatalf("did not expect to be subset of only MD5")
}
if h.SubsetOf(fs.NewHashSet(fs.HashSHA1)) {
t.Fatalf("did not expect to be subset of only SHA1")
}
if !h.SubsetOf(fs.NewHashSet(fs.HashMD5, fs.HashSHA1)) {
t.Fatalf("expected to be subset of MD5/SHA1")
}
a = h.Array()
if len(a) != 2 {
t.Fatalf("expected 2 elements, got %d", len(a))
}
ol := h.Overlap(fs.NewHashSet(fs.HashMD5))
if ol.Count() != 1 {
t.Fatalf("expected 1 element overlap, got %d", ol.Count())
}
if !ol.Contains(fs.HashMD5) {
t.Fatalf("expected overlap to be MD5, got %v", ol)
}
if ol.Contains(fs.HashSHA1) {
t.Fatalf("expected overlap NOT to contain SHA1, got %v", ol)
}
ol = h.Overlap(fs.NewHashSet(fs.HashMD5, fs.HashSHA1))
if ol.Count() != 2 {
t.Fatalf("expected 2 element overlap, got %d", ol.Count())
}
if !ol.Contains(fs.HashMD5) {
t.Fatalf("expected overlap to contain MD5, got %v", ol)
}
if !ol.Contains(fs.HashSHA1) {
t.Fatalf("expected overlap to contain SHA1, got %v", ol)
}
}
type hashTest struct {
input []byte
output map[fs.HashType]string
}
var hashTestSet = []hashTest{
hashTest{
input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
output: map[fs.HashType]string{
fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe",
fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
},
},
// Empty data set
hashTest{
input: []byte{},
output: map[fs.HashType]string{
fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e",
fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
},
},
}
func TestMultiHasher(t *testing.T) {
for _, test := range hashTestSet {
mh := fs.NewMultiHasher()
n, err := io.Copy(mh, bytes.NewBuffer(test.input))
if err != nil {
t.Fatal(err)
}
if int(n) != len(test.input) {
t.Fatalf("copy mismatch: %d != %d", n, len(test.input))
}
sums := mh.Sums()
for k, v := range sums {
expect, ok := test.output[k]
if !ok {
t.Errorf("Unknown hash type %v, sum: %q", k, v)
}
if expect != v {
t.Errorf("hash %v mismatch %q != %q", k, v, expect)
}
}
// Test that all are present
for k, v := range test.output {
expect, ok := sums[k]
if !ok {
t.Errorf("did not calculate hash type %v, sum: %q", k, v)
}
if expect != v {
t.Errorf("hash %d mismatch %q != %q", k, v, expect)
}
}
}
}
func TestMultiHasherTypes(t *testing.T) {
h := fs.HashSHA1
for _, test := range hashTestSet {
mh, err := fs.NewMultiHasherTypes(fs.NewHashSet(h))
if err != nil {
t.Fatal(err)
}
n, err := io.Copy(mh, bytes.NewBuffer(test.input))
if err != nil {
t.Fatal(err)
}
if int(n) != len(test.input) {
t.Fatalf("copy mismatch: %d != %d", n, len(test.input))
}
sums := mh.Sums()
if len(sums) != 1 {
t.Fatalf("expected 1 sum, got %d", len(sums))
}
expect := test.output[h]
if expect != sums[h] {
t.Errorf("hash %v mismatch %q != %q", h, sums[h], expect)
}
}
}
func TestHashStream(t *testing.T) {
for _, test := range hashTestSet {
sums, err := fs.HashStream(bytes.NewBuffer(test.input))
if err != nil {
t.Fatal(err)
}
for k, v := range sums {
expect, ok := test.output[k]
if !ok {
t.Errorf("Unknown hash type %v, sum: %q", k, v)
}
if expect != v {
t.Errorf("hash %v mismatch %q != %q", k, v, expect)
}
}
// Test that all are present
for k, v := range test.output {
expect, ok := sums[k]
if !ok {
t.Errorf("did not calculate hash type %v, sum: %q", k, v)
}
if expect != v {
t.Errorf("hash %v mismatch %q != %q", k, v, expect)
}
}
}
}
func TestHashStreamTypes(t *testing.T) {
h := fs.HashSHA1
for _, test := range hashTestSet {
sums, err := fs.HashStreamTypes(bytes.NewBuffer(test.input), fs.NewHashSet(h))
if err != nil {
t.Fatal(err)
}
if len(sums) != 1 {
t.Fatalf("expected 1 sum, got %d", len(sums))
}
expect := test.output[h]
if expect != sums[h] {
t.Errorf("hash %d mismatch %q != %q", h, sums[h], expect)
}
}
}
func TestHashSetStringer(t *testing.T) {
h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5)
s := h.String()
expect := "[MD5, SHA-1]"
if s != expect {
t.Errorf("unexpected stringer: was %q, expected %q", s, expect)
}
h = fs.NewHashSet(fs.HashSHA1)
s = h.String()
expect = "[SHA-1]"
if s != expect {
t.Errorf("unexpected stringer: was %q, expected %q", s, expect)
}
h = fs.NewHashSet()
s = h.String()
expect = "[]"
if s != expect {
t.Errorf("unexpected stringer: was %q, expected %q", s, expect)
}
}
func TestHashStringer(t *testing.T) {
h := fs.HashMD5
s := h.String()
expect := "MD5"
if s != expect {
t.Errorf("unexpected stringer: was %q, expected %q", s, expect)
}
h = fs.HashNone
s = h.String()
expect = "None"
if s != expect {
t.Errorf("unexpected stringer: was %q, expected %q", s, expect)
}
}

View file

@ -96,6 +96,11 @@ func (f *Limited) Precision() time.Duration {
return f.fs.Precision()
}
// Hashes returns the supported hash sets.
func (f *Limited) Hashes() HashSet {
return f.fs.Hashes()
}
// Copy src to this remote using server side copy operations.
//
// This is stored with the remote path given

View file

@ -33,48 +33,54 @@ func CalculateModifyWindow(fs ...Fs) {
Debug(fs[0], "Modify window is %s", Config.ModifyWindow)
}
// Md5sumsEqual checks to see if src == dst, but ignores empty strings
func Md5sumsEqual(src, dst string) bool {
// HashEquals checks to see if src == dst, but ignores empty strings
// and returns true if either is empty.
func HashEquals(src, dst string) bool {
if src == "" || dst == "" {
return true
}
return src == dst
}
// CheckMd5sums checks the two files to see if the MD5sums are the same
// CheckHashes checks the two files to see if they have common
// known hash types and compares them
//
// Returns two bools, the first of which is equality and the second of
// which is true if either of the MD5SUMs were unset.
// which is true if either of the hashes were unset.
//
// May return an error which will already have been logged
//
// If an error is returned it will return equal as false
func CheckMd5sums(src, dst Object) (equal bool, unset bool, err error) {
srcMd5, err := src.Md5sum()
if err != nil {
Stats.Error()
ErrorLog(src, "Failed to calculate src md5: %s", err)
return false, false, err
}
if srcMd5 == "" {
func CheckHashes(src, dst Object) (equal bool, unset bool, err error) {
common := src.Fs().Hashes().Overlap(dst.Fs().Hashes())
Debug(nil, "Shared hashes: %v", common)
if common.Count() == 0 {
return true, true, nil
}
dstMd5, err := dst.Md5sum()
usehash := common.GetOne()
srcHash, err := src.Hash(usehash)
if err != nil {
Stats.Error()
ErrorLog(dst, "Failed to calculate dst md5: %s", err)
ErrorLog(src, "Failed to calculate src hash: %s", err)
return false, false, err
}
if dstMd5 == "" {
if srcHash == "" {
return true, true, nil
}
// Debug("Src MD5 %s", srcMd5)
// Debug("Dst MD5 %s", obj.Hash)
return Md5sumsEqual(srcMd5, dstMd5), false, nil
dstHash, err := dst.Hash(usehash)
if err != nil {
Stats.Error()
ErrorLog(dst, "Failed to calculate dst hash: %s", err)
return false, false, err
}
if dstHash == "" {
return true, true, nil
}
return srcHash == dstHash, false, nil
}
// Equal checks to see if the src and dst objects are equal by looking at
// size, mtime and MD5SUM
// size, mtime and hash
//
// If the src and dst size are different then it is considered to be
// not equal. If --size-only is in effect then this is the only check
@ -84,7 +90,7 @@ func CheckMd5sums(src, dst Object) (equal bool, unset bool, err error) {
// considered to be equal. This check is skipped if using --checksum.
//
// If the size is the same and mtime is different, unreadable or
// --checksum is set and the MD5SUM is the same then the file is
// --checksum is set and the hash is the same then the file is
// considered to be equal. In this case the mtime on the dst is
// updated if --checksum is not set.
//
@ -120,23 +126,23 @@ func Equal(src, dst Object) bool {
}
// mtime is unreadable or different but size is the same so
// check the MD5SUM
same, md5unset, _ := CheckMd5sums(src, dst)
// check the hash
same, hashunset, _ := CheckHashes(src, dst)
if !same {
Debug(src, "Md5sums differ")
Debug(src, "Hash differ")
return false
}
if !Config.CheckSum {
// Size and MD5 the same but mtime different so update the
// Size and hash the same but mtime different so update the
// mtime of the dst object here
dst.SetModTime(srcModTime)
}
if md5unset {
if hashunset {
Debug(src, "Size of src and dst objects identical")
} else {
Debug(src, "Size and MD5SUM of src and dst objects identical")
Debug(src, "Size and hash of src and dst objects identical")
}
return true
}
@ -245,20 +251,27 @@ tryAgain:
return
}
// Verify md5sums are the same after transfer - ignoring blank md5sums
if !Config.SizeOnly {
srcMd5sum, md5sumErr := src.Md5sum()
if md5sumErr != nil {
// Verify hashes are the same after transfer - ignoring blank hashes
// TODO(klauspost): This could be extended, so we always create a hash type matching
// the destination, and calculate it while sending.
common := src.Fs().Hashes().Overlap(dst.Fs().Hashes())
Debug(src, "common hashes: %v", common)
if !Config.SizeOnly && common.Count() > 0 {
// Get common hash type
hashType := common.GetOne()
srcSum, err := src.Hash(hashType)
if err != nil {
Stats.Error()
ErrorLog(src, "Failed to read md5sum: %s", md5sumErr)
} else if srcMd5sum != "" {
dstMd5sum, md5sumErr := dst.Md5sum()
if md5sumErr != nil {
ErrorLog(src, "Failed to read src hash: %s", err)
} else if srcSum != "" {
dstSum, err := dst.Hash(hashType)
if err != nil {
Stats.Error()
ErrorLog(dst, "Failed to read md5sum: %s", md5sumErr)
} else if !Md5sumsEqual(srcMd5sum, dstMd5sum) {
ErrorLog(dst, "Failed to read hash: %s", err)
} else if !HashEquals(srcSum, dstSum) {
Stats.Error()
err = fmt.Errorf("Corrupted on transfer: md5sums differ %q vs %q", srcMd5sum, dstMd5sum)
err = fmt.Errorf("Corrupted on transfer: %v hash differ %q vs %q", hashType, srcSum, dstSum)
ErrorLog(dst, "%s", err)
removeFailedCopy(dst)
return
@ -296,7 +309,7 @@ func checkOne(pair ObjectPair, out ObjectPairChan) {
// PairChecker reads Objects~s on in send to out if they need transferring.
//
// FIXME potentially doing lots of MD5SUMS at once
// FIXME potentially doing lots of hashes at once
func PairChecker(in ObjectPairChan, out ObjectPairChan, wg *sync.WaitGroup) {
defer wg.Done()
for pair := range in {
@ -540,7 +553,7 @@ func MoveDir(fdst, fsrc Fs) error {
return Purge(fsrc)
}
// Check the files in fsrc and fdst according to Size and MD5SUM
// Check the files in fsrc and fdst according to Size and hash
func Check(fdst, fsrc Fs) error {
var (
wg sync.WaitGroup
@ -614,7 +627,7 @@ func Check(fdst, fsrc Fs) error {
ErrorLog(src, "Sizes differ")
continue
}
same, _, err := CheckMd5sums(src, dst)
same, _, err := CheckHashes(src, dst)
Stats.DoneChecking(src)
if err != nil {
continue
@ -702,15 +715,30 @@ func ListLong(f Fs, w io.Writer) error {
//
// Lists in parallel which may get them out of order
func Md5sum(f Fs, w io.Writer) error {
return hashLister(HashMD5, f, w)
}
// Sha1sum list the Fs to the supplied writer
//
// Obeys includes and excludes
//
// Lists in parallel which may get them out of order
func Sha1sum(f Fs, w io.Writer) error {
return hashLister(HashSHA1, f, w)
}
func hashLister(ht HashType, f Fs, w io.Writer) error {
return ListFn(f, func(o Object) {
Stats.Checking(o)
md5sum, err := o.Md5sum()
sum, err := o.Hash(ht)
Stats.DoneChecking(o)
if err != nil {
Debug(o, "Failed to read MD5: %v", err)
md5sum = "ERROR"
if err == ErrHashUnsupported {
sum = "UNSUPPORTED"
} else if err != nil {
Debug(o, "Failed to read %v: %v", ht, err)
sum = "ERROR"
}
syncFprintf(w, "%32s %s\n", md5sum, o.Remote())
syncFprintf(w, "%32s %s\n", sum, o.Remote())
})
}

View file

@ -365,7 +365,7 @@ func TestSyncAfterChangingFilesSizeOnly(t *testing.T) {
fstest.CheckListingWithPrecision(t, fremote, items, fs.Config.ModifyWindow)
}
// Sync after changing a file's contents, modtime but not length
// Sync after changing a file's contents, maintaining modtime and length
func TestSyncAfterChangingContentsOnly(t *testing.T) {
if fremote.Precision() == fs.ModTimeNotSupported {
t.Logf("ModTimeNotSupported so forcing file to be a different size")

View file

@ -54,16 +54,21 @@ func (i *Item) Check(t *testing.T, obj fs.Object, precision time.Duration) {
if obj == nil {
t.Fatalf("Object is nil")
}
types := obj.Fs().Hashes().Array()
for _, hash := range types {
// Check attributes
Md5sum, err := obj.Md5sum()
sum, err := obj.Hash(hash)
if err != nil {
t.Fatalf("Failed to read md5sum for %q: %v", obj.Remote(), err)
t.Fatalf("%s: Failed to read hash %v for %q: %v", obj.Fs().String(), hash, obj.Remote(), err)
}
if hash == fs.HashMD5 {
if !fs.HashEquals(i.Md5sum, sum) {
t.Errorf("%s/%s: md5 hash incorrect - expecting %q got %q", obj.Fs().String(), obj.Remote(), i.Md5sum, sum)
}
}
if !fs.Md5sumsEqual(i.Md5sum, Md5sum) {
t.Errorf("%s: Md5sum incorrect - expecting %q got %q", obj.Remote(), i.Md5sum, Md5sum)
}
if i.Size != obj.Size() {
t.Errorf("%s: Size incorrect - expecting %d got %d", obj.Remote(), i.Size, obj.Size())
t.Errorf("%s/%s: Size incorrect - expecting %d got %d", obj.Fs().String(), obj.Remote(), i.Size, obj.Size())
}
i.CheckModTime(t, obj, obj.ModTime(), precision)
}

View file

@ -469,11 +469,11 @@ func TestObjectRemote(t *testing.T) {
func TestObjectMd5sum(t *testing.T) {
skipIfNotOk(t)
obj := findObject(t, file1.Path)
Md5sum, err := obj.Md5sum()
if err != nil {
Md5sum, err := obj.Hash(fs.HashMD5)
if err != nil && err != fs.ErrHashUnsupported {
t.Errorf("Error in Md5sum: %v", err)
}
if !fs.Md5sumsEqual(Md5sum, file1.Md5sum) {
if !fs.HashEquals(Md5sum, file1.Md5sum) {
t.Errorf("Md5sum is wrong %v != %v", Md5sum, file1.Md5sum)
}
}
@ -527,7 +527,7 @@ func TestObjectOpen(t *testing.T) {
t.Fatalf("in.Close() return error: %v", err)
}
Md5sum := hex.EncodeToString(hash.Sum(nil))
if !fs.Md5sumsEqual(Md5sum, file1.Md5sum) {
if !fs.HashEquals(Md5sum, file1.Md5sum) {
t.Errorf("Md5sum is wrong %v != %v", Md5sum, file1.Md5sum)
}
}

View file

@ -458,6 +458,11 @@ func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) {
return dstObj, nil
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashMD5)
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -478,8 +483,11 @@ func (o *Object) Remote() string {
return o.remote
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
// Hash returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashMD5 {
return "", fs.ErrHashUnsupported
}
return o.md5sum, nil
}

View file

@ -207,6 +207,12 @@ func (f *Fs) UnWrap() fs.Fs {
return f.Fs
}
// Hashes returns the supported hash sets.
// Inherited from swift
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashMD5)
}
// Check the interfaces are satisfied
var (
_ fs.Fs = (*Fs)(nil)

View file

@ -2,10 +2,7 @@
package local
import (
"crypto/md5"
"encoding/hex"
"fmt"
"hash"
"io"
"io/ioutil"
"os"
@ -54,7 +51,7 @@ type Object struct {
remote string // The remote path
path string // The local path
info os.FileInfo // Interface for file info (always present)
md5sum string // the md5sum of the object or "" if not calculated
hashes map[fs.HashType]string // Hashes
}
// ------------------------------------------------------------
@ -417,6 +414,11 @@ func (f *Fs) DirMove(src fs.Fs) error {
return os.Rename(srcFs.root, f.root)
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.SupportedHashes
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -437,19 +439,26 @@ func (o *Object) Remote() string {
return o.fs.cleanUtf8(o.remote)
}
// Md5sum calculates the Md5sum of a file returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
if o.md5sum != "" {
return o.md5sum, nil
// Hash returns the requested hash of a file as a lowercase hex string
func (o *Object) Hash(r fs.HashType) (string, error) {
// Check that the underlying file hasn't changed
oldtime := o.info.ModTime()
oldsize := o.info.Size()
_ = o.lstat()
if !o.info.ModTime().Equal(oldtime) || oldsize != o.info.Size() {
o.hashes = nil
}
if o.hashes == nil {
o.hashes = make(map[fs.HashType]string)
in, err := os.Open(o.path)
if err != nil {
fs.Stats.Error()
fs.ErrorLog(o, "Failed to open: %s", err)
return "", err
}
hash := md5.New()
_, err = io.Copy(hash, in)
o.hashes, err = fs.HashStream(in)
closeErr := in.Close()
if err != nil {
fs.Stats.Error()
@ -461,8 +470,8 @@ func (o *Object) Md5sum() (string, error) {
fs.ErrorLog(o, "Failed to close: %s", closeErr)
return "", closeErr
}
o.md5sum = hex.EncodeToString(hash.Sum(nil))
return o.md5sum, nil
}
return o.hashes[r], nil
}
// Size returns the size of an object in bytes
@ -508,7 +517,7 @@ func (o *Object) Storable() bool {
type localOpenFile struct {
o *Object // object that is open
in io.ReadCloser // handle we are wrapping
hash hash.Hash // currently accumulating MD5
hash *fs.MultiHasher // currently accumulating hashes
}
// Read bytes from the object - see io.Reader
@ -525,9 +534,9 @@ func (file *localOpenFile) Read(p []byte) (n int, err error) {
func (file *localOpenFile) Close() (err error) {
err = file.in.Close()
if err == nil {
file.o.md5sum = hex.EncodeToString(file.hash.Sum(nil))
file.o.hashes = file.hash.Sums()
} else {
file.o.md5sum = ""
file.o.hashes = nil
}
return err
}
@ -542,7 +551,7 @@ func (o *Object) Open() (in io.ReadCloser, err error) {
in = &localOpenFile{
o: o,
in: in,
hash: md5.New(),
hash: fs.NewMultiHasher(),
}
return
}
@ -566,7 +575,7 @@ func (o *Object) Update(in io.Reader, modTime time.Time, size int64) error {
}
// Calculate the md5sum of the object we are reading as we go along
hash := md5.New()
hash := fs.NewMultiHasher()
in = io.TeeReader(in, hash)
_, err = io.Copy(out, in)
@ -578,8 +587,8 @@ func (o *Object) Update(in io.Reader, modTime time.Time, size int64) error {
return outErr
}
// All successful so update the md5sum
o.md5sum = hex.EncodeToString(hash.Sum(nil))
// All successful so update the hashes
o.hashes = hash.Sums()
// Set the mtime
o.SetModTime(modTime)

View file

@ -4,6 +4,7 @@ package onedrive
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"log"
@ -13,6 +14,8 @@ import (
"sync"
"time"
"encoding/hex"
"github.com/ncw/rclone/dircache"
"github.com/ncw/rclone/fs"
"github.com/ncw/rclone/oauthutil"
@ -95,6 +98,7 @@ type Object struct {
size int64 // size of the object
modTime time.Time // modification time of the object
id string // ID of the object
sha1 string // SHA-1 of the object content
}
// ------------------------------------------------------------
@ -670,6 +674,11 @@ func (f *Fs) Purge() error {
return f.purgeCheck(false)
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashSHA1)
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -695,9 +704,12 @@ func (o *Object) srvPath() string {
return replaceReservedChars(o.fs.rootSlash() + o.remote)
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
return "", nil // not supported by one drive
// Hash returns the SHA-1 of an object returning a lowercase hex string
func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashSHA1 {
return "", fs.ErrHashUnsupported
}
return o.sha1, nil
}
// Size returns the size of an object in bytes
@ -714,6 +726,16 @@ func (o *Object) Size() int64 {
func (o *Object) setMetaData(info *api.Item) {
o.hasMetaData = true
o.size = info.Size
// In OneDrive for Business, SHA1 and CRC32 hash values are not returned for files.
if info.File != nil && info.File.Hashes.Sha1Hash != "" {
sha1sumData, err := base64.StdEncoding.DecodeString(info.File.Hashes.Sha1Hash)
if err != nil {
fs.Log(o, "Bad SHA1 decode: %v", err)
} else {
o.sha1 = hex.EncodeToString(sha1sumData)
}
}
if info.FileSystemInfo != nil {
o.modTime = time.Time(info.FileSystemInfo.LastModifiedDateTime)
} else {

View file

@ -147,6 +147,18 @@ var Commands = []Command{
MinArgs: 1,
MaxArgs: 1,
},
{
Name: "sha1sum",
ArgsHelp: "remote:path",
Help: `
Produces an sha1sum file for all the objects in the path. This
is in the same format as the standard sha1sum tool produces.`,
Run: func(fdst, fsrc fs.Fs) error {
return fs.Sha1sum(fdst, os.Stdout)
},
MinArgs: 1,
MaxArgs: 1,
},
{
Name: "size",
ArgsHelp: "remote:path",

View file

@ -537,6 +537,11 @@ func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) {
return f.NewFsObject(remote), err
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashMD5)
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -559,8 +564,11 @@ func (o *Object) Remote() string {
var matchMd5 = regexp.MustCompile(`^[0-9a-f]{32}$`)
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
// Hash returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashMD5 {
return "", fs.ErrHashUnsupported
}
etag := strings.Trim(strings.ToLower(o.etag), `"`)
// Check the etag is a valid md5sum
if !matchMd5.MatchString(etag) {

View file

@ -431,6 +431,11 @@ func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) {
return f.NewFsObject(remote), nil
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashMD5)
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -451,8 +456,11 @@ func (o *Object) Remote() string {
return o.remote
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
// Hash returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashMD5 {
return "", fs.ErrHashUnsupported
}
isManifest, err := o.isManifestFile()
if err != nil {
return "", err

View file

@ -382,6 +382,11 @@ func (f *Fs) Purge() error {
return f.purgeCheck(false)
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashMD5)
}
// ------------------------------------------------------------
// Fs returns the parent Fs
@ -402,8 +407,11 @@ func (o *Object) Remote() string {
return o.remote
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Md5sum() (string, error) {
// Hash returns the Md5sum of an object returning a lowercase hex string
func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashMD5 {
return "", fs.ErrHashUnsupported
}
return o.md5sum, nil
}