forked from TrueCloudLab/rclone
sftp: Add support for md5 and sha1 hashes where available
This commit is contained in:
parent
ee13ea74f1
commit
80ce569874
4 changed files with 159 additions and 32 deletions
|
@ -15,25 +15,25 @@ show through.
|
|||
|
||||
Here is an overview of the major features of each cloud storage system.
|
||||
|
||||
| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type |
|
||||
| ---------------------------- |:-------:|:-------:|:----------------:|:---------------:|:---------:|
|
||||
| Amazon Drive | MD5 | No | Yes | No | R |
|
||||
| Amazon S3 | MD5 | Yes | No | No | R/W |
|
||||
| Backblaze B2 | SHA1 | Yes | No | No | R/W |
|
||||
| Box | SHA1 | Yes | Yes | No | - |
|
||||
| Dropbox | DBHASH †| Yes | Yes | No | - |
|
||||
| FTP | - | No | No | No | - |
|
||||
| Google Cloud Storage | MD5 | Yes | No | No | R/W |
|
||||
| Google Drive | MD5 | Yes | No | Yes | R/W |
|
||||
| HTTP | - | No | No | No | R |
|
||||
| Hubic | MD5 | Yes | No | No | R/W |
|
||||
| Microsoft Azure Blob Storage | MD5 | Yes | No | No | R/W |
|
||||
| Microsoft OneDrive | SHA1 | Yes | Yes | No | R |
|
||||
| Openstack Swift | MD5 | Yes | No | No | R/W |
|
||||
| QingStor | - | No | No | No | R/W |
|
||||
| SFTP | - | Yes | Depends | No | - |
|
||||
| Yandex Disk | MD5 | Yes | No | No | R/W |
|
||||
| The local filesystem | All | Yes | Depends | No | - |
|
||||
| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type |
|
||||
| ---------------------------- |:-----------:|:-------:|:----------------:|:---------------:|:---------:|
|
||||
| Amazon Drive | MD5 | No | Yes | No | R |
|
||||
| Amazon S3 | MD5 | Yes | No | No | R/W |
|
||||
| Backblaze B2 | SHA1 | Yes | No | No | R/W |
|
||||
| Box | SHA1 | Yes | Yes | No | - |
|
||||
| Dropbox | DBHASH † | Yes | Yes | No | - |
|
||||
| FTP | - | No | No | No | - |
|
||||
| Google Cloud Storage | MD5 | Yes | No | No | R/W |
|
||||
| Google Drive | MD5 | Yes | No | Yes | R/W |
|
||||
| HTTP | - | No | No | No | R |
|
||||
| Hubic | MD5 | Yes | No | No | R/W |
|
||||
| Microsoft Azure Blob Storage | MD5 | Yes | No | No | R/W |
|
||||
| Microsoft OneDrive | SHA1 | Yes | Yes | No | R |
|
||||
| Openstack Swift | MD5 | Yes | No | No | R/W |
|
||||
| QingStor | - | No | No | No | R/W |
|
||||
| SFTP | MD5, SHA1 * | Yes | Depends | No | - |
|
||||
| Yandex Disk | MD5 | Yes | No | No | R/W |
|
||||
| The local filesystem | All | Yes | Depends | No | - |
|
||||
|
||||
### Hash ###
|
||||
|
||||
|
@ -49,6 +49,9 @@ systems they must support a common hash type.
|
|||
hash](https://www.dropbox.com/developers/reference/content-hash).
|
||||
This is an SHA256 sum of all the 4MB block SHA256s.
|
||||
|
||||
* SFTP supports checksums if the same login has shell access and `md5sum`
|
||||
or `sha1sum` as well as `echo` are in the remote's PATH.
|
||||
|
||||
### ModTime ###
|
||||
|
||||
The cloud storage system supports setting modification times on
|
||||
|
|
|
@ -149,7 +149,8 @@ Modified times are used in syncing and are fully supported.
|
|||
|
||||
### Limitations ###
|
||||
|
||||
SFTP does not support any checksums.
|
||||
SFTP supports checksums if the same login has shell access and `md5sum`
|
||||
or `sha1sum` as well as `echo` are in the remote's PATH.
|
||||
|
||||
The only ssh agent supported under Windows is Putty's pagent.
|
||||
|
||||
|
|
113
sftp/sftp.go
113
sftp/sftp.go
|
@ -14,8 +14,10 @@ import (
|
|||
"github.com/ncw/rclone/fs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/pkg/sftp"
|
||||
sshagent "github.com/xanzy/ssh-agent"
|
||||
"github.com/xanzy/ssh-agent"
|
||||
"golang.org/x/crypto/ssh"
|
||||
"strings"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -55,13 +57,14 @@ func init() {
|
|||
|
||||
// Fs stores the interface to the remote SFTP files
|
||||
type Fs struct {
|
||||
name string
|
||||
root string
|
||||
features *fs.Features // optional features
|
||||
url string
|
||||
sshClient *ssh.Client
|
||||
sftpClient *sftp.Client
|
||||
mkdirLock *stringLock
|
||||
name string
|
||||
root string
|
||||
features *fs.Features // optional features
|
||||
url string
|
||||
sshClient *ssh.Client
|
||||
sftpClient *sftp.Client
|
||||
mkdirLock *stringLock
|
||||
cachedHashes *fs.HashSet
|
||||
}
|
||||
|
||||
// Object is a remote SFTP file that has been stat'd (so it exists, but is not necessarily open for reading)
|
||||
|
@ -71,6 +74,8 @@ type Object struct {
|
|||
size int64 // size of the object
|
||||
modTime time.Time // modification time of the object
|
||||
mode os.FileMode // mode bits from the file
|
||||
md5sum *string // Cached MD5 checksum
|
||||
sha1sum *string // Cached SHA1 checksum
|
||||
}
|
||||
|
||||
// ObjectReader holds the sftp.File interface to a remote SFTP file opened for reading
|
||||
|
@ -432,9 +437,44 @@ func (f *Fs) DirMove(src fs.Fs, srcRemote, dstRemote string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Hashes returns fs.HashNone to indicate remote hashing is unavailable
|
||||
func (f *Fs) Hashes() fs.HashSet {
|
||||
return fs.HashSet(fs.HashNone)
|
||||
if f.cachedHashes != nil {
|
||||
return *f.cachedHashes
|
||||
}
|
||||
|
||||
session, err := f.sshClient.NewSession()
|
||||
if err != nil {
|
||||
return fs.HashSet(fs.HashNone)
|
||||
}
|
||||
sha1Output, _ := session.Output("echo 'abc' | sha1sum")
|
||||
expectedSha1 := "03cfd743661f07975fa2f1220c5194cbaff48451"
|
||||
_ = session.Close()
|
||||
|
||||
session, err = f.sshClient.NewSession()
|
||||
if err != nil {
|
||||
return fs.HashSet(fs.HashNone)
|
||||
}
|
||||
md5Output, _ := session.Output("echo 'abc' | md5sum")
|
||||
expectedMd5 := "0bee89b07a248e27c83fc3d5951213c1"
|
||||
_ = session.Close()
|
||||
|
||||
sha1Works := parseHash(sha1Output) == expectedSha1
|
||||
md5Works := parseHash(md5Output) == expectedMd5
|
||||
|
||||
var set fs.HashSet = fs.NewHashSet()
|
||||
if !sha1Works && !md5Works {
|
||||
set.Add(fs.HashNone)
|
||||
}
|
||||
if sha1Works {
|
||||
set.Add(fs.HashSHA1)
|
||||
}
|
||||
if md5Works {
|
||||
set.Add(fs.HashMD5)
|
||||
}
|
||||
|
||||
_ = session.Close()
|
||||
f.cachedHashes = &set
|
||||
return set
|
||||
}
|
||||
|
||||
// Fs is the filesystem this remote sftp file object is located within
|
||||
|
@ -455,9 +495,58 @@ func (o *Object) Remote() string {
|
|||
return o.remote
|
||||
}
|
||||
|
||||
// Hash returns "" since SFTP (in Go or OpenSSH) doesn't support remote calculation of hashes
|
||||
func (o *Object) Hash(r fs.HashType) (string, error) {
|
||||
return "", fs.ErrHashUnsupported
|
||||
if r == fs.HashMD5 && o.md5sum != nil {
|
||||
return *o.md5sum, nil
|
||||
} else if r == fs.HashSHA1 && o.sha1sum != nil {
|
||||
return *o.sha1sum, nil
|
||||
}
|
||||
|
||||
session, err := o.fs.sshClient.NewSession()
|
||||
if err != nil {
|
||||
o.fs.cachedHashes = nil // Something has changed on the remote system
|
||||
return "", fs.ErrHashUnsupported
|
||||
}
|
||||
|
||||
err = fs.ErrHashUnsupported
|
||||
var outputBytes []byte
|
||||
escapedPath := shellEscape(o.path())
|
||||
if r == fs.HashMD5 {
|
||||
outputBytes, err = session.Output("md5sum " + escapedPath)
|
||||
} else if r == fs.HashSHA1 {
|
||||
outputBytes, err = session.Output("sha1sum " + escapedPath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
o.fs.cachedHashes = nil // Something has changed on the remote system
|
||||
_ = session.Close()
|
||||
return "", fs.ErrHashUnsupported
|
||||
}
|
||||
|
||||
_ = session.Close()
|
||||
str := parseHash(outputBytes)
|
||||
if r == fs.HashMD5 {
|
||||
o.md5sum = &str
|
||||
} else if r == fs.HashSHA1 {
|
||||
o.sha1sum = &str
|
||||
}
|
||||
return str, nil
|
||||
}
|
||||
|
||||
var shellEscapeRegex = regexp.MustCompile(`[^A-Za-z0-9_.,:/@\n-]`)
|
||||
|
||||
// Escape a string s.t. it cannot cause unintended behavior
|
||||
// when sending it to a shell.
|
||||
func shellEscape(str string) string {
|
||||
safe := shellEscapeRegex.ReplaceAllString(str, `\$0`)
|
||||
return strings.Replace(safe, "\n", "'\n'", -1)
|
||||
}
|
||||
|
||||
// Converts a byte array from the SSH session returned by
|
||||
// an invocation of md5sum/sha1sum to a hash string
|
||||
// as expected by the rest of this application
|
||||
func parseHash(bytes []byte) string {
|
||||
return strings.Split(string(bytes), " ")[0] // Split at hash / filename separator
|
||||
}
|
||||
|
||||
// Size returns the size in bytes of the remote sftp file
|
||||
|
|
34
sftp/sftp_internal_test.go
Normal file
34
sftp/sftp_internal_test.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
package sftp
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func TestShellEscape(t *testing.T) {
|
||||
for i, test := range []struct {
|
||||
unescaped, escaped string
|
||||
}{
|
||||
{"", ""},
|
||||
{"/this/is/harmless", "/this/is/harmless"},
|
||||
{"$(rm -rf /)", "\\$\\(rm\\ -rf\\ /\\)"},
|
||||
{"/test/\n", "/test/'\n'"},
|
||||
{":\"'", ":\\\"\\'"},
|
||||
} {
|
||||
got := shellEscape(test.unescaped)
|
||||
assert.Equal(t, test.escaped, got, fmt.Sprintf("Test %d unescaped = %q", i, test.unescaped))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseHash(t *testing.T) {
|
||||
for i, test := range []struct {
|
||||
sshOutput, checksum string
|
||||
}{
|
||||
{"8dbc7733dbd10d2efc5c0a0d8dad90f958581821 RELEASE.md\n", "8dbc7733dbd10d2efc5c0a0d8dad90f958581821"},
|
||||
{"03cfd743661f07975fa2f1220c5194cbaff48451 -\n", "03cfd743661f07975fa2f1220c5194cbaff48451"},
|
||||
} {
|
||||
got := parseHash([]byte(test.sshOutput))
|
||||
assert.Equal(t, test.checksum, got, fmt.Sprintf("Test %d sshOutput = %q", i, test.sshOutput))
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue