sftp: add support for more hashes (crc32, sha256, blake3, xxh3, xxh128)

This commit is contained in:
albertony 2024-04-15 11:35:37 +02:00
parent 203f115661
commit 3cb22501ac
2 changed files with 238 additions and 82 deletions

View file

@ -221,15 +221,45 @@ E.g. the second example above should be rewritten as:
Help: "Windows Command Prompt",
},
},
}, {
Name: "hashes",
Help: `Comma separated list of supported checksum types.`,
Default: fs.CommaSepList{},
Advanced: true,
}, {
Name: "md5sum_command",
Default: "",
Help: "The command used to read md5 hashes.\n\nLeave blank for autodetect.",
Help: "The command used to read MD5 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "sha1sum_command",
Default: "",
Help: "The command used to read sha1 hashes.\n\nLeave blank for autodetect.",
Help: "The command used to read SHA-1 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "crc32sum_command",
Default: "",
Help: "The command used to read CRC-32 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "sha256sum_command",
Default: "",
Help: "The command used to read SHA-256 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "blake3sum_command",
Default: "",
Help: "The command used to read BLAKE3 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "xxh3sum_command",
Default: "",
Help: "The command used to read XXH3 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "xxh128sum_command",
Default: "",
Help: "The command used to read XXH128 hashes.\n\nLeave blank for autodetect.",
Advanced: true,
}, {
Name: "skip_links",
@ -526,8 +556,14 @@ type Options struct {
PathOverride string `config:"path_override"`
SetModTime bool `config:"set_modtime"`
ShellType string `config:"shell_type"`
Hashes fs.CommaSepList `config:"hashes"`
Md5sumCommand string `config:"md5sum_command"`
Sha1sumCommand string `config:"sha1sum_command"`
Crc32sumCommand string `config:"crc32sum_command"`
Sha256sumCommand string `config:"sha256sum_command"`
Blake3sumCommand string `config:"blake3sum_command"`
Xxh3sumCommand string `config:"xxh3sum_command"`
Xxh128sumCommand string `config:"xxh128sum_command"`
SkipLinks bool `config:"skip_links"`
Subsystem string `config:"subsystem"`
ServerCommand string `config:"server_command"`
@ -574,13 +610,18 @@ type Fs struct {
// Object is a remote SFTP file that has been stat'd (so it exists, but is not necessarily open for reading)
type Object struct {
fs *Fs
remote string
size int64 // size of the object
modTime uint32 // modification time of the object as unix time
mode os.FileMode // mode bits from the file
md5sum *string // Cached MD5 checksum
sha1sum *string // Cached SHA1 checksum
fs *Fs
remote string
size int64 // size of the object
modTime uint32 // modification time of the object as unix time
mode os.FileMode // mode bits from the file
md5sum *string // Cached MD5 checksum
sha1sum *string // Cached SHA-1 checksum
crc32sum *string // Cached CRC-32 checksum
sha256sum *string // Cached SHA-256 checksum
blake3sum *string // Cached BLAKE3 checksum
xxh3sum *string // Cached XXH3 checksum
xxh128sum *string // Cached XXH128 checksum
}
// conn encapsulates an ssh client and corresponding sftp client
@ -1623,14 +1664,113 @@ func (f *Fs) Hashes() hash.Set {
return *f.cachedHashes
}
hashSet := hash.NewHashSet()
f.cachedHashes = &hashSet
hashTypesSupported := hash.NewHashSet()
f.cachedHashes = &hashTypesSupported
if f.opt.DisableHashCheck || f.shellType == shellTypeNotSupported {
return hashSet
return hashTypesSupported
}
hashTypes := hash.NewHashSet()
if len(f.opt.Hashes) > 0 {
for _, hashName := range f.opt.Hashes {
var hashType hash.Type
if err := hashType.Set(hashName); err != nil {
fs.Infof(nil, "Invalid token %q in hash string %q", hashName, f.opt.Hashes.String())
}
hashTypes.Add(hashType)
}
} else {
hashTypes.Add(hash.MD5, hash.SHA1)
}
hashCommands := map[hash.Type]struct {
option *string
emptyHash string
hashCommands []struct{ hashFile, hashEmpty string }
}{
hash.MD5: {
&f.opt.Md5sumCommand,
"d41d8cd98f00b204e9800998ecf8427e",
[]struct{ hashFile, hashEmpty string }{
{"md5sum", "md5sum"},
{"md5 -r", "md5 -r"},
{"rclone md5sum", "rclone md5sum"},
},
},
hash.SHA1: {
&f.opt.Sha1sumCommand,
"da39a3ee5e6b4b0d3255bfef95601890afd80709",
[]struct{ hashFile, hashEmpty string }{
{"sha1sum", "sha1sum"},
{"sha1 -r", "sha1 -r"},
{"rclone sha1sum", "rclone sha1sum"},
},
},
hash.CRC32: {
&f.opt.Sha1sumCommand,
"00000000",
[]struct{ hashFile, hashEmpty string }{
{"crc32", "crc32"},
{"rclone hashsum crc32", "rclone hashsum crc32"},
},
},
hash.SHA256: {
&f.opt.Sha256sumCommand,
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
[]struct{ hashFile, hashEmpty string }{
{"sha256sum", "sha1sum"},
{"sha256 -r", "sha1 -r"},
{"rclone hashsum sha256", "rclone hashsum sha256"},
},
},
hash.BLAKE3: {
&f.opt.Blake3sumCommand,
"af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262",
[]struct{ hashFile, hashEmpty string }{
{"b3sum", "b3sum"},
{"rclone hashsum blake3", "rclone hashsum blake3"},
},
},
hash.XXH3: {
&f.opt.Xxh3sumCommand,
"2d06800538d394c2",
[]struct{ hashFile, hashEmpty string }{
// The xxhsum tool uses an alternative BSD style output format for the 64-bit variant of XXH3,
// otherwise optional with argument --tag, to avoid confusion with the older 64-bit algorithm XXH64.
// For the same reason there is no algorithm-specific alias, xxh3sum, either. We are currently not able
// to parse this output format. Next release of xxHash after 0.8.2 will change to GNU style, classic
// md5sum, output format, but will use a non-standard prefix "XXH3_" preceding the hash, so we still
// need additional changes to be able to support it.
//{"xxh3sum", "xxh3sum"},
//{"xxhsum -H3", "xxhsum -H3"},
{"rclone hashsum xxh3", "rclone hashsum xxh3"},
},
},
hash.XXH128: {
&f.opt.Xxh128sumCommand,
"99aa06d3014798d86001c324468d497f",
[]struct{ hashFile, hashEmpty string }{
{"xxh128sum", "xxh128sum"},
{"xxhsum -H2", "xxhsum -H2"},
{"rclone hashsum xxh128", "rclone hashsum xxh128"},
},
},
}
if f.shellType == "powershell" {
for _, hashType := range []hash.Type{hash.MD5, hash.SHA1, hash.SHA256} {
if entry, ok := hashCommands[hashType]; ok {
entry.hashCommands = append(hashCommands[hashType].hashCommands, struct {
hashFile, hashEmpty string
}{
fmt.Sprintf("&{param($Path);Get-FileHash -Algorithm %v -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}", hashType),
fmt.Sprintf("Get-FileHash -Algorithm %v -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}", hashType),
})
hashCommands[hashType] = entry
}
}
}
// look for a hash command which works
checkHash := func(hashType hash.Type, commands []struct{ hashFile, hashEmpty string }, expected string, hashCommand *string, changed *bool) bool {
if *hashCommand == hashCommandNotSupported {
return false
@ -1659,55 +1799,25 @@ func (f *Fs) Hashes() hash.Set {
}
changed := false
md5Commands := []struct {
hashFile, hashEmpty string
}{
{"md5sum", "md5sum"},
{"md5 -r", "md5 -r"},
{"rclone md5sum", "rclone md5sum"},
for _, hashType := range hashTypes.Array() {
if entry, ok := hashCommands[hashType]; ok {
if works := checkHash(hashType, entry.hashCommands, entry.emptyHash, entry.option, &changed); works {
hashTypesSupported.Add(hashType)
}
}
}
sha1Commands := []struct {
hashFile, hashEmpty string
}{
{"sha1sum", "sha1sum"},
{"sha1 -r", "sha1 -r"},
{"rclone sha1sum", "rclone sha1sum"},
}
if f.shellType == "powershell" {
md5Commands = append(md5Commands, struct {
hashFile, hashEmpty string
}{
"&{param($Path);Get-FileHash -Algorithm MD5 -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}",
"Get-FileHash -Algorithm MD5 -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}",
})
sha1Commands = append(sha1Commands, struct {
hashFile, hashEmpty string
}{
"&{param($Path);Get-FileHash -Algorithm SHA1 -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}",
"Get-FileHash -Algorithm SHA1 -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}",
})
}
md5Works := checkHash(hash.MD5, md5Commands, "d41d8cd98f00b204e9800998ecf8427e", &f.opt.Md5sumCommand, &changed)
sha1Works := checkHash(hash.SHA1, sha1Commands, "da39a3ee5e6b4b0d3255bfef95601890afd80709", &f.opt.Sha1sumCommand, &changed)
if changed {
// Save permanently in config to avoid the extra work next time
fs.Debugf(f, "Setting hash command for %v to %q (set sha1sum_command to override)", hash.MD5, f.opt.Md5sumCommand)
f.m.Set("md5sum_command", f.opt.Md5sumCommand)
fs.Debugf(f, "Setting hash command for %v to %q (set md5sum_command to override)", hash.SHA1, f.opt.Sha1sumCommand)
f.m.Set("sha1sum_command", f.opt.Sha1sumCommand)
for _, hashType := range hashTypes.Array() {
if entry, ok := hashCommands[hashType]; ok {
fs.Debugf(f, "Setting hash command for %v to %q (set %vsum_command to override)", hashType, *entry.option, hashType)
f.m.Set(fmt.Sprintf("%vsum_command", hashType), *entry.option)
}
}
}
if sha1Works {
hashSet.Add(hash.SHA1)
}
if md5Works {
hashSet.Add(hash.MD5)
}
return hashSet
return hashTypesSupported
}
// About gets usage stats
@ -1842,17 +1952,43 @@ func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) {
_ = o.fs.Hashes()
var hashCmd string
if r == hash.MD5 {
switch r {
case hash.MD5:
if o.md5sum != nil {
return *o.md5sum, nil
}
hashCmd = o.fs.opt.Md5sumCommand
} else if r == hash.SHA1 {
case hash.SHA1:
if o.sha1sum != nil {
return *o.sha1sum, nil
}
hashCmd = o.fs.opt.Sha1sumCommand
} else {
case hash.CRC32:
if o.crc32sum != nil {
return *o.crc32sum, nil
}
hashCmd = o.fs.opt.Crc32sumCommand
case hash.SHA256:
if o.sha256sum != nil {
return *o.sha256sum, nil
}
hashCmd = o.fs.opt.Sha256sumCommand
case hash.BLAKE3:
if o.blake3sum != nil {
return *o.blake3sum, nil
}
hashCmd = o.fs.opt.Blake3sumCommand
case hash.XXH3:
if o.xxh3sum != nil {
return *o.xxh3sum, nil
}
hashCmd = o.fs.opt.Xxh3sumCommand
case hash.XXH128:
if o.xxh128sum != nil {
return *o.xxh128sum, nil
}
hashCmd = o.fs.opt.Xxh128sumCommand
default:
return "", hash.ErrUnsupported
}
if hashCmd == "" || hashCmd == hashCommandNotSupported {
@ -1869,10 +2005,21 @@ func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) {
}
hashString := parseHash(outBytes)
fs.Debugf(o, "Parsed hash: %s", hashString)
if r == hash.MD5 {
switch r {
case hash.MD5:
o.md5sum = &hashString
} else if r == hash.SHA1 {
case hash.SHA1:
o.sha1sum = &hashString
case hash.CRC32:
o.crc32sum = &hashString
case hash.SHA256:
o.sha256sum = &hashString
case hash.BLAKE3:
o.blake3sum = &hashString
case hash.XXH3:
o.xxh3sum = &hashString
case hash.XXH128:
o.xxh128sum = &hashString
}
return hashString, nil
}
@ -1937,7 +2084,7 @@ func (f *Fs) remoteShellPath(remote string) string {
}
// Converts a byte array from the SSH session returned by
// an invocation of md5sum/sha1sum to a hash string
// an invocation of hash command to a hash string
// as expected by the rest of this application
func parseHash(bytes []byte) string {
// For strings with backslash *sum writes a leading \
@ -2166,6 +2313,11 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
// Clear the hash cache since we are about to update the object
o.md5sum = nil
o.sha1sum = nil
o.crc32sum = nil
o.sha256sum = nil
o.blake3sum = nil
o.xxh3sum = nil
o.xxh128sum = nil
c, err := o.fs.getSftpConnection(ctx)
if err != nil {
return fmt.Errorf("Update: %w", err)

View file

@ -318,29 +318,30 @@ is able to use checksumming if the same login has shell access,
and can execute remote commands. If there is a command that can
calculate compatible checksums on the remote system, Rclone can
then be configured to execute this whenever a checksum is needed,
and read back the results. Currently MD5 and SHA-1 are supported.
and read back the results. By default MD5 and SHA-1 are considered,
but also CRC32, SHA-256, BLAKE3, XXH3 and XXH128 are supported,
option `hashes` can be set to specify which to consider.
Normally this requires an external utility being available on
the server. By default rclone will try commands `md5sum`, `md5`
and `rclone md5sum` for MD5 checksums, and the first one found usable
will be picked. Same with `sha1sum`, `sha1` and `rclone sha1sum`
commands for SHA-1 checksums. These utilities normally need to
be in the remote's PATH to be found.
the server. E.g. for MD5 checksums, by default rclone will try commands
`md5sum`, `md5` and `rclone md5sum`, and the first one found
usable will be picked. These utilities normally need to be in the
remote's PATH to be found.
In some cases the shell itself is capable of calculating checksums.
PowerShell is an example of such a shell. If rclone detects that the
remote shell is PowerShell, which means it most probably is a
Windows OpenSSH server, rclone will use a predefined script block
to produce the checksums when no external checksum commands are found
(see [shell access](#shell-access)). This assumes PowerShell version
4.0 or newer.
to produce the checksums for MD5, SHA-1 and SHA-256 when no external
checksum commands are found (see [shell access](#shell-access)). This
assumes PowerShell version 4.0 or newer.
The options `md5sum_command` and `sha1_command` can be used to customize
the command to be executed for calculation of checksums. You can for
example set a specific path to where md5sum and sha1sum executables
are located, or use them to specify some other tools that print checksums
in compatible format. The value can include command-line arguments,
or even shell script blocks as with PowerShell. Rclone has subcommands
The options `md5sum_command`, `sha1_command`, etc. can be used to customize
the commands to be executed for calculation of checksums. You can for
example set a specific path to where the md5sum executable are located,
or specify some other tool that print checksums in compatible format.
The value can include command-line arguments, or even shell script blocks
as with PowerShell. Rclone has subcommands [hashsum](/commands/rclone_hashsum/),
[md5sum](/commands/rclone_md5sum/) and [sha1sum](/commands/rclone_sha1sum/)
that use compatible format, which means if you have an rclone executable
on the server it can be used. As mentioned above, they will be automatically
@ -356,11 +357,14 @@ configuration, so next time it will use the same. Value `none`
will be set if none of the default commands could be used for a specific
algorithm, and this algorithm will not be supported by the remote.
Disabling the checksumming may be required if you are connecting to SFTP servers
which are not under your control, and to which the execution of remote shell
commands is prohibited. Set the configuration option `disable_hashcheck`
to `true` to disable checksumming entirely, or set `shell_type` to `none`
to disable all functionality based on remote shell command execution.
Disabling the checksumming completely may be required if you are connecting to
SFTP servers which are not under your control, and to which the execution of
remote shell commands is prohibited. Set the configuration option `disable_hashcheck`
to `true` to disable checksumming entirely (you get the same effect by setting
option `hashes` to `none` or options `md5sum_command`, `sha1_command` etc.
to `none`). Set option `shell_type` to `none` to not only disable checksumming,
but also disable all other functionality that are based on remote shell command
execution.
### Modification times and hashes