From 3cb22501ac795dbae73c40ce83e8296a40fb110d Mon Sep 17 00:00:00 2001 From: albertony <12441419+albertony@users.noreply.github.com> Date: Mon, 15 Apr 2024 11:35:37 +0200 Subject: [PATCH] sftp: add support for more hashes (crc32, sha256, blake3, xxh3, xxh128) --- backend/sftp/sftp.go | 276 +++++++++++++++++++++++++++++++++---------- docs/content/sftp.md | 44 +++---- 2 files changed, 238 insertions(+), 82 deletions(-) diff --git a/backend/sftp/sftp.go b/backend/sftp/sftp.go index 3becf7b5c..47b3886ee 100644 --- a/backend/sftp/sftp.go +++ b/backend/sftp/sftp.go @@ -221,15 +221,45 @@ E.g. the second example above should be rewritten as: Help: "Windows Command Prompt", }, }, + }, { + Name: "hashes", + Help: `Comma separated list of supported checksum types.`, + Default: fs.CommaSepList{}, + Advanced: true, }, { Name: "md5sum_command", Default: "", - Help: "The command used to read md5 hashes.\n\nLeave blank for autodetect.", + Help: "The command used to read MD5 hashes.\n\nLeave blank for autodetect.", Advanced: true, }, { Name: "sha1sum_command", Default: "", - Help: "The command used to read sha1 hashes.\n\nLeave blank for autodetect.", + Help: "The command used to read SHA-1 hashes.\n\nLeave blank for autodetect.", + Advanced: true, + }, { + Name: "crc32sum_command", + Default: "", + Help: "The command used to read CRC-32 hashes.\n\nLeave blank for autodetect.", + Advanced: true, + }, { + Name: "sha256sum_command", + Default: "", + Help: "The command used to read SHA-256 hashes.\n\nLeave blank for autodetect.", + Advanced: true, + }, { + Name: "blake3sum_command", + Default: "", + Help: "The command used to read BLAKE3 hashes.\n\nLeave blank for autodetect.", + Advanced: true, + }, { + Name: "xxh3sum_command", + Default: "", + Help: "The command used to read XXH3 hashes.\n\nLeave blank for autodetect.", + Advanced: true, + }, { + Name: "xxh128sum_command", + Default: "", + Help: "The command used to read XXH128 hashes.\n\nLeave blank for autodetect.", Advanced: true, }, { Name: "skip_links", @@ -526,8 +556,14 @@ type Options struct { PathOverride string `config:"path_override"` SetModTime bool `config:"set_modtime"` ShellType string `config:"shell_type"` + Hashes fs.CommaSepList `config:"hashes"` Md5sumCommand string `config:"md5sum_command"` Sha1sumCommand string `config:"sha1sum_command"` + Crc32sumCommand string `config:"crc32sum_command"` + Sha256sumCommand string `config:"sha256sum_command"` + Blake3sumCommand string `config:"blake3sum_command"` + Xxh3sumCommand string `config:"xxh3sum_command"` + Xxh128sumCommand string `config:"xxh128sum_command"` SkipLinks bool `config:"skip_links"` Subsystem string `config:"subsystem"` ServerCommand string `config:"server_command"` @@ -574,13 +610,18 @@ type Fs struct { // Object is a remote SFTP file that has been stat'd (so it exists, but is not necessarily open for reading) type Object struct { - fs *Fs - remote string - size int64 // size of the object - modTime uint32 // modification time of the object as unix time - mode os.FileMode // mode bits from the file - md5sum *string // Cached MD5 checksum - sha1sum *string // Cached SHA1 checksum + fs *Fs + remote string + size int64 // size of the object + modTime uint32 // modification time of the object as unix time + mode os.FileMode // mode bits from the file + md5sum *string // Cached MD5 checksum + sha1sum *string // Cached SHA-1 checksum + crc32sum *string // Cached CRC-32 checksum + sha256sum *string // Cached SHA-256 checksum + blake3sum *string // Cached BLAKE3 checksum + xxh3sum *string // Cached XXH3 checksum + xxh128sum *string // Cached XXH128 checksum } // conn encapsulates an ssh client and corresponding sftp client @@ -1623,14 +1664,113 @@ func (f *Fs) Hashes() hash.Set { return *f.cachedHashes } - hashSet := hash.NewHashSet() - f.cachedHashes = &hashSet + hashTypesSupported := hash.NewHashSet() + f.cachedHashes = &hashTypesSupported if f.opt.DisableHashCheck || f.shellType == shellTypeNotSupported { - return hashSet + return hashTypesSupported + } + + hashTypes := hash.NewHashSet() + if len(f.opt.Hashes) > 0 { + for _, hashName := range f.opt.Hashes { + var hashType hash.Type + if err := hashType.Set(hashName); err != nil { + fs.Infof(nil, "Invalid token %q in hash string %q", hashName, f.opt.Hashes.String()) + } + hashTypes.Add(hashType) + } + } else { + hashTypes.Add(hash.MD5, hash.SHA1) + } + + hashCommands := map[hash.Type]struct { + option *string + emptyHash string + hashCommands []struct{ hashFile, hashEmpty string } + }{ + hash.MD5: { + &f.opt.Md5sumCommand, + "d41d8cd98f00b204e9800998ecf8427e", + []struct{ hashFile, hashEmpty string }{ + {"md5sum", "md5sum"}, + {"md5 -r", "md5 -r"}, + {"rclone md5sum", "rclone md5sum"}, + }, + }, + hash.SHA1: { + &f.opt.Sha1sumCommand, + "da39a3ee5e6b4b0d3255bfef95601890afd80709", + []struct{ hashFile, hashEmpty string }{ + {"sha1sum", "sha1sum"}, + {"sha1 -r", "sha1 -r"}, + {"rclone sha1sum", "rclone sha1sum"}, + }, + }, + hash.CRC32: { + &f.opt.Sha1sumCommand, + "00000000", + []struct{ hashFile, hashEmpty string }{ + {"crc32", "crc32"}, + {"rclone hashsum crc32", "rclone hashsum crc32"}, + }, + }, + hash.SHA256: { + &f.opt.Sha256sumCommand, + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + []struct{ hashFile, hashEmpty string }{ + {"sha256sum", "sha1sum"}, + {"sha256 -r", "sha1 -r"}, + {"rclone hashsum sha256", "rclone hashsum sha256"}, + }, + }, + hash.BLAKE3: { + &f.opt.Blake3sumCommand, + "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", + []struct{ hashFile, hashEmpty string }{ + {"b3sum", "b3sum"}, + {"rclone hashsum blake3", "rclone hashsum blake3"}, + }, + }, + hash.XXH3: { + &f.opt.Xxh3sumCommand, + "2d06800538d394c2", + []struct{ hashFile, hashEmpty string }{ + // The xxhsum tool uses an alternative BSD style output format for the 64-bit variant of XXH3, + // otherwise optional with argument --tag, to avoid confusion with the older 64-bit algorithm XXH64. + // For the same reason there is no algorithm-specific alias, xxh3sum, either. We are currently not able + // to parse this output format. Next release of xxHash after 0.8.2 will change to GNU style, classic + // md5sum, output format, but will use a non-standard prefix "XXH3_" preceding the hash, so we still + // need additional changes to be able to support it. + //{"xxh3sum", "xxh3sum"}, + //{"xxhsum -H3", "xxhsum -H3"}, + {"rclone hashsum xxh3", "rclone hashsum xxh3"}, + }, + }, + hash.XXH128: { + &f.opt.Xxh128sumCommand, + "99aa06d3014798d86001c324468d497f", + []struct{ hashFile, hashEmpty string }{ + {"xxh128sum", "xxh128sum"}, + {"xxhsum -H2", "xxhsum -H2"}, + {"rclone hashsum xxh128", "rclone hashsum xxh128"}, + }, + }, + } + if f.shellType == "powershell" { + for _, hashType := range []hash.Type{hash.MD5, hash.SHA1, hash.SHA256} { + if entry, ok := hashCommands[hashType]; ok { + entry.hashCommands = append(hashCommands[hashType].hashCommands, struct { + hashFile, hashEmpty string + }{ + fmt.Sprintf("&{param($Path);Get-FileHash -Algorithm %v -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}", hashType), + fmt.Sprintf("Get-FileHash -Algorithm %v -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}", hashType), + }) + hashCommands[hashType] = entry + } + } } - // look for a hash command which works checkHash := func(hashType hash.Type, commands []struct{ hashFile, hashEmpty string }, expected string, hashCommand *string, changed *bool) bool { if *hashCommand == hashCommandNotSupported { return false @@ -1659,55 +1799,25 @@ func (f *Fs) Hashes() hash.Set { } changed := false - md5Commands := []struct { - hashFile, hashEmpty string - }{ - {"md5sum", "md5sum"}, - {"md5 -r", "md5 -r"}, - {"rclone md5sum", "rclone md5sum"}, + for _, hashType := range hashTypes.Array() { + if entry, ok := hashCommands[hashType]; ok { + if works := checkHash(hashType, entry.hashCommands, entry.emptyHash, entry.option, &changed); works { + hashTypesSupported.Add(hashType) + } + } } - sha1Commands := []struct { - hashFile, hashEmpty string - }{ - {"sha1sum", "sha1sum"}, - {"sha1 -r", "sha1 -r"}, - {"rclone sha1sum", "rclone sha1sum"}, - } - if f.shellType == "powershell" { - md5Commands = append(md5Commands, struct { - hashFile, hashEmpty string - }{ - "&{param($Path);Get-FileHash -Algorithm MD5 -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}", - "Get-FileHash -Algorithm MD5 -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}", - }) - - sha1Commands = append(sha1Commands, struct { - hashFile, hashEmpty string - }{ - "&{param($Path);Get-FileHash -Algorithm SHA1 -LiteralPath $Path -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{\"$($_.ToLower()) ${Path}\"}}", - "Get-FileHash -Algorithm SHA1 -InputStream ([System.IO.MemoryStream]::new()) -ErrorAction Stop|Select-Object -First 1 -ExpandProperty Hash|ForEach-Object{$_.ToLower()}", - }) - } - - md5Works := checkHash(hash.MD5, md5Commands, "d41d8cd98f00b204e9800998ecf8427e", &f.opt.Md5sumCommand, &changed) - sha1Works := checkHash(hash.SHA1, sha1Commands, "da39a3ee5e6b4b0d3255bfef95601890afd80709", &f.opt.Sha1sumCommand, &changed) if changed { // Save permanently in config to avoid the extra work next time - fs.Debugf(f, "Setting hash command for %v to %q (set sha1sum_command to override)", hash.MD5, f.opt.Md5sumCommand) - f.m.Set("md5sum_command", f.opt.Md5sumCommand) - fs.Debugf(f, "Setting hash command for %v to %q (set md5sum_command to override)", hash.SHA1, f.opt.Sha1sumCommand) - f.m.Set("sha1sum_command", f.opt.Sha1sumCommand) + for _, hashType := range hashTypes.Array() { + if entry, ok := hashCommands[hashType]; ok { + fs.Debugf(f, "Setting hash command for %v to %q (set %vsum_command to override)", hashType, *entry.option, hashType) + f.m.Set(fmt.Sprintf("%vsum_command", hashType), *entry.option) + } + } } - if sha1Works { - hashSet.Add(hash.SHA1) - } - if md5Works { - hashSet.Add(hash.MD5) - } - - return hashSet + return hashTypesSupported } // About gets usage stats @@ -1842,17 +1952,43 @@ func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) { _ = o.fs.Hashes() var hashCmd string - if r == hash.MD5 { + switch r { + case hash.MD5: if o.md5sum != nil { return *o.md5sum, nil } hashCmd = o.fs.opt.Md5sumCommand - } else if r == hash.SHA1 { + case hash.SHA1: if o.sha1sum != nil { return *o.sha1sum, nil } hashCmd = o.fs.opt.Sha1sumCommand - } else { + case hash.CRC32: + if o.crc32sum != nil { + return *o.crc32sum, nil + } + hashCmd = o.fs.opt.Crc32sumCommand + case hash.SHA256: + if o.sha256sum != nil { + return *o.sha256sum, nil + } + hashCmd = o.fs.opt.Sha256sumCommand + case hash.BLAKE3: + if o.blake3sum != nil { + return *o.blake3sum, nil + } + hashCmd = o.fs.opt.Blake3sumCommand + case hash.XXH3: + if o.xxh3sum != nil { + return *o.xxh3sum, nil + } + hashCmd = o.fs.opt.Xxh3sumCommand + case hash.XXH128: + if o.xxh128sum != nil { + return *o.xxh128sum, nil + } + hashCmd = o.fs.opt.Xxh128sumCommand + default: return "", hash.ErrUnsupported } if hashCmd == "" || hashCmd == hashCommandNotSupported { @@ -1869,10 +2005,21 @@ func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) { } hashString := parseHash(outBytes) fs.Debugf(o, "Parsed hash: %s", hashString) - if r == hash.MD5 { + switch r { + case hash.MD5: o.md5sum = &hashString - } else if r == hash.SHA1 { + case hash.SHA1: o.sha1sum = &hashString + case hash.CRC32: + o.crc32sum = &hashString + case hash.SHA256: + o.sha256sum = &hashString + case hash.BLAKE3: + o.blake3sum = &hashString + case hash.XXH3: + o.xxh3sum = &hashString + case hash.XXH128: + o.xxh128sum = &hashString } return hashString, nil } @@ -1937,7 +2084,7 @@ func (f *Fs) remoteShellPath(remote string) string { } // Converts a byte array from the SSH session returned by -// an invocation of md5sum/sha1sum to a hash string +// an invocation of hash command to a hash string // as expected by the rest of this application func parseHash(bytes []byte) string { // For strings with backslash *sum writes a leading \ @@ -2166,6 +2313,11 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op // Clear the hash cache since we are about to update the object o.md5sum = nil o.sha1sum = nil + o.crc32sum = nil + o.sha256sum = nil + o.blake3sum = nil + o.xxh3sum = nil + o.xxh128sum = nil c, err := o.fs.getSftpConnection(ctx) if err != nil { return fmt.Errorf("Update: %w", err) diff --git a/docs/content/sftp.md b/docs/content/sftp.md index 56f4d98dc..5776510df 100644 --- a/docs/content/sftp.md +++ b/docs/content/sftp.md @@ -318,29 +318,30 @@ is able to use checksumming if the same login has shell access, and can execute remote commands. If there is a command that can calculate compatible checksums on the remote system, Rclone can then be configured to execute this whenever a checksum is needed, -and read back the results. Currently MD5 and SHA-1 are supported. +and read back the results. By default MD5 and SHA-1 are considered, +but also CRC32, SHA-256, BLAKE3, XXH3 and XXH128 are supported, +option `hashes` can be set to specify which to consider. Normally this requires an external utility being available on -the server. By default rclone will try commands `md5sum`, `md5` -and `rclone md5sum` for MD5 checksums, and the first one found usable -will be picked. Same with `sha1sum`, `sha1` and `rclone sha1sum` -commands for SHA-1 checksums. These utilities normally need to -be in the remote's PATH to be found. +the server. E.g. for MD5 checksums, by default rclone will try commands +`md5sum`, `md5` and `rclone md5sum`, and the first one found +usable will be picked. These utilities normally need to be in the +remote's PATH to be found. In some cases the shell itself is capable of calculating checksums. PowerShell is an example of such a shell. If rclone detects that the remote shell is PowerShell, which means it most probably is a Windows OpenSSH server, rclone will use a predefined script block -to produce the checksums when no external checksum commands are found -(see [shell access](#shell-access)). This assumes PowerShell version -4.0 or newer. +to produce the checksums for MD5, SHA-1 and SHA-256 when no external +checksum commands are found (see [shell access](#shell-access)). This +assumes PowerShell version 4.0 or newer. -The options `md5sum_command` and `sha1_command` can be used to customize -the command to be executed for calculation of checksums. You can for -example set a specific path to where md5sum and sha1sum executables -are located, or use them to specify some other tools that print checksums -in compatible format. The value can include command-line arguments, -or even shell script blocks as with PowerShell. Rclone has subcommands +The options `md5sum_command`, `sha1_command`, etc. can be used to customize +the commands to be executed for calculation of checksums. You can for +example set a specific path to where the md5sum executable are located, +or specify some other tool that print checksums in compatible format. +The value can include command-line arguments, or even shell script blocks +as with PowerShell. Rclone has subcommands [hashsum](/commands/rclone_hashsum/), [md5sum](/commands/rclone_md5sum/) and [sha1sum](/commands/rclone_sha1sum/) that use compatible format, which means if you have an rclone executable on the server it can be used. As mentioned above, they will be automatically @@ -356,11 +357,14 @@ configuration, so next time it will use the same. Value `none` will be set if none of the default commands could be used for a specific algorithm, and this algorithm will not be supported by the remote. -Disabling the checksumming may be required if you are connecting to SFTP servers -which are not under your control, and to which the execution of remote shell -commands is prohibited. Set the configuration option `disable_hashcheck` -to `true` to disable checksumming entirely, or set `shell_type` to `none` -to disable all functionality based on remote shell command execution. +Disabling the checksumming completely may be required if you are connecting to +SFTP servers which are not under your control, and to which the execution of +remote shell commands is prohibited. Set the configuration option `disable_hashcheck` +to `true` to disable checksumming entirely (you get the same effect by setting +option `hashes` to `none` or options `md5sum_command`, `sha1_command` etc. +to `none`). Set option `shell_type` to `none` to not only disable checksumming, +but also disable all other functionality that are based on remote shell command +execution. ### Modification times and hashes