cmd: add hashSUM file support (#5352)
Currently rclone check supports matching two file trees by sizes and hashes. This change adds support for SUM files produced by GNU utilities like sha1sum. Fixes #1005 Note: checksum by default checks, hashsum by default prints sums. New flag is named "--checkfile" but carries hash name. Summary of introduced command forms: ``` rclone check sums.sha1 remote:path --checkfile sha1 rclone checksum sha1 sums.sha1 remote:path rclone hashsum sha1 remote:path --checkfile sums.sha1 rclone sha1sum remote:path --checkfile sums.sha1 rclone md5sum remote:path --checkfile sums.md5 ```
This commit is contained in:
parent
4680c0776d
commit
b40d9bd4c4
17 changed files with 685 additions and 41 deletions
|
@ -10,6 +10,7 @@ import (
|
|||
_ "github.com/rclone/rclone/cmd/cachestats"
|
||||
_ "github.com/rclone/rclone/cmd/cat"
|
||||
_ "github.com/rclone/rclone/cmd/check"
|
||||
_ "github.com/rclone/rclone/cmd/checksum"
|
||||
_ "github.com/rclone/rclone/cmd/cleanup"
|
||||
_ "github.com/rclone/rclone/cmd/cmount"
|
||||
_ "github.com/rclone/rclone/cmd/config"
|
||||
|
|
|
@ -2,6 +2,7 @@ package check
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
@ -17,20 +18,22 @@ import (
|
|||
|
||||
// Globals
|
||||
var (
|
||||
download = false
|
||||
oneway = false
|
||||
combined = ""
|
||||
missingOnSrc = ""
|
||||
missingOnDst = ""
|
||||
match = ""
|
||||
differ = ""
|
||||
errFile = ""
|
||||
download = false
|
||||
oneway = false
|
||||
combined = ""
|
||||
missingOnSrc = ""
|
||||
missingOnDst = ""
|
||||
match = ""
|
||||
differ = ""
|
||||
errFile = ""
|
||||
checkFileHashType = ""
|
||||
)
|
||||
|
||||
func init() {
|
||||
cmd.Root.AddCommand(commandDefinition)
|
||||
cmdFlags := commandDefinition.Flags()
|
||||
flags.BoolVarP(cmdFlags, &download, "download", "", download, "Check by downloading rather than with hash.")
|
||||
flags.StringVarP(cmdFlags, &checkFileHashType, "checkfile", "C", checkFileHashType, "Treat source:path as a SUM file with hashes of given type")
|
||||
AddFlags(cmdFlags)
|
||||
}
|
||||
|
||||
|
@ -126,7 +129,6 @@ func GetCheckOpt(fsrc, fdst fs.Fs) (opt *operations.CheckOpt, close func(), err
|
|||
}
|
||||
|
||||
return opt, close, nil
|
||||
|
||||
}
|
||||
|
||||
var commandDefinition = &cobra.Command{
|
||||
|
@ -144,16 +146,39 @@ If you supply the |--download| flag, it will download the data from
|
|||
both remotes and check them against each other on the fly. This can
|
||||
be useful for remotes that don't support hashes or if you really want
|
||||
to check all the data.
|
||||
|
||||
If you supply the |--checkfile HASH| flag with a valid hash name,
|
||||
the |source:path| must point to a text file in the SUM format.
|
||||
`, "|", "`") + FlagsHelp,
|
||||
Run: func(command *cobra.Command, args []string) {
|
||||
RunE: func(command *cobra.Command, args []string) error {
|
||||
cmd.CheckArgs(2, 2, command, args)
|
||||
fsrc, fdst := cmd.NewFsSrcDst(args)
|
||||
var (
|
||||
fsrc, fdst fs.Fs
|
||||
hashType hash.Type
|
||||
fsum fs.Fs
|
||||
sumFile string
|
||||
)
|
||||
if checkFileHashType != "" {
|
||||
if err := hashType.Set(checkFileHashType); err != nil {
|
||||
fmt.Println(hash.HelpString(0))
|
||||
return err
|
||||
}
|
||||
fsum, sumFile, fsrc = cmd.NewFsSrcFileDst(args)
|
||||
} else {
|
||||
fsrc, fdst = cmd.NewFsSrcDst(args)
|
||||
}
|
||||
|
||||
cmd.Run(false, true, command, func() error {
|
||||
opt, close, err := GetCheckOpt(fsrc, fdst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer close()
|
||||
|
||||
if checkFileHashType != "" {
|
||||
return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, hashType, opt, download)
|
||||
}
|
||||
|
||||
if download {
|
||||
return operations.CheckDownload(context.Background(), opt)
|
||||
}
|
||||
|
@ -165,5 +190,6 @@ to check all the data.
|
|||
}
|
||||
return operations.Check(context.Background(), opt)
|
||||
})
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
|
57
cmd/checksum/checksum.go
Normal file
57
cmd/checksum/checksum.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
package checksum
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/rclone/rclone/cmd"
|
||||
"github.com/rclone/rclone/cmd/check" // for common flags
|
||||
"github.com/rclone/rclone/fs/config/flags"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var download = false
|
||||
|
||||
func init() {
|
||||
cmd.Root.AddCommand(commandDefinition)
|
||||
cmdFlags := commandDefinition.Flags()
|
||||
flags.BoolVarP(cmdFlags, &download, "download", "", download, "Check by hashing the contents.")
|
||||
check.AddFlags(cmdFlags)
|
||||
}
|
||||
|
||||
var commandDefinition = &cobra.Command{
|
||||
Use: "checksum <hash> sumfile src:path",
|
||||
Short: `Checks the files in the source against a SUM file.`,
|
||||
Long: strings.ReplaceAll(`
|
||||
Checks that hashsums of source files match the SUM file.
|
||||
It compares hashes (MD5, SHA1, etc) and logs a report of files which
|
||||
don't match. It doesn't alter the file system.
|
||||
|
||||
If you supply the |--download| flag, it will download the data from remote
|
||||
and calculate the contents hash on the fly. This can be useful for remotes
|
||||
that don't support hashes or if you really want to check all the data.
|
||||
`, "|", "`") + check.FlagsHelp,
|
||||
RunE: func(command *cobra.Command, args []string) error {
|
||||
cmd.CheckArgs(3, 3, command, args)
|
||||
var hashType hash.Type
|
||||
if err := hashType.Set(args[0]); err != nil {
|
||||
fmt.Println(hash.HelpString(0))
|
||||
return err
|
||||
}
|
||||
fsum, sumFile, fsrc := cmd.NewFsSrcFileDst(args[1:])
|
||||
|
||||
cmd.Run(false, true, command, func() error {
|
||||
opt, close, err := check.GetCheckOpt(nil, fsrc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer close()
|
||||
|
||||
return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, hashType, opt, download)
|
||||
})
|
||||
return nil
|
||||
},
|
||||
}
|
|
@ -4,7 +4,6 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rclone/rclone/cmd"
|
||||
|
@ -21,6 +20,7 @@ var (
|
|||
OutputBase64 = false
|
||||
DownloadFlag = false
|
||||
HashsumOutfile = ""
|
||||
ChecksumFile = ""
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -33,6 +33,7 @@ func init() {
|
|||
func AddHashFlags(cmdFlags *pflag.FlagSet) {
|
||||
flags.BoolVarP(cmdFlags, &OutputBase64, "base64", "", OutputBase64, "Output base64 encoded hashsum")
|
||||
flags.StringVarP(cmdFlags, &HashsumOutfile, "output-file", "", HashsumOutfile, "Output hashsums to a file rather than the terminal")
|
||||
flags.StringVarP(cmdFlags, &ChecksumFile, "checkfile", "C", ChecksumFile, "Validate hashes against a given SUM file instead of printing them")
|
||||
flags.BoolVarP(cmdFlags, &DownloadFlag, "download", "", DownloadFlag, "Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote")
|
||||
}
|
||||
|
||||
|
@ -70,7 +71,7 @@ hashed locally enabling any hash for any remote.
|
|||
Run without a hash to see the list of all supported hashes, e.g.
|
||||
|
||||
$ rclone hashsum
|
||||
` + hashListHelp(" ") + `
|
||||
` + hash.HelpString(4) + `
|
||||
Then
|
||||
|
||||
$ rclone hashsum MD5 remote:path
|
||||
|
@ -80,7 +81,7 @@ Note that hash names are case insensitive.
|
|||
RunE: func(command *cobra.Command, args []string) error {
|
||||
cmd.CheckArgs(0, 2, command, args)
|
||||
if len(args) == 0 {
|
||||
fmt.Print(hashListHelp(""))
|
||||
fmt.Print(hash.HelpString(0))
|
||||
return nil
|
||||
} else if len(args) == 1 {
|
||||
return errors.New("need hash type and remote")
|
||||
|
@ -88,12 +89,16 @@ Note that hash names are case insensitive.
|
|||
var ht hash.Type
|
||||
err := ht.Set(args[0])
|
||||
if err != nil {
|
||||
fmt.Println(hashListHelp(""))
|
||||
fmt.Println(hash.HelpString(0))
|
||||
return err
|
||||
}
|
||||
fsrc := cmd.NewFsSrc(args[1:])
|
||||
|
||||
cmd.Run(false, false, command, func() error {
|
||||
if ChecksumFile != "" {
|
||||
fsum, sumFile := cmd.NewFsFile(ChecksumFile)
|
||||
return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, ht, nil, DownloadFlag)
|
||||
}
|
||||
if HashsumOutfile == "" {
|
||||
return operations.HashLister(context.Background(), ht, OutputBase64, DownloadFlag, fsrc, nil)
|
||||
}
|
||||
|
@ -107,14 +112,3 @@ Note that hash names are case insensitive.
|
|||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
func hashListHelp(indent string) string {
|
||||
var help strings.Builder
|
||||
help.WriteString(indent)
|
||||
help.WriteString("Supported hashes are:\n")
|
||||
for _, ht := range hash.Supported().Array() {
|
||||
help.WriteString(indent)
|
||||
fmt.Fprintf(&help, " * %v\n", ht.String())
|
||||
}
|
||||
return help.String()
|
||||
}
|
||||
|
|
|
@ -32,6 +32,10 @@ hashed locally enabling MD5 for any remote.
|
|||
cmd.CheckArgs(1, 1, command, args)
|
||||
fsrc := cmd.NewFsSrc(args)
|
||||
cmd.Run(false, false, command, func() error {
|
||||
if hashsum.ChecksumFile != "" {
|
||||
fsum, sumFile := cmd.NewFsFile(hashsum.ChecksumFile)
|
||||
return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, hash.MD5, nil, hashsum.DownloadFlag)
|
||||
}
|
||||
if hashsum.HashsumOutfile == "" {
|
||||
return operations.HashLister(context.Background(), hash.MD5, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, nil)
|
||||
}
|
||||
|
|
|
@ -32,6 +32,10 @@ hashed locally enabling SHA-1 for any remote.
|
|||
cmd.CheckArgs(1, 1, command, args)
|
||||
fsrc := cmd.NewFsSrc(args)
|
||||
cmd.Run(false, false, command, func() error {
|
||||
if hashsum.ChecksumFile != "" {
|
||||
fsum, sumFile := cmd.NewFsFile(hashsum.ChecksumFile)
|
||||
return operations.CheckSum(context.Background(), fsrc, fsum, sumFile, hash.SHA1, nil, hashsum.DownloadFlag)
|
||||
}
|
||||
if hashsum.HashsumOutfile == "" {
|
||||
return operations.HashLister(context.Background(), hash.SHA1, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, nil)
|
||||
}
|
||||
|
|
|
@ -24,6 +24,9 @@ both remotes and check them against each other on the fly. This can
|
|||
be useful for remotes that don't support hashes or if you really want
|
||||
to check all the data.
|
||||
|
||||
If you supply the `--checkfile HASH` flag with a valid hash name,
|
||||
the `source:path` must point to a text file in the SUM format.
|
||||
|
||||
If you supply the `--one-way` flag, it will only check that files in
|
||||
the source match the files in the destination, not the other way
|
||||
around. This means that extra files in the destination that are not in
|
||||
|
@ -53,6 +56,7 @@ rclone check source:path dest:path [flags]
|
|||
## Options
|
||||
|
||||
```
|
||||
-C, --checkfile string Treat source:path as a SUM file with hashes of given type
|
||||
--combined string Make a combined report of changes to this file
|
||||
--differ string Report all non-matching files to this file
|
||||
--download Check by downloading rather than with hash.
|
||||
|
|
68
docs/content/commands/rclone_checksum.md
Normal file
68
docs/content/commands/rclone_checksum.md
Normal file
|
@ -0,0 +1,68 @@
|
|||
---
|
||||
title: "rclone checksum"
|
||||
description: "Checks the files in the source against a SUM file."
|
||||
slug: rclone_checksum
|
||||
url: /commands/rclone_checksum/
|
||||
# autogenerated - DO NOT EDIT, instead edit the source code in cmd/checksum/ and as part of making a release run "make commanddocs"
|
||||
---
|
||||
# rclone checksum
|
||||
|
||||
Checks the files in the source against a SUM file.
|
||||
|
||||
## Synopsis
|
||||
|
||||
|
||||
Checks that hashsums of source files match the SUM file.
|
||||
It compares hashes (MD5, SHA1, etc) and logs a report of files which
|
||||
don't match. It doesn't alter the file system.
|
||||
|
||||
If you supply the `--download` flag, it will download the data from remote
|
||||
and calculate the contents hash on the fly. This can be useful for remotes
|
||||
that don't support hashes or if you really want to check all the data.
|
||||
|
||||
If you supply the `--one-way` flag, it will only check that files in
|
||||
the source match the files in the destination, not the other way
|
||||
around. This means that extra files in the destination that are not in
|
||||
the source will not be detected.
|
||||
|
||||
The `--differ`, `--missing-on-dst`, `--missing-on-src`, `--match`
|
||||
and `--error` flags write paths, one per line, to the file name (or
|
||||
stdout if it is `-`) supplied. What they write is described in the
|
||||
help below. For example `--differ` will write all paths which are
|
||||
present on both the source and destination but different.
|
||||
|
||||
The `--combined` flag will write a file (or stdout) which contains all
|
||||
file paths with a symbol and then a space and then the path to tell
|
||||
you what happened to it. These are reminiscent of diff files.
|
||||
|
||||
- `= path` means path was found in source and destination and was identical
|
||||
- `- path` means path was missing on the source, so only in the destination
|
||||
- `+ path` means path was missing on the destination, so only in the source
|
||||
- `* path` means path was present in source and destination but different.
|
||||
- `! path` means there was an error reading or hashing the source or dest.
|
||||
|
||||
|
||||
```
|
||||
rclone checksum <hash> sumfile src:path [flags]
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
```
|
||||
--combined string Make a combined report of changes to this file
|
||||
--differ string Report all non-matching files to this file
|
||||
--download Check by hashing the contents.
|
||||
--error string Report all files with errors (hashing or reading) to this file
|
||||
-h, --help help for checksum
|
||||
--match string Report all matching files to this file
|
||||
--missing-on-dst string Report all files missing from the destination to this file
|
||||
--missing-on-src string Report all files missing from the source to this file
|
||||
--one-way Check one way only, source files must exist on remote
|
||||
```
|
||||
|
||||
See the [global flags page](/flags/) for global options not listed here.
|
||||
|
||||
## SEE ALSO
|
||||
|
||||
* [rclone](/commands/rclone/) - Show help for rclone commands, flags and backends.
|
||||
|
|
@ -48,6 +48,7 @@ rclone hashsum <hash> remote:path [flags]
|
|||
|
||||
```
|
||||
--base64 Output base64 encoded hashsum
|
||||
-C, --checkfile string Validate hashes against a given SUM file instead of printing them
|
||||
--download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote
|
||||
-h, --help help for hashsum
|
||||
--output-file string Output hashsums to a file rather than the terminal
|
||||
|
|
|
@ -29,6 +29,7 @@ rclone md5sum remote:path [flags]
|
|||
|
||||
```
|
||||
--base64 Output base64 encoded hashsum
|
||||
-C, --checkfile string Validate hashes against a given SUM file instead of printing them
|
||||
--download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote
|
||||
-h, --help help for md5sum
|
||||
--output-file string Output hashsums to a file rather than the terminal
|
||||
|
|
|
@ -29,6 +29,7 @@ rclone sha1sum remote:path [flags]
|
|||
|
||||
```
|
||||
--base64 Output base64 encoded hashsum
|
||||
-C, --checkfile string Validate hashes against a given SUM file instead of printing them
|
||||
--download Download the file and hash it locally; if this flag is not specified, the hash is requested from the remote
|
||||
-h, --help help for sha1sum
|
||||
--output-file string Output hashsums to a file rather than the terminal
|
||||
|
|
|
@ -373,8 +373,8 @@ func (f *Filter) InActive() bool {
|
|||
len(f.Opt.ExcludeFile) == 0)
|
||||
}
|
||||
|
||||
// includeRemote returns whether this remote passes the filter rules.
|
||||
func (f *Filter) includeRemote(remote string) bool {
|
||||
// IncludeRemote returns whether this remote passes the filter rules.
|
||||
func (f *Filter) IncludeRemote(remote string) bool {
|
||||
for _, rule := range f.fileRules.rules {
|
||||
if rule.Match(remote) {
|
||||
return rule.Include
|
||||
|
@ -467,7 +467,7 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
|
|||
if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
|
||||
return false
|
||||
}
|
||||
return f.includeRemote(remote)
|
||||
return f.IncludeRemote(remote)
|
||||
}
|
||||
|
||||
// IncludeObject returns whether this object should be included into
|
||||
|
|
|
@ -341,3 +341,15 @@ func Equals(src, dst string) bool {
|
|||
}
|
||||
return src == dst
|
||||
}
|
||||
|
||||
// HelpString returns help message with supported hashes
|
||||
func HelpString(indent int) string {
|
||||
padding := strings.Repeat(" ", indent)
|
||||
var help strings.Builder
|
||||
help.WriteString(padding)
|
||||
help.WriteString("Supported hashes are:\n")
|
||||
for _, h := range supported {
|
||||
fmt.Fprintf(&help, "%s * %v\n", padding, h.String())
|
||||
}
|
||||
return help.String()
|
||||
}
|
||||
|
|
|
@ -1,16 +1,19 @@
|
|||
package operations
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/accounting"
|
||||
"github.com/rclone/rclone/fs/filter"
|
||||
"github.com/rclone/rclone/fs/fserrors"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/fs/march"
|
||||
|
@ -56,15 +59,15 @@ type checkMarch struct {
|
|||
|
||||
// report outputs the fileName to out if required and to the combined log
|
||||
func (c *checkMarch) report(o fs.DirEntry, out io.Writer, sigil rune) {
|
||||
c.reportFilename(o.String(), out, sigil)
|
||||
}
|
||||
|
||||
func (c *checkMarch) reportFilename(filename string, out io.Writer, sigil rune) {
|
||||
if out != nil {
|
||||
c.ioMu.Lock()
|
||||
_, _ = fmt.Fprintf(out, "%v\n", o)
|
||||
c.ioMu.Unlock()
|
||||
syncFprintf(out, "%s\n", filename)
|
||||
}
|
||||
if c.opt.Combined != nil {
|
||||
c.ioMu.Lock()
|
||||
_, _ = fmt.Fprintf(c.opt.Combined, "%c %v\n", sigil, o)
|
||||
c.ioMu.Unlock()
|
||||
syncFprintf(c.opt.Combined, "%c %s\n", sigil, filename)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -224,11 +227,19 @@ func CheckFn(ctx context.Context, opt *CheckOpt) error {
|
|||
err := m.Run(ctx)
|
||||
c.wg.Wait() // wait for background go-routines
|
||||
|
||||
return c.reportResults(ctx, err)
|
||||
}
|
||||
|
||||
func (c *checkMarch) reportResults(ctx context.Context, err error) error {
|
||||
if c.dstFilesMissing > 0 {
|
||||
fs.Logf(c.opt.Fdst, "%d files missing", c.dstFilesMissing)
|
||||
}
|
||||
if c.srcFilesMissing > 0 {
|
||||
fs.Logf(c.opt.Fsrc, "%d files missing", c.srcFilesMissing)
|
||||
entity := "files"
|
||||
if c.opt.Fsrc == nil {
|
||||
entity = "hashes"
|
||||
}
|
||||
fs.Logf(c.opt.Fsrc, "%d %s missing", c.srcFilesMissing, entity)
|
||||
}
|
||||
|
||||
fs.Logf(c.opt.Fdst, "%d differences found", accounting.Stats(ctx).GetErrors())
|
||||
|
@ -358,3 +369,210 @@ func CheckDownload(ctx context.Context, opt *CheckOpt) error {
|
|||
}
|
||||
return CheckFn(ctx, &optCopy)
|
||||
}
|
||||
|
||||
// CheckSum checks filesystem hashes against a SUM file
|
||||
func CheckSum(ctx context.Context, fsrc, fsum fs.Fs, sumFile string, hashType hash.Type, opt *CheckOpt, download bool) error {
|
||||
var options CheckOpt
|
||||
if opt != nil {
|
||||
options = *opt
|
||||
} else {
|
||||
// default options for hashsum -c
|
||||
options.Combined = os.Stdout
|
||||
}
|
||||
// CheckSum treats Fsrc and Fdst specially:
|
||||
options.Fsrc = nil // no file system here, corresponds to the sum list
|
||||
options.Fdst = fsrc // denotes the file system to check
|
||||
opt = &options // override supplied argument
|
||||
|
||||
if !download && (hashType == hash.None || !opt.Fdst.Hashes().Contains(hashType)) {
|
||||
return errors.Errorf("%s: hash type is not supported by file system: %s", hashType, opt.Fdst)
|
||||
}
|
||||
|
||||
if sumFile == "" {
|
||||
return errors.Errorf("not a sum file: %s", fsum)
|
||||
}
|
||||
sumObj, err := fsum.NewObject(ctx, sumFile)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot open sum file")
|
||||
}
|
||||
hashes, err := ParseSumFile(ctx, sumObj)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse sum file")
|
||||
}
|
||||
|
||||
ci := fs.GetConfig(ctx)
|
||||
c := &checkMarch{
|
||||
tokens: make(chan struct{}, ci.Checkers),
|
||||
opt: *opt,
|
||||
}
|
||||
lastErr := ListFn(ctx, opt.Fdst, func(obj fs.Object) {
|
||||
c.checkSum(ctx, obj, download, hashes, hashType)
|
||||
})
|
||||
c.wg.Wait() // wait for background go-routines
|
||||
|
||||
// make census of unhandled sums
|
||||
fi := filter.GetConfig(ctx)
|
||||
for filename, hash := range hashes {
|
||||
if hash == "" { // the sum has been successfully consumed
|
||||
continue
|
||||
}
|
||||
if !fi.IncludeRemote(filename) { // the file was filtered out
|
||||
continue
|
||||
}
|
||||
// filesystem missed the file, sum wasn't consumed
|
||||
err := errors.Errorf("File not in %v", opt.Fdst)
|
||||
fs.Errorf(filename, "%v", err)
|
||||
_ = fs.CountError(err)
|
||||
if lastErr == nil {
|
||||
lastErr = err
|
||||
}
|
||||
atomic.AddInt32(&c.dstFilesMissing, 1)
|
||||
c.reportFilename(filename, opt.MissingOnDst, '+')
|
||||
}
|
||||
|
||||
return c.reportResults(ctx, lastErr)
|
||||
}
|
||||
|
||||
// checkSum checks single object against golden hashes
|
||||
func (c *checkMarch) checkSum(ctx context.Context, obj fs.Object, download bool, hashes HashSums, hashType hash.Type) {
|
||||
remote := obj.Remote()
|
||||
c.ioMu.Lock()
|
||||
sumHash, sumFound := hashes[remote]
|
||||
hashes[remote] = "" // mark sum as consumed
|
||||
c.ioMu.Unlock()
|
||||
|
||||
if !sumFound && c.opt.OneWay {
|
||||
return
|
||||
}
|
||||
|
||||
var err error
|
||||
tr := accounting.Stats(ctx).NewCheckingTransfer(obj)
|
||||
defer tr.Done(ctx, err)
|
||||
|
||||
if !sumFound {
|
||||
err = errors.New("sum not found")
|
||||
_ = fs.CountError(err)
|
||||
fs.Errorf(obj, "%v", err)
|
||||
atomic.AddInt32(&c.differences, 1)
|
||||
atomic.AddInt32(&c.srcFilesMissing, 1)
|
||||
c.report(obj, c.opt.MissingOnSrc, '-')
|
||||
return
|
||||
}
|
||||
|
||||
if !download {
|
||||
var objHash string
|
||||
objHash, err = obj.Hash(ctx, hashType)
|
||||
c.matchSum(ctx, sumHash, objHash, obj, err, hashType)
|
||||
return
|
||||
}
|
||||
|
||||
c.wg.Add(1)
|
||||
c.tokens <- struct{}{} // put a token to limit concurrency
|
||||
go func() {
|
||||
var (
|
||||
objHash string
|
||||
err error
|
||||
in io.ReadCloser
|
||||
)
|
||||
defer func() {
|
||||
c.matchSum(ctx, sumHash, objHash, obj, err, hashType)
|
||||
<-c.tokens // get the token back to free up a slot
|
||||
c.wg.Done()
|
||||
}()
|
||||
if in, err = obj.Open(ctx); err != nil {
|
||||
return
|
||||
}
|
||||
tr := accounting.Stats(ctx).NewTransfer(obj)
|
||||
in = tr.Account(ctx, in).WithBuffer() // account and buffer the transfer
|
||||
defer func() {
|
||||
tr.Done(ctx, nil) // will close the stream
|
||||
}()
|
||||
hashVals, err2 := hash.StreamTypes(in, hash.NewHashSet(hashType))
|
||||
if err2 != nil {
|
||||
err = err2 // pass to matchSum
|
||||
return
|
||||
}
|
||||
objHash = hashVals[hashType]
|
||||
}()
|
||||
}
|
||||
|
||||
// matchSum sums up the results of hashsum matching for an object
|
||||
func (c *checkMarch) matchSum(ctx context.Context, sumHash, objHash string, obj fs.Object, err error, hashType hash.Type) {
|
||||
switch {
|
||||
case err != nil:
|
||||
_ = fs.CountError(err)
|
||||
fs.Errorf(obj, "Failed to calculate hash: %v", err)
|
||||
c.report(obj, c.opt.Error, '!')
|
||||
case sumHash == "":
|
||||
err = errors.New("duplicate file")
|
||||
_ = fs.CountError(err)
|
||||
fs.Errorf(obj, "%v", err)
|
||||
c.report(obj, c.opt.Error, '!')
|
||||
case objHash == "":
|
||||
fs.Debugf(nil, "%v = %s (sum)", hashType, sumHash)
|
||||
fs.Debugf(obj, "%v - could not check hash (%v)", hashType, c.opt.Fdst)
|
||||
atomic.AddInt32(&c.noHashes, 1)
|
||||
atomic.AddInt32(&c.matches, 1)
|
||||
c.report(obj, c.opt.Match, '=')
|
||||
case objHash == sumHash:
|
||||
fs.Debugf(obj, "%v = %s OK", hashType, sumHash)
|
||||
atomic.AddInt32(&c.matches, 1)
|
||||
c.report(obj, c.opt.Match, '=')
|
||||
default:
|
||||
err = errors.New("files differ")
|
||||
_ = fs.CountError(err)
|
||||
fs.Debugf(nil, "%v = %s (sum)", hashType, sumHash)
|
||||
fs.Debugf(obj, "%v = %s (%v)", hashType, objHash, c.opt.Fdst)
|
||||
fs.Errorf(obj, "%v", err)
|
||||
atomic.AddInt32(&c.differences, 1)
|
||||
c.report(obj, c.opt.Differ, '*')
|
||||
}
|
||||
}
|
||||
|
||||
// HashSums represents a parsed SUM file
|
||||
type HashSums map[string]string
|
||||
|
||||
// ParseSumFile parses a hash SUM file and returns hashes as a map
|
||||
func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) {
|
||||
rd, err := sumFile.Open(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
parser := bufio.NewReader(rd)
|
||||
|
||||
const maxWarn = 4
|
||||
numWarn := 0
|
||||
|
||||
re := regexp.MustCompile(`^([^ ]+) [ *](.+)$`)
|
||||
hashes := HashSums{}
|
||||
for lineNo := 0; true; lineNo++ {
|
||||
lineBytes, _, err := parser.ReadLine()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
line := string(lineBytes)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if fields := re.FindStringSubmatch(line); fields != nil {
|
||||
hashes[fields[2]] = fields[1]
|
||||
continue
|
||||
}
|
||||
|
||||
numWarn++
|
||||
if numWarn < maxWarn {
|
||||
fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo)
|
||||
} else if numWarn == maxWarn {
|
||||
fs.Logf(sumFile, "more warnings suppressed...")
|
||||
}
|
||||
}
|
||||
|
||||
if err = rd.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return hashes, nil
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
"github.com/pkg/errors"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/accounting"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
"github.com/rclone/rclone/fstest"
|
||||
"github.com/rclone/rclone/lib/readers"
|
||||
|
@ -275,3 +276,255 @@ func TestCheckEqualReaders(t *testing.T) {
|
|||
assert.Equal(t, myErr, err)
|
||||
assert.Equal(t, differ, true)
|
||||
}
|
||||
|
||||
func TestParseSumFile(t *testing.T) {
|
||||
r := fstest.NewRun(t)
|
||||
defer r.Finalise()
|
||||
ctx := context.Background()
|
||||
|
||||
const sumFile = "test.sum"
|
||||
|
||||
samples := []struct {
|
||||
hash, sep, name string
|
||||
ok bool
|
||||
}{
|
||||
{"1", " ", "file1", true},
|
||||
{"2", " *", "file2", true},
|
||||
{"3", " ", " file3 ", true},
|
||||
{"4", " ", "\tfile3\t", true},
|
||||
{"5", " ", "file5", false},
|
||||
{"6", "\t", "file6", false},
|
||||
{"7", " \t", " file7 ", false},
|
||||
{"", " ", "file8", false},
|
||||
{"", "", "file9", false},
|
||||
}
|
||||
|
||||
for _, eol := range []string{"\n", "\r\n"} {
|
||||
data := &bytes.Buffer{}
|
||||
wantNum := 0
|
||||
for _, s := range samples {
|
||||
_, _ = data.WriteString(s.hash + s.sep + s.name + eol)
|
||||
if s.ok {
|
||||
wantNum++
|
||||
}
|
||||
}
|
||||
|
||||
_ = r.WriteObject(ctx, sumFile, data.String(), t1)
|
||||
file, err := r.Fremote.NewObject(ctx, sumFile)
|
||||
assert.NoError(t, err)
|
||||
sums, err := operations.ParseSumFile(ctx, file)
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, wantNum, len(sums))
|
||||
for _, s := range samples {
|
||||
if s.ok {
|
||||
assert.Equal(t, s.hash, sums[s.name])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testCheckSum(t *testing.T, download bool) {
|
||||
const dataDir = "data"
|
||||
const sumFile = "test.sum"
|
||||
|
||||
hashType := hash.MD5
|
||||
const (
|
||||
testString1 = "Hello, World!"
|
||||
testDigest1 = "65a8e27d8879283831b664bd8b7f0ad4"
|
||||
testString2 = "I am the walrus"
|
||||
testDigest2 = "87396e030ef3f5b35bbf85c0a09a4fb3"
|
||||
)
|
||||
|
||||
type wantType map[string]string
|
||||
|
||||
ctx := context.Background()
|
||||
r := fstest.NewRun(t)
|
||||
defer r.Finalise()
|
||||
|
||||
subRemote := r.FremoteName
|
||||
if !strings.HasSuffix(subRemote, ":") {
|
||||
subRemote += "/"
|
||||
}
|
||||
subRemote += dataDir
|
||||
dataFs, err := fs.NewFs(ctx, subRemote)
|
||||
require.NoError(t, err)
|
||||
|
||||
if !download && !dataFs.Hashes().Contains(hashType) {
|
||||
t.Skipf("%s lacks %s, skipping", dataFs, hashType)
|
||||
}
|
||||
|
||||
makeFile := func(name, content string) fstest.Item {
|
||||
remote := dataDir + "/" + name
|
||||
return r.WriteObject(ctx, remote, content, t1)
|
||||
}
|
||||
|
||||
makeSums := func(sums operations.HashSums) fstest.Item {
|
||||
files := make([]string, 0, len(sums))
|
||||
for name := range sums {
|
||||
files = append(files, name)
|
||||
}
|
||||
sort.Strings(files)
|
||||
buf := &bytes.Buffer{}
|
||||
for _, name := range files {
|
||||
_, _ = fmt.Fprintf(buf, "%s %s\n", sums[name], name)
|
||||
}
|
||||
return r.WriteObject(ctx, sumFile, buf.String(), t1)
|
||||
}
|
||||
|
||||
sortLines := func(in string) []string {
|
||||
if in == "" {
|
||||
return []string{}
|
||||
}
|
||||
lines := strings.Split(in, "\n")
|
||||
sort.Strings(lines)
|
||||
return lines
|
||||
}
|
||||
|
||||
checkResult := func(runNo int, want wantType, name string, out io.Writer) {
|
||||
expected := want[name]
|
||||
buf, ok := out.(*bytes.Buffer)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, sortLines(expected), sortLines(buf.String()), "wrong %s result in run %d", name, runNo)
|
||||
}
|
||||
|
||||
checkRun := func(runNo, wantChecks, wantErrors int, want wantType) {
|
||||
accounting.GlobalStats().ResetCounters()
|
||||
buf := new(bytes.Buffer)
|
||||
log.SetOutput(buf)
|
||||
defer log.SetOutput(os.Stderr)
|
||||
|
||||
opt := operations.CheckOpt{
|
||||
Combined: new(bytes.Buffer),
|
||||
Match: new(bytes.Buffer),
|
||||
Differ: new(bytes.Buffer),
|
||||
Error: new(bytes.Buffer),
|
||||
MissingOnSrc: new(bytes.Buffer),
|
||||
MissingOnDst: new(bytes.Buffer),
|
||||
}
|
||||
err := operations.CheckSum(ctx, dataFs, r.Fremote, sumFile, hashType, &opt, download)
|
||||
|
||||
gotErrors := int(accounting.GlobalStats().GetErrors())
|
||||
if wantErrors == 0 {
|
||||
assert.NoError(t, err, "unexpected error in run %d", runNo)
|
||||
}
|
||||
if wantErrors > 0 {
|
||||
assert.Error(t, err, "no expected error in run %d", runNo)
|
||||
}
|
||||
assert.Equal(t, wantErrors, gotErrors, "wrong error count in run %d", runNo)
|
||||
|
||||
gotChecks := int(accounting.GlobalStats().GetChecks())
|
||||
if wantChecks > 0 || gotChecks > 0 {
|
||||
assert.Contains(t, buf.String(), "matching files", "missing matching files in run %d", runNo)
|
||||
}
|
||||
assert.Equal(t, wantChecks, gotChecks, "wrong number of checks in run %d", runNo)
|
||||
|
||||
checkResult(runNo, want, "combined", opt.Combined)
|
||||
checkResult(runNo, want, "missingonsrc", opt.MissingOnSrc)
|
||||
checkResult(runNo, want, "missingondst", opt.MissingOnDst)
|
||||
checkResult(runNo, want, "match", opt.Match)
|
||||
checkResult(runNo, want, "differ", opt.Differ)
|
||||
checkResult(runNo, want, "error", opt.Error)
|
||||
}
|
||||
|
||||
check := func(runNo, wantChecks, wantErrors int, wantResults wantType) {
|
||||
runName := fmt.Sprintf("move%d", runNo)
|
||||
t.Run(runName, func(t *testing.T) {
|
||||
checkRun(runNo, wantChecks, wantErrors, wantResults)
|
||||
})
|
||||
}
|
||||
|
||||
file1 := makeFile("banana", testString1)
|
||||
fcsums := makeSums(operations.HashSums{
|
||||
"banana": testDigest1,
|
||||
})
|
||||
fstest.CheckItems(t, r.Fremote, fcsums, file1)
|
||||
check(1, 1, 0, wantType{
|
||||
"combined": "= banana\n",
|
||||
"missingonsrc": "",
|
||||
"missingondst": "",
|
||||
"match": "banana\n",
|
||||
"differ": "",
|
||||
"error": "",
|
||||
})
|
||||
|
||||
file2 := makeFile("potato", testString2)
|
||||
fcsums = makeSums(operations.HashSums{
|
||||
"banana": testDigest1,
|
||||
})
|
||||
fstest.CheckItems(t, r.Fremote, fcsums, file1, file2)
|
||||
check(2, 2, 1, wantType{
|
||||
"combined": "- potato\n= banana\n",
|
||||
"missingonsrc": "potato\n",
|
||||
"missingondst": "",
|
||||
"match": "banana\n",
|
||||
"differ": "",
|
||||
"error": "",
|
||||
})
|
||||
|
||||
fcsums = makeSums(operations.HashSums{
|
||||
"banana": testDigest1,
|
||||
"potato": testDigest2,
|
||||
})
|
||||
fstest.CheckItems(t, r.Fremote, fcsums, file1, file2)
|
||||
check(3, 2, 0, wantType{
|
||||
"combined": "= potato\n= banana\n",
|
||||
"missingonsrc": "",
|
||||
"missingondst": "",
|
||||
"match": "banana\npotato\n",
|
||||
"differ": "",
|
||||
"error": "",
|
||||
})
|
||||
|
||||
fcsums = makeSums(operations.HashSums{
|
||||
"banana": testDigest2,
|
||||
"potato": testDigest2,
|
||||
})
|
||||
fstest.CheckItems(t, r.Fremote, fcsums, file1, file2)
|
||||
check(4, 2, 1, wantType{
|
||||
"combined": "* banana\n= potato\n",
|
||||
"missingonsrc": "",
|
||||
"missingondst": "",
|
||||
"match": "potato\n",
|
||||
"differ": "banana\n",
|
||||
"error": "",
|
||||
})
|
||||
|
||||
fcsums = makeSums(operations.HashSums{
|
||||
"banana": testDigest1,
|
||||
"potato": testDigest2,
|
||||
"orange": testDigest2,
|
||||
})
|
||||
fstest.CheckItems(t, r.Fremote, fcsums, file1, file2)
|
||||
check(5, 2, 1, wantType{
|
||||
"combined": "+ orange\n= potato\n= banana\n",
|
||||
"missingonsrc": "",
|
||||
"missingondst": "orange\n",
|
||||
"match": "banana\npotato\n",
|
||||
"differ": "",
|
||||
"error": "",
|
||||
})
|
||||
|
||||
fcsums = makeSums(operations.HashSums{
|
||||
"banana": testDigest1,
|
||||
"potato": testDigest1,
|
||||
"orange": testDigest2,
|
||||
})
|
||||
fstest.CheckItems(t, r.Fremote, fcsums, file1, file2)
|
||||
check(6, 2, 2, wantType{
|
||||
"combined": "+ orange\n* potato\n= banana\n",
|
||||
"missingonsrc": "",
|
||||
"missingondst": "orange\n",
|
||||
"match": "banana\n",
|
||||
"differ": "potato\n",
|
||||
"error": "",
|
||||
})
|
||||
}
|
||||
|
||||
func TestCheckSum(t *testing.T) {
|
||||
testCheckSum(t, false)
|
||||
}
|
||||
|
||||
func TestCheckSumDownload(t *testing.T) {
|
||||
testCheckSum(t, true)
|
||||
}
|
||||
|
|
|
@ -853,7 +853,7 @@ var SyncPrintf = func(format string, a ...interface{}) {
|
|||
func syncFprintf(w io.Writer, format string, a ...interface{}) {
|
||||
outMutex.Lock()
|
||||
defer outMutex.Unlock()
|
||||
if w == nil {
|
||||
if w == nil || w == os.Stdout {
|
||||
SyncPrintf(format, a...)
|
||||
} else {
|
||||
_, _ = fmt.Fprintf(w, format, a...)
|
||||
|
@ -1751,7 +1751,7 @@ func moveOrCopyFile(ctx context.Context, fdst fs.Fs, fsrc fs.Fs, dstFileName str
|
|||
// This will move the file to a temporary name then
|
||||
// move it back to the intended destination. This is required
|
||||
// to avoid issues with certain remotes and avoid file deletion.
|
||||
if !cp && fdst.Name() == fsrc.Name() && fdst.Features().CaseInsensitive && dstFileName != srcFileName && strings.ToLower(dstFilePath) == strings.ToLower(srcFilePath) {
|
||||
if !cp && fdst.Name() == fsrc.Name() && fdst.Features().CaseInsensitive && dstFileName != srcFileName && strings.EqualFold(dstFilePath, srcFilePath) {
|
||||
// Create random name to temporarily move file to
|
||||
tmpObjName := dstFileName + "-rclone-move-" + random.String(8)
|
||||
_, err := fdst.NewObject(ctx, tmpObjName)
|
||||
|
|
|
@ -713,7 +713,7 @@ func TestCopyURL(t *testing.T) {
|
|||
fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1}, nil, fs.ModTimeNotSupported)
|
||||
|
||||
// Check file clobbering
|
||||
o, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, true)
|
||||
_, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, false, true)
|
||||
require.Error(t, err)
|
||||
|
||||
// Check auto file naming
|
||||
|
@ -725,7 +725,7 @@ func TestCopyURL(t *testing.T) {
|
|||
assert.Equal(t, urlFileName, o.Remote())
|
||||
|
||||
// Check auto file naming when url without file name
|
||||
o, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, true, false)
|
||||
_, err = operations.CopyURL(ctx, r.Fremote, "file1", ts.URL, true, false)
|
||||
require.Error(t, err)
|
||||
|
||||
// Check an error is returned for a 404
|
||||
|
|
Loading…
Reference in a new issue