From a752563842b3a603c7d73b00e71c6ff1fd120382 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 24 May 2023 10:51:30 +0100 Subject: [PATCH] operations: add operations/check to the rc API Fixes #7015 --- cmd/checksum/checksum.go | 13 +-- fs/operations/rc.go | 179 +++++++++++++++++++++++++++++++++++++++ fs/operations/rc_test.go | 121 ++++++++++++++++++++++++++ 3 files changed, 308 insertions(+), 5 deletions(-) diff --git a/cmd/checksum/checksum.go b/cmd/checksum/checksum.go index 2e2c7cc1d..16b83429e 100644 --- a/cmd/checksum/checksum.go +++ b/cmd/checksum/checksum.go @@ -24,15 +24,18 @@ func init() { } var commandDefinition = &cobra.Command{ - Use: "checksum sumfile src:path", - Short: `Checks the files in the source against a SUM file.`, + Use: "checksum sumfile dst:path", + Short: `Checks the files in the destination against a SUM file.`, Long: strings.ReplaceAll(` -Checks that hashsums of source files match the SUM file. +Checks that hashsums of destination files match the SUM file. It compares hashes (MD5, SHA1, etc) and logs a report of files which don't match. It doesn't alter the file system. -If you supply the |--download| flag, it will download the data from remote -and calculate the contents hash on the fly. This can be useful for remotes +The sumfile is treated as the source and the dst:path is treated as +the destination for the purposes of the output. + +If you supply the |--download| flag, it will download the data from the remote +and calculate the content hash on the fly. This can be useful for remotes that don't support hashes or if you really want to check all the data. Note that hash values in the SUM file are treated as case insensitive. diff --git a/fs/operations/rc.go b/fs/operations/rc.go index bff50dccd..533c3601e 100644 --- a/fs/operations/rc.go +++ b/fs/operations/rc.go @@ -2,6 +2,7 @@ package operations import ( "context" + "errors" "fmt" "io" "mime" @@ -13,6 +14,7 @@ import ( "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/config" + "github.com/rclone/rclone/fs/hash" "github.com/rclone/rclone/fs/rc" "github.com/rclone/rclone/lib/diskusage" ) @@ -697,3 +699,180 @@ func rcDu(ctx context.Context, in rc.Params) (out rc.Params, err error) { } return out, nil } + +func init() { + rc.Add(rc.Call{ + Path: "operations/check", + AuthRequired: true, + Fn: rcCheck, + Title: "check the source and destination are the same", + Help: `Checks the files in the source and destination match. It compares +sizes and hashes and logs a report of files that don't +match. It doesn't alter the source or destination. + +This takes the following parameters: + +- srcFs - a remote name string e.g. "drive:" for the source, "/" for local filesystem +- dstFs - a remote name string e.g. "drive2:" for the destination, "/" for local filesystem +- download - check by downloading rather than with hash +- checkFileHash - treat checkFileFs:checkFileRemote as a SUM file with hashes of given type +- checkFileFs - treat checkFileFs:checkFileRemote as a SUM file with hashes of given type +- checkFileRemote - treat checkFileFs:checkFileRemote as a SUM file with hashes of given type +- oneWay - check one way only, source files must exist on remote +- combined - make a combined report of changes (default false) +- missingOnSrc - report all files missing from the source (default true) +- missingOnDst - report all files missing from the destination (default true) +- match - report all matching files (default false) +- differ - report all non-matching files (default true) +- error - report all files with errors (hashing or reading) (default true) + +If you supply the download flag, it will download the data from +both remotes and check them against each other on the fly. This can +be useful for remotes that don't support hashes or if you really want +to check all the data. + +If you supply the size-only global flag, it will only compare the sizes not +the hashes as well. Use this for a quick check. + +If you supply the checkFileHash option with a valid hash name, the +checkFileFs:checkFileRemote must point to a text file in the SUM +format. This treats the checksum file as the source and dstFs as the +destination. Note that srcFs is not used and should not be supplied in +this case. + +Returns: + +- success - true if no error, false otherwise +- status - textual summary of check, OK or text string +- hashType - hash used in check, may be missing +- combined - array of strings of combined report of changes +- missingOnSrc - array of strings of all files missing from the source +- missingOnDst - array of strings of all files missing from the destination +- match - array of strings of all matching files +- differ - array of strings of all non-matching files +- error - array of strings of all files with errors (hashing or reading) + +`, + }) +} + +// Writer which writes into the slice provided +type stringWriter struct { + out *[]string +} + +// Write writes len(p) bytes from p to the underlying data stream. It returns +// the number of bytes written from p (0 <= n <= len(p)) and any error +// encountered that caused the write to stop early. Write must return a non-nil +// error if it returns n < len(p). Write must not modify the slice data, +// even temporarily. +// +// Implementations must not retain p. +func (s stringWriter) Write(p []byte) (n int, err error) { + result := string(p) + result = strings.TrimSuffix(result, "\n") + *s.out = append(*s.out, result) + return len(p), nil +} + +// Check two directories +func rcCheck(ctx context.Context, in rc.Params) (out rc.Params, err error) { + srcFs, err := rc.GetFsNamed(ctx, in, "srcFs") + if err != nil && !rc.IsErrParamNotFound(err) { + return nil, err + } + + dstFs, err := rc.GetFsNamed(ctx, in, "dstFs") + if err != nil { + return nil, err + } + + checkFileFs, checkFileRemote, err := rc.GetFsAndRemoteNamed(ctx, in, "checkFileFs", "checkFileRemote") + if err != nil && !rc.IsErrParamNotFound(err) { + return nil, err + } + + checkFileHash, err := in.GetString("checkFileHash") + if err != nil && !rc.IsErrParamNotFound(err) { + return nil, err + } + + checkFileSet := 0 + if checkFileHash != "" { + checkFileSet++ + } + if checkFileFs != nil { + checkFileSet++ + } + if checkFileRemote != "" { + checkFileSet++ + } + if checkFileSet > 0 && checkFileSet < 3 { + return nil, fmt.Errorf("need all of checkFileFs, checkFileRemote, checkFileHash to be set together") + } + + var checkFileHashType hash.Type + if checkFileHash != "" { + if err := checkFileHashType.Set(checkFileHash); err != nil { + return nil, err + } + if srcFs != nil { + return nil, rc.NewErrParamInvalid(errors.New("only supply dstFs when using checkFileHash")) + } + } else { + if srcFs == nil { + return nil, rc.NewErrParamInvalid(errors.New("need srcFs parameter when not using checkFileHash")) + } + } + + oneway, _ := in.GetBool("oneway") + download, _ := in.GetBool("download") + + opt := &CheckOpt{ + Fsrc: srcFs, + Fdst: dstFs, + OneWay: oneway, + } + + out = rc.Params{} + + getOutput := func(name string, Default bool) io.Writer { + active, err := in.GetBool(name) + if err != nil { + active = Default + } + if !active { + return nil + } + result := []string{} + out[name] = &result + return stringWriter{&result} + } + + opt.Combined = getOutput("combined", false) + opt.MissingOnSrc = getOutput("missingOnSrc", true) + opt.MissingOnDst = getOutput("missingOnDst", true) + opt.Match = getOutput("match", false) + opt.Differ = getOutput("differ", true) + opt.Error = getOutput("error", true) + + if checkFileHash != "" { + out["hashType"] = checkFileHashType.String() + err = CheckSum(context.Background(), dstFs, checkFileFs, checkFileRemote, checkFileHashType, opt, download) + } else { + if download { + err = CheckDownload(context.Background(), opt) + } else { + out["hashType"] = srcFs.Hashes().Overlap(dstFs.Hashes()).GetOne().String() + err = Check(context.Background(), opt) + } + } + if err != nil { + out["status"] = err.Error() + out["success"] = false + } else { + out["status"] = "OK" + out["success"] = true + } + return out, nil +} diff --git a/fs/operations/rc_test.go b/fs/operations/rc_test.go index d0684215f..5883b4f03 100644 --- a/fs/operations/rc_test.go +++ b/fs/operations/rc_test.go @@ -7,6 +7,8 @@ import ( "net/url" "os" "path" + "sort" + "strings" "testing" "time" @@ -658,3 +660,122 @@ func TestRcDu(t *testing.T) { assert.True(t, info.Total > info.Available) assert.True(t, info.Free >= info.Available) } + +// operations/check: check the source and destination are the same +func TestRcCheck(t *testing.T) { + ctx := context.Background() + r, call := rcNewRun(t, "operations/check") + r.Mkdir(ctx, r.Fremote) + + MD5SUMS := ` +0ef726ce9b1a7692357ff70dd321d595 file1 +deadbeefcafe00000000000000000000 subdir/file2 +0386a8b8fcf672c326845c00ba41b9e2 subdir/subsubdir/file4 +` + + file1 := r.WriteBoth(ctx, "file1", "file1 contents", t1) + file2 := r.WriteFile("subdir/file2", MD5SUMS, t2) + file3 := r.WriteObject(ctx, "subdir/subsubdir/file3", "file3 contents", t3) + file4a := r.WriteFile("subdir/subsubdir/file4", "file4 contents", t3) + file4b := r.WriteObject(ctx, "subdir/subsubdir/file4", "file4 different contents", t3) + // operations.HashLister(ctx, hash.MD5, false, false, r.Fremote, os.Stdout) + + r.CheckLocalItems(t, file1, file2, file4a) + r.CheckRemoteItems(t, file1, file3, file4b) + + pstring := func(items ...fstest.Item) *[]string { + xs := make([]string, len(items)) + for i, item := range items { + xs[i] = item.Path + } + return &xs + } + + for _, testName := range []string{"Normal", "Download"} { + t.Run(testName, func(t *testing.T) { + in := rc.Params{ + "srcFs": r.LocalName, + "dstFs": r.FremoteName, + "combined": true, + "missingOnSrc": true, + "missingOnDst": true, + "match": true, + "differ": true, + "error": true, + } + if testName == "Download" { + in["download"] = true + } + out, err := call.Fn(ctx, in) + require.NoError(t, err) + + combined := []string{ + "= " + file1.Path, + "+ " + file2.Path, + "- " + file3.Path, + "* " + file4a.Path, + } + sort.Strings(combined) + sort.Strings(*out["combined"].(*[]string)) + want := rc.Params{ + "missingOnSrc": pstring(file3), + "missingOnDst": pstring(file2), + "differ": pstring(file4a), + "error": pstring(), + "match": pstring(file1), + "combined": &combined, + "status": "3 differences found", + "success": false, + } + if testName == "Normal" { + want["hashType"] = "md5" + } + + assert.Equal(t, want, out) + }) + } + + t.Run("CheckFile", func(t *testing.T) { + // The checksum file is treated as the source and srcFs is not used + in := rc.Params{ + "dstFs": r.FremoteName, + "combined": true, + "missingOnSrc": true, + "missingOnDst": true, + "match": true, + "differ": true, + "error": true, + "checkFileFs": r.LocalName, + "checkFileRemote": file2.Path, + "checkFileHash": "md5", + } + out, err := call.Fn(ctx, in) + require.NoError(t, err) + + combined := []string{ + "= " + file1.Path, + "+ " + file2.Path, + "- " + file3.Path, + "* " + file4a.Path, + } + sort.Strings(combined) + sort.Strings(*out["combined"].(*[]string)) + if strings.HasPrefix(out["status"].(string), "file not in") { + out["status"] = "file not in" + } + want := rc.Params{ + "missingOnSrc": pstring(file3), + "missingOnDst": pstring(file2), + "differ": pstring(file4a), + "error": pstring(), + "match": pstring(file1), + "combined": &combined, + "hashType": "md5", + "status": "file not in", + "success": false, + } + + assert.Equal(t, want, out) + }) + +}