hashsum: treat hash values in sum file as case insensitive

Also warn duplicate file paths in sum files.

Fixes https://forum.rclone.org/t/rclone-check-sum/25566/45
This commit is contained in:
Ivan Andreev 2021-10-13 15:02:49 +03:00
parent cf9b82b8db
commit 16fb608bee
6 changed files with 51 additions and 15 deletions

View file

@ -33,6 +33,8 @@ don't match. It doesn't alter the file system.
If you supply the |--download| flag, it will download the data from remote If you supply the |--download| flag, it will download the data from remote
and calculate the contents hash on the fly. This can be useful for remotes and calculate the contents hash on the fly. This can be useful for remotes
that don't support hashes or if you really want to check all the data. that don't support hashes or if you really want to check all the data.
Note that hash values in the SUM file are treated as case insensitive.
`, "|", "`") + check.FlagsHelp, `, "|", "`") + check.FlagsHelp,
RunE: func(command *cobra.Command, args []string) error { RunE: func(command *cobra.Command, args []string) error {
cmd.CheckArgs(3, 3, command, args) cmd.CheckArgs(3, 3, command, args)

View file

@ -76,7 +76,7 @@ Then
$ rclone hashsum MD5 remote:path $ rclone hashsum MD5 remote:path
Note that hash names are case insensitive. Note that hash names are case insensitive and values are output in lower case.
`, `,
RunE: func(command *cobra.Command, args []string) error { RunE: func(command *cobra.Command, args []string) error {
cmd.CheckArgs(0, 2, command, args) cmd.CheckArgs(0, 2, command, args)

View file

@ -20,6 +20,8 @@ If you supply the `--download` flag, it will download the data from remote
and calculate the contents hash on the fly. This can be useful for remotes and calculate the contents hash on the fly. This can be useful for remotes
that don't support hashes or if you really want to check all the data. that don't support hashes or if you really want to check all the data.
Note that hash values in the SUM file are treated as case insensitive.
If you supply the `--one-way` flag, it will only check that files in If you supply the `--one-way` flag, it will only check that files in
the source match the files in the destination, not the other way the source match the files in the destination, not the other way
around. This means that extra files in the destination that are not in around. This means that extra files in the destination that are not in

View file

@ -38,7 +38,7 @@ Then
$ rclone hashsum MD5 remote:path $ rclone hashsum MD5 remote:path
Note that hash names are case insensitive. Note that hash names are case insensitive and values are output in lower case.
``` ```

View file

@ -7,6 +7,7 @@ import (
"io" "io"
"os" "os"
"regexp" "regexp"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
@ -540,7 +541,7 @@ func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) {
} }
parser := bufio.NewReader(rd) parser := bufio.NewReader(rd)
const maxWarn = 4 const maxWarn = 3
numWarn := 0 numWarn := 0
re := regexp.MustCompile(`^([^ ]+) [ *](.+)$`) re := regexp.MustCompile(`^([^ ]+) [ *](.+)$`)
@ -558,19 +559,31 @@ func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) {
continue continue
} }
if fields := re.FindStringSubmatch(line); fields != nil { fields := re.FindStringSubmatch(line)
hashes[fields[2]] = fields[1] if fields == nil {
numWarn++
if numWarn <= maxWarn {
fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo)
}
continue continue
} }
sum, file := fields[1], fields[2]
if hashes[file] != "" {
numWarn++ numWarn++
if numWarn < maxWarn { if numWarn <= maxWarn {
fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo) fs.Logf(sumFile, "duplicate file on checksum line %d", lineNo)
} else if numWarn == maxWarn {
fs.Logf(sumFile, "more warnings suppressed...")
} }
continue
} }
// We've standardised on lower case checksums in rclone internals.
hashes[file] = strings.ToLower(sum)
}
if numWarn > maxWarn {
fs.Logf(sumFile, "%d warning(s) suppressed...", numWarn-maxWarn)
}
if err = rd.Close(); err != nil { if err = rd.Close(); err != nil {
return nil, err return nil, err
} }

View file

@ -332,8 +332,10 @@ func testCheckSum(t *testing.T, download bool) {
const ( const (
testString1 = "Hello, World!" testString1 = "Hello, World!"
testDigest1 = "65a8e27d8879283831b664bd8b7f0ad4" testDigest1 = "65a8e27d8879283831b664bd8b7f0ad4"
testDigest1Upper = "65A8E27D8879283831B664BD8B7F0AD4"
testString2 = "I am the walrus" testString2 = "I am the walrus"
testDigest2 = "87396e030ef3f5b35bbf85c0a09a4fb3" testDigest2 = "87396e030ef3f5b35bbf85c0a09a4fb3"
testDigest2Mixed = "87396e030EF3f5b35BBf85c0a09a4FB3"
) )
type wantType map[string]string type wantType map[string]string
@ -428,7 +430,7 @@ func testCheckSum(t *testing.T, download bool) {
} }
check := func(runNo, wantChecks, wantErrors int, wantResults wantType) { check := func(runNo, wantChecks, wantErrors int, wantResults wantType) {
runName := fmt.Sprintf("move%d", runNo) runName := fmt.Sprintf("subtest%d", runNo)
t.Run(runName, func(t *testing.T) { t.Run(runName, func(t *testing.T) {
checkRun(runNo, wantChecks, wantErrors, wantResults) checkRun(runNo, wantChecks, wantErrors, wantResults)
}) })
@ -519,6 +521,23 @@ func testCheckSum(t *testing.T, download bool) {
"differ": "potato\n", "differ": "potato\n",
"error": "", "error": "",
}) })
// test mixed-case checksums
file1 = makeFile("banana", testString1)
file2 = makeFile("potato", testString2)
fcsums = makeSums(operations.HashSums{
"banana": testDigest1Upper,
"potato": testDigest2Mixed,
})
fstest.CheckItems(t, r.Fremote, fcsums, file1, file2)
check(7, 2, 0, wantType{
"combined": "= banana\n= potato\n",
"missingonsrc": "",
"missingondst": "",
"match": "banana\npotato\n",
"differ": "",
"error": "",
})
} }
func TestCheckSum(t *testing.T) { func TestCheckSum(t *testing.T) {