hashsum: support creating hash from data received on stdin

See #5781
This commit is contained in:
albertony 2021-11-03 00:34:20 +01:00
parent 663b2d9c46
commit 29abbd2032
7 changed files with 166 additions and 35 deletions

View file

@ -294,7 +294,7 @@ func (f *Fs) dumpLine(r *hashRecord, path string, include bool, err error) strin
if hashVal == "" || err != nil { if hashVal == "" || err != nil {
hashVal = "-" hashVal = "-"
} }
hashVal = fmt.Sprintf("%-*s", hash.Width(hashType), hashVal) hashVal = fmt.Sprintf("%-*s", hash.Width(hashType, false), hashVal)
hashes = append(hashes, hashName+":"+hashVal) hashes = append(hashes, hashName+":"+hashVal)
} }
hashesStr := strings.Join(hashes, " ") hashesStr := strings.Join(hashes, " ")

View file

@ -2,7 +2,6 @@ package hashsum
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"os" "os"
@ -26,11 +25,11 @@ var (
func init() { func init() {
cmd.Root.AddCommand(commandDefinition) cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags() cmdFlags := commandDefinition.Flags()
AddHashFlags(cmdFlags) AddHashsumFlags(cmdFlags)
} }
// AddHashFlags is a convenience function to add the command flags OutputBase64 and DownloadFlag to hashsum, md5sum, sha1sum // AddHashsumFlags is a convenience function to add the command flags OutputBase64 and DownloadFlag to hashsum, md5sum, sha1sum
func AddHashFlags(cmdFlags *pflag.FlagSet) { func AddHashsumFlags(cmdFlags *pflag.FlagSet) {
flags.BoolVarP(cmdFlags, &OutputBase64, "base64", "", OutputBase64, "Output base64 encoded hashsum") flags.BoolVarP(cmdFlags, &OutputBase64, "base64", "", OutputBase64, "Output base64 encoded hashsum")
flags.StringVarP(cmdFlags, &HashsumOutfile, "output-file", "", HashsumOutfile, "Output hashsums to a file rather than the terminal") flags.StringVarP(cmdFlags, &HashsumOutfile, "output-file", "", HashsumOutfile, "Output hashsums to a file rather than the terminal")
flags.StringVarP(cmdFlags, &ChecksumFile, "checkfile", "C", ChecksumFile, "Validate hashes against a given SUM file instead of printing them") flags.StringVarP(cmdFlags, &ChecksumFile, "checkfile", "C", ChecksumFile, "Validate hashes against a given SUM file instead of printing them")
@ -41,7 +40,7 @@ func AddHashFlags(cmdFlags *pflag.FlagSet) {
func GetHashsumOutput(filename string) (out *os.File, close func(), err error) { func GetHashsumOutput(filename string) (out *os.File, close func(), err error) {
out, err = os.Create(filename) out, err = os.Create(filename)
if err != nil { if err != nil {
err = fmt.Errorf("Failed to open output file %v: %w", filename, err) err = fmt.Errorf("failed to open output file %v: %w", filename, err)
return nil, nil, err return nil, nil, err
} }
@ -55,6 +54,32 @@ func GetHashsumOutput(filename string) (out *os.File, close func(), err error) {
return out, close, nil return out, close, nil
} }
// CreateFromStdinArg checks args and produces hashsum from standard input if it is requested
func CreateFromStdinArg(ht hash.Type, args []string, startArg int) (bool, error) {
var stdinArg bool
if len(args) == startArg {
// Missing arg: Always read from stdin
stdinArg = true
} else if len(args) > startArg && args[startArg] == "-" {
// Special arg: Read from stdin only if there is data available
if fi, _ := os.Stdin.Stat(); fi.Mode()&os.ModeCharDevice == 0 {
stdinArg = true
}
}
if !stdinArg {
return false, nil
}
if HashsumOutfile == "" {
return true, operations.HashSumStream(ht, OutputBase64, os.Stdin, nil)
}
output, close, err := GetHashsumOutput(HashsumOutfile)
if err != nil {
return true, err
}
defer close()
return true, operations.HashSumStream(ht, OutputBase64, os.Stdin, output)
}
var commandDefinition = &cobra.Command{ var commandDefinition = &cobra.Command{
Use: "hashsum <hash> remote:path", Use: "hashsum <hash> remote:path",
Short: `Produces a hashsum file for all the objects in the path.`, Short: `Produces a hashsum file for all the objects in the path.`,
@ -68,6 +93,11 @@ not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and download flag, the file will be downloaded from the remote and
hashed locally enabling any hash for any remote. hashed locally enabling any hash for any remote.
This command can also hash data received on standard input (stdin),
by not passing a remote:path, or by passing a hyphen as remote:path
when there is data to read (if not, the hypen will be treated literaly,
as a relative path).
Run without a hash to see the list of all supported hashes, e.g. Run without a hash to see the list of all supported hashes, e.g.
$ rclone hashsum $ rclone hashsum
@ -83,8 +113,6 @@ Note that hash names are case insensitive and values are output in lower case.
if len(args) == 0 { if len(args) == 0 {
fmt.Print(hash.HelpString(0)) fmt.Print(hash.HelpString(0))
return nil return nil
} else if len(args) == 1 {
return errors.New("need hash type and remote")
} }
var ht hash.Type var ht hash.Type
err := ht.Set(args[0]) err := ht.Set(args[0])
@ -92,8 +120,10 @@ Note that hash names are case insensitive and values are output in lower case.
fmt.Println(hash.HelpString(0)) fmt.Println(hash.HelpString(0))
return err return err
} }
if found, err := CreateFromStdinArg(ht, args, 1); found {
return err
}
fsrc := cmd.NewFsSrc(args[1:]) fsrc := cmd.NewFsSrc(args[1:])
cmd.Run(false, false, command, func() error { cmd.Run(false, false, command, func() error {
if ChecksumFile != "" { if ChecksumFile != "" {
fsum, sumFile := cmd.NewFsFile(ChecksumFile) fsum, sumFile := cmd.NewFsFile(ChecksumFile)

View file

@ -13,7 +13,7 @@ import (
func init() { func init() {
cmd.Root.AddCommand(commandDefinition) cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags() cmdFlags := commandDefinition.Flags()
hashsum.AddHashFlags(cmdFlags) hashsum.AddHashsumFlags(cmdFlags)
} }
var commandDefinition = &cobra.Command{ var commandDefinition = &cobra.Command{
@ -27,9 +27,17 @@ By default, the hash is requested from the remote. If MD5 is
not supported by the remote, no hash will be returned. With the not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and download flag, the file will be downloaded from the remote and
hashed locally enabling MD5 for any remote. hashed locally enabling MD5 for any remote.
This command can also hash data received on standard input (stdin),
by not passing a remote:path, or by passing a hyphen as remote:path
when there is data to read (if not, the hypen will be treated literaly,
as a relative path).
`, `,
Run: func(command *cobra.Command, args []string) { RunE: func(command *cobra.Command, args []string) error {
cmd.CheckArgs(1, 1, command, args) cmd.CheckArgs(0, 1, command, args)
if found, err := hashsum.CreateFromStdinArg(hash.MD5, args, 0); found {
return err
}
fsrc := cmd.NewFsSrc(args) fsrc := cmd.NewFsSrc(args)
cmd.Run(false, false, command, func() error { cmd.Run(false, false, command, func() error {
if hashsum.ChecksumFile != "" { if hashsum.ChecksumFile != "" {
@ -46,5 +54,6 @@ hashed locally enabling MD5 for any remote.
defer close() defer close()
return operations.HashLister(context.Background(), hash.MD5, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output) return operations.HashLister(context.Background(), hash.MD5, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output)
}) })
return nil
}, },
} }

View file

@ -13,7 +13,7 @@ import (
func init() { func init() {
cmd.Root.AddCommand(commandDefinition) cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags() cmdFlags := commandDefinition.Flags()
hashsum.AddHashFlags(cmdFlags) hashsum.AddHashsumFlags(cmdFlags)
} }
var commandDefinition = &cobra.Command{ var commandDefinition = &cobra.Command{
@ -27,9 +27,20 @@ By default, the hash is requested from the remote. If SHA-1 is
not supported by the remote, no hash will be returned. With the not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and download flag, the file will be downloaded from the remote and
hashed locally enabling SHA-1 for any remote. hashed locally enabling SHA-1 for any remote.
This command can also hash data received on standard input (stdin),
by not passing a remote:path, or by passing a hyphen as remote:path
when there is data to read (if not, the hypen will be treated literaly,
as a relative path).
This command can also hash data received on STDIN, if not passing
a remote:path.
`, `,
Run: func(command *cobra.Command, args []string) { RunE: func(command *cobra.Command, args []string) error {
cmd.CheckArgs(1, 1, command, args) cmd.CheckArgs(0, 1, command, args)
if found, err := hashsum.CreateFromStdinArg(hash.SHA1, args, 0); found {
return err
}
fsrc := cmd.NewFsSrc(args) fsrc := cmd.NewFsSrc(args)
cmd.Run(false, false, command, func() error { cmd.Run(false, false, command, func() error {
if hashsum.ChecksumFile != "" { if hashsum.ChecksumFile != "" {
@ -46,5 +57,6 @@ hashed locally enabling SHA-1 for any remote.
defer close() defer close()
return operations.HashLister(context.Background(), hash.SHA1, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output) return operations.HashLister(context.Background(), hash.SHA1, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output)
}) })
return nil
}, },
} }

View file

@ -4,6 +4,7 @@ import (
"crypto/md5" "crypto/md5"
"crypto/sha1" "crypto/sha1"
"crypto/sha256" "crypto/sha256"
"encoding/base64"
"encoding/hex" "encoding/hex"
"errors" "errors"
"fmt" "fmt"
@ -92,8 +93,11 @@ func Supported() Set {
} }
// Width returns the width in characters for any HashType // Width returns the width in characters for any HashType
func Width(hashType Type) int { func Width(hashType Type, base64Encoded bool) int {
if hash := type2hash[hashType]; hash != nil { if hash := type2hash[hashType]; hash != nil {
if base64Encoded {
return base64.URLEncoding.EncodedLen(hash.width / 2)
}
return hash.width return hash.width
} }
return 0 return 0
@ -243,6 +247,18 @@ func (m *MultiHasher) Sum(hashType Type) ([]byte, error) {
return h.Sum(nil), nil return h.Sum(nil), nil
} }
// SumString returns the specified hash from the multihasher as a hex or base64 encoded string
func (m *MultiHasher) SumString(hashType Type, base64Encoded bool) (string, error) {
sum, err := m.Sum(hashType)
if err != nil {
return "", err
}
if base64Encoded {
return base64.URLEncoding.EncodeToString(sum), nil
}
return hex.EncodeToString(sum), nil
}
// Size returns the number of bytes written // Size returns the number of bytes written
func (m *MultiHasher) Size() int64 { func (m *MultiHasher) Size() int64 {
return m.size return m.size

View file

@ -946,7 +946,7 @@ func ListLong(ctx context.Context, f fs.Fs, w io.Writer) error {
// hashSum returns the human-readable hash for ht passed in. This may // hashSum returns the human-readable hash for ht passed in. This may
// be UNSUPPORTED or ERROR. If it isn't returning a valid hash it will // be UNSUPPORTED or ERROR. If it isn't returning a valid hash it will
// return an error. // return an error.
func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object) (string, error) { func hashSum(ctx context.Context, ht hash.Type, base64Encoded bool, downloadFlag bool, o fs.Object) (string, error) {
var sum string var sum string
var err error var err error
@ -968,7 +968,7 @@ func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object)
} }
in, err := NewReOpen(ctx, o, fs.GetConfig(ctx).LowLevelRetries, options...) in, err := NewReOpen(ctx, o, fs.GetConfig(ctx).LowLevelRetries, options...)
if err != nil { if err != nil {
return "ERROR", fmt.Errorf("Failed to open file %v: %w", o, err) return "ERROR", fmt.Errorf("failed to open file %v: %w", o, err)
} }
// Account and buffer the transfer // Account and buffer the transfer
@ -977,21 +977,20 @@ func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object)
// Setup hasher // Setup hasher
hasher, err := hash.NewMultiHasherTypes(hash.NewHashSet(ht)) hasher, err := hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil { if err != nil {
return "UNSUPPORTED", fmt.Errorf("Hash unsupported: %w", err) return "UNSUPPORTED", fmt.Errorf("hash unsupported: %w", err)
} }
// Copy to hasher, downloading the file and passing directly to hash // Copy to hasher, downloading the file and passing directly to hash
_, err = io.Copy(hasher, in) _, err = io.Copy(hasher, in)
if err != nil { if err != nil {
return "ERROR", fmt.Errorf("Failed to copy file to hasher: %w", err) return "ERROR", fmt.Errorf("failed to copy file to hasher: %w", err)
} }
// Get hash and encode as hex // Get hash as hex or base64 encoded string
byteSum, err := hasher.Sum(ht) sum, err = hasher.SumString(ht, base64Encoded)
if err != nil { if err != nil {
return "ERROR", fmt.Errorf("Hasher returned an error: %w", err) return "ERROR", fmt.Errorf("hasher returned an error: %w", err)
} }
sum = hex.EncodeToString(byteSum)
} else { } else {
tr := accounting.Stats(ctx).NewCheckingTransfer(o) tr := accounting.Stats(ctx).NewCheckingTransfer(o)
defer func() { defer func() {
@ -999,11 +998,15 @@ func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object)
}() }()
sum, err = o.Hash(ctx, ht) sum, err = o.Hash(ctx, ht)
if base64Encoded {
hexBytes, _ := hex.DecodeString(sum)
sum = base64.URLEncoding.EncodeToString(hexBytes)
}
if err == hash.ErrUnsupported { if err == hash.ErrUnsupported {
return "", fmt.Errorf("Hash unsupported: %w", err) return "", fmt.Errorf("hash unsupported: %w", err)
} }
if err != nil { if err != nil {
return "", fmt.Errorf("Failed to get hash %v from backend: %v: %w", ht, err, err) return "", fmt.Errorf("failed to get hash %v from backend: %v: %w", ht, err, err)
} }
} }
@ -1014,10 +1017,7 @@ func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object)
// Updated to handle both standard hex encoding and base64 // Updated to handle both standard hex encoding and base64
// Updated to perform multiple hashes concurrently // Updated to perform multiple hashes concurrently
func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFlag bool, f fs.Fs, w io.Writer) error { func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFlag bool, f fs.Fs, w io.Writer) error {
width := hash.Width(ht) width := hash.Width(ht, outputBase64)
if outputBase64 {
width = base64.URLEncoding.EncodedLen(width / 2)
}
concurrencyControl := make(chan struct{}, fs.GetConfig(ctx).Transfers) concurrencyControl := make(chan struct{}, fs.GetConfig(ctx).Transfers)
var wg sync.WaitGroup var wg sync.WaitGroup
err := ListFn(ctx, f, func(o fs.Object) { err := ListFn(ctx, f, func(o fs.Object) {
@ -1028,15 +1028,11 @@ func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFl
<-concurrencyControl <-concurrencyControl
wg.Done() wg.Done()
}() }()
sum, err := hashSum(ctx, ht, downloadFlag, o) sum, err := hashSum(ctx, ht, outputBase64, downloadFlag, o)
if err != nil { if err != nil {
fs.Errorf(o, "%v", fs.CountError(err)) fs.Errorf(o, "%v", fs.CountError(err))
return return
} }
if outputBase64 {
hexBytes, _ := hex.DecodeString(sum)
sum = base64.URLEncoding.EncodeToString(hexBytes)
}
syncFprintf(w, "%*s %s\n", width, sum, o.Remote()) syncFprintf(w, "%*s %s\n", width, sum, o.Remote())
}() }()
}) })
@ -1044,6 +1040,28 @@ func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFl
return err return err
} }
// HashSumStream outputs a line compatible with md5sum to w based on the
// input stream in and the hash type ht passed in. If outputBase64 is
// set then the hash will be base64 instead of hexadecimal.
func HashSumStream(ht hash.Type, outputBase64 bool, in io.ReadCloser, w io.Writer) error {
hasher, err := hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return fmt.Errorf("hash unsupported: %w", err)
}
written, err := io.Copy(hasher, in)
fs.Debugf(nil, "Creating %s hash of %d bytes read from input stream", ht, written)
if err != nil {
return fmt.Errorf("failed to copy input to hasher: %w", err)
}
sum, err := hasher.SumString(ht, outputBase64)
if err != nil {
return fmt.Errorf("hasher returned an error: %w", err)
}
width := hash.Width(ht, outputBase64)
syncFprintf(w, "%*s -\n", width, sum)
return nil
}
// Count counts the objects and their sizes in the Fs // Count counts the objects and their sizes in the Fs
// //
// Obeys includes and excludes // Obeys includes and excludes

View file

@ -316,6 +316,52 @@ func TestHashSumsWithErrors(t *testing.T) {
// TODO mock an unreadable file // TODO mock an unreadable file
} }
func TestHashStream(t *testing.T) {
reader := strings.NewReader("")
in := ioutil.NopCloser(reader)
out := &bytes.Buffer{}
for _, test := range []struct {
input string
ht hash.Type
wantHex string
wantBase64 string
}{
{
input: "",
ht: hash.MD5,
wantHex: "d41d8cd98f00b204e9800998ecf8427e -\n",
wantBase64: "1B2M2Y8AsgTpgAmY7PhCfg== -\n",
},
{
input: "",
ht: hash.SHA1,
wantHex: "da39a3ee5e6b4b0d3255bfef95601890afd80709 -\n",
wantBase64: "2jmj7l5rSw0yVb_vlWAYkK_YBwk= -\n",
},
{
input: "Hello world!",
ht: hash.MD5,
wantHex: "86fb269d190d2c85f6e0468ceca42a20 -\n",
wantBase64: "hvsmnRkNLIX24EaM7KQqIA== -\n",
},
{
input: "Hello world!",
ht: hash.SHA1,
wantHex: "d3486ae9136e7856bc42212385ea797094475802 -\n",
wantBase64: "00hq6RNueFa8QiEjhep5cJRHWAI= -\n",
},
} {
reader.Reset(test.input)
require.NoError(t, operations.HashSumStream(test.ht, false, in, out))
assert.Equal(t, test.wantHex, out.String())
_, _ = reader.Seek(0, io.SeekStart)
out.Reset()
require.NoError(t, operations.HashSumStream(test.ht, true, in, out))
assert.Equal(t, test.wantBase64, out.String())
out.Reset()
}
}
func TestSuffixName(t *testing.T) { func TestSuffixName(t *testing.T) {
ctx := context.Background() ctx := context.Background()
ctx, ci := fs.AddConfig(ctx) ctx, ci := fs.AddConfig(ctx)