hashsum: support creating hash from data received on stdin

See #5781
This commit is contained in:
albertony 2021-11-03 00:34:20 +01:00
parent 663b2d9c46
commit 29abbd2032
7 changed files with 166 additions and 35 deletions

View file

@ -294,7 +294,7 @@ func (f *Fs) dumpLine(r *hashRecord, path string, include bool, err error) strin
if hashVal == "" || err != nil {
hashVal = "-"
}
hashVal = fmt.Sprintf("%-*s", hash.Width(hashType), hashVal)
hashVal = fmt.Sprintf("%-*s", hash.Width(hashType, false), hashVal)
hashes = append(hashes, hashName+":"+hashVal)
}
hashesStr := strings.Join(hashes, " ")

View file

@ -2,7 +2,6 @@ package hashsum
import (
"context"
"errors"
"fmt"
"os"
@ -26,11 +25,11 @@ var (
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
AddHashFlags(cmdFlags)
AddHashsumFlags(cmdFlags)
}
// AddHashFlags is a convenience function to add the command flags OutputBase64 and DownloadFlag to hashsum, md5sum, sha1sum
func AddHashFlags(cmdFlags *pflag.FlagSet) {
// AddHashsumFlags is a convenience function to add the command flags OutputBase64 and DownloadFlag to hashsum, md5sum, sha1sum
func AddHashsumFlags(cmdFlags *pflag.FlagSet) {
flags.BoolVarP(cmdFlags, &OutputBase64, "base64", "", OutputBase64, "Output base64 encoded hashsum")
flags.StringVarP(cmdFlags, &HashsumOutfile, "output-file", "", HashsumOutfile, "Output hashsums to a file rather than the terminal")
flags.StringVarP(cmdFlags, &ChecksumFile, "checkfile", "C", ChecksumFile, "Validate hashes against a given SUM file instead of printing them")
@ -41,7 +40,7 @@ func AddHashFlags(cmdFlags *pflag.FlagSet) {
func GetHashsumOutput(filename string) (out *os.File, close func(), err error) {
out, err = os.Create(filename)
if err != nil {
err = fmt.Errorf("Failed to open output file %v: %w", filename, err)
err = fmt.Errorf("failed to open output file %v: %w", filename, err)
return nil, nil, err
}
@ -55,6 +54,32 @@ func GetHashsumOutput(filename string) (out *os.File, close func(), err error) {
return out, close, nil
}
// CreateFromStdinArg checks args and produces hashsum from standard input if it is requested
func CreateFromStdinArg(ht hash.Type, args []string, startArg int) (bool, error) {
var stdinArg bool
if len(args) == startArg {
// Missing arg: Always read from stdin
stdinArg = true
} else if len(args) > startArg && args[startArg] == "-" {
// Special arg: Read from stdin only if there is data available
if fi, _ := os.Stdin.Stat(); fi.Mode()&os.ModeCharDevice == 0 {
stdinArg = true
}
}
if !stdinArg {
return false, nil
}
if HashsumOutfile == "" {
return true, operations.HashSumStream(ht, OutputBase64, os.Stdin, nil)
}
output, close, err := GetHashsumOutput(HashsumOutfile)
if err != nil {
return true, err
}
defer close()
return true, operations.HashSumStream(ht, OutputBase64, os.Stdin, output)
}
var commandDefinition = &cobra.Command{
Use: "hashsum <hash> remote:path",
Short: `Produces a hashsum file for all the objects in the path.`,
@ -68,6 +93,11 @@ not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling any hash for any remote.
This command can also hash data received on standard input (stdin),
by not passing a remote:path, or by passing a hyphen as remote:path
when there is data to read (if not, the hypen will be treated literaly,
as a relative path).
Run without a hash to see the list of all supported hashes, e.g.
$ rclone hashsum
@ -83,8 +113,6 @@ Note that hash names are case insensitive and values are output in lower case.
if len(args) == 0 {
fmt.Print(hash.HelpString(0))
return nil
} else if len(args) == 1 {
return errors.New("need hash type and remote")
}
var ht hash.Type
err := ht.Set(args[0])
@ -92,8 +120,10 @@ Note that hash names are case insensitive and values are output in lower case.
fmt.Println(hash.HelpString(0))
return err
}
if found, err := CreateFromStdinArg(ht, args, 1); found {
return err
}
fsrc := cmd.NewFsSrc(args[1:])
cmd.Run(false, false, command, func() error {
if ChecksumFile != "" {
fsum, sumFile := cmd.NewFsFile(ChecksumFile)

View file

@ -13,7 +13,7 @@ import (
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
hashsum.AddHashFlags(cmdFlags)
hashsum.AddHashsumFlags(cmdFlags)
}
var commandDefinition = &cobra.Command{
@ -27,9 +27,17 @@ By default, the hash is requested from the remote. If MD5 is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling MD5 for any remote.
This command can also hash data received on standard input (stdin),
by not passing a remote:path, or by passing a hyphen as remote:path
when there is data to read (if not, the hypen will be treated literaly,
as a relative path).
`,
Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(1, 1, command, args)
RunE: func(command *cobra.Command, args []string) error {
cmd.CheckArgs(0, 1, command, args)
if found, err := hashsum.CreateFromStdinArg(hash.MD5, args, 0); found {
return err
}
fsrc := cmd.NewFsSrc(args)
cmd.Run(false, false, command, func() error {
if hashsum.ChecksumFile != "" {
@ -46,5 +54,6 @@ hashed locally enabling MD5 for any remote.
defer close()
return operations.HashLister(context.Background(), hash.MD5, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output)
})
return nil
},
}

View file

@ -13,7 +13,7 @@ import (
func init() {
cmd.Root.AddCommand(commandDefinition)
cmdFlags := commandDefinition.Flags()
hashsum.AddHashFlags(cmdFlags)
hashsum.AddHashsumFlags(cmdFlags)
}
var commandDefinition = &cobra.Command{
@ -27,9 +27,20 @@ By default, the hash is requested from the remote. If SHA-1 is
not supported by the remote, no hash will be returned. With the
download flag, the file will be downloaded from the remote and
hashed locally enabling SHA-1 for any remote.
This command can also hash data received on standard input (stdin),
by not passing a remote:path, or by passing a hyphen as remote:path
when there is data to read (if not, the hypen will be treated literaly,
as a relative path).
This command can also hash data received on STDIN, if not passing
a remote:path.
`,
Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(1, 1, command, args)
RunE: func(command *cobra.Command, args []string) error {
cmd.CheckArgs(0, 1, command, args)
if found, err := hashsum.CreateFromStdinArg(hash.SHA1, args, 0); found {
return err
}
fsrc := cmd.NewFsSrc(args)
cmd.Run(false, false, command, func() error {
if hashsum.ChecksumFile != "" {
@ -46,5 +57,6 @@ hashed locally enabling SHA-1 for any remote.
defer close()
return operations.HashLister(context.Background(), hash.SHA1, hashsum.OutputBase64, hashsum.DownloadFlag, fsrc, output)
})
return nil
},
}

View file

@ -4,6 +4,7 @@ import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"errors"
"fmt"
@ -92,8 +93,11 @@ func Supported() Set {
}
// Width returns the width in characters for any HashType
func Width(hashType Type) int {
func Width(hashType Type, base64Encoded bool) int {
if hash := type2hash[hashType]; hash != nil {
if base64Encoded {
return base64.URLEncoding.EncodedLen(hash.width / 2)
}
return hash.width
}
return 0
@ -243,6 +247,18 @@ func (m *MultiHasher) Sum(hashType Type) ([]byte, error) {
return h.Sum(nil), nil
}
// SumString returns the specified hash from the multihasher as a hex or base64 encoded string
func (m *MultiHasher) SumString(hashType Type, base64Encoded bool) (string, error) {
sum, err := m.Sum(hashType)
if err != nil {
return "", err
}
if base64Encoded {
return base64.URLEncoding.EncodeToString(sum), nil
}
return hex.EncodeToString(sum), nil
}
// Size returns the number of bytes written
func (m *MultiHasher) Size() int64 {
return m.size

View file

@ -946,7 +946,7 @@ func ListLong(ctx context.Context, f fs.Fs, w io.Writer) error {
// hashSum returns the human-readable hash for ht passed in. This may
// be UNSUPPORTED or ERROR. If it isn't returning a valid hash it will
// return an error.
func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object) (string, error) {
func hashSum(ctx context.Context, ht hash.Type, base64Encoded bool, downloadFlag bool, o fs.Object) (string, error) {
var sum string
var err error
@ -968,7 +968,7 @@ func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object)
}
in, err := NewReOpen(ctx, o, fs.GetConfig(ctx).LowLevelRetries, options...)
if err != nil {
return "ERROR", fmt.Errorf("Failed to open file %v: %w", o, err)
return "ERROR", fmt.Errorf("failed to open file %v: %w", o, err)
}
// Account and buffer the transfer
@ -977,21 +977,20 @@ func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object)
// Setup hasher
hasher, err := hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return "UNSUPPORTED", fmt.Errorf("Hash unsupported: %w", err)
return "UNSUPPORTED", fmt.Errorf("hash unsupported: %w", err)
}
// Copy to hasher, downloading the file and passing directly to hash
_, err = io.Copy(hasher, in)
if err != nil {
return "ERROR", fmt.Errorf("Failed to copy file to hasher: %w", err)
return "ERROR", fmt.Errorf("failed to copy file to hasher: %w", err)
}
// Get hash and encode as hex
byteSum, err := hasher.Sum(ht)
// Get hash as hex or base64 encoded string
sum, err = hasher.SumString(ht, base64Encoded)
if err != nil {
return "ERROR", fmt.Errorf("Hasher returned an error: %w", err)
return "ERROR", fmt.Errorf("hasher returned an error: %w", err)
}
sum = hex.EncodeToString(byteSum)
} else {
tr := accounting.Stats(ctx).NewCheckingTransfer(o)
defer func() {
@ -999,11 +998,15 @@ func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object)
}()
sum, err = o.Hash(ctx, ht)
if base64Encoded {
hexBytes, _ := hex.DecodeString(sum)
sum = base64.URLEncoding.EncodeToString(hexBytes)
}
if err == hash.ErrUnsupported {
return "", fmt.Errorf("Hash unsupported: %w", err)
return "", fmt.Errorf("hash unsupported: %w", err)
}
if err != nil {
return "", fmt.Errorf("Failed to get hash %v from backend: %v: %w", ht, err, err)
return "", fmt.Errorf("failed to get hash %v from backend: %v: %w", ht, err, err)
}
}
@ -1014,10 +1017,7 @@ func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object)
// Updated to handle both standard hex encoding and base64
// Updated to perform multiple hashes concurrently
func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFlag bool, f fs.Fs, w io.Writer) error {
width := hash.Width(ht)
if outputBase64 {
width = base64.URLEncoding.EncodedLen(width / 2)
}
width := hash.Width(ht, outputBase64)
concurrencyControl := make(chan struct{}, fs.GetConfig(ctx).Transfers)
var wg sync.WaitGroup
err := ListFn(ctx, f, func(o fs.Object) {
@ -1028,15 +1028,11 @@ func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFl
<-concurrencyControl
wg.Done()
}()
sum, err := hashSum(ctx, ht, downloadFlag, o)
sum, err := hashSum(ctx, ht, outputBase64, downloadFlag, o)
if err != nil {
fs.Errorf(o, "%v", fs.CountError(err))
return
}
if outputBase64 {
hexBytes, _ := hex.DecodeString(sum)
sum = base64.URLEncoding.EncodeToString(hexBytes)
}
syncFprintf(w, "%*s %s\n", width, sum, o.Remote())
}()
})
@ -1044,6 +1040,28 @@ func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFl
return err
}
// HashSumStream outputs a line compatible with md5sum to w based on the
// input stream in and the hash type ht passed in. If outputBase64 is
// set then the hash will be base64 instead of hexadecimal.
func HashSumStream(ht hash.Type, outputBase64 bool, in io.ReadCloser, w io.Writer) error {
hasher, err := hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return fmt.Errorf("hash unsupported: %w", err)
}
written, err := io.Copy(hasher, in)
fs.Debugf(nil, "Creating %s hash of %d bytes read from input stream", ht, written)
if err != nil {
return fmt.Errorf("failed to copy input to hasher: %w", err)
}
sum, err := hasher.SumString(ht, outputBase64)
if err != nil {
return fmt.Errorf("hasher returned an error: %w", err)
}
width := hash.Width(ht, outputBase64)
syncFprintf(w, "%*s -\n", width, sum)
return nil
}
// Count counts the objects and their sizes in the Fs
//
// Obeys includes and excludes

View file

@ -316,6 +316,52 @@ func TestHashSumsWithErrors(t *testing.T) {
// TODO mock an unreadable file
}
func TestHashStream(t *testing.T) {
reader := strings.NewReader("")
in := ioutil.NopCloser(reader)
out := &bytes.Buffer{}
for _, test := range []struct {
input string
ht hash.Type
wantHex string
wantBase64 string
}{
{
input: "",
ht: hash.MD5,
wantHex: "d41d8cd98f00b204e9800998ecf8427e -\n",
wantBase64: "1B2M2Y8AsgTpgAmY7PhCfg== -\n",
},
{
input: "",
ht: hash.SHA1,
wantHex: "da39a3ee5e6b4b0d3255bfef95601890afd80709 -\n",
wantBase64: "2jmj7l5rSw0yVb_vlWAYkK_YBwk= -\n",
},
{
input: "Hello world!",
ht: hash.MD5,
wantHex: "86fb269d190d2c85f6e0468ceca42a20 -\n",
wantBase64: "hvsmnRkNLIX24EaM7KQqIA== -\n",
},
{
input: "Hello world!",
ht: hash.SHA1,
wantHex: "d3486ae9136e7856bc42212385ea797094475802 -\n",
wantBase64: "00hq6RNueFa8QiEjhep5cJRHWAI= -\n",
},
} {
reader.Reset(test.input)
require.NoError(t, operations.HashSumStream(test.ht, false, in, out))
assert.Equal(t, test.wantHex, out.String())
_, _ = reader.Seek(0, io.SeekStart)
out.Reset()
require.NoError(t, operations.HashSumStream(test.ht, true, in, out))
assert.Equal(t, test.wantBase64, out.String())
out.Reset()
}
}
func TestSuffixName(t *testing.T) {
ctx := context.Background()
ctx, ci := fs.AddConfig(ctx)