diff --git a/digest/digest.go b/digest/digest.go new file mode 100644 index 00000000..f2ce021a --- /dev/null +++ b/digest/digest.go @@ -0,0 +1,145 @@ +package digest + +import ( + "bytes" + "crypto/sha256" + "fmt" + "hash" + "io" + "io/ioutil" + "strings" + + "github.com/docker/docker-registry/common" + "github.com/docker/docker/pkg/tarsum" +) + +// Digest allows simple protection of hex formatted digest strings, prefixed +// by their algorithm. Strings of type Digest have some guarantee of being in +// the correct format and it provides quick access to the components of a +// digest string. +// +// The following is an example of the contents of Digest types: +// +// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc +// +// More important for this code base, this type is compatible with tarsum +// digests. For example, the following would be a valid Digest: +// +// tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b +// +// This allows to abstract the digest behind this type and work only in those +// terms. +type Digest string + +// NewDigest returns a Digest from alg and a hash.Hash object. +func NewDigest(alg string, h hash.Hash) Digest { + return Digest(fmt.Sprintf("%s:%x", alg, h.Sum(nil))) +} + +var ( + // ErrDigestInvalidFormat returned when digest format invalid. + ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format") + + // ErrDigestUnsupported returned when the digest algorithm is unsupported by registry. + ErrDigestUnsupported = fmt.Errorf("unsupported digest algorithm") +) + +// ParseDigest parses s and returns the validated digest object. An error will +// be returned if the format is invalid. +func ParseDigest(s string) (Digest, error) { + // Common case will be tarsum + _, err := common.ParseTarSum(s) + if err == nil { + return Digest(s), nil + } + + // Continue on for general parser + + i := strings.Index(s, ":") + if i < 0 { + return "", ErrDigestInvalidFormat + } + + // case: "sha256:" with no hex. + if i+1 == len(s) { + return "", ErrDigestInvalidFormat + } + + switch s[:i] { + case "md5", "sha1", "sha256": + break + default: + return "", ErrDigestUnsupported + } + + return Digest(s), nil +} + +// DigestReader returns the most valid digest for the underlying content. +func DigestReader(rd io.Reader) (Digest, error) { + + // TODO(stevvooe): This is pretty inefficient to always be calculating a + // sha256 hash to provide fallback, but it provides some nice semantics in + // that we never worry about getting the right digest for a given reader. + // For the most part, we can detect tar vs non-tar with only a few bytes, + // so a scheme that saves those bytes would probably be better here. + + h := sha256.New() + tr := io.TeeReader(rd, h) + + ts, err := tarsum.NewTarSum(tr, true, tarsum.Version1) + if err != nil { + return "", err + } + + // Try to copy from the tarsum, if we fail, copy the remaining bytes into + // hash directly. + if _, err := io.Copy(ioutil.Discard, ts); err != nil { + if err.Error() != "archive/tar: invalid tar header" { + return "", err + } + + if _, err := io.Copy(h, rd); err != nil { + return "", err + } + + return NewDigest("sha256", h), nil + } + + d, err := ParseDigest(ts.Sum(nil)) + if err != nil { + return "", err + } + + return d, nil +} + +func DigestBytes(p []byte) (Digest, error) { + return DigestReader(bytes.NewReader(p)) +} + +// Algorithm returns the algorithm portion of the digest. This will panic if +// the underlying digest is not in a valid format. +func (d Digest) Algorithm() string { + return string(d[:d.sepIndex()]) +} + +// Hex returns the hex digest portion of the digest. This will panic if the +// underlying digest is not in a valid format. +func (d Digest) Hex() string { + return string(d[d.sepIndex()+1:]) +} + +func (d Digest) String() string { + return string(d) +} + +func (d Digest) sepIndex() int { + i := strings.Index(string(d), ":") + + if i < 0 { + panic("invalid digest: " + d) + } + + return i +} diff --git a/digest/digest_test.go b/digest/digest_test.go new file mode 100644 index 00000000..127f7873 --- /dev/null +++ b/digest/digest_test.go @@ -0,0 +1,80 @@ +package digest + +import "testing" + +func TestParseDigest(t *testing.T) { + for _, testcase := range []struct { + input string + err error + algorithm string + hex string + }{ + { + input: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + algorithm: "tarsum+sha256", + hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + }, + { + input: "tarsum.dev+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + algorithm: "tarsum.dev+sha256", + hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + }, + { + input: "tarsum.v1+sha256:220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e", + algorithm: "tarsum.v1+sha256", + hex: "220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e", + }, + { + input: "sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + algorithm: "sha256", + hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + }, + { + input: "md5:d41d8cd98f00b204e9800998ecf8427e", + algorithm: "md5", + hex: "d41d8cd98f00b204e9800998ecf8427e", + }, + { + // empty hex + input: "sha256:", + err: ErrDigestInvalidFormat, + }, + { + // just hex + input: "d41d8cd98f00b204e9800998ecf8427e", + err: ErrDigestInvalidFormat, + }, + { + input: "foo:d41d8cd98f00b204e9800998ecf8427e", + err: ErrDigestUnsupported, + }, + } { + digest, err := ParseDigest(testcase.input) + if err != testcase.err { + t.Fatalf("error differed from expected while parsing %q: %v != %v", testcase.input, err, testcase.err) + } + + if testcase.err != nil { + continue + } + + if digest.Algorithm() != testcase.algorithm { + t.Fatalf("incorrect algorithm for parsed digest: %q != %q", digest.Algorithm(), testcase.algorithm) + } + + if digest.Hex() != testcase.hex { + t.Fatalf("incorrect hex for parsed digest: %q != %q", digest.Hex(), testcase.hex) + } + + // Parse string return value and check equality + newParsed, err := ParseDigest(digest.String()) + + if err != nil { + t.Fatalf("unexpected error parsing input %q: %v", testcase.input, err) + } + + if newParsed != digest { + t.Fatalf("expected equal: %q != %q", newParsed, digest) + } + } +} diff --git a/digest/doc.go b/digest/doc.go new file mode 100644 index 00000000..2ce7698c --- /dev/null +++ b/digest/doc.go @@ -0,0 +1,52 @@ +// This package provides a generalized type to opaquely represent message +// digests and their operations within the registry. The Digest type is +// designed to serve as a flexible identifier in a content-addressable system. +// More importantly, it provides tools and wrappers to work with tarsums and +// hash.Hash-based digests with little effort. +// +// Basics +// +// The format of a digest is simply a string with two parts, dubbed the +// "algorithm" and the "digest", separated by a colon: +// +// : +// +// An example of a sha256 digest representation follows: +// +// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc +// +// In this case, the string "sha256" is the algorithm and the hex bytes are +// the "digest". A tarsum example will be more illustrative of the use case +// involved in the registry: +// +// tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b +// +// For this, we consider the algorithm to be "tarsum+sha256". Prudent +// applications will favor the ParseDigest function to verify the format over +// using simple type casts. However, a normal string can be cast as a digest +// with a simple type conversion: +// +// Digest("tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b") +// +// Because the Digest type is simply a string, once a valid Digest is +// obtained, comparisons are cheap, quick and simple to express with the +// standard equality operator. +// +// Verification +// +// The main benefit of using the Digest type is simple verification against a +// given digest. The Verifier interface, modeled after the stdlib hash.Hash +// interface, provides a common write sink for digest verification. After +// writing is complete, calling the Verifier.Verified method will indicate +// whether or not the stream of bytes matches the target digest. +// +// Missing Features +// +// In addition to the above, we intend to add the following features to this +// package: +// +// 1. A Digester type that supports write sink digest calculation. +// +// 2. Suspend and resume of ongoing digest calculations to support efficient digest verification in the registry. +// +package digest diff --git a/digest/verifiers.go b/digest/verifiers.go new file mode 100644 index 00000000..e738026a --- /dev/null +++ b/digest/verifiers.go @@ -0,0 +1,131 @@ +package digest + +import ( + "crypto/md5" + "crypto/sha1" + "crypto/sha256" + "hash" + "io" + "io/ioutil" + + "github.com/docker/docker/pkg/tarsum" +) + +type Verifier interface { + io.Writer + + // Verified will return true if the content written to Verifier matches + // the digest. + Verified() bool + + // Planned methods: + // Err() error + // Reset() +} + +func DigestVerifier(d Digest) Verifier { + alg := d.Algorithm() + switch alg { + case "md5", "sha1", "sha256": + return hashVerifier{ + hash: newHash(alg), + digest: d, + } + default: + // Assume we have a tarsum. + version, err := tarsum.GetVersionFromTarsum(string(d)) + if err != nil { + panic(err) // Always assume valid tarsum at this point. + } + + pr, pw := io.Pipe() + + // TODO(stevvooe): We may actually want to ban the earlier versions of + // tarsum. That decision may not be the place of the verifier. + + ts, err := tarsum.NewTarSum(pr, true, version) + if err != nil { + panic(err) + } + + // TODO(sday): Ick! A goroutine per digest verification? We'll have to + // get the tarsum library to export an io.Writer variant. + go func() { + io.Copy(ioutil.Discard, ts) + pw.Close() + }() + + return &tarsumVerifier{ + digest: d, + ts: ts, + pr: pr, + pw: pw, + } + } + + panic("unsupported digest: " + d) +} + +// LengthVerifier returns a verifier that returns true when the number of read +// bytes equals the expected parameter. +func LengthVerifier(expected int64) Verifier { + return &lengthVerifier{ + expected: expected, + } +} + +type lengthVerifier struct { + expected int64 // expected bytes read + len int64 // bytes read +} + +func (lv *lengthVerifier) Write(p []byte) (n int, err error) { + n = len(p) + lv.len += int64(n) + return n, err +} + +func (lv *lengthVerifier) Verified() bool { + return lv.expected == lv.len +} + +func newHash(name string) hash.Hash { + switch name { + case "sha256": + return sha256.New() + case "sha1": + return sha1.New() + case "md5": + return md5.New() + default: + panic("unsupport algorithm: " + name) + } +} + +type hashVerifier struct { + digest Digest + hash hash.Hash +} + +func (hv hashVerifier) Write(p []byte) (n int, err error) { + return hv.hash.Write(p) +} + +func (hv hashVerifier) Verified() bool { + return hv.digest == NewDigest(hv.digest.Algorithm(), hv.hash) +} + +type tarsumVerifier struct { + digest Digest + ts tarsum.TarSum + pr *io.PipeReader + pw *io.PipeWriter +} + +func (tv *tarsumVerifier) Write(p []byte) (n int, err error) { + return tv.pw.Write(p) +} + +func (tv *tarsumVerifier) Verified() bool { + return tv.digest == Digest(tv.ts.Sum(nil)) +} diff --git a/digest/verifiers_test.go b/digest/verifiers_test.go new file mode 100644 index 00000000..77b02ed0 --- /dev/null +++ b/digest/verifiers_test.go @@ -0,0 +1,71 @@ +package digest + +import ( + "bytes" + "crypto/rand" + "io" + "os" + "testing" + + "github.com/docker/docker-registry/common/testutil" +) + +func TestDigestVerifier(t *testing.T) { + p := make([]byte, 1<<20) + rand.Read(p) + digest, err := DigestBytes(p) + if err != nil { + t.Fatalf("unexpected error digesting bytes: %#v", err) + } + + verifier := DigestVerifier(digest) + io.Copy(verifier, bytes.NewReader(p)) + + if !verifier.Verified() { + t.Fatalf("bytes not verified") + } + + tf, tarSum, err := testutil.CreateRandomTarFile() + if err != nil { + t.Fatalf("error creating tarfile: %v", err) + } + + digest, err = DigestReader(tf) + if err != nil { + t.Fatalf("error digesting tarsum: %v", err) + } + + if digest.String() != tarSum { + t.Fatalf("unexpected digest: %q != %q", digest.String(), tarSum) + } + + expectedSize, _ := tf.Seek(0, os.SEEK_END) // Get tar file size + tf.Seek(0, os.SEEK_SET) // seek back + + // This is the most relevant example for the registry application. It's + // effectively a read through pipeline, where the final sink is the digest + // verifier. + verifier = DigestVerifier(digest) + lengthVerifier := LengthVerifier(expectedSize) + rd := io.TeeReader(tf, lengthVerifier) + io.Copy(verifier, rd) + + if !lengthVerifier.Verified() { + t.Fatalf("verifier detected incorrect length") + } + + if !verifier.Verified() { + t.Fatalf("bytes not verified") + } +} + +// TODO(stevvooe): Add benchmarks to measure bytes/second throughput for +// DigestVerifier. We should be tarsum/gzip limited for common cases but we +// want to verify this. +// +// The relevant benchmarks for comparison can be run with the following +// commands: +// +// go test -bench . crypto/sha1 +// go test -bench . github.com/docker/docker/pkg/tarsum +//