From 3cfe9aede51ed09e2763cc08afc53bd5c652704f Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Wed, 19 Nov 2014 13:23:01 -0800 Subject: [PATCH 1/4] Move Digest type into discrete package The Digest type will be fairly central for blob and layer management. The type presented in this package provides a number of core features that should enable reliable use within the registry. This commit will be followed by others that convert the storage layer and webapp to use this type as the primary layer/blob CAS identifier. --- digest/digest.go | 145 +++++++++++++++++++++++++++++++++++++++ digest/digest_test.go | 80 +++++++++++++++++++++ digest/doc.go | 52 ++++++++++++++ digest/verifiers.go | 131 +++++++++++++++++++++++++++++++++++ digest/verifiers_test.go | 71 +++++++++++++++++++ 5 files changed, 479 insertions(+) create mode 100644 digest/digest.go create mode 100644 digest/digest_test.go create mode 100644 digest/doc.go create mode 100644 digest/verifiers.go create mode 100644 digest/verifiers_test.go diff --git a/digest/digest.go b/digest/digest.go new file mode 100644 index 000000000..f2ce021ae --- /dev/null +++ b/digest/digest.go @@ -0,0 +1,145 @@ +package digest + +import ( + "bytes" + "crypto/sha256" + "fmt" + "hash" + "io" + "io/ioutil" + "strings" + + "github.com/docker/docker-registry/common" + "github.com/docker/docker/pkg/tarsum" +) + +// Digest allows simple protection of hex formatted digest strings, prefixed +// by their algorithm. Strings of type Digest have some guarantee of being in +// the correct format and it provides quick access to the components of a +// digest string. +// +// The following is an example of the contents of Digest types: +// +// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc +// +// More important for this code base, this type is compatible with tarsum +// digests. For example, the following would be a valid Digest: +// +// tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b +// +// This allows to abstract the digest behind this type and work only in those +// terms. +type Digest string + +// NewDigest returns a Digest from alg and a hash.Hash object. +func NewDigest(alg string, h hash.Hash) Digest { + return Digest(fmt.Sprintf("%s:%x", alg, h.Sum(nil))) +} + +var ( + // ErrDigestInvalidFormat returned when digest format invalid. + ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format") + + // ErrDigestUnsupported returned when the digest algorithm is unsupported by registry. + ErrDigestUnsupported = fmt.Errorf("unsupported digest algorithm") +) + +// ParseDigest parses s and returns the validated digest object. An error will +// be returned if the format is invalid. +func ParseDigest(s string) (Digest, error) { + // Common case will be tarsum + _, err := common.ParseTarSum(s) + if err == nil { + return Digest(s), nil + } + + // Continue on for general parser + + i := strings.Index(s, ":") + if i < 0 { + return "", ErrDigestInvalidFormat + } + + // case: "sha256:" with no hex. + if i+1 == len(s) { + return "", ErrDigestInvalidFormat + } + + switch s[:i] { + case "md5", "sha1", "sha256": + break + default: + return "", ErrDigestUnsupported + } + + return Digest(s), nil +} + +// DigestReader returns the most valid digest for the underlying content. +func DigestReader(rd io.Reader) (Digest, error) { + + // TODO(stevvooe): This is pretty inefficient to always be calculating a + // sha256 hash to provide fallback, but it provides some nice semantics in + // that we never worry about getting the right digest for a given reader. + // For the most part, we can detect tar vs non-tar with only a few bytes, + // so a scheme that saves those bytes would probably be better here. + + h := sha256.New() + tr := io.TeeReader(rd, h) + + ts, err := tarsum.NewTarSum(tr, true, tarsum.Version1) + if err != nil { + return "", err + } + + // Try to copy from the tarsum, if we fail, copy the remaining bytes into + // hash directly. + if _, err := io.Copy(ioutil.Discard, ts); err != nil { + if err.Error() != "archive/tar: invalid tar header" { + return "", err + } + + if _, err := io.Copy(h, rd); err != nil { + return "", err + } + + return NewDigest("sha256", h), nil + } + + d, err := ParseDigest(ts.Sum(nil)) + if err != nil { + return "", err + } + + return d, nil +} + +func DigestBytes(p []byte) (Digest, error) { + return DigestReader(bytes.NewReader(p)) +} + +// Algorithm returns the algorithm portion of the digest. This will panic if +// the underlying digest is not in a valid format. +func (d Digest) Algorithm() string { + return string(d[:d.sepIndex()]) +} + +// Hex returns the hex digest portion of the digest. This will panic if the +// underlying digest is not in a valid format. +func (d Digest) Hex() string { + return string(d[d.sepIndex()+1:]) +} + +func (d Digest) String() string { + return string(d) +} + +func (d Digest) sepIndex() int { + i := strings.Index(string(d), ":") + + if i < 0 { + panic("invalid digest: " + d) + } + + return i +} diff --git a/digest/digest_test.go b/digest/digest_test.go new file mode 100644 index 000000000..127f7873d --- /dev/null +++ b/digest/digest_test.go @@ -0,0 +1,80 @@ +package digest + +import "testing" + +func TestParseDigest(t *testing.T) { + for _, testcase := range []struct { + input string + err error + algorithm string + hex string + }{ + { + input: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + algorithm: "tarsum+sha256", + hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + }, + { + input: "tarsum.dev+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + algorithm: "tarsum.dev+sha256", + hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + }, + { + input: "tarsum.v1+sha256:220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e", + algorithm: "tarsum.v1+sha256", + hex: "220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e", + }, + { + input: "sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + algorithm: "sha256", + hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b", + }, + { + input: "md5:d41d8cd98f00b204e9800998ecf8427e", + algorithm: "md5", + hex: "d41d8cd98f00b204e9800998ecf8427e", + }, + { + // empty hex + input: "sha256:", + err: ErrDigestInvalidFormat, + }, + { + // just hex + input: "d41d8cd98f00b204e9800998ecf8427e", + err: ErrDigestInvalidFormat, + }, + { + input: "foo:d41d8cd98f00b204e9800998ecf8427e", + err: ErrDigestUnsupported, + }, + } { + digest, err := ParseDigest(testcase.input) + if err != testcase.err { + t.Fatalf("error differed from expected while parsing %q: %v != %v", testcase.input, err, testcase.err) + } + + if testcase.err != nil { + continue + } + + if digest.Algorithm() != testcase.algorithm { + t.Fatalf("incorrect algorithm for parsed digest: %q != %q", digest.Algorithm(), testcase.algorithm) + } + + if digest.Hex() != testcase.hex { + t.Fatalf("incorrect hex for parsed digest: %q != %q", digest.Hex(), testcase.hex) + } + + // Parse string return value and check equality + newParsed, err := ParseDigest(digest.String()) + + if err != nil { + t.Fatalf("unexpected error parsing input %q: %v", testcase.input, err) + } + + if newParsed != digest { + t.Fatalf("expected equal: %q != %q", newParsed, digest) + } + } +} diff --git a/digest/doc.go b/digest/doc.go new file mode 100644 index 000000000..2ce7698c2 --- /dev/null +++ b/digest/doc.go @@ -0,0 +1,52 @@ +// This package provides a generalized type to opaquely represent message +// digests and their operations within the registry. The Digest type is +// designed to serve as a flexible identifier in a content-addressable system. +// More importantly, it provides tools and wrappers to work with tarsums and +// hash.Hash-based digests with little effort. +// +// Basics +// +// The format of a digest is simply a string with two parts, dubbed the +// "algorithm" and the "digest", separated by a colon: +// +// : +// +// An example of a sha256 digest representation follows: +// +// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc +// +// In this case, the string "sha256" is the algorithm and the hex bytes are +// the "digest". A tarsum example will be more illustrative of the use case +// involved in the registry: +// +// tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b +// +// For this, we consider the algorithm to be "tarsum+sha256". Prudent +// applications will favor the ParseDigest function to verify the format over +// using simple type casts. However, a normal string can be cast as a digest +// with a simple type conversion: +// +// Digest("tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b") +// +// Because the Digest type is simply a string, once a valid Digest is +// obtained, comparisons are cheap, quick and simple to express with the +// standard equality operator. +// +// Verification +// +// The main benefit of using the Digest type is simple verification against a +// given digest. The Verifier interface, modeled after the stdlib hash.Hash +// interface, provides a common write sink for digest verification. After +// writing is complete, calling the Verifier.Verified method will indicate +// whether or not the stream of bytes matches the target digest. +// +// Missing Features +// +// In addition to the above, we intend to add the following features to this +// package: +// +// 1. A Digester type that supports write sink digest calculation. +// +// 2. Suspend and resume of ongoing digest calculations to support efficient digest verification in the registry. +// +package digest diff --git a/digest/verifiers.go b/digest/verifiers.go new file mode 100644 index 000000000..e738026ac --- /dev/null +++ b/digest/verifiers.go @@ -0,0 +1,131 @@ +package digest + +import ( + "crypto/md5" + "crypto/sha1" + "crypto/sha256" + "hash" + "io" + "io/ioutil" + + "github.com/docker/docker/pkg/tarsum" +) + +type Verifier interface { + io.Writer + + // Verified will return true if the content written to Verifier matches + // the digest. + Verified() bool + + // Planned methods: + // Err() error + // Reset() +} + +func DigestVerifier(d Digest) Verifier { + alg := d.Algorithm() + switch alg { + case "md5", "sha1", "sha256": + return hashVerifier{ + hash: newHash(alg), + digest: d, + } + default: + // Assume we have a tarsum. + version, err := tarsum.GetVersionFromTarsum(string(d)) + if err != nil { + panic(err) // Always assume valid tarsum at this point. + } + + pr, pw := io.Pipe() + + // TODO(stevvooe): We may actually want to ban the earlier versions of + // tarsum. That decision may not be the place of the verifier. + + ts, err := tarsum.NewTarSum(pr, true, version) + if err != nil { + panic(err) + } + + // TODO(sday): Ick! A goroutine per digest verification? We'll have to + // get the tarsum library to export an io.Writer variant. + go func() { + io.Copy(ioutil.Discard, ts) + pw.Close() + }() + + return &tarsumVerifier{ + digest: d, + ts: ts, + pr: pr, + pw: pw, + } + } + + panic("unsupported digest: " + d) +} + +// LengthVerifier returns a verifier that returns true when the number of read +// bytes equals the expected parameter. +func LengthVerifier(expected int64) Verifier { + return &lengthVerifier{ + expected: expected, + } +} + +type lengthVerifier struct { + expected int64 // expected bytes read + len int64 // bytes read +} + +func (lv *lengthVerifier) Write(p []byte) (n int, err error) { + n = len(p) + lv.len += int64(n) + return n, err +} + +func (lv *lengthVerifier) Verified() bool { + return lv.expected == lv.len +} + +func newHash(name string) hash.Hash { + switch name { + case "sha256": + return sha256.New() + case "sha1": + return sha1.New() + case "md5": + return md5.New() + default: + panic("unsupport algorithm: " + name) + } +} + +type hashVerifier struct { + digest Digest + hash hash.Hash +} + +func (hv hashVerifier) Write(p []byte) (n int, err error) { + return hv.hash.Write(p) +} + +func (hv hashVerifier) Verified() bool { + return hv.digest == NewDigest(hv.digest.Algorithm(), hv.hash) +} + +type tarsumVerifier struct { + digest Digest + ts tarsum.TarSum + pr *io.PipeReader + pw *io.PipeWriter +} + +func (tv *tarsumVerifier) Write(p []byte) (n int, err error) { + return tv.pw.Write(p) +} + +func (tv *tarsumVerifier) Verified() bool { + return tv.digest == Digest(tv.ts.Sum(nil)) +} diff --git a/digest/verifiers_test.go b/digest/verifiers_test.go new file mode 100644 index 000000000..77b02ed07 --- /dev/null +++ b/digest/verifiers_test.go @@ -0,0 +1,71 @@ +package digest + +import ( + "bytes" + "crypto/rand" + "io" + "os" + "testing" + + "github.com/docker/docker-registry/common/testutil" +) + +func TestDigestVerifier(t *testing.T) { + p := make([]byte, 1<<20) + rand.Read(p) + digest, err := DigestBytes(p) + if err != nil { + t.Fatalf("unexpected error digesting bytes: %#v", err) + } + + verifier := DigestVerifier(digest) + io.Copy(verifier, bytes.NewReader(p)) + + if !verifier.Verified() { + t.Fatalf("bytes not verified") + } + + tf, tarSum, err := testutil.CreateRandomTarFile() + if err != nil { + t.Fatalf("error creating tarfile: %v", err) + } + + digest, err = DigestReader(tf) + if err != nil { + t.Fatalf("error digesting tarsum: %v", err) + } + + if digest.String() != tarSum { + t.Fatalf("unexpected digest: %q != %q", digest.String(), tarSum) + } + + expectedSize, _ := tf.Seek(0, os.SEEK_END) // Get tar file size + tf.Seek(0, os.SEEK_SET) // seek back + + // This is the most relevant example for the registry application. It's + // effectively a read through pipeline, where the final sink is the digest + // verifier. + verifier = DigestVerifier(digest) + lengthVerifier := LengthVerifier(expectedSize) + rd := io.TeeReader(tf, lengthVerifier) + io.Copy(verifier, rd) + + if !lengthVerifier.Verified() { + t.Fatalf("verifier detected incorrect length") + } + + if !verifier.Verified() { + t.Fatalf("bytes not verified") + } +} + +// TODO(stevvooe): Add benchmarks to measure bytes/second throughput for +// DigestVerifier. We should be tarsum/gzip limited for common cases but we +// want to verify this. +// +// The relevant benchmarks for comparison can be run with the following +// commands: +// +// go test -bench . crypto/sha1 +// go test -bench . github.com/docker/docker/pkg/tarsum +// From 1a508d67d959daa427c46488f74380561c844713 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Wed, 19 Nov 2014 14:39:32 -0800 Subject: [PATCH 2/4] Move storage package to use Digest type Mostly, we've made superficial changes to the storage package to start using the Digest type. Many of the exported interface methods have been changed to reflect this in addition to changes in the way layer uploads will be initiated. Further work here is necessary but will come with a separate PR. --- storage/digest.go | 59 --------------- storage/layer.go | 38 ++++------ storage/layer_test.go | 162 ++++++++++------------------------------- storage/layerreader.go | 8 +- storage/layerstore.go | 41 ++++------- storage/layerupload.go | 121 +++++++++++++----------------- storage/paths.go | 26 ++++++- storage/paths_test.go | 10 ++- 8 files changed, 156 insertions(+), 309 deletions(-) delete mode 100644 storage/digest.go diff --git a/storage/digest.go b/storage/digest.go deleted file mode 100644 index db5c884be..000000000 --- a/storage/digest.go +++ /dev/null @@ -1,59 +0,0 @@ -package storage - -import ( - "fmt" - "hash" - "strings" -) - -// Digest allows simple protection of hex formatted digest strings, prefixed -// by their algorithm. Strings of type Digest have some guarantee of being in -// the correct format and it provides quick access to the components of a -// digest string. -// -// The following is an example of the contents of Digest types: -// -// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc -// -type Digest string - -// NewDigest returns a Digest from alg and a hash.Hash object. -func NewDigest(alg string, h hash.Hash) Digest { - return Digest(fmt.Sprintf("%s:%x", alg, h.Sum(nil))) -} - -var ( - // ErrDigestInvalidFormat returned when digest format invalid. - ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format") - - // ErrDigestUnsupported returned when the digest algorithm is unsupported by registry. - ErrDigestUnsupported = fmt.Errorf("unsupported digest algorithm") -) - -// ParseDigest parses s and returns the validated digest object. An error will -// be returned if the format is invalid. -func ParseDigest(s string) (Digest, error) { - parts := strings.SplitN(s, ":", 2) - if len(parts) != 2 { - return "", ErrDigestInvalidFormat - } - - switch parts[0] { - case "sha256": - break - default: - return "", ErrDigestUnsupported - } - - return Digest(s), nil -} - -// Algorithm returns the algorithm portion of the digest. -func (d Digest) Algorithm() string { - return strings.SplitN(string(d), ":", 2)[0] -} - -// Hex returns the hex digest portion of the digest. -func (d Digest) Hex() string { - return strings.SplitN(string(d), ":", 2)[1] -} diff --git a/storage/layer.go b/storage/layer.go index bae697015..6c45f401a 100644 --- a/storage/layer.go +++ b/storage/layer.go @@ -4,24 +4,25 @@ import ( "fmt" "io" "time" + + "github.com/docker/docker-registry/digest" ) // LayerService provides operations on layer files in a backend storage. type LayerService interface { // Exists returns true if the layer exists. - Exists(tarSum string) (bool, error) + Exists(name string, digest digest.Digest) (bool, error) // Fetch the layer identifed by TarSum. - Fetch(tarSum string) (Layer, error) + Fetch(name string, digest digest.Digest) (Layer, error) - // Upload begins a layer upload, returning a handle. If the layer upload - // is already in progress or the layer has already been uploaded, this - // will return an error. - Upload(name, tarSum string) (LayerUpload, error) + // Upload begins a layer upload to repository identified by name, + // returning a handle. + Upload(name string) (LayerUpload, error) // Resume continues an in progress layer upload, returning the current // state of the upload. - Resume(name, tarSum, uuid string) (LayerUpload, error) + Resume(uuid string) (LayerUpload, error) } // Layer provides a readable and seekable layer object. Typically, @@ -35,8 +36,9 @@ type Layer interface { // Name returns the repository under which this layer is linked. Name() string // TODO(stevvooe): struggling with nomenclature: should this be "repo" or "name"? - // TarSum returns the unique tarsum of the layer. - TarSum() string + // Digest returns the unique digest of the blob, which is the tarsum for + // layers. + Digest() digest.Digest // CreatedAt returns the time this layer was created. Until we implement // Stat call on storagedriver, this just returns the zero time. @@ -55,18 +57,13 @@ type LayerUpload interface { // Name of the repository under which the layer will be linked. Name() string - // TarSum identifier of the proposed layer. Resulting data must match this - // tarsum. - TarSum() string - // Offset returns the position of the last byte written to this layer. Offset() int64 // Finish marks the upload as completed, returning a valid handle to the - // uploaded layer. The final size and checksum are validated against the - // contents of the uploaded layer. The checksum should be provided in the - // format :. - Finish(size int64, digest string) (Layer, error) + // uploaded layer. The final size and digest are validated against the + // contents of the uploaded layer. + Finish(size int64, digest digest.Digest) (Layer, error) // Cancel the layer upload process. Cancel() error @@ -85,11 +82,8 @@ var ( // ErrLayerUploadUnknown returned when upload is not found. ErrLayerUploadUnknown = fmt.Errorf("layer upload unknown") - // ErrLayerInvalidChecksum returned when checksum/digest check fails. - ErrLayerInvalidChecksum = fmt.Errorf("invalid layer checksum") - - // ErrLayerInvalidTarsum returned when tarsum check fails. - ErrLayerInvalidTarsum = fmt.Errorf("invalid layer tarsum") + // ErrLayerInvalidDigest returned when tarsum check fails. + ErrLayerInvalidDigest = fmt.Errorf("invalid layer digest") // ErrLayerInvalidLength returned when length check fails. ErrLayerInvalidLength = fmt.Errorf("invalid layer length") diff --git a/storage/layer_test.go b/storage/layer_test.go index 721878102..335793d28 100644 --- a/storage/layer_test.go +++ b/storage/layer_test.go @@ -1,20 +1,16 @@ package storage import ( - "archive/tar" "bytes" - "crypto/rand" "crypto/sha256" "fmt" "io" "io/ioutil" - mrand "math/rand" "os" "testing" - "time" - - "github.com/docker/docker/pkg/tarsum" + "github.com/docker/docker-registry/common/testutil" + "github.com/docker/docker-registry/digest" "github.com/docker/docker-registry/storagedriver" "github.com/docker/docker-registry/storagedriver/inmemory" ) @@ -22,12 +18,14 @@ import ( // TestSimpleLayerUpload covers the layer upload process, exercising common // error paths that might be seen during an upload. func TestSimpleLayerUpload(t *testing.T) { - randomDataReader, tarSum, err := createRandomReader() + randomDataReader, tarSumStr, err := testutil.CreateRandomTarFile() if err != nil { t.Fatalf("error creating random reader: %v", err) } + dgst := digest.Digest(tarSumStr) + uploadStore, err := newTemporaryLocalFSLayerUploadStore() if err != nil { t.Fatalf("error allocating upload store: %v", err) @@ -48,7 +46,7 @@ func TestSimpleLayerUpload(t *testing.T) { h := sha256.New() rd := io.TeeReader(randomDataReader, h) - layerUpload, err := ls.Upload(imageName, tarSum) + layerUpload, err := ls.Upload(imageName) if err != nil { t.Fatalf("unexpected error starting layer upload: %s", err) @@ -60,13 +58,13 @@ func TestSimpleLayerUpload(t *testing.T) { } // Do a resume, get unknown upload - layerUpload, err = ls.Resume(imageName, tarSum, layerUpload.UUID()) + layerUpload, err = ls.Resume(layerUpload.UUID()) if err != ErrLayerUploadUnknown { t.Fatalf("unexpected error resuming upload, should be unkown: %v", err) } // Restart! - layerUpload, err = ls.Upload(imageName, tarSum) + layerUpload, err = ls.Upload(imageName) if err != nil { t.Fatalf("unexpected error starting layer upload: %s", err) } @@ -92,25 +90,25 @@ func TestSimpleLayerUpload(t *testing.T) { layerUpload.Close() // Do a resume, for good fun - layerUpload, err = ls.Resume(imageName, tarSum, layerUpload.UUID()) + layerUpload, err = ls.Resume(layerUpload.UUID()) if err != nil { t.Fatalf("unexpected error resuming upload: %v", err) } - digest := NewDigest("sha256", h) - layer, err := layerUpload.Finish(randomDataSize, string(digest)) + sha256Digest := digest.NewDigest("sha256", h) + layer, err := layerUpload.Finish(randomDataSize, dgst) if err != nil { t.Fatalf("unexpected error finishing layer upload: %v", err) } // After finishing an upload, it should no longer exist. - if _, err := ls.Resume(imageName, tarSum, layerUpload.UUID()); err != ErrLayerUploadUnknown { + if _, err := ls.Resume(layerUpload.UUID()); err != ErrLayerUploadUnknown { t.Fatalf("expected layer upload to be unknown, got %v", err) } // Test for existence. - exists, err := ls.Exists(layer.TarSum()) + exists, err := ls.Exists(layer.Name(), layer.Digest()) if err != nil { t.Fatalf("unexpected error checking for existence: %v", err) } @@ -129,8 +127,8 @@ func TestSimpleLayerUpload(t *testing.T) { t.Fatalf("incorrect read length") } - if NewDigest("sha256", h) != digest { - t.Fatalf("unexpected digest from uploaded layer: %q != %q", NewDigest("sha256", h), digest) + if digest.NewDigest("sha256", h) != sha256Digest { + t.Fatalf("unexpected digest from uploaded layer: %q != %q", digest.NewDigest("sha256", h), sha256Digest) } } @@ -148,13 +146,15 @@ func TestSimpleLayerRead(t *testing.T) { }, } - randomLayerReader, tarSum, err := createRandomReader() + randomLayerReader, tarSumStr, err := testutil.CreateRandomTarFile() if err != nil { t.Fatalf("error creating random data: %v", err) } + dgst := digest.Digest(tarSumStr) + // Test for existence. - exists, err := ls.Exists(tarSum) + exists, err := ls.Exists(imageName, dgst) if err != nil { t.Fatalf("unexpected error checking for existence: %v", err) } @@ -164,7 +164,7 @@ func TestSimpleLayerRead(t *testing.T) { } // Try to get the layer and make sure we get a not found error - layer, err := ls.Fetch(tarSum) + layer, err := ls.Fetch(imageName, dgst) if err == nil { t.Fatalf("error expected fetching unknown layer") } @@ -174,8 +174,7 @@ func TestSimpleLayerRead(t *testing.T) { } else { err = nil } - - randomLayerDigest, err := writeTestLayer(driver, ls.pathMapper, imageName, tarSum, randomLayerReader) + randomLayerDigest, err := writeTestLayer(driver, ls.pathMapper, imageName, dgst, randomLayerReader) if err != nil { t.Fatalf("unexpected error writing test layer: %v", err) } @@ -185,7 +184,7 @@ func TestSimpleLayerRead(t *testing.T) { t.Fatalf("error getting seeker size for random layer: %v", err) } - layer, err = ls.Fetch(tarSum) + layer, err = ls.Fetch(imageName, dgst) if err != nil { t.Fatal(err) } @@ -202,9 +201,9 @@ func TestSimpleLayerRead(t *testing.T) { t.Fatalf("stored incorrect number of bytes in layer: %d != %d", nn, randomLayerSize) } - digest := NewDigest("sha256", h) - if digest != randomLayerDigest { - t.Fatalf("fetched digest does not match: %q != %q", digest, randomLayerDigest) + sha256Digest := digest.NewDigest("sha256", h) + if sha256Digest != randomLayerDigest { + t.Fatalf("fetched digest does not match: %q != %q", sha256Digest, randomLayerDigest) } // Now seek back the layer, read the whole thing and check against randomLayerData @@ -270,12 +269,14 @@ func TestLayerReadErrors(t *testing.T) { // writeRandomLayer creates a random layer under name and tarSum using driver // and pathMapper. An io.ReadSeeker with the data is returned, along with the // sha256 hex digest. -func writeRandomLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, name string) (rs io.ReadSeeker, tarSum string, digest Digest, err error) { - reader, tarSum, err := createRandomReader() +func writeRandomLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, name string) (rs io.ReadSeeker, tarSum digest.Digest, sha256digest digest.Digest, err error) { + reader, tarSumStr, err := testutil.CreateRandomTarFile() if err != nil { return nil, "", "", err } + tarSum = digest.Digest(tarSumStr) + // Now, actually create the layer. randomLayerDigest, err := writeTestLayer(driver, pathMapper, name, tarSum, ioutil.NopCloser(reader)) @@ -312,91 +313,10 @@ func seekerSize(seeker io.ReadSeeker) (int64, error) { return end, nil } -// createRandomReader returns a random read seeker and its tarsum. The -// returned content will be a valid tar file with a random number of files and -// content. -func createRandomReader() (rs io.ReadSeeker, tarSum string, err error) { - nFiles := mrand.Intn(10) + 10 - target := &bytes.Buffer{} - wr := tar.NewWriter(target) - - // Perturb this on each iteration of the loop below. - header := &tar.Header{ - Mode: 0644, - ModTime: time.Now(), - Typeflag: tar.TypeReg, - Uname: "randocalrissian", - Gname: "cloudcity", - AccessTime: time.Now(), - ChangeTime: time.Now(), - } - - for fileNumber := 0; fileNumber < nFiles; fileNumber++ { - fileSize := mrand.Int63n(1<<20) + 1<<20 - - header.Name = fmt.Sprint(fileNumber) - header.Size = fileSize - - if err := wr.WriteHeader(header); err != nil { - return nil, "", err - } - - randomData := make([]byte, fileSize) - - // Fill up the buffer with some random data. - n, err := rand.Read(randomData) - - if n != len(randomData) { - return nil, "", fmt.Errorf("short read creating random reader: %v bytes != %v bytes", n, len(randomData)) - } - - if err != nil { - return nil, "", err - } - - nn, err := io.Copy(wr, bytes.NewReader(randomData)) - if nn != fileSize { - return nil, "", fmt.Errorf("short copy writing random file to tar") - } - - if err != nil { - return nil, "", err - } - - if err := wr.Flush(); err != nil { - return nil, "", err - } - } - - if err := wr.Close(); err != nil { - return nil, "", err - } - - reader := bytes.NewReader(target.Bytes()) - - // A tar builder that supports tarsum inline calculation would be awesome - // here. - ts, err := tarsum.NewTarSum(reader, true, tarsum.Version1) - if err != nil { - return nil, "", err - } - - nn, err := io.Copy(ioutil.Discard, ts) - if nn != int64(len(target.Bytes())) { - return nil, "", fmt.Errorf("short copy when getting tarsum of random layer: %v != %v", nn, len(target.Bytes())) - } - - if err != nil { - return nil, "", err - } - - return bytes.NewReader(target.Bytes()), ts.Sum(nil), nil -} - // createTestLayer creates a simple test layer in the provided driver under -// tarsum, returning the string digest. This is implemented peicemeal and -// should probably be replaced by the uploader when it's ready. -func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, name, tarSum string, content io.Reader) (Digest, error) { +// tarsum dgst, returning the sha256 digest location. This is implemented +// peicemeal and should probably be replaced by the uploader when it's ready. +func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, name string, dgst digest.Digest, content io.Reader) (digest.Digest, error) { h := sha256.New() rd := io.TeeReader(content, h) @@ -406,11 +326,11 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, return "", nil } - digest := NewDigest("sha256", h) + blobDigestSHA := digest.NewDigest("sha256", h) blobPath, err := pathMapper.path(blobPathSpec{ - alg: digest.Algorithm(), - digest: digest.Hex(), + alg: blobDigestSHA.Algorithm(), + digest: blobDigestSHA.Hex(), }) if err := driver.PutContent(blobPath, p); err != nil { @@ -418,7 +338,7 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, } layerIndexLinkPath, err := pathMapper.path(layerIndexLinkPathSpec{ - tarSum: tarSum, + digest: dgst, }) if err != nil { @@ -427,18 +347,14 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, layerLinkPath, err := pathMapper.path(layerLinkPathSpec{ name: name, - tarSum: tarSum, + digest: dgst, }) if err != nil { return "", err } - if err != nil { - return "", err - } - - if err := driver.PutContent(layerLinkPath, []byte(string(NewDigest("sha256", h)))); err != nil { + if err := driver.PutContent(layerLinkPath, []byte(blobDigestSHA.String())); err != nil { return "", nil } @@ -446,5 +362,5 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, return "", nil } - return NewDigest("sha256", h), err + return blobDigestSHA, err } diff --git a/storage/layerreader.go b/storage/layerreader.go index df05c3675..396940d0f 100644 --- a/storage/layerreader.go +++ b/storage/layerreader.go @@ -6,6 +6,8 @@ import ( "io" "os" "time" + + "github.com/docker/docker-registry/digest" ) // layerReadSeeker implements Layer and provides facilities for reading and @@ -16,7 +18,7 @@ type layerReader struct { brd *bufio.Reader name string // repo name of this layer - tarSum string + digest digest.Digest path string createdAt time.Time @@ -35,8 +37,8 @@ func (lrs *layerReader) Name() string { return lrs.name } -func (lrs *layerReader) TarSum() string { - return lrs.tarSum +func (lrs *layerReader) Digest() digest.Digest { + return lrs.digest } func (lrs *layerReader) CreatedAt() time.Time { diff --git a/storage/layerstore.go b/storage/layerstore.go index e2821a839..c9662ffde 100644 --- a/storage/layerstore.go +++ b/storage/layerstore.go @@ -6,6 +6,7 @@ import ( "time" "github.com/Sirupsen/logrus" + "github.com/docker/docker-registry/digest" "github.com/docker/docker-registry/storagedriver" ) @@ -15,10 +16,10 @@ type layerStore struct { uploadStore layerUploadStore } -func (ls *layerStore) Exists(tarSum string) (bool, error) { +func (ls *layerStore) Exists(name string, digest digest.Digest) (bool, error) { // Because this implementation just follows blob links, an existence check // is pretty cheap by starting and closing a fetch. - _, err := ls.Fetch(tarSum) + _, err := ls.Fetch(name, digest) if err != nil { if err == ErrLayerUnknown { @@ -31,8 +32,8 @@ func (ls *layerStore) Exists(tarSum string) (bool, error) { return true, nil } -func (ls *layerStore) Fetch(tarSum string) (Layer, error) { - repos, err := ls.resolveContainingRepositories(tarSum) +func (ls *layerStore) Fetch(name string, digest digest.Digest) (Layer, error) { + repos, err := ls.resolveContainingRepositories(digest) if err != nil { // TODO(stevvooe): Unknown tarsum error: need to wrap. @@ -44,7 +45,7 @@ func (ls *layerStore) Fetch(tarSum string) (Layer, error) { // against the list of repos to which we have pull access. The argument // repos needs to be filtered against that access list. - name, blobPath, err := ls.resolveBlobPath(repos, tarSum) + _, blobPath, err := ls.resolveBlobPath(repos, digest) if err != nil { // TODO(stevvooe): Map this error correctly, perhaps in the callee. @@ -72,7 +73,7 @@ func (ls *layerStore) Fetch(tarSum string) (Layer, error) { layerStore: ls, path: p, name: name, - tarSum: tarSum, + digest: digest, // TODO(stevvooe): Storage backend does not support modification time // queries yet. Layers "never" change, so just return the zero value. @@ -88,25 +89,13 @@ func (ls *layerStore) Fetch(tarSum string) (Layer, error) { // Upload begins a layer upload, returning a handle. If the layer upload // is already in progress or the layer has already been uploaded, this // will return an error. -func (ls *layerStore) Upload(name, tarSum string) (LayerUpload, error) { - exists, err := ls.Exists(tarSum) - if err != nil { - return nil, err - } - - if exists { - // TODO(stevvoe): This looks simple now, but we really should only - // return the layer exists error when the layer exists AND the current - // client has access to the layer. If the client doesn't have access - // to the layer, the upload should proceed. - return nil, ErrLayerExists - } +func (ls *layerStore) Upload(name string) (LayerUpload, error) { // NOTE(stevvooe): Consider the issues with allowing concurrent upload of // the same two layers. Should it be disallowed? For now, we allow both // parties to proceed and the the first one uploads the layer. - lus, err := ls.uploadStore.New(name, tarSum) + lus, err := ls.uploadStore.New(name) if err != nil { return nil, err } @@ -116,7 +105,7 @@ func (ls *layerStore) Upload(name, tarSum string) (LayerUpload, error) { // Resume continues an in progress layer upload, returning the current // state of the upload. -func (ls *layerStore) Resume(name, tarSum, uuid string) (LayerUpload, error) { +func (ls *layerStore) Resume(uuid string) (LayerUpload, error) { lus, err := ls.uploadStore.GetState(uuid) if err != nil { @@ -135,9 +124,9 @@ func (ls *layerStore) newLayerUpload(lus LayerUploadState) LayerUpload { } } -func (ls *layerStore) resolveContainingRepositories(tarSum string) ([]string, error) { +func (ls *layerStore) resolveContainingRepositories(digest digest.Digest) ([]string, error) { // Lookup the layer link in the index by tarsum id. - layerIndexLinkPath, err := ls.pathMapper.path(layerIndexLinkPathSpec{tarSum: tarSum}) + layerIndexLinkPath, err := ls.pathMapper.path(layerIndexLinkPathSpec{digest: digest}) if err != nil { return nil, err } @@ -164,10 +153,10 @@ func (ls *layerStore) resolveContainingRepositories(tarSum string) ([]string, er // resolveBlobId lookups up the tarSum in the various repos to find the blob // link, returning the repo name and blob path spec or an error on failure. -func (ls *layerStore) resolveBlobPath(repos []string, tarSum string) (name string, bps blobPathSpec, err error) { +func (ls *layerStore) resolveBlobPath(repos []string, digest digest.Digest) (name string, bps blobPathSpec, err error) { for _, repo := range repos { - pathSpec := layerLinkPathSpec{name: repo, tarSum: tarSum} + pathSpec := layerLinkPathSpec{name: repo, digest: digest} layerLinkPath, err := ls.pathMapper.path(pathSpec) if err != nil { @@ -199,5 +188,5 @@ func (ls *layerStore) resolveBlobPath(repos []string, tarSum string) (name strin // TODO(stevvooe): Map this error to repo not found, but it basically // means we exited the loop above without finding a blob link. - return "", bps, fmt.Errorf("unable to resolve blog id for repos=%v and tarSum=%q", repos, tarSum) + return "", bps, fmt.Errorf("unable to resolve blog id for repos=%v and digest=%v", repos, digest) } diff --git a/storage/layerupload.go b/storage/layerupload.go index 7ad32d753..4ad021626 100644 --- a/storage/layerupload.go +++ b/storage/layerupload.go @@ -1,7 +1,6 @@ package storage import ( - "crypto/sha256" "encoding/json" "fmt" "io/ioutil" @@ -12,6 +11,7 @@ import ( "code.google.com/p/go-uuid/uuid" + "github.com/docker/docker-registry/digest" "github.com/docker/docker-registry/storagedriver" "github.com/docker/docker/pkg/tarsum" @@ -23,11 +23,6 @@ type LayerUploadState struct { // name is the primary repository under which the layer will be linked. Name string - // tarSum identifies the target layer. Provided by the client. If the - // resulting tarSum does not match this value, an error should be - // returned. - TarSum string - // UUID identifies the upload. UUID string @@ -64,7 +59,7 @@ type layerFile interface { // uploads. This interface will definitely change and will most likely end up // being exported to the app layer. Move the layer.go when it's ready to go. type layerUploadStore interface { - New(name, tarSum string) (LayerUploadState, error) + New(name string) (LayerUploadState, error) Open(uuid string) (layerFile, error) GetState(uuid string) (LayerUploadState, error) SaveState(lus LayerUploadState) error @@ -78,12 +73,6 @@ func (luc *layerUploadController) Name() string { return luc.LayerUploadState.Name } -// TarSum identifier of the proposed layer. Resulting data must match this -// tarsum. -func (luc *layerUploadController) TarSum() string { - return luc.LayerUploadState.TarSum -} - // UUID returns the identifier for this upload. func (luc *layerUploadController) UUID() string { return luc.LayerUploadState.UUID @@ -98,7 +87,7 @@ func (luc *layerUploadController) Offset() int64 { // uploaded layer. The final size and checksum are validated against the // contents of the uploaded layer. The checksum should be provided in the // format :. -func (luc *layerUploadController) Finish(size int64, digestStr string) (Layer, error) { +func (luc *layerUploadController) Finish(size int64, digest digest.Digest) (Layer, error) { // This section is going to be pretty ugly now. We will have to read the // file twice. First, to get the tarsum and checksum. When those are @@ -115,16 +104,11 @@ func (luc *layerUploadController) Finish(size int64, digestStr string) (Layer, e return nil, err } - digest, err := ParseDigest(digestStr) + digest, err = luc.validateLayer(fp, size, digest) if err != nil { return nil, err } - if err := luc.validateLayer(fp, size, digest); err != nil { - // Cleanup? - return nil, err - } - if err := luc.writeLayer(fp, size, digest); err != nil { // Cleanup? return nil, err @@ -142,7 +126,7 @@ func (luc *layerUploadController) Finish(size int64, digestStr string) (Layer, e return nil, err } - return luc.layerStore.Fetch(luc.TarSum()) + return luc.layerStore.Fetch(luc.Name(), digest) } // Cancel the layer upload process. @@ -239,69 +223,69 @@ func (luc *layerUploadController) reset() { } // validateLayer runs several checks on the layer file to ensure its validity. -// This is currently very expensive and relies on fast io and fast seek. -func (luc *layerUploadController) validateLayer(fp layerFile, size int64, digest Digest) error { +// This is currently very expensive and relies on fast io and fast seek on the +// local host. If successful, the latest digest is returned, which should be +// used over the passed in value. +func (luc *layerUploadController) validateLayer(fp layerFile, size int64, dgst digest.Digest) (digest.Digest, error) { + // First, check the incoming tarsum version of the digest. + version, err := tarsum.GetVersionFromTarsum(dgst.String()) + if err != nil { + return "", err + } + + // TODO(stevvooe): Should we push this down into the digest type? + switch version { + case tarsum.Version1: + default: + // version 0 and dev, for now. + return "", ErrLayerTarSumVersionUnsupported + } + + digestVerifier := digest.DigestVerifier(dgst) + lengthVerifier := digest.LengthVerifier(size) + // First, seek to the end of the file, checking the size is as expected. end, err := fp.Seek(0, os.SEEK_END) if err != nil { - return err + return "", err } if end != size { - return ErrLayerInvalidLength + // Fast path length check. + return "", ErrLayerInvalidLength } - // Now seek back to start and take care of tarsum and checksum. + // Now seek back to start and take care of the digest. if _, err := fp.Seek(0, os.SEEK_SET); err != nil { - return err + return "", err } - version, err := tarsum.GetVersionFromTarsum(luc.TarSum()) + tr := io.TeeReader(fp, lengthVerifier) + tr = io.TeeReader(tr, digestVerifier) + + // TODO(stevvooe): This is one of the places we need a Digester write + // sink. Instead, its read driven. This migth be okay. + + // Calculate an updated digest with the latest version. + dgst, err = digest.DigestReader(tr) if err != nil { - return ErrLayerTarSumVersionUnsupported + return "", err } - // // We only support tarsum version 1 for now. - if version != tarsum.Version1 { - return ErrLayerTarSumVersionUnsupported + if !lengthVerifier.Verified() { + return "", ErrLayerInvalidLength } - ts, err := tarsum.NewTarSum(fp, true, tarsum.Version1) - if err != nil { - return err + if !digestVerifier.Verified() { + return "", ErrLayerInvalidDigest } - h := sha256.New() - - // Pull the layer file through by writing it to a checksum. - nn, err := io.Copy(h, ts) - - if nn != int64(size) { - return fmt.Errorf("bad read while finishing upload(%s) %v: %v != %v, err=%v", luc.UUID(), fp, nn, size, err) - } - - if err != nil && err != io.EOF { - return err - } - - calculatedDigest := NewDigest("sha256", h) - - // Compare the digests! - if digest != calculatedDigest { - return ErrLayerInvalidChecksum - } - - // Compare the tarsums! - if ts.Sum(nil) != luc.TarSum() { - return ErrLayerInvalidTarsum - } - - return nil + return dgst, nil } // writeLayer actually writes the the layer file into its final destination. // The layer should be validated before commencing the write. -func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest Digest) error { +func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest digest.Digest) error { blobPath, err := luc.layerStore.pathMapper.path(blobPathSpec{ alg: digest.Algorithm(), digest: digest.Hex(), @@ -342,10 +326,10 @@ func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest Di // linkLayer links a valid, written layer blog into the registry, first // linking the repository namespace, then adding it to the layerindex. -func (luc *layerUploadController) linkLayer(digest Digest) error { +func (luc *layerUploadController) linkLayer(digest digest.Digest) error { layerLinkPath, err := luc.layerStore.pathMapper.path(layerLinkPathSpec{ name: luc.Name(), - tarSum: luc.TarSum(), + digest: digest, }) if err != nil { @@ -358,7 +342,7 @@ func (luc *layerUploadController) linkLayer(digest Digest) error { // Link the layer into the name index. layerIndexLinkPath, err := luc.layerStore.pathMapper.path(layerIndexLinkPathSpec{ - tarSum: luc.TarSum(), + digest: digest, }) if err != nil { @@ -435,11 +419,10 @@ func newTemporaryLocalFSLayerUploadStore() (layerUploadStore, error) { }, nil } -func (llufs *localFSLayerUploadStore) New(name, tarSum string) (LayerUploadState, error) { +func (llufs *localFSLayerUploadStore) New(name string) (LayerUploadState, error) { lus := LayerUploadState{ - Name: name, - TarSum: tarSum, - UUID: uuid.New(), + Name: name, + UUID: uuid.New(), } if err := os.Mkdir(llufs.path(lus.UUID, ""), 0755); err != nil { diff --git a/storage/paths.go b/storage/paths.go index 76991c1fc..aedba3207 100644 --- a/storage/paths.go +++ b/storage/paths.go @@ -1,6 +1,8 @@ package storage import ( + "strings" + "github.com/docker/docker-registry/digest" "fmt" "path" @@ -9,6 +11,11 @@ import ( const storagePathVersion = "v2" +// TODO(sday): This needs to be changed: all layers for an image will be +// linked under the repository. Lookup from tarsum to name is not necessary, +// so we can remove the layer index. For this to properly work, image push +// must link the images layers under the repo. + // pathMapper maps paths based on "object names" and their ids. The "object // names" mapped by pathMapper are internal to the storage system. // @@ -79,7 +86,12 @@ func (pm *pathMapper) path(spec pathSpec) (string, error) { switch v := spec.(type) { case layerLinkPathSpec: - tsi, err := common.ParseTarSum(v.tarSum) + if !strings.HasPrefix(v.digest.Algorithm(), "tarsum") { + // Only tarsum is supported, for now + return "", fmt.Errorf("unsupport content digest: %v", v.digest) + } + + tsi, err := common.ParseTarSum(v.digest.String()) if err != nil { // TODO(sday): This will return an InvalidTarSumError from @@ -93,7 +105,12 @@ func (pm *pathMapper) path(spec pathSpec) (string, error) { return p, nil case layerIndexLinkPathSpec: - tsi, err := common.ParseTarSum(v.tarSum) + if !strings.HasPrefix(v.digest.Algorithm(), "tarsum") { + // Only tarsum is supported, for now + return "", fmt.Errorf("unsupport content digest: %v", v.digest) + } + + tsi, err := common.ParseTarSum(v.digest.String()) if err != nil { // TODO(sday): This will return an InvalidTarSumError from @@ -136,7 +153,7 @@ type pathSpec interface { // sha256 that can be fetched from the blob store. type layerLinkPathSpec struct { name string - tarSum string + digest digest.Digest } func (layerLinkPathSpec) pathSpec() {} @@ -152,7 +169,7 @@ func (layerLinkPathSpec) pathSpec() {} // library/ubuntu repository. The storage layer should access the tarsum from // the first repository to which the client has access. type layerIndexLinkPathSpec struct { - tarSum string + digest digest.Digest } func (layerIndexLinkPathSpec) pathSpec() {} @@ -160,6 +177,7 @@ func (layerIndexLinkPathSpec) pathSpec() {} // blobPath contains the path for the registry global blob store. For now, // this contains layer data, exclusively. type blobPathSpec struct { + // TODO(stevvooe): Port this to make better use of Digest type. alg string digest string } diff --git a/storage/paths_test.go b/storage/paths_test.go index 376966c56..5dc4c07c5 100644 --- a/storage/paths_test.go +++ b/storage/paths_test.go @@ -1,6 +1,10 @@ package storage -import "testing" +import ( + "testing" + + "github.com/docker/docker-registry/digest" +) func TestPathMapper(t *testing.T) { pm := &pathMapper{ @@ -15,13 +19,13 @@ func TestPathMapper(t *testing.T) { { spec: layerLinkPathSpec{ name: "foo/bar", - tarSum: "tarsum.v1+test:abcdef", + digest: digest.Digest("tarsum.v1+test:abcdef"), }, expected: "/pathmapper-test/repositories/foo/bar/layers/tarsum/v1/test/abcdef", }, { spec: layerIndexLinkPathSpec{ - tarSum: "tarsum.v1+test:abcdef", + digest: digest.Digest("tarsum.v1+test:abcdef"), }, expected: "/pathmapper-test/layerindex/tarsum/v1/test/abcdef", }, From c0fe9d72d1b80bfd68e1f09ab04fa83367dd7666 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Wed, 19 Nov 2014 14:59:05 -0800 Subject: [PATCH 3/4] Various adjustments to digest package for govet/golint --- digest/digest.go | 9 +++++---- digest/doc.go | 2 +- digest/verifiers.go | 16 ++++++++++------ digest/verifiers_test.go | 10 +++++----- storage/layerupload.go | 6 +++--- storage/paths.go | 4 ++-- 6 files changed, 26 insertions(+), 21 deletions(-) diff --git a/digest/digest.go b/digest/digest.go index f2ce021ae..cbd0ab6bc 100644 --- a/digest/digest.go +++ b/digest/digest.go @@ -75,8 +75,8 @@ func ParseDigest(s string) (Digest, error) { return Digest(s), nil } -// DigestReader returns the most valid digest for the underlying content. -func DigestReader(rd io.Reader) (Digest, error) { +// FromReader returns the most valid digest for the underlying content. +func FromReader(rd io.Reader) (Digest, error) { // TODO(stevvooe): This is pretty inefficient to always be calculating a // sha256 hash to provide fallback, but it provides some nice semantics in @@ -114,8 +114,9 @@ func DigestReader(rd io.Reader) (Digest, error) { return d, nil } -func DigestBytes(p []byte) (Digest, error) { - return DigestReader(bytes.NewReader(p)) +// FromBytes digests the input and returns a Digest. +func FromBytes(p []byte) (Digest, error) { + return FromReader(bytes.NewReader(p)) } // Algorithm returns the algorithm portion of the digest. This will panic if diff --git a/digest/doc.go b/digest/doc.go index 2ce7698c2..278c50e01 100644 --- a/digest/doc.go +++ b/digest/doc.go @@ -1,4 +1,4 @@ -// This package provides a generalized type to opaquely represent message +// Package digest provides a generalized type to opaquely represent message // digests and their operations within the registry. The Digest type is // designed to serve as a flexible identifier in a content-addressable system. // More importantly, it provides tools and wrappers to work with tarsums and diff --git a/digest/verifiers.go b/digest/verifiers.go index e738026ac..26b2b2b25 100644 --- a/digest/verifiers.go +++ b/digest/verifiers.go @@ -11,6 +11,10 @@ import ( "github.com/docker/docker/pkg/tarsum" ) +// Verifier presents a general verification interface to be used with message +// digests and other byte stream verifications. Users instantiate a Verifier +// from one of the various methods, write the data under test to it then check +// the result with the Verified method. type Verifier interface { io.Writer @@ -23,7 +27,9 @@ type Verifier interface { // Reset() } -func DigestVerifier(d Digest) Verifier { +// NewDigestVerifier returns a verifier that compares the written bytes +// against a passed in digest. +func NewDigestVerifier(d Digest) Verifier { alg := d.Algorithm() switch alg { case "md5", "sha1", "sha256": @@ -62,13 +68,11 @@ func DigestVerifier(d Digest) Verifier { pw: pw, } } - - panic("unsupported digest: " + d) } -// LengthVerifier returns a verifier that returns true when the number of read -// bytes equals the expected parameter. -func LengthVerifier(expected int64) Verifier { +// NewLengthVerifier returns a verifier that returns true when the number of +// read bytes equals the expected parameter. +func NewLengthVerifier(expected int64) Verifier { return &lengthVerifier{ expected: expected, } diff --git a/digest/verifiers_test.go b/digest/verifiers_test.go index 77b02ed07..fb176cc1a 100644 --- a/digest/verifiers_test.go +++ b/digest/verifiers_test.go @@ -13,12 +13,12 @@ import ( func TestDigestVerifier(t *testing.T) { p := make([]byte, 1<<20) rand.Read(p) - digest, err := DigestBytes(p) + digest, err := FromBytes(p) if err != nil { t.Fatalf("unexpected error digesting bytes: %#v", err) } - verifier := DigestVerifier(digest) + verifier := NewDigestVerifier(digest) io.Copy(verifier, bytes.NewReader(p)) if !verifier.Verified() { @@ -30,7 +30,7 @@ func TestDigestVerifier(t *testing.T) { t.Fatalf("error creating tarfile: %v", err) } - digest, err = DigestReader(tf) + digest, err = FromReader(tf) if err != nil { t.Fatalf("error digesting tarsum: %v", err) } @@ -45,8 +45,8 @@ func TestDigestVerifier(t *testing.T) { // This is the most relevant example for the registry application. It's // effectively a read through pipeline, where the final sink is the digest // verifier. - verifier = DigestVerifier(digest) - lengthVerifier := LengthVerifier(expectedSize) + verifier = NewDigestVerifier(digest) + lengthVerifier := NewLengthVerifier(expectedSize) rd := io.TeeReader(tf, lengthVerifier) io.Copy(verifier, rd) diff --git a/storage/layerupload.go b/storage/layerupload.go index 4ad021626..c07927f17 100644 --- a/storage/layerupload.go +++ b/storage/layerupload.go @@ -241,8 +241,8 @@ func (luc *layerUploadController) validateLayer(fp layerFile, size int64, dgst d return "", ErrLayerTarSumVersionUnsupported } - digestVerifier := digest.DigestVerifier(dgst) - lengthVerifier := digest.LengthVerifier(size) + digestVerifier := digest.NewDigestVerifier(dgst) + lengthVerifier := digest.NewLengthVerifier(size) // First, seek to the end of the file, checking the size is as expected. end, err := fp.Seek(0, os.SEEK_END) @@ -267,7 +267,7 @@ func (luc *layerUploadController) validateLayer(fp layerFile, size int64, dgst d // sink. Instead, its read driven. This migth be okay. // Calculate an updated digest with the latest version. - dgst, err = digest.DigestReader(tr) + dgst, err = digest.FromReader(tr) if err != nil { return "", err } diff --git a/storage/paths.go b/storage/paths.go index aedba3207..18aef17ec 100644 --- a/storage/paths.go +++ b/storage/paths.go @@ -1,12 +1,12 @@ package storage import ( - "strings" - "github.com/docker/docker-registry/digest" "fmt" "path" + "strings" "github.com/docker/docker-registry/common" + "github.com/docker/docker-registry/digest" ) const storagePathVersion = "v2" From 56118905b854d6cfb03cee995c0749393e81a63b Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Wed, 19 Nov 2014 15:10:23 -0800 Subject: [PATCH 4/4] Include testutil package needed for tar-based tests --- common/testutil/tarfile.go | 95 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 common/testutil/tarfile.go diff --git a/common/testutil/tarfile.go b/common/testutil/tarfile.go new file mode 100644 index 000000000..08b796f5e --- /dev/null +++ b/common/testutil/tarfile.go @@ -0,0 +1,95 @@ +package testutil + +import ( + "archive/tar" + "bytes" + "crypto/rand" + "fmt" + "io" + "io/ioutil" + mrand "math/rand" + "time" + + "github.com/docker/docker/pkg/tarsum" +) + +// CreateRandomTarFile creates a random tarfile, returning it as an +// io.ReadSeeker along with its tarsum. An error is returned if there is a +// problem generating valid content. +func CreateRandomTarFile() (rs io.ReadSeeker, tarSum string, err error) { + nFiles := mrand.Intn(10) + 10 + target := &bytes.Buffer{} + wr := tar.NewWriter(target) + + // Perturb this on each iteration of the loop below. + header := &tar.Header{ + Mode: 0644, + ModTime: time.Now(), + Typeflag: tar.TypeReg, + Uname: "randocalrissian", + Gname: "cloudcity", + AccessTime: time.Now(), + ChangeTime: time.Now(), + } + + for fileNumber := 0; fileNumber < nFiles; fileNumber++ { + fileSize := mrand.Int63n(1<<20) + 1<<20 + + header.Name = fmt.Sprint(fileNumber) + header.Size = fileSize + + if err := wr.WriteHeader(header); err != nil { + return nil, "", err + } + + randomData := make([]byte, fileSize) + + // Fill up the buffer with some random data. + n, err := rand.Read(randomData) + + if n != len(randomData) { + return nil, "", fmt.Errorf("short read creating random reader: %v bytes != %v bytes", n, len(randomData)) + } + + if err != nil { + return nil, "", err + } + + nn, err := io.Copy(wr, bytes.NewReader(randomData)) + if nn != fileSize { + return nil, "", fmt.Errorf("short copy writing random file to tar") + } + + if err != nil { + return nil, "", err + } + + if err := wr.Flush(); err != nil { + return nil, "", err + } + } + + if err := wr.Close(); err != nil { + return nil, "", err + } + + reader := bytes.NewReader(target.Bytes()) + + // A tar builder that supports tarsum inline calculation would be awesome + // here. + ts, err := tarsum.NewTarSum(reader, true, tarsum.Version1) + if err != nil { + return nil, "", err + } + + nn, err := io.Copy(ioutil.Discard, ts) + if nn != int64(len(target.Bytes())) { + return nil, "", fmt.Errorf("short copy when getting tarsum of random layer: %v != %v", nn, len(target.Bytes())) + } + + if err != nil { + return nil, "", err + } + + return bytes.NewReader(target.Bytes()), ts.Sum(nil), nil +}