diff --git a/digest/digest.go b/digest/digest.go index cbd0ab6bc..6a3fdfd55 100644 --- a/digest/digest.go +++ b/digest/digest.go @@ -47,32 +47,9 @@ var ( // ParseDigest parses s and returns the validated digest object. An error will // be returned if the format is invalid. func ParseDigest(s string) (Digest, error) { - // Common case will be tarsum - _, err := common.ParseTarSum(s) - if err == nil { - return Digest(s), nil - } + d := Digest(s) - // Continue on for general parser - - i := strings.Index(s, ":") - if i < 0 { - return "", ErrDigestInvalidFormat - } - - // case: "sha256:" with no hex. - if i+1 == len(s) { - return "", ErrDigestInvalidFormat - } - - switch s[:i] { - case "md5", "sha1", "sha256": - break - default: - return "", ErrDigestUnsupported - } - - return Digest(s), nil + return d, d.Validate() } // FromReader returns the most valid digest for the underlying content. @@ -119,6 +96,38 @@ func FromBytes(p []byte) (Digest, error) { return FromReader(bytes.NewReader(p)) } +// Validate checks that the contents of d is a valid digest, returning an +// error if not. +func (d Digest) Validate() error { + s := string(d) + // Common case will be tarsum + _, err := common.ParseTarSum(s) + if err == nil { + return nil + } + + // Continue on for general parser + + i := strings.Index(s, ":") + if i < 0 { + return ErrDigestInvalidFormat + } + + // case: "sha256:" with no hex. + if i+1 == len(s) { + return ErrDigestInvalidFormat + } + + switch s[:i] { + case "md5", "sha1", "sha256": + break + default: + return ErrDigestUnsupported + } + + return nil +} + // Algorithm returns the algorithm portion of the digest. This will panic if // the underlying digest is not in a valid format. func (d Digest) Algorithm() string { diff --git a/storage/layer_test.go b/storage/layer_test.go index 03cba9b94..f04115e74 100644 --- a/storage/layer_test.go +++ b/storage/layer_test.go @@ -304,18 +304,13 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, blobDigestSHA := digest.NewDigest("sha256", h) blobPath, err := pathMapper.path(blobPathSpec{ - alg: blobDigestSHA.Algorithm(), - digest: blobDigestSHA.Hex(), + digest: dgst, }) if err := driver.PutContent(blobPath, p); err != nil { return "", err } - layerIndexLinkPath, err := pathMapper.path(layerIndexLinkPathSpec{ - digest: dgst, - }) - if err != nil { return "", err } @@ -329,11 +324,7 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, return "", err } - if err := driver.PutContent(layerLinkPath, []byte(blobDigestSHA.String())); err != nil { - return "", nil - } - - if err = driver.PutContent(layerIndexLinkPath, []byte(name)); err != nil { + if err := driver.PutContent(layerLinkPath, []byte(dgst)); err != nil { return "", nil } diff --git a/storage/layerstore.go b/storage/layerstore.go index 6abd50e3b..2544ec4fb 100644 --- a/storage/layerstore.go +++ b/storage/layerstore.go @@ -1,11 +1,8 @@ package storage import ( - "fmt" - "strings" "time" - "github.com/Sirupsen/logrus" "github.com/docker/docker-registry/digest" "github.com/docker/docker-registry/storagedriver" ) @@ -33,31 +30,17 @@ func (ls *layerStore) Exists(name string, digest digest.Digest) (bool, error) { } func (ls *layerStore) Fetch(name string, digest digest.Digest) (Layer, error) { - repos, err := ls.resolveContainingRepositories(digest) - + blobPath, err := ls.resolveBlobPath(name, digest) if err != nil { - // TODO(stevvooe): Unknown tarsum error: need to wrap. - return nil, err + switch err := err.(type) { + case storagedriver.PathNotFoundError, *storagedriver.PathNotFoundError: + return nil, ErrLayerUnknown + default: + return nil, err + } } - // TODO(stevvooe): Access control for layer pulls need to happen here: we - // have a list of repos that "own" the tarsum that need to be checked - // against the list of repos to which we have pull access. The argument - // repos needs to be filtered against that access list. - - _, blobPath, err := ls.resolveBlobPath(repos, digest) - - if err != nil { - // TODO(stevvooe): Map this error correctly, perhaps in the callee. - return nil, err - } - - p, err := ls.pathMapper.path(blobPath) - if err != nil { - return nil, err - } - - fr, err := newFileReader(ls.driver, p) + fr, err := newFileReader(ls.driver, blobPath) if err != nil { switch err := err.(type) { case storagedriver.PathNotFoundError, *storagedriver.PathNotFoundError: @@ -117,69 +100,30 @@ func (ls *layerStore) newLayerUpload(lus LayerUploadState) LayerUpload { } } -func (ls *layerStore) resolveContainingRepositories(digest digest.Digest) ([]string, error) { - // Lookup the layer link in the index by tarsum id. - layerIndexLinkPath, err := ls.pathMapper.path(layerIndexLinkPathSpec{digest: digest}) +// resolveBlobId looks up the blob location in the repositories from a +// layer/blob link file, returning blob path or an error on failure. +func (ls *layerStore) resolveBlobPath(name string, dgst digest.Digest) (string, error) { + pathSpec := layerLinkPathSpec{name: name, digest: dgst} + layerLinkPath, err := ls.pathMapper.path(pathSpec) + if err != nil { - return nil, err + return "", err } - layerIndexLinkContent, err := ls.driver.GetContent(layerIndexLinkPath) + layerLinkContent, err := ls.driver.GetContent(layerLinkPath) if err != nil { - switch err := err.(type) { - case storagedriver.PathNotFoundError: - return nil, ErrLayerUnknown - default: - return nil, err - } + return "", err } - results := strings.Split(string(layerIndexLinkContent), "\n") + // NOTE(stevvooe): The content of the layer link should match the digest. + // This layer of indirection is for name-based content protection. - // clean these up - for i, result := range results { - results[i] = strings.TrimSpace(result) + linked, err := digest.ParseDigest(string(layerLinkContent)) + if err != nil { + return "", err } - return results, nil -} - -// resolveBlobId lookups up the tarSum in the various repos to find the blob -// link, returning the repo name and blob path spec or an error on failure. -func (ls *layerStore) resolveBlobPath(repos []string, digest digest.Digest) (name string, bps blobPathSpec, err error) { - - for _, repo := range repos { - pathSpec := layerLinkPathSpec{name: repo, digest: digest} - layerLinkPath, err := ls.pathMapper.path(pathSpec) - - if err != nil { - // TODO(stevvooe): This looks very lazy, may want to collect these - // errors and report them if we exit this for loop without - // resolving the blob id. - logrus.Debugf("error building linkLayerPath (%V): %v", pathSpec, err) - continue - } - - layerLinkContent, err := ls.driver.GetContent(layerLinkPath) - if err != nil { - logrus.Debugf("error getting layerLink content (%V): %v", pathSpec, err) - continue - } - - // Yay! We've resolved our blob id and we're ready to go. - parts := strings.SplitN(strings.TrimSpace(string(layerLinkContent)), ":", 2) - - if len(parts) != 2 { - return "", bps, fmt.Errorf("invalid blob reference: %q", string(layerLinkContent)) - } - - name = repo - bp := blobPathSpec{alg: parts[0], digest: parts[1]} - - return repo, bp, nil - } - - // TODO(stevvooe): Map this error to repo not found, but it basically - // means we exited the loop above without finding a blob link. - return "", bps, fmt.Errorf("unable to resolve blog id for repos=%v and digest=%v", repos, digest) + bp := blobPathSpec{digest: linked} + + return ls.pathMapper.path(bp) } diff --git a/storage/layerupload.go b/storage/layerupload.go index f134aa191..e0336a0f9 100644 --- a/storage/layerupload.go +++ b/storage/layerupload.go @@ -6,8 +6,6 @@ import ( "io/ioutil" "os" "path/filepath" - "sort" - "strings" "code.google.com/p/go-uuid/uuid" @@ -285,10 +283,9 @@ func (luc *layerUploadController) validateLayer(fp layerFile, size int64, dgst d // writeLayer actually writes the the layer file into its final destination. // The layer should be validated before commencing the write. -func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest digest.Digest) error { +func (luc *layerUploadController) writeLayer(fp layerFile, size int64, dgst digest.Digest) error { blobPath, err := luc.layerStore.pathMapper.path(blobPathSpec{ - alg: digest.Algorithm(), - digest: digest.Hex(), + digest: dgst, }) if err != nil { @@ -324,8 +321,8 @@ func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest di return nil } -// linkLayer links a valid, written layer blog into the registry, first -// linking the repository namespace, then adding it to the layerindex. +// linkLayer links a valid, written layer blob into the registry under the +// named repository for the upload controller. func (luc *layerUploadController) linkLayer(digest digest.Digest) error { layerLinkPath, err := luc.layerStore.pathMapper.path(layerLinkPathSpec{ name: luc.Name(), @@ -336,56 +333,7 @@ func (luc *layerUploadController) linkLayer(digest digest.Digest) error { return err } - if err := luc.layerStore.driver.PutContent(layerLinkPath, []byte(digest)); err != nil { - return nil - } - - // Link the layer into the name index. - layerIndexLinkPath, err := luc.layerStore.pathMapper.path(layerIndexLinkPathSpec{ - digest: digest, - }) - - if err != nil { - return err - } - - // Read back the name index file. If it exists, create it. If not, add the - // new repo to the name list. - - // TODO(stevvooe): This is very racy, as well. Reconsider using list for - // this operation? - layerIndexLinkContent, err := luc.layerStore.driver.GetContent(layerIndexLinkPath) - if err != nil { - switch err := err.(type) { - case storagedriver.PathNotFoundError: - layerIndexLinkContent = []byte(luc.Name()) - default: - return err - } - } - layerIndexLinkContent = luc.maybeAddNameToLayerIndexLinkContent(layerIndexLinkContent) - - // Write the index content back to the index. - return luc.layerStore.driver.PutContent(layerIndexLinkPath, layerIndexLinkContent) -} - -func (luc *layerUploadController) maybeAddNameToLayerIndexLinkContent(content []byte) []byte { - names := strings.Split(string(content), "\n") - var found bool - // Search the names and find ours - for _, name := range names { - if name == luc.Name() { - found = true - } - } - - if !found { - names = append(names, luc.Name()) - } - - sort.Strings(names) - - return []byte(strings.Join(names, "\n")) + return luc.layerStore.driver.PutContent(layerLinkPath, []byte(digest)) } // localFSLayerUploadStore implements a local layerUploadStore. There are some diff --git a/storage/paths.go b/storage/paths.go index 87c0b2fda..ecc3dd32a 100644 --- a/storage/paths.go +++ b/storage/paths.go @@ -11,11 +11,6 @@ import ( const storagePathVersion = "v2" -// TODO(sday): This needs to be changed: all layers for an image will be -// linked under the repository. Lookup from tarsum to name is not necessary, -// so we can remove the layer index. For this to properly work, image push -// must link the images layers under the repo. - // pathMapper maps paths based on "object names" and their ids. The "object // names" mapped by pathMapper are internal to the storage system. // @@ -27,31 +22,21 @@ const storagePathVersion = "v2" // -> manifests/ // // -> layers/ -// -> tarsum/ -// -> / -// -> / -// -// -> layerindex/ -// -> tarsum/ -// -> / -// -> / -// -// -> blob/sha256 -// +// +// -> blob/ +// // // There are few important components to this path layout. First, we have the // repository store identified by name. This contains the image manifests and // a layer store with links to CAS blob ids. Outside of the named repo area, -// we have the layerindex, which provides lookup from tarsum id to repo -// storage. The blob store contains the actual layer data and any other data -// that can be referenced by a CAS id. +// we have the the blob store. It contains the actual layer data and any other +// data that can be referenced by a CAS id. // // We cover the path formats implemented by this path mapper below. // // manifestPathSpec: /v2/repositories//manifests/ // layerLinkPathSpec: /v2/repositories//layers/tarsum/// -// layerIndexLinkPathSpec: /v2/layerindex/tarsum/// -// blobPathSpec: /v2/blob/sha256// +// blobPathSpec: /v2/blob/// // // For more information on the semantic meaning of each path and their // contents, please see the path spec documentation. @@ -60,16 +45,6 @@ type pathMapper struct { version string // should be a constant? } -// TODO(stevvooe): This storage layout currently allows lookup to layer stores -// by repo name via the tarsum. The layer index lookup could come with an -// access control check against the link contents before proceeding. The main -// problem with this comes with a collision in the tarsum algorithm: if party -// A uploads a layer before party B, with an identical tarsum, party B may -// never be able to get access to the tarsum stored under party A. We'll need -// a way for party B to associate with a "unique" version of their image. This -// may be as simple as forcing the client to re-upload images to which they -// don't have access. - // path returns the path identified by spec. func (pm *pathMapper) path(spec pathSpec) (string, error) { @@ -93,44 +68,34 @@ func (pm *pathMapper) path(spec pathSpec) (string, error) { // TODO(sday): May need to store manifest by architecture. return path.Join(append(repoPrefix, v.name, "manifests", v.tag)...), nil case layerLinkPathSpec: - if !strings.HasPrefix(v.digest.Algorithm(), "tarsum") { - // Only tarsum is supported, for now - return "", fmt.Errorf("unsupport content digest: %v", v.digest) - } - - tsi, err := common.ParseTarSum(v.digest.String()) - + components, err := digestPathComoponents(v.digest) if err != nil { - // TODO(sday): This will return an InvalidTarSumError from - // ParseTarSum but we may want to wrap this. This error should - // never be encountered in production, since the tarsum should be - // validated by this point. return "", err } - return path.Join(append(append(repoPrefix, v.name, "layers"), - tarSumInfoPathComponents(tsi)...)...), nil - case layerIndexLinkPathSpec: - if !strings.HasPrefix(v.digest.Algorithm(), "tarsum") { + // For now, only map tarsum paths. + if components[0] != "tarsum" { // Only tarsum is supported, for now - return "", fmt.Errorf("unsupport content digest: %v", v.digest) + return "", fmt.Errorf("unsupported content digest: %v", v.digest) } - tsi, err := common.ParseTarSum(v.digest.String()) + layerLinkPathComponents := append(repoPrefix, v.name, "layers") - if err != nil { - // TODO(sday): This will return an InvalidTarSumError from - // ParseTarSum but we may want to wrap this. This error should - // never be encountered in production, since the tarsum should be - // validated by this point. - return "", err - } - - return path.Join(append(append(rootPrefix, "layerindex"), - tarSumInfoPathComponents(tsi)...)...), nil + return path.Join(append(layerLinkPathComponents, components...)...), nil case blobPathSpec: - p := path.Join([]string{pm.root, pm.version, "blob", v.alg, v.digest[:2], v.digest}...) - return p, nil + components, err := digestPathComoponents(v.digest) + if err != nil { + return "", err + } + + // For now, only map tarsum paths. + if components[0] != "tarsum" { + // Only tarsum is supported, for now + return "", fmt.Errorf("unsupported content digest: %v", v.digest) + } + + blobPathPrefix := append(rootPrefix, "blob") + return path.Join(append(blobPathPrefix, components...)...), nil default: // TODO(sday): This is an internal error. Ensure it doesn't escape (panic?). return "", fmt.Errorf("unknown path spec: %#v", v) @@ -172,40 +137,61 @@ type layerLinkPathSpec struct { func (layerLinkPathSpec) pathSpec() {} -// layerIndexLinkPath provides a path to a registry global layer store, -// indexed by tarsum. The target file will contain the repo name of the -// "owner" of the layer. An example name link file follows: -// -// library/ubuntu -// foo/bar -// -// The above file has the tarsum stored under the foo/bar repository and the -// library/ubuntu repository. The storage layer should access the tarsum from -// the first repository to which the client has access. -type layerIndexLinkPathSpec struct { - digest digest.Digest -} - -func (layerIndexLinkPathSpec) pathSpec() {} +// blobAlgorithmReplacer does some very simple path sanitization for user +// input. Mostly, this is to provide some heirachry for tarsum digests. Paths +// should be "safe" before getting this far due to strict digest requirements +// but we can add further path conversion here, if needed. +var blobAlgorithmReplacer = strings.NewReplacer( + "+", "/", + ".", "/", + ";", "/", +) // blobPath contains the path for the registry global blob store. For now, // this contains layer data, exclusively. type blobPathSpec struct { - // TODO(stevvooe): Port this to make better use of Digest type. - alg string - digest string + digest digest.Digest } func (blobPathSpec) pathSpec() {} -// tarSumInfoPath generates storage path components for the provided -// TarSumInfo. -func tarSumInfoPathComponents(tsi common.TarSumInfo) []string { - version := tsi.Version - - if version == "" { - version = "v0" +// digestPathComoponents provides a consistent path breakdown for a given +// digest. For a generic digest, it will be as follows: +// +// // +// +// Most importantly, for tarsum, the layout looks like this: +// +// tarsum//// +// +// This is slightly specialized to store an extra version path for version 0 +// tarsums. +func digestPathComoponents(dgst digest.Digest) ([]string, error) { + if err := dgst.Validate(); err != nil { + return nil, err } - return []string{"tarsum", version, tsi.Algorithm, tsi.Digest} + algorithm := blobAlgorithmReplacer.Replace(dgst.Algorithm()) + hex := dgst.Hex() + prefix := []string{algorithm} + suffix := []string{ + hex[:2], // Breaks heirarchy up. + hex, + } + + if tsi, err := common.ParseTarSum(dgst.String()); err == nil { + // We have a tarsum! + version := tsi.Version + if version == "" { + version = "v0" + } + + prefix = []string{ + "tarsum", + version, + tsi.Algorithm, + } + } + + return append(prefix, suffix...), nil } diff --git a/storage/paths_test.go b/storage/paths_test.go index d2ff542f5..33681f392 100644 --- a/storage/paths_test.go +++ b/storage/paths_test.go @@ -28,20 +28,25 @@ func TestPathMapper(t *testing.T) { name: "foo/bar", digest: digest.Digest("tarsum.v1+test:abcdef"), }, - expected: "/pathmapper-test/repositories/foo/bar/layers/tarsum/v1/test/abcdef", - }, - { - spec: layerIndexLinkPathSpec{ - digest: digest.Digest("tarsum.v1+test:abcdef"), - }, - expected: "/pathmapper-test/layerindex/tarsum/v1/test/abcdef", + expected: "/pathmapper-test/repositories/foo/bar/layers/tarsum/v1/test/ab/abcdef", }, { spec: blobPathSpec{ - alg: "sha512", - digest: "abcdefabcdefabcdef908909909", + digest: digest.Digest("tarsum.dev+sha512:abcdefabcdefabcdef908909909"), }, - expected: "/pathmapper-test/blob/sha512/ab/abcdefabcdefabcdef908909909", + expected: "/pathmapper-test/blob/tarsum/dev/sha512/ab/abcdefabcdefabcdef908909909", + }, + { + spec: blobPathSpec{ + digest: digest.Digest("tarsum.v1+sha256:abcdefabcdefabcdef908909909"), + }, + expected: "/pathmapper-test/blob/tarsum/v1/sha256/ab/abcdefabcdefabcdef908909909", + }, + { + spec: blobPathSpec{ + digest: digest.Digest("tarsum+sha256:abcdefabcdefabcdef908909909"), + }, + expected: "/pathmapper-test/blob/tarsum/v0/sha256/ab/abcdefabcdefabcdef908909909", }, } { p, err := pm.path(testcase.spec)