From 8e44c1d2097c13de3ae929ddeaa77ba7452d0325 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Thu, 13 Nov 2014 15:16:54 -0800 Subject: [PATCH] Initial implementation of storage layer path mapper We've added a path mapper to support simple mapping between path objects used in the storage layer and the underlying file system. The target of this is to ensure that paths are only calculated in a single place and their format is separated from the data that makes up the path components. This commit only includes spec implementation to support layer reads. Further specs will come along with their implementations. --- common/names.go | 4 + storage/doc.go | 3 + storage/paths.go | 179 ++++++++++++++++++++++++++++++++++++++++++ storage/paths_test.go | 45 +++++++++++ 4 files changed, 231 insertions(+) create mode 100644 storage/doc.go create mode 100644 storage/paths.go create mode 100644 storage/paths_test.go diff --git a/common/names.go b/common/names.go index d856467bf..c16cbaad8 100644 --- a/common/names.go +++ b/common/names.go @@ -9,6 +9,10 @@ import ( // separated by one period, dash or underscore. var RepositoryNameComponentRegexp = regexp.MustCompile(`[a-z0-9]{2,}(?:[._-][a-z0-9]+)*`) +// TODO(stevvooe): RepositoryName needs to be limited to some fixed length. +// Looking path prefixes and s3 limitation of 1024, this should likely be +// around 512 bytes. 256 bytes might be more manageable. + // RepositoryNameRegexp builds on RepositoryNameComponentRegexp to allow 2 to // 5 path components, separated by a forward slash. var RepositoryNameRegexp = regexp.MustCompile(`(?:` + RepositoryNameComponentRegexp.String() + `/){1,4}` + RepositoryNameComponentRegexp.String()) diff --git a/storage/doc.go b/storage/doc.go new file mode 100644 index 000000000..ecc3fc0b8 --- /dev/null +++ b/storage/doc.go @@ -0,0 +1,3 @@ +// This package contains storage services for use in the registry application. +// It should be considered an internal package, as of Go 1.4. +package storage diff --git a/storage/paths.go b/storage/paths.go new file mode 100644 index 000000000..76991c1fc --- /dev/null +++ b/storage/paths.go @@ -0,0 +1,179 @@ +package storage + +import ( + "fmt" + "path" + + "github.com/docker/docker-registry/common" +) + +const storagePathVersion = "v2" + +// pathMapper maps paths based on "object names" and their ids. The "object +// names" mapped by pathMapper are internal to the storage system. +// +// The path layout in the storage backend will be roughly as follows: +// +// /v2 +// -> repositories/ +// ->/ +// -> images/ +// +// -> layers/ +// -> tarsum/ +// -> / +// -> / +// +// -> layerindex/ +// -> tarsum/ +// -> / +// -> / +// +// -> blob/sha256 +// +// +// There are few important components to this path layout. First, we have the +// repository store identified by name. This contains the image manifests and +// a layer store with links to CAS blob ids. Outside of the named repo area, +// we have the layerindex, which provides lookup from tarsum id to repo +// storage. The blob store contains the actual layer data and any other data +// that can be referenced by a CAS id. +// +// We cover the path formats implemented by this path mapper below. +// +// layerLinkPathSpec: /v2/repositories//layers/tarsum/// +// layerIndexLinkPathSpec: /v2/layerindex/tarsum/// +// blobPathSpec: /v2/blob/sha256// +// +// For more information on the semantic meaning of each path and their +// contents, please see the path spec documentation. +type pathMapper struct { + root string + version string // should be a constant? +} + +// TODO(stevvooe): This storage layout currently allows lookup to layer stores +// by repo name via the tarsum. The layer index lookup could come with an +// access control check against the link contents before proceeding. The main +// problem with this comes with a collision in the tarsum algorithm: if party +// A uploads a layer before party B, with an identical tarsum, party B may +// never be able to get access to the tarsum stored under party A. We'll need +// a way for party B to associate with a "unique" version of their image. This +// may be as simple as forcing the client to re-upload images to which they +// don't have access. + +// path returns the path identified by spec. +func (pm *pathMapper) path(spec pathSpec) (string, error) { + + // Switch on the path object type and return the appropriate path. At + // first glance, one may wonder why we don't use an interface to + // accomplish this. By keep the formatting separate from the pathSpec, we + // keep separate the path generation componentized. These specs could be + // passed to a completely different mapper implementation and generate a + // different set of paths. + // + // For example, imagine migrating from one backend to the other: one could + // build a filesystem walker that converts a string path in one version, + // to an intermediate path object, than can be consumed and mapped by the + // other version. + + switch v := spec.(type) { + case layerLinkPathSpec: + tsi, err := common.ParseTarSum(v.tarSum) + + if err != nil { + // TODO(sday): This will return an InvalidTarSumError from + // ParseTarSum but we may want to wrap this. This error should + // never be encountered in production, since the tarsum should be + // validated by this point. + return "", err + } + + p := path.Join(append([]string{pm.root, pm.version, "repositories", v.name, "layers"}, tarSumInfoPathComponents(tsi)...)...) + + return p, nil + case layerIndexLinkPathSpec: + tsi, err := common.ParseTarSum(v.tarSum) + + if err != nil { + // TODO(sday): This will return an InvalidTarSumError from + // ParseTarSum but we may want to wrap this. This error should + // never be encountered in production, since the tarsum should be + // validated by this point. + return "", err + } + + p := path.Join(append([]string{pm.root, pm.version, "layerindex"}, tarSumInfoPathComponents(tsi)...)...) + + return p, nil + case blobPathSpec: + p := path.Join([]string{pm.root, pm.version, "blob", v.alg, v.digest[:2], v.digest}...) + return p, nil + default: + // TODO(sday): This is an internal error. Ensure it doesn't escape (panic?). + return "", fmt.Errorf("unknown path spec: %#v", v) + } +} + +// pathSpec is a type to mark structs as path specs. There is no +// implementation because we'd like to keep the specs and the mappers +// decoupled. +type pathSpec interface { + pathSpec() +} + +// layerLink specifies a path for a layer link, which is a file with a blob +// id. The layer link will contain a content addressable blob id reference +// into the blob store. The format of the contents is as follows: +// +// : +// +// The following example of the file contents is more illustrative: +// +// sha256:96443a84ce518ac22acb2e985eda402b58ac19ce6f91980bde63726a79d80b36 +// +// This says indicates that there is a blob with the id/digest, calculated via +// sha256 that can be fetched from the blob store. +type layerLinkPathSpec struct { + name string + tarSum string +} + +func (layerLinkPathSpec) pathSpec() {} + +// layerIndexLinkPath provides a path to a registry global layer store, +// indexed by tarsum. The target file will contain the repo name of the +// "owner" of the layer. An example name link file follows: +// +// library/ubuntu +// foo/bar +// +// The above file has the tarsum stored under the foo/bar repository and the +// library/ubuntu repository. The storage layer should access the tarsum from +// the first repository to which the client has access. +type layerIndexLinkPathSpec struct { + tarSum string +} + +func (layerIndexLinkPathSpec) pathSpec() {} + +// blobPath contains the path for the registry global blob store. For now, +// this contains layer data, exclusively. +type blobPathSpec struct { + alg string + digest string +} + +func (blobPathSpec) pathSpec() {} + +// tarSumInfoPath generates storage path components for the provided +// TarSumInfo. +func tarSumInfoPathComponents(tsi common.TarSumInfo) []string { + version := tsi.Version + + if version == "" { + version = "v0" + } + + return []string{"tarsum", version, tsi.Algorithm, tsi.Digest} +} diff --git a/storage/paths_test.go b/storage/paths_test.go new file mode 100644 index 000000000..376966c56 --- /dev/null +++ b/storage/paths_test.go @@ -0,0 +1,45 @@ +package storage + +import "testing" + +func TestPathMapper(t *testing.T) { + pm := &pathMapper{ + root: "/pathmapper-test", + } + + for _, testcase := range []struct { + spec pathSpec + expected string + err error + }{ + { + spec: layerLinkPathSpec{ + name: "foo/bar", + tarSum: "tarsum.v1+test:abcdef", + }, + expected: "/pathmapper-test/repositories/foo/bar/layers/tarsum/v1/test/abcdef", + }, + { + spec: layerIndexLinkPathSpec{ + tarSum: "tarsum.v1+test:abcdef", + }, + expected: "/pathmapper-test/layerindex/tarsum/v1/test/abcdef", + }, + { + spec: blobPathSpec{ + alg: "sha512", + digest: "abcdefabcdefabcdef908909909", + }, + expected: "/pathmapper-test/blob/sha512/ab/abcdefabcdefabcdef908909909", + }, + } { + p, err := pm.path(testcase.spec) + if err != nil { + t.Fatal(err) + } + + if p != testcase.expected { + t.Fatalf("unexpected path generated: %q != %q", p, testcase.expected) + } + } +}