diff --git a/common/names.go b/common/names.go index d856467bf..c16cbaad8 100644 --- a/common/names.go +++ b/common/names.go @@ -9,6 +9,10 @@ import ( // separated by one period, dash or underscore. var RepositoryNameComponentRegexp = regexp.MustCompile(`[a-z0-9]{2,}(?:[._-][a-z0-9]+)*`) +// TODO(stevvooe): RepositoryName needs to be limited to some fixed length. +// Looking path prefixes and s3 limitation of 1024, this should likely be +// around 512 bytes. 256 bytes might be more manageable. + // RepositoryNameRegexp builds on RepositoryNameComponentRegexp to allow 2 to // 5 path components, separated by a forward slash. var RepositoryNameRegexp = regexp.MustCompile(`(?:` + RepositoryNameComponentRegexp.String() + `/){1,4}` + RepositoryNameComponentRegexp.String()) diff --git a/storage/doc.go b/storage/doc.go new file mode 100644 index 000000000..ecc3fc0b8 --- /dev/null +++ b/storage/doc.go @@ -0,0 +1,3 @@ +// This package contains storage services for use in the registry application. +// It should be considered an internal package, as of Go 1.4. +package storage diff --git a/storage/paths.go b/storage/paths.go new file mode 100644 index 000000000..76991c1fc --- /dev/null +++ b/storage/paths.go @@ -0,0 +1,179 @@ +package storage + +import ( + "fmt" + "path" + + "github.com/docker/docker-registry/common" +) + +const storagePathVersion = "v2" + +// pathMapper maps paths based on "object names" and their ids. The "object +// names" mapped by pathMapper are internal to the storage system. +// +// The path layout in the storage backend will be roughly as follows: +// +// /v2 +// -> repositories/ +// ->/ +// -> images/ +// +// -> layers/ +// -> tarsum/ +// -> / +// -> / +// +// -> layerindex/ +// -> tarsum/ +// -> / +// -> / +// +// -> blob/sha256 +// +// +// There are few important components to this path layout. First, we have the +// repository store identified by name. This contains the image manifests and +// a layer store with links to CAS blob ids. Outside of the named repo area, +// we have the layerindex, which provides lookup from tarsum id to repo +// storage. The blob store contains the actual layer data and any other data +// that can be referenced by a CAS id. +// +// We cover the path formats implemented by this path mapper below. +// +// layerLinkPathSpec: /v2/repositories//layers/tarsum/// +// layerIndexLinkPathSpec: /v2/layerindex/tarsum/// +// blobPathSpec: /v2/blob/sha256// +// +// For more information on the semantic meaning of each path and their +// contents, please see the path spec documentation. +type pathMapper struct { + root string + version string // should be a constant? +} + +// TODO(stevvooe): This storage layout currently allows lookup to layer stores +// by repo name via the tarsum. The layer index lookup could come with an +// access control check against the link contents before proceeding. The main +// problem with this comes with a collision in the tarsum algorithm: if party +// A uploads a layer before party B, with an identical tarsum, party B may +// never be able to get access to the tarsum stored under party A. We'll need +// a way for party B to associate with a "unique" version of their image. This +// may be as simple as forcing the client to re-upload images to which they +// don't have access. + +// path returns the path identified by spec. +func (pm *pathMapper) path(spec pathSpec) (string, error) { + + // Switch on the path object type and return the appropriate path. At + // first glance, one may wonder why we don't use an interface to + // accomplish this. By keep the formatting separate from the pathSpec, we + // keep separate the path generation componentized. These specs could be + // passed to a completely different mapper implementation and generate a + // different set of paths. + // + // For example, imagine migrating from one backend to the other: one could + // build a filesystem walker that converts a string path in one version, + // to an intermediate path object, than can be consumed and mapped by the + // other version. + + switch v := spec.(type) { + case layerLinkPathSpec: + tsi, err := common.ParseTarSum(v.tarSum) + + if err != nil { + // TODO(sday): This will return an InvalidTarSumError from + // ParseTarSum but we may want to wrap this. This error should + // never be encountered in production, since the tarsum should be + // validated by this point. + return "", err + } + + p := path.Join(append([]string{pm.root, pm.version, "repositories", v.name, "layers"}, tarSumInfoPathComponents(tsi)...)...) + + return p, nil + case layerIndexLinkPathSpec: + tsi, err := common.ParseTarSum(v.tarSum) + + if err != nil { + // TODO(sday): This will return an InvalidTarSumError from + // ParseTarSum but we may want to wrap this. This error should + // never be encountered in production, since the tarsum should be + // validated by this point. + return "", err + } + + p := path.Join(append([]string{pm.root, pm.version, "layerindex"}, tarSumInfoPathComponents(tsi)...)...) + + return p, nil + case blobPathSpec: + p := path.Join([]string{pm.root, pm.version, "blob", v.alg, v.digest[:2], v.digest}...) + return p, nil + default: + // TODO(sday): This is an internal error. Ensure it doesn't escape (panic?). + return "", fmt.Errorf("unknown path spec: %#v", v) + } +} + +// pathSpec is a type to mark structs as path specs. There is no +// implementation because we'd like to keep the specs and the mappers +// decoupled. +type pathSpec interface { + pathSpec() +} + +// layerLink specifies a path for a layer link, which is a file with a blob +// id. The layer link will contain a content addressable blob id reference +// into the blob store. The format of the contents is as follows: +// +// : +// +// The following example of the file contents is more illustrative: +// +// sha256:96443a84ce518ac22acb2e985eda402b58ac19ce6f91980bde63726a79d80b36 +// +// This says indicates that there is a blob with the id/digest, calculated via +// sha256 that can be fetched from the blob store. +type layerLinkPathSpec struct { + name string + tarSum string +} + +func (layerLinkPathSpec) pathSpec() {} + +// layerIndexLinkPath provides a path to a registry global layer store, +// indexed by tarsum. The target file will contain the repo name of the +// "owner" of the layer. An example name link file follows: +// +// library/ubuntu +// foo/bar +// +// The above file has the tarsum stored under the foo/bar repository and the +// library/ubuntu repository. The storage layer should access the tarsum from +// the first repository to which the client has access. +type layerIndexLinkPathSpec struct { + tarSum string +} + +func (layerIndexLinkPathSpec) pathSpec() {} + +// blobPath contains the path for the registry global blob store. For now, +// this contains layer data, exclusively. +type blobPathSpec struct { + alg string + digest string +} + +func (blobPathSpec) pathSpec() {} + +// tarSumInfoPath generates storage path components for the provided +// TarSumInfo. +func tarSumInfoPathComponents(tsi common.TarSumInfo) []string { + version := tsi.Version + + if version == "" { + version = "v0" + } + + return []string{"tarsum", version, tsi.Algorithm, tsi.Digest} +} diff --git a/storage/paths_test.go b/storage/paths_test.go new file mode 100644 index 000000000..376966c56 --- /dev/null +++ b/storage/paths_test.go @@ -0,0 +1,45 @@ +package storage + +import "testing" + +func TestPathMapper(t *testing.T) { + pm := &pathMapper{ + root: "/pathmapper-test", + } + + for _, testcase := range []struct { + spec pathSpec + expected string + err error + }{ + { + spec: layerLinkPathSpec{ + name: "foo/bar", + tarSum: "tarsum.v1+test:abcdef", + }, + expected: "/pathmapper-test/repositories/foo/bar/layers/tarsum/v1/test/abcdef", + }, + { + spec: layerIndexLinkPathSpec{ + tarSum: "tarsum.v1+test:abcdef", + }, + expected: "/pathmapper-test/layerindex/tarsum/v1/test/abcdef", + }, + { + spec: blobPathSpec{ + alg: "sha512", + digest: "abcdefabcdefabcdef908909909", + }, + expected: "/pathmapper-test/blob/sha512/ab/abcdefabcdefabcdef908909909", + }, + } { + p, err := pm.path(testcase.spec) + if err != nil { + t.Fatal(err) + } + + if p != testcase.expected { + t.Fatalf("unexpected path generated: %q != %q", p, testcase.expected) + } + } +}