diff --git a/api/v2/descriptors.go b/api/v2/descriptors.go index 97e41b748..b70575206 100644 --- a/api/v2/descriptors.go +++ b/api/v2/descriptors.go @@ -1,9 +1,246 @@ package v2 -import "net/http" +import ( + "net/http" + "regexp" -// TODO(stevvooe): Add route descriptors for each named route, along with -// accepted methods, parameters, returned status codes and error codes. + "github.com/docker/docker-registry/common" + "github.com/docker/docker-registry/digest" +) + +var ( + nameParameterDescriptor = ParameterDescriptor{ + Name: "name", + Type: "string", + Format: common.RepositoryNameRegexp.String(), + Required: true, + Description: `Name of the target repository.`, + } + + tagParameterDescriptor = ParameterDescriptor{ + Name: "tag", + Type: "string", + Format: common.TagNameRegexp.String(), + Required: true, + Description: `Tag of the target manifiest.`, + } + + uuidParameterDescriptor = ParameterDescriptor{ + Name: "uuid", + Type: "opaque", + Required: true, + Description: `A uuid identifying the upload. This field can accept almost anything.`, + } + + digestPathParameter = ParameterDescriptor{ + Name: "digest", + Type: "path", + Required: true, + Format: digest.DigestRegexp.String(), + Description: `Digest of desired blob.`, + } + + authHeader = ParameterDescriptor{ + Name: "Authorization", + Type: "string", + Description: "rfc7235 compliant authorization header.", + Format: " ", + Examples: []string{"Bearer dGhpcyBpcyBhIGZha2UgYmVhcmVyIHRva2VuIQ=="}, + } + + authChallengeHeader = ParameterDescriptor{ + Name: "WWW-Authenticate", + Type: "string", + Description: "An RFC7235 compliant authentication challenge header.", + Format: ` realm="", ..."`, + Examples: []string{ + `Bearer realm="https://auth.docker.com/", service="registry.docker.com", scopes="repository:library/ubuntu:pull"`, + }, + } + + contentLengthZeroHeader = ParameterDescriptor{ + Name: "Content-Length", + Description: "The `Content-Length` header must be zero and the body must be empty.", + Type: "integer", + Format: "0", + } +) + +const ( + manifestBody = `{ + "name": , + "tag": , + "fsLayers": [ + { + "blobSum": + }, + ... + ] + ], + "history": , + "signature": +}` + + errorsBody = `{ + "errors:" [{ + "code": , + "message": "", + "detail": ... + }, + ... + ] +}` +) + +// APIDescriptor exports descriptions of the layout of the v2 registry API. +var APIDescriptor = struct { + // RouteDescriptors provides a list of the routes available in the API. + RouteDescriptors []RouteDescriptor + + // ErrorDescriptors provides a list of the error codes and their + // associated documentation and metadata. + ErrorDescriptors []ErrorDescriptor +}{ + RouteDescriptors: routeDescriptors, + ErrorDescriptors: errorDescriptors, +} + +// RouteDescriptor describes a route specified by name. +type RouteDescriptor struct { + // Name is the name of the route, as specified in RouteNameXXX exports. + // These names a should be considered a unique reference for a route. If + // the route is registered with gorilla, this is the name that will be + // used. + Name string + + // Path is a gorilla/mux-compatible regexp that can be used to match the + // route. For any incoming method and path, only one route descriptor + // should match. + Path string + + // Entity should be a short, human-readalbe description of the object + // targeted by the endpoint. + Entity string + + // Description should provide an accurate overview of the functionality + // provided by the route. + Description string + + // Methods should describe the various HTTP methods that may be used on + // this route, including request and response formats. + Methods []MethodDescriptor +} + +// MethodDescriptor provides a description of the requests that may be +// conducted with the target method. +type MethodDescriptor struct { + + // Method is an HTTP method, such as GET, PUT or POST. + Method string + + // Description should provide an overview of the functionality provided by + // the covered method, suitable for use in documentation. Use of markdown + // here is encouraged. + Description string + + // Requests is a slice of request descriptors enumerating how this + // endpoint may be used. + Requests []RequestDescriptor +} + +// RequestDescriptor covers a particular set of headers and parameters that +// can be carried out with the parent method. Its most helpful to have one +// RequestDescriptor per API use case. +type RequestDescriptor struct { + // Name provides a short identifier for the request, usable as a title or + // to provide quick context for the particalar request. + Name string + + // Description should cover the requests purpose, covering any details for + // this particular use case. + Description string + + // Headers describes headers that must be used with the HTTP request. + Headers []ParameterDescriptor + + // PathParameters enumerate the parameterized path components for the + // given request, as defined in the route's regular expression. + PathParameters []ParameterDescriptor + + // QueryParameters provides a list of query parameters for the given + // request. + QueryParameters []ParameterDescriptor + + // Body describes the format of the request body. + Body BodyDescriptor + + // Successes enumerates the possible responses that are considered to be + // the result of a successful request. + Successes []ResponseDescriptor + + // Failures covers the possible failures from this particular request. + Failures []ResponseDescriptor +} + +// ResponseDescriptor describes the components of an API response. +type ResponseDescriptor struct { + // Name provides a short identifier for the response, usable as a title or + // to provide quick context for the particalar response. + Name string + + // Description should provide a brief overview of the role of the + // response. + Description string + + // StatusCode specifies the status recieved by this particular response. + StatusCode int + + // Headers covers any headers that may be returned from the response. + Headers []ParameterDescriptor + + // ErrorCodes enumerates the error codes that may be returned along with + // the response. + ErrorCodes []ErrorCode + + // Body describes the body of the response, if any. + Body BodyDescriptor +} + +// BodyDescriptor describes a request body and its expected content type. For +// the most part, it should be example json or some placeholder for body +// data in documentation. +type BodyDescriptor struct { + ContentType string + Format string +} + +// ParameterDescriptor describes the format of a request parameter, which may +// be a header, path parameter or query parameter. +type ParameterDescriptor struct { + // Name is the name of the parameter, either of the path component or + // query parameter. + Name string + + // Type specifies the type of the parameter, such as string, integer, etc. + Type string + + // Description provides a human-readable description of the parameter. + Description string + + // Required means the field is required when set. + Required bool + + // Format is a specifying the string format accepted by this parameter. + Format string + + // Regexp is a compiled regular expression that can be used to validate + // the contents of the parameter. + Regexp *regexp.Regexp + + // Examples provides multiple examples for the values that might be valid + // for this parameter. + Examples []string +} // ErrorDescriptor provides relevant information about a given error code. type ErrorDescriptor struct { @@ -29,9 +266,613 @@ type ErrorDescriptor struct { HTTPStatusCodes []int } +var routeDescriptors = []RouteDescriptor{ + { + Name: RouteNameBase, + Path: "/v2/", + Entity: "Base", + Description: `Base V2 API route. Typically, this can be used for lightweight version checks and to validate registry authorization.`, + Methods: []MethodDescriptor{ + { + Method: "GET", + Description: "Check that the endpoint implements Docker Registry API V2.", + Requests: []RequestDescriptor{ + { + Headers: []ParameterDescriptor{ + authHeader, + }, + Successes: []ResponseDescriptor{ + { + Description: "The API implements V2 protocol and is accessible.", + StatusCode: http.StatusOK, + }, + }, + Failures: []ResponseDescriptor{ + { + Description: "The client is not authorized to access the registry.", + StatusCode: http.StatusUnauthorized, + Headers: []ParameterDescriptor{ + authChallengeHeader, + }, + }, + { + Description: "The registry does not implement the V2 API.", + StatusCode: http.StatusNotFound, + }, + }, + }, + }, + }, + }, + }, + { + Name: RouteNameTags, + Path: "/v2/{name:" + common.RepositoryNameRegexp.String() + "}/tags/list", + Entity: "Tags", + Description: "Retrieve information about tags.", + Methods: []MethodDescriptor{ + { + Method: "GET", + Description: "Fetch the tags under the repository identified by `name`.", + Requests: []RequestDescriptor{ + { + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + }, + Successes: []ResponseDescriptor{ + { + StatusCode: http.StatusOK, + Description: "A list of tags for the named repository.", + Body: BodyDescriptor{ + ContentType: "application/json", + Format: `{ + "name": , + "tags": [ + , + ... + ] +}`, + }, + }, + }, + Failures: []ResponseDescriptor{ + { + StatusCode: http.StatusNotFound, + Description: "The repository is not known to the registry.", + }, + { + StatusCode: http.StatusUnauthorized, + Description: "The client doesn't have access to repository.", + }, + }, + }, + }, + }, + }, + }, + { + Name: RouteNameManifest, + Path: "/v2/{name:" + common.RepositoryNameRegexp.String() + "}/manifests/{tag:" + common.TagNameRegexp.String() + "}", + Entity: "Manifest", + Description: "Create, update and retrieve manifests.", + Methods: []MethodDescriptor{ + { + Method: "GET", + Description: "Fetch the manifest identified by `name` and `tag`.", + Requests: []RequestDescriptor{ + { + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + tagParameterDescriptor, + }, + Successes: []ResponseDescriptor{ + { + Description: "The manifest idenfied by `name` and `tag`.", + StatusCode: http.StatusOK, + Body: BodyDescriptor{ + ContentType: "application/json", + Format: manifestBody, + }, + }, + }, + Failures: []ResponseDescriptor{ + { + Description: "The name or tag was invalid.", + StatusCode: http.StatusBadRequest, + ErrorCodes: []ErrorCode{ + ErrorCodeNameInvalid, + ErrorCodeTagInvalid, + }, + Body: BodyDescriptor{ + ContentType: "application/json", + Format: errorsBody, + }, + }, + { + Description: "The named manifest is not known to the registry.", + StatusCode: http.StatusNotFound, + ErrorCodes: []ErrorCode{ + ErrorCodeNameUnknown, + ErrorCodeManifestUnknown, + }, + Body: BodyDescriptor{ + ContentType: "application/json", + Format: errorsBody, + }, + }, + }, + }, + }, + }, + { + Method: "PUT", + Description: "Put the manifest identified by `name` and `tag`.", + Requests: []RequestDescriptor{ + { + Headers: []ParameterDescriptor{ + authHeader, + }, + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + tagParameterDescriptor, + }, + Body: BodyDescriptor{ + ContentType: "application/json", + Format: manifestBody, + }, + Successes: []ResponseDescriptor{ + { + StatusCode: http.StatusAccepted, + }, + }, + Failures: []ResponseDescriptor{ + { + StatusCode: http.StatusBadRequest, + ErrorCodes: []ErrorCode{ + ErrorCodeNameInvalid, + ErrorCodeTagInvalid, + ErrorCodeManifestInvalid, + ErrorCodeManifestUnverified, + ErrorCodeBlobUnknown, + }, + }, + { + Description: "One or more layers may be missing during a manifest upload. If so, the missing layers will be enumerated in the error response.", + StatusCode: http.StatusBadRequest, + ErrorCodes: []ErrorCode{ + ErrorCodeBlobUnknown, + }, + Body: BodyDescriptor{ + ContentType: "application/json", + Format: `{ + "errors:" [{ + "code": "BLOB_UNKNOWN", + "message": "blob unknown to registry", + "detail": { + "digest": + } + }, + ... + ] +}`, + }, + }, + { + StatusCode: http.StatusUnauthorized, + Headers: []ParameterDescriptor{ + authChallengeHeader, + }, + }, + }, + }, + }, + }, + { + Method: "DELETE", + Description: "Delete the manifest identified by `name` and `tag`.", + Requests: []RequestDescriptor{ + { + Headers: []ParameterDescriptor{ + authHeader, + }, + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + tagParameterDescriptor, + }, + Successes: []ResponseDescriptor{ + { + StatusCode: http.StatusAccepted, + }, + }, + Failures: []ResponseDescriptor{ + { + StatusCode: http.StatusBadRequest, + ErrorCodes: []ErrorCode{ + ErrorCodeNameInvalid, + ErrorCodeTagInvalid, + }, + }, + { + StatusCode: http.StatusUnauthorized, + Headers: []ParameterDescriptor{ + authChallengeHeader, + }, + }, + { + StatusCode: http.StatusNotFound, + ErrorCodes: []ErrorCode{ + ErrorCodeNameUnknown, + ErrorCodeManifestUnknown, + }, + }, + }, + }, + }, + }, + }, + }, + + { + Name: RouteNameBlob, + Path: "/v2/{name:" + common.RepositoryNameRegexp.String() + "}/blobs/{digest:" + digest.DigestRegexp.String() + "}", + Entity: "Blob", + Description: "Fetch the blob identified by `name` and `digest`. Used to fetch layers by tarsum digest.", + Methods: []MethodDescriptor{ + + { + Method: "GET", + Description: "Retrieve the blob from the registry identified by `digest`.", + Requests: []RequestDescriptor{ + { + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + digestPathParameter, + }, + Successes: []ResponseDescriptor{ + { + Description: "The blob identified by `digest` is available. The blob content will be present in the body of the request.", + StatusCode: http.StatusOK, + Body: BodyDescriptor{ + ContentType: "application/octet-stream", + Format: "", + }, + }, + { + Description: "The blob identified by `digest` is available at the provided location.", + StatusCode: http.StatusTemporaryRedirect, + Headers: []ParameterDescriptor{ + { + Name: "Location", + Type: "url", + Description: "The location where the layer should be accessible.", + Format: "", + }, + }, + }, + }, + Failures: []ResponseDescriptor{ + { + StatusCode: http.StatusBadRequest, + ErrorCodes: []ErrorCode{ + ErrorCodeNameInvalid, + ErrorCodeDigestInvalid, + }, + }, + { + StatusCode: http.StatusUnauthorized, + }, + { + StatusCode: http.StatusNotFound, + ErrorCodes: []ErrorCode{ + ErrorCodeNameUnknown, + ErrorCodeBlobUnknown, + }, + }, + }, + }, + }, + }, + { + Method: "HEAD", + Description: "Check if the blob is known to the registry.", + Requests: []RequestDescriptor{ + { + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + digestPathParameter, + }, + }, + }, + }, + // TODO(stevvooe): We may want to add a PUT request here to + // kickoff an upload of a blob, integrated with the blob upload + // API. + }, + }, + + { + Name: RouteNameBlobUpload, + Path: "/v2/{name:" + common.RepositoryNameRegexp.String() + "}/blobs/uploads/", + Entity: "Intiate Blob Upload", + Description: "Initiate a blob upload. This endpoint can be used to create resumable uploads or monolithic uploads.", + Methods: []MethodDescriptor{ + { + Method: "POST", + Description: "Initiate a resumable blob upload. If successful, an upload location will be provided to complete the upload. Optionally, if the `digest` parameter is present, the request body will be used to complete the upload in a single request.", + Requests: []RequestDescriptor{ + { + Name: "Initiate Monolithic Blob Upload", + Description: "Upload a blob identified by the `digest` parameter in single request. This upload will not be resumable unless a recoverable error is returned.", + Headers: []ParameterDescriptor{ + authHeader, + { + Name: "Content-Length", + Type: "integer", + Format: "", + }, + }, + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + }, + QueryParameters: []ParameterDescriptor{ + { + Name: "digest", + Type: "query", + Format: "", + Regexp: digest.DigestRegexp, + Description: `Digest of uploaded blob. If present, the upload will be completed, in a single request, with contents of the request body as the resulting blob.`, + }, + }, + Body: BodyDescriptor{ + ContentType: "application/octect-stream", + Format: "", + }, + Successes: []ResponseDescriptor{ + { + StatusCode: http.StatusCreated, + Headers: []ParameterDescriptor{ + { + Name: "Location", + Type: "url", + Format: "", + }, + contentLengthZeroHeader, + }, + }, + }, + Failures: []ResponseDescriptor{ + { + Name: "Invalid Name or Digest", + StatusCode: http.StatusBadRequest, + ErrorCodes: []ErrorCode{ + ErrorCodeDigestInvalid, + ErrorCodeNameInvalid, + }, + }, + { + Name: "Unauthorized", + StatusCode: http.StatusUnauthorized, + Headers: []ParameterDescriptor{ + authChallengeHeader, + }, + ErrorCodes: []ErrorCode{ + ErrorCodeDigestInvalid, + ErrorCodeNameInvalid, + }, + }, + }, + }, + { + Name: "Initiate Resumable Blob Upload", + Description: "Initiate a resumable blob upload with an empty request body.", + Headers: []ParameterDescriptor{ + authHeader, + contentLengthZeroHeader, + }, + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + }, + Successes: []ResponseDescriptor{ + { + Description: "The upload has been created. The `Location` header must be used to complete the upload. The response should identical to a `GET` request on the contents of the returned `Location` header.", + StatusCode: http.StatusAccepted, + Headers: []ParameterDescriptor{ + contentLengthZeroHeader, + { + Name: "Location", + Type: "url", + Format: "/v2//blobs/uploads/", + Description: "The location of the created upload. Clients should use the contents verbatim to complete the upload, adding parameters where required.", + }, + { + Name: "Range", + Format: "0-0", + Description: "Range header indicating the progress of the upload. When starting an upload, it will return an empty range, since no content has been received.", + }, + }, + }, + }, + }, + }, + }, + }, + }, + + { + Name: RouteNameBlobUploadChunk, + Path: "/v2/{name:" + common.RepositoryNameRegexp.String() + "}/blobs/uploads/{uuid}", + Entity: "Blob Upload", + Description: "Interact with blob uploads. Clients should never assemble URLs for this endpoint and should only take it through the `Location` header on related API requests.", + Methods: []MethodDescriptor{ + { + Method: "GET", + Description: "Retrieve status of upload identified by `uuid`. The primary purpose of this endpoint is to resolve the current status of a resumable upload.", + Requests: []RequestDescriptor{ + { + Description: "Retrieve the progress of the current upload, as reported by the `Range` header.", + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + uuidParameterDescriptor, + }, + Successes: []ResponseDescriptor{ + { + StatusCode: http.StatusNoContent, + Headers: []ParameterDescriptor{ + { + Name: "Range", + Type: "header", + Format: "0-", + Description: "Range indicating the current progress of the upload.", + }, + }, + }, + }, + }, + }, + }, + { + Method: "HEAD", + Description: "Retrieve status of upload identified by `uuid`. This is identical to the GET request.", + Requests: []RequestDescriptor{ + { + Description: "Retrieve the progress of the current upload, as reported by the `Range` header.", + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + uuidParameterDescriptor, + }, + Successes: []ResponseDescriptor{ + { + StatusCode: http.StatusNoContent, + Headers: []ParameterDescriptor{ + { + Name: "Range", + Type: "header", + Format: "0-", + Description: "Range indicating the current progress of the upload.", + }, + }, + }, + }, + }, + }, + }, + { + Method: "PATCH", + Description: "Upload a chunk of data for the specified upload.", + Requests: []RequestDescriptor{ + { + Description: "Upload a chunk of data to specified upload without completing the upload.", + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + uuidParameterDescriptor, + }, + Headers: []ParameterDescriptor{ + { + Name: "Content-Range", + Type: "header", + Format: "-", + Required: true, + Description: "Range of bytes identifying the desired block of content represented by the body. Start must the end offset retrieved via status check plus one. Note that this is a non-standard use of the `Content-Range` header.", + }, + { + Name: "Content-Length", + Type: "integer", + Format: "", + Description: "Length of the chunk being uploaded, corresponding the length of the request body.", + }, + }, + Body: BodyDescriptor{ + ContentType: "application/octet-stream", + Format: "", + }, + Successes: []ResponseDescriptor{ + { + StatusCode: http.StatusNoContent, + Headers: []ParameterDescriptor{ + { + Name: "Range", + Type: "header", + Format: "0-", + Description: "Range indicating the current progress of the upload.", + }, + contentLengthZeroHeader, + }, + }, + }, + }, + }, + }, + { + Method: "PUT", + Description: "Complete the upload specified by `uuid`, optionally appending the body as the final chunk.", + Requests: []RequestDescriptor{ + { + // TODO(stevvooe): Break this down into three separate requests: + // 1. Complete an upload where all data has already been sent. + // 2. Complete an upload where the entire body is in the PUT. + // 3. Complete an upload where the final, partial chunk is the body. + + Description: "Upload the _final_ chunk of data.", + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + uuidParameterDescriptor, + }, + QueryParameters: []ParameterDescriptor{ + { + Name: "digest", + Type: "string", + Format: "", + Regexp: digest.DigestRegexp, + Required: true, + Description: `Digest of uploaded blob.`, + }, + }, + Successes: []ResponseDescriptor{ + { + StatusCode: http.StatusNoContent, + Headers: []ParameterDescriptor{ + { + Name: "Content-Range", + Type: "header", + Format: "-", + Description: "Range of bytes identifying the desired block of content represented by the body. Start must match the end of offset retrieved via status check. Note that this is a non-standard use of the `Content-Range` header.", + }, + { + Name: "Content-Length", + Type: "integer", + Format: "", + Description: "Length of the chunk being uploaded, corresponding the length of the request body.", + }, + }, + Body: BodyDescriptor{ + ContentType: "application/octet-stream", + Format: "", + }, + }, + }, + }, + }, + }, + { + Method: "DELETE", + Description: "Cancel outstanding upload processes, releasing associated resources. If this is not called, the unfinished uploads will eventually timeout.", + Requests: []RequestDescriptor{ + { + Description: "Cancel the upload specified by `uuid`.", + PathParameters: []ParameterDescriptor{ + nameParameterDescriptor, + uuidParameterDescriptor, + }, + }, + }, + }, + }, + }, +} + // ErrorDescriptors provides a list of HTTP API Error codes that may be // encountered when interacting with the registry API. -var ErrorDescriptors = []ErrorDescriptor{ +var errorDescriptors = []ErrorDescriptor{ { Code: ErrorCodeUnknown, Value: "UNKNOWN", @@ -142,10 +983,10 @@ var errorCodeToDescriptors map[ErrorCode]ErrorDescriptor var idToDescriptors map[string]ErrorDescriptor func init() { - errorCodeToDescriptors = make(map[ErrorCode]ErrorDescriptor, len(ErrorDescriptors)) - idToDescriptors = make(map[string]ErrorDescriptor, len(ErrorDescriptors)) + errorCodeToDescriptors = make(map[ErrorCode]ErrorDescriptor, len(errorDescriptors)) + idToDescriptors = make(map[string]ErrorDescriptor, len(errorDescriptors)) - for _, descriptor := range ErrorDescriptors { + for _, descriptor := range errorDescriptors { errorCodeToDescriptors[descriptor.Code] = descriptor idToDescriptors[descriptor.Value] = descriptor } diff --git a/api/v2/errors_test.go b/api/v2/errors_test.go index d2fc091ac..70f3c2a95 100644 --- a/api/v2/errors_test.go +++ b/api/v2/errors_test.go @@ -11,7 +11,7 @@ import ( // TestErrorCodes ensures that error code format, mappings and // marshaling/unmarshaling. round trips are stable. func TestErrorCodes(t *testing.T) { - for _, desc := range ErrorDescriptors { + for _, desc := range errorDescriptors { if desc.Code.String() != desc.Value { t.Fatalf("error code string incorrect: %q != %q", desc.Code.String(), desc.Value) } diff --git a/api/v2/routes.go b/api/v2/routes.go index 7ebe61d66..ef9336009 100644 --- a/api/v2/routes.go +++ b/api/v2/routes.go @@ -1,9 +1,6 @@ package v2 -import ( - "github.com/docker/docker-registry/common" - "github.com/gorilla/mux" -) +import "github.com/gorilla/mux" // The following are definitions of the name under which all V2 routes are // registered. These symbols can be used to look up a route based on the name. @@ -31,39 +28,9 @@ func Router() *mux.Router { router := mux.NewRouter(). StrictSlash(true) - // GET /v2/ Check Check that the registry implements API version 2(.1) - router. - Path("/v2/"). - Name(RouteNameBase) - - // GET /v2//manifest/ Image Manifest Fetch the image manifest identified by name and tag. - // PUT /v2//manifest/ Image Manifest Upload the image manifest identified by name and tag. - // DELETE /v2//manifest/ Image Manifest Delete the image identified by name and tag. - router. - Path("/v2/{name:" + common.RepositoryNameRegexp.String() + "}/manifests/{tag:" + common.TagNameRegexp.String() + "}"). - Name(RouteNameManifest) - - // GET /v2//tags/list Tags Fetch the tags under the repository identified by name. - router. - Path("/v2/{name:" + common.RepositoryNameRegexp.String() + "}/tags/list"). - Name(RouteNameTags) - - // GET /v2//blob/ Layer Fetch the blob identified by digest. - router. - Path("/v2/{name:" + common.RepositoryNameRegexp.String() + "}/blobs/{digest:[a-zA-Z0-9-_+.]+:[a-zA-Z0-9-_+.=]+}"). - Name(RouteNameBlob) - - // POST /v2//blob/upload/ Layer Upload Initiate an upload of the layer identified by tarsum. - router. - Path("/v2/{name:" + common.RepositoryNameRegexp.String() + "}/blobs/uploads/"). - Name(RouteNameBlobUpload) - - // GET /v2//blob/upload/ Layer Upload Get the status of the upload identified by tarsum and uuid. - // PUT /v2//blob/upload/ Layer Upload Upload all or a chunk of the upload identified by tarsum and uuid. - // DELETE /v2//blob/upload/ Layer Upload Cancel the upload identified by layer and uuid - router. - Path("/v2/{name:" + common.RepositoryNameRegexp.String() + "}/blobs/uploads/{uuid}"). - Name(RouteNameBlobUploadChunk) + for _, descriptor := range routeDescriptors { + router.Path(descriptor.Path).Name(descriptor.Name) + } return router } diff --git a/api/v2/routes_test.go b/api/v2/routes_test.go index 9969ebcc4..29cd81064 100644 --- a/api/v2/routes_test.go +++ b/api/v2/routes_test.go @@ -140,7 +140,13 @@ func TestRouter(t *testing.T) { }, } { // Register the endpoint - router.GetRoute(testcase.RouteName).Handler(testHandler) + route := router.GetRoute(testcase.RouteName) + if route == nil { + t.Fatalf("route for name %q not found", testcase.RouteName) + } + + route.Handler(testHandler) + u := server.URL + testcase.RequestURI resp, err := http.Get(u) diff --git a/cmd/registry-api-descriptor-template/main.go b/cmd/registry-api-descriptor-template/main.go new file mode 100644 index 000000000..07b955a2e --- /dev/null +++ b/cmd/registry-api-descriptor-template/main.go @@ -0,0 +1,118 @@ +// registry-api-descriptor-template uses the APIDescriptor defined in the +// api/v2 package to execute templates passed to the command line. +// +// For example, to generate a new API specification, one would execute the +// following command from the repo root: +// +// $ registry-api-descriptor-template doc/SPEC.md.tmpl > doc/SPEC.md +// +// The templates are passed in the api/v2.APIDescriptor object. Please see the +// package documentation for fields available on that object. The template +// syntax is from Go's standard library text/template package. For information +// on Go's template syntax, please see golang.org/pkg/text/template. +package main + +import ( + "log" + "net/http" + "os" + "path/filepath" + "regexp" + "text/template" + + "github.com/docker/docker-registry/api/v2" +) + +var spaceRegex = regexp.MustCompile(`\n\s*`) + +func main() { + + if len(os.Args) != 2 { + log.Fatalln("please specify a template to execute.") + } + + path := os.Args[1] + filename := filepath.Base(path) + + funcMap := template.FuncMap{ + "removenewlines": func(s string) string { + return spaceRegex.ReplaceAllString(s, " ") + }, + "statustext": http.StatusText, + "prettygorilla": prettyGorillaMuxPath, + } + + tmpl := template.Must(template.New(filename).Funcs(funcMap).ParseFiles(path)) + + if err := tmpl.Execute(os.Stdout, v2.APIDescriptor); err != nil { + log.Fatalln(err) + } +} + +// prettyGorillaMuxPath removes the regular expressions from a gorilla/mux +// route string, making it suitable for documentation. +func prettyGorillaMuxPath(s string) string { + // Stateful parser that removes regular expressions from gorilla + // routes. It correctly handles balanced bracket pairs. + + var output string + var label string + var level int + +start: + if s[0] == '{' { + s = s[1:] + level++ + goto capture + } + + output += string(s[0]) + s = s[1:] + + goto end +capture: + switch s[0] { + case '{': + level++ + case '}': + level-- + + if level == 0 { + s = s[1:] + goto label + } + case ':': + s = s[1:] + goto skip + default: + label += string(s[0]) + } + s = s[1:] + goto capture +skip: + switch s[0] { + case '{': + level++ + case '}': + level-- + } + s = s[1:] + + if level == 0 { + goto label + } + + goto skip +label: + if label != "" { + output += "<" + label + ">" + label = "" + } +end: + if s != "" { + goto start + } + + return output + +} diff --git a/cmd/registry-api-doctable-gen/main.go b/cmd/registry-api-doctable-gen/main.go deleted file mode 100644 index a9e71ffff..000000000 --- a/cmd/registry-api-doctable-gen/main.go +++ /dev/null @@ -1,95 +0,0 @@ -// registry-api-doctable-gen uses various descriptors within the registry code -// base to generate markdown tables for use in documentation. This is only -// meant to facilitate updates to documentation and not as an automated tool. -// -// For now, this only includes support for error codes: -// -// $ registry-api-doctable-gen errors -// -package main - -import ( - "fmt" - "io" - "log" - "os" - "reflect" - "strings" - "text/tabwriter" - - "github.com/docker/docker-registry/api/v2" -) - -func main() { - - if len(os.Args) < 2 { - log.Fatalln("please specify a table to generate: (errors)") - } - - switch os.Args[1] { - case "errors": - dumpErrors(os.Stdout) - default: - log.Fatalln("unknown descriptor table:", os.Args[1]) - } - -} - -func dumpErrors(wr io.Writer) { - writer := tabwriter.NewWriter(os.Stdout, 8, 8, 0, '\t', 0) - defer writer.Flush() - - fmt.Fprint(writer, "|") - dtype := reflect.TypeOf(v2.ErrorDescriptor{}) - var fieldsPrinted int - for i := 0; i < dtype.NumField(); i++ { - field := dtype.Field(i) - if field.Name == "Value" { - continue - } - - fmt.Fprint(writer, field.Name, "|") - fieldsPrinted++ - } - - divider := strings.Repeat("-", 8) - var parts []string - for i := 0; i < fieldsPrinted; i++ { - parts = append(parts, divider) - } - divider = strings.Join(parts, "|") - - fmt.Fprintln(writer, "\n"+divider) - - for _, descriptor := range v2.ErrorDescriptors { - fmt.Fprint(writer, "|") - - v := reflect.ValueOf(descriptor) - for i := 0; i < dtype.NumField(); i++ { - value := v.Field(i).Interface() - field := v.Type().Field(i) - if field.Name == "Value" { - continue - } else if field.Name == "Description" { - value = strings.Replace(value.(string), "\n", " ", -1) - } else if field.Name == "Code" { - value = fmt.Sprintf("`%s`", value) - } else if field.Name == "HTTPStatusCodes" { - if len(value.([]int)) > 0 { - var codes []string - for _, code := range value.([]int) { - codes = append(codes, fmt.Sprint(code)) - } - value = strings.Join(codes, ", ") - } else { - value = "Any" - } - - } - - fmt.Fprint(writer, value, "|") - } - - fmt.Fprint(writer, "\n") - } -} diff --git a/digest/digest.go b/digest/digest.go index 6a3fdfd55..08abba5ca 100644 --- a/digest/digest.go +++ b/digest/digest.go @@ -7,6 +7,7 @@ import ( "hash" "io" "io/ioutil" + "regexp" "strings" "github.com/docker/docker-registry/common" @@ -36,6 +37,9 @@ func NewDigest(alg string, h hash.Hash) Digest { return Digest(fmt.Sprintf("%s:%x", alg, h.Sum(nil))) } +// DigestRegexp matches valid digest types. +var DigestRegexp = regexp.MustCompile(`[a-zA-Z0-9-_+.]+:[a-zA-Z0-9-_+.=]+`) + var ( // ErrDigestInvalidFormat returned when digest format invalid. ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format") @@ -125,6 +129,8 @@ func (d Digest) Validate() error { return ErrDigestUnsupported } + // TODO(stevvooe): Use DigestRegexp to validate digest here. + return nil } diff --git a/doc/SPEC.md b/doc/SPEC.md new file mode 100644 index 000000000..efe90bdaf --- /dev/null +++ b/doc/SPEC.md @@ -0,0 +1,1630 @@ +# Docker Registry API V2.1 + +> **Note**: This specification has been ported over from the proposal on +> docker/docker#9015. Much of the language in this document is still written +> in the proposal tense and needs to be converted. + +## Abstract + +> **TODO**: Merge this section into the overview/introduction. + +The docker registry is a service to manage information about docker images and +enable their distribution. While the current registry is usable, there are +several problems with the architecture that have led to this proposal. For +relevant details, please see the following issues: + +- docker/docker#8093 +- docker/docker-registry#612 + +The main driver of this proposal are changes to the docker the image format, +covered in docker/docker#8093. The new, self-contained image manifest +simplifies the image definition and the underlying backend layout. To reduce +bandwidth usage, the new registry will be architected to avoid uploading +existing layers and will support resumable layer uploads. + +While out of scope for this specification, the URI layout of the new API will +be structured to support a rich Authentication and Authorization model by +leveraging namespaces. + +Furthermore, to bring docker registry in line with docker core, the registry is written in Go. + +## Scope + +> **TODO**: Merge this section into the overview/introduction. + +This proposal covers the URL layout and protocols of the Docker Registry V2 +JSON API. This will affect the docker core registry API and the rewrite of +docker-registry. + +This includes the following features: + +- Namespace-oriented URI Layout +- PUSH/PULL registry server for V2 image manifest format +- Resumable layer PUSH support +- V2 Client library implementation + +While authentication and authorization support will influence this +specification, details of the protocol will be left to a future specification. +Other features marked as next generation will be incorporated when the initial +support is complete. Please see the road map for details. + +## Use Cases + +> **TODO**: Merge this section into the overview/introduction. + +For the most part, the use cases of the former registry API apply to the new +version. Differentiating uses cases are covered below. + +### Resumable Push + +Company X's build servers lose connectivity to docker registry before +completing an image layer transfer. After connectivity returns, the build +server attempts to re-upload the image. The registry notifies the build server +that the upload has already been partially attempted. The build server +responds by only sending the remaining data to complete the image file. + +### Resumable Pull + +Company X is having more connectivity problems but this time in their +deployment datacenter. When downloading an image, the connection is +interrupted before completion. The client keeps the partial data and uses http +`Range` requests to avoid downloading repeated data. + +### Layer Upload De-duplication + +Company Y's build system creates two identical docker layers from build +processes A and B. Build process A completes uploading the layer before B. +When process B attempts to upload the layer, the registry indicates that its +not necessary because the layer is already known. + +If process A and B upload the same layer at the same time, both operations +will proceed and the first to complete will be stored in the registry (Note: +we may modify this to prevent dogpile with some locking mechanism). + +## Overview + +This section covers client flows and details of the API endpoints. All +endpoints will be prefixed by the API version and the repository name: + + /v2// + +For example, an API endpoint that will work with the `library/ubuntu` +repository, the URI prefix will be: + + /v2/library/ubuntu/ + +This scheme provides rich access control over various operations and methods +using the URI prefix and http methods that can be controlled in variety of +ways. + +Classically, repository names have always been two path components where each +path component is less than 30 characters. The V2 registry API does not +enforce this. The rules for a repository name are as follows: + +1. A repository name is broken up into _path components_. A component of a + repository name must be at least two characters, optionally separated by + periods, dashes or underscores. More strictly, it must match the regular + expression `[a-z0-9]+(?:[._-][a-z0-9]+)*` and the matched result must be 2 + or more characters in length. +2. The name of a repository must have at least two path components, separated + by a forward slash. +3. The total length of a repository name, including slashes, must be less the + 256 characters. + +These name requirements _only_ apply to the registry API and should accept a +superset of what is supported by other docker ecosystem components. + +All endpoints should support aggressive http caching, compression and range +headers, where appropriate. The new API attempts to leverage HTTP semantics +where possible but may break from standards to implement targeted features. + +For detail on individual endpoints, please see the _Detail_ section. + +### Errors + +Actionable failure conditions, covered in detail in their relevant sections, +are reported as part of 4xx responses, in a json response body. One or more +errors will be returned in the following format: + + { + "errors:" [{ + "code": , + "message": , + "detail": + }, + ... + ] + } + +The `code` field will be a unique identifier, all caps with underscores by +convention. The `message` field will be a human readable string. The optional +`detail` field may contain arbitrary json data providing information the +client can use to resolve the issue. + +While the client can take action on certain error codes, the registry may add +new error codes over time. All client implementations should treat unknown +error codes as `UNKNOWN`, allowing future error codes to be added without +breaking API compatibility. For the purposes of the specification error codes +will only be added and never removed. + +For a complete account of all error codes, please see the _Detail_ section. + +### API Version Check + +A minimal endpoint, mounted at `/v2/` will provide version support information +based on its response statuses. The request format is as follows: + + GET /v2/ + +If a `200 OK` response is returned, the registry implements the V2(.1) +registry API and the client may proceed safely with other V2 operations. +Optionally, the response may contain information about the supported paths in +the response body. The client should be prepared to ignore this data. + +If a `401 Unauthorized` response is returned, the client should take action +based on the contents of the "WWW-Authenticate" header and try the endpoint +again. Depending on access control setup, the client may still have to +authenticate against different resources, even if this check succeeds. + +If `404 Not Found` response status, or other unexpected status, is returned, +the client should proceed with the assumption that the registry does not +implement V2 of the API. + +### Pulling An Image + +An "image" is a combination of a JSON manifest and individual layer files. The +process of pulling an image centers around retrieving these two components. + +The first step in pulling an image is to retrieve the manifest. For reference, +the relevant manifest fields for the registry are the following: + + field | description | +----------|------------------------------------------------| +name | The name of the image. | +tag | The tag for this version of the image. | +fsLayers | A list of layer descriptors (including tarsum) | +signature | A JWS used to verify the manifest content | + +For more information about the manifest format, please see +[docker/docker#8093](https://github.com/docker/docker/issues/8093). + +When the manifest is in hand, the client must verify the signature to ensure +the names and layers are valid. Once confirmed, the client will then use the +tarsums to download the individual layers. Layers are stored in as blobs in +the V2 registry API, keyed by their tarsum digest. + +#### Pulling an Image Manifest + +The image manifest can be fetched with the following url: + +``` +GET /v2//manifests/ +``` + +The "name" and "tag" parameter identify the image and are required. + +A `404 Not Found` response will be returned if the image is unknown to the +registry. If the image exists and the response is successful, the image +manifest will be returned, with the following format (see docker/docker#8093 +for details): + + { + "name": , + "tag": , + "fsLayers": [ + { + "blobSum": + }, + ... + ] + ], + "history": , + "signature": + } + +The client should verify the returned manifest signature for authenticity +before fetching layers. + +#### Pulling a Layer + +Layers are stored in the blob portion of the registry, keyed by tarsum digest. +Pulling a layer is carried out by a standard http request. The URL is as +follows: + + GET /v2//blobs/ + +Access to a layer will be gated by the `name` of the repository but is +identified uniquely in the registry by `tarsum`. The `tarsum` parameter is an +opaque field, to be interpreted by the tarsum library. + +This endpoint may issue a 307 (302 for /blobs/uploads/ +``` + +The parameters of this request are the image namespace under which the layer +will be linked. Responses to this request are covered below. + +##### Existing Layers + +The existence of a layer can be checked via a `HEAD` request to the blob store +API. The request should be formatted as follows: + +``` +HEAD /v2//blobs/ +``` + +If the layer with the tarsum specified in `digest` is available, a 200 OK +response will be received, with no actual body content (this is according to +http specification). The response will look as follows: + +``` +200 OK +Content-Length: +``` + +When this response is received, the client can assume that the layer is +already available in the registry under the given name and should take no +further action to upload the layer. Note that the binary digests may differ +for the existing registry layer, but the tarsums will be guaranteed to match. + +##### Uploading the Layer + +If the POST request is successful, a `202 Accepted` response will be returned +with the upload URL in the `Location` header: + +``` +202 Accepted +Location: /v2//blobs/uploads/ +Range: bytes=0- +Content-Length: 0 +``` + +The rest of the upload process can be carried out with the returned url, +called the "Upload URL" from the `Location` header. All responses to the +upload url, whether sending data or getting status, will be in this format. +Though the URI format (`/v2//blobs/uploads/`) for the `Location` +header is specified, clients should treat it as an opaque url and should never +try to assemble the it. While the `uuid` parameter may be an actual UUID, this +proposal imposes no constraints on the format and clients should never impose +any. + +##### Upload Progress + +The progress and chunk coordination of the upload process will be coordinated +through the `Range` header. While this is a non-standard use of the `Range` +header, there are examples of [similar approaches](https://developers.google.c +om/youtube/v3/guides/using_resumable_upload_protocol) in APIs with heavy use. +For an upload that just started, for an example with a 1000 byte layer file, +the `Range` header would be as follows: + +``` +Range: bytes=0-0 +``` + +To get the status of an upload, issue a GET request to the upload URL: + +``` +GET /v2//blobs/uploads/ +Host: +``` + +The response will be similar to the above, except will return 204 status: + +``` +204 No Content +Location: /v2//blobs/uploads/ +Range: bytes=0- +``` + +Note that the HTTP `Range` header byte ranges are inclusive and that will be +honored, even in non-standard use cases. + +##### Monolithic Upload + +A monolithic upload is simply a chunked upload with a single chunk and may be +favored by clients that would like to avoided the complexity of chunking. To +carry out a "monolithic" upload, one can simply put the entire content blob to +the provided URL: + +``` +PUT /v2//blobs/uploads/?digest=[&digest=sha256:] +Content-Length: +Content-Type: application/octet-stream + + +``` + +The "digest" parameter must be included with the PUT request. Please see the +_Completed Upload_ section for details on the parameters and expected +responses. + +Additionally, the download can be completed with a single `POST` request to +the uploads endpoint, including the "size" and "digest" parameters: + +``` +POST /v2//blobs/uploads/?digest=[&digest=sha256:] +Content-Length: +Content-Type: application/octet-stream + + +``` + +On the registry service, this should allocate a download, accept and verify +the data and return the same response as the final chunk of an upload. If the +POST request fails collecting the data in any way, the registry should attempt +to return an error response to the client with the `Location` header providing +a place to continue the download. + +The single `POST` method is provided for convenience and most clients should +implement `POST` + `PUT` to support reliable resume of uploads. + +##### Chunked Upload + +To carry out an upload of a chunk, the client can specify a range header and +only include that part of the layer file: + +``` +PATCH /v2//blobs/uploads/ +Content-Length: +Content-Range: - +Content-Type: application/octet-stream + + +``` + +There is no enforcement on layer chunk splits other than that the server must +receive them in order. The server may enforce a minimum chunk size. If the +server cannot accept the chunk, a `416 Requested Range Not Satisfiable` +response will be returned and will include a `Range` header indicating the +current status: + +``` +416 Requested Range Not Satisfiable +Location: /v2//blobs/uploads/ +Range: 0- +Content-Length: 0 +``` + +If this response is received, the client should resume from the "last valid +range" and upload the subsequent chunk. A 416 will be returned under the +following conditions: + +- Invalid Content-Range header format +- Out of order chunk: the range of the next chunk must start immediately after + the "last valid range" from the previous response. + +When a chunk is accepted as part of the upload, a `202 Accepted` response will +be returned, including a `Range` header with the current upload status: + +``` +202 Accepted +Location: /v2//blobs/uploads/ +Range: bytes=0- +Content-Length: 0 +``` + +##### Completed Upload + +For an upload to be considered complete, the client must submit a `PUT` +request on the upload endpoint with a digest parameter. If it is not provided, +the download will not be considered complete. The format for the final chunk +will be as follows: + +``` +PUT /v2//blob/uploads/?digest=[&digest=sha256:] +Content-Length: +Content-Range: - +Content-Type: application/octet-stream + + +``` + +Optionally, if all chunks have already been uploaded, a `PUT` request with a +`digest` parameter and zero-length body may be sent to complete and validated +the upload. Multiple "digest" parameters may be provided with different +digests. The server may verify none or all of them but _must_ notify the +client if the content is rejected. + +When the last chunk is received and the layer has been validated, the client +will receive a `201 Created` response: + +``` +201 Created +Location: /v2//blobs/ +Content-Length: 0 +``` + +The `Location` header will contain the registry URL to access the accepted +layer file. + +###### Digest Parameter + +The "digest" parameter is designed as an opaque parameter to support +verification of a successful transfer. The initial version of the registry API +will support a tarsum digest, in the standard tarsum format. For example, a +HTTP URI parameter might be as follows: + +``` +tarsum.v1+sha256:6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b +``` + +Given this parameter, the registry will verify that the provided content does +result in this tarsum. Optionally, the registry can support other other digest +parameters for non-tarfile content stored as a layer. A regular hash digest +might be specified as follows: + +``` +sha256:6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b +``` + +Such a parameter would be used to verify that the binary content (as opposed +to the tar content) would be verified at the end of the upload process. + +For the initial version, registry servers are only required to support the +tarsum format. + +##### Canceling an Upload + +An upload can be cancelled by issuing a DELETE request to the upload endpoint. +The format will be as follows: + +``` +DELETE /v2//blobs/uploads/ +``` + +After this request is issued, the upload uuid will no longer be valid and the +registry server will dump all intermediate data. While uploads will time out +if not completed, clients should issue this request if they encounter a fatal +error but still have the ability to issue an http request. + +##### Errors + +If an 502, 503 or 504 error is received, the client should assume that the +download can proceed due to a temporary condition, honoring the appropriate +retry mechanism. Other 5xx errors should be treated as terminal. + +If there is a problem with the upload, a 4xx error will be returned indicating +the problem. After receiving a 4xx response (except 416, as called out above), +the upload will be considered failed and the client should take appropriate +action. + +Note that the upload url will not be available forever. If the upload uuid is +unknown to the registry, a `404 Not Found` response will be returned and the +client must restart the upload process. + +#### Pushing an Image Manifest + +Once all of the layers for an image are uploaded, the client can upload the +image manifest. An image can be pushed using the following request format: + + PUT /v2//manifests/ + + { + "name": , + "tag": , + "fsLayers": [ + { + "blobSum": + }, + ... + ] + ], + "history": , + "signature": , + ... + } + +The `name` and `tag` fields of the response body must match those specified in +the URL. + +If there is a problem with pushing the manifest, a relevant 4xx response will +be returned with a JSON error message. Please see the _PUT Manifest section +for details on possible error codes that may be returned. + +If one or more layers are unknown to the registry, `BLOB_UNKNOWN` errors are +returned. The `detail` field of the error response will have a `digest` field +identifying the missing blob, which will be a tarsum. An error is returned for +each unknown blob. The response format is as follows: + + { + "errors:" [{ + "code": "BLOB_UNKNOWN", + "message": "blob unknown to registry", + "detail": { + "digest": + } + }, + ... + ] + } + +#### Listing Image Tags + +It may be necessary to list all of the tags under a given repository. The tags +for an image repository can be retrieved with the following request: + + GET /v2//tags/list + +The response will be in the following format: + + 200 OK + Content-Type: application/json + + { + "name": , + "tags": [ + , + ... + ] + } + +For repositories with a large number of tags, this response may be quite +large, so care should be taken by the client when parsing the response to +reduce copying. + +### Deleting an Image + +An image may be deleted from the registry via its `name` and `tag`. A delete +may be issued with the following request format: + + DELETE /v2//manifests/ + +If the image exists and has been successfully deleted, the following response +will be issued: + + 202 Accepted + Content-Length: None + +If the image had already been deleted or did not exist, a `404 Not Found` +response will be issued instead. + +## Detail + +> **Note**: This section is still under construction. For the purposes of +> implementation, if any details below differ from the described request flows +> above, the section below should be corrected. When they match, this note +> should be removed. + +The behavior of the endpoints are covered in detail in this section, organized +by route and entity. All aspects of the request and responses are covered, +including headers, parameters and body formats. Examples of requests and their +corresponding responses, with success and failure, are enumerated. + +> **Note**: The sections on endpoint detail are arranged with an example +> request, a description of the request, followed by information about that +> request. + +A list of methods and URIs are covered in the table below: + +|Method|Path|Entity|Description| +-------|----|------|------------ +| GET | `/v2/` | Base | Check that the endpoint implements Docker Registry API V2. | +| GET | `/v2//tags/list` | Tags | Fetch the tags under the repository identified by `name`. | +| GET | `/v2//manifests/` | Manifest | Fetch the manifest identified by `name` and `tag`. | +| PUT | `/v2//manifests/` | Manifest | Put the manifest identified by `name` and `tag`. | +| DELETE | `/v2//manifests/` | Manifest | Delete the manifest identified by `name` and `tag`. | +| GET | `/v2//blobs/` | Blob | Retrieve the blob from the registry identified by `digest`. | +| HEAD | `/v2//blobs/` | Blob | Check if the blob is known to the registry. | +| POST | `/v2//blobs/uploads/` | Intiate Blob Upload | Initiate a resumable blob upload. If successful, an upload location will be provided to complete the upload. Optionally, if the `digest` parameter is present, the request body will be used to complete the upload in a single request. | +| GET | `/v2//blobs/uploads/` | Blob Upload | Retrieve status of upload identified by `uuid`. The primary purpose of this endpoint is to resolve the current status of a resumable upload. | +| HEAD | `/v2//blobs/uploads/` | Blob Upload | Retrieve status of upload identified by `uuid`. This is identical to the GET request. | +| PATCH | `/v2//blobs/uploads/` | Blob Upload | Upload a chunk of data for the specified upload. | +| PUT | `/v2//blobs/uploads/` | Blob Upload | Complete the upload specified by `uuid`, optionally appending the body as the final chunk. | +| DELETE | `/v2//blobs/uploads/` | Blob Upload | Cancel outstanding upload processes, releasing associated resources. If this is not called, the unfinished uploads will eventually timeout. | + + +The detail for each endpoint is covered in the following sections. + +### Errors + +The error codes encountered via the API are enumerated in the following table: + +|Code|Message|Description| +-------|----|------|------------ + `UNKNOWN` | unknown error | Generic error returned when the error does not have an API classification. + `DIGEST_INVALID` | provided digest did not match uploaded content | When a blob is uploaded, the registry will check that the content matches the digest provided by the client. The error may include a detail structure with the key "digest", including the invalid digest string. This error may also be returned when a manifest includes an invalid layer digest. + `SIZE_INVALID` | provided length did not match content length | When a layer is uploaded, the provided size will be checked against the uploaded content. If they do not match, this error will be returned. + `NAME_INVALID` | manifest name did not match URI | During a manifest upload, if the name in the manifest does not match the uri name, this error will be returned. + `TAG_INVALID` | manifest tag did not match URI | During a manifest upload, if the tag in the manifest does not match the uri tag, this error will be returned. + `NAME_UNKNOWN` | repository name not known to registry | This is returned if the name used during an operation is unknown to the registry. + `MANIFEST_UNKNOWN` | manifest unknown | This error is returned when the manifest, identified by name and tag is unknown to the repository. + `MANIFEST_INVALID` | manifest invalid | During upload, manifests undergo several checks ensuring validity. If those checks fail, this error may be returned, unless a more specific error is included. The detail will contain information the failed validation. + `MANIFEST_UNVERIFIED` | manifest failed signature verification | During manifest upload, if the manifest fails signature verification, this error will be returned. + `BLOB_UNKNOWN` | blob unknown to registry | This error may be returned when a blob is unknown to the registry in a specified repository. This can be returned with a standard get or if a manifest references an unknown layer during upload. + `BLOB_UPLOAD_UNKNOWN` | blob upload unknown to registry | If a blob upload has been cancelled or was never started, this error code may be returned. + + + +### Base + +Base V2 API route. Typically, this can be used for lightweight version checks and to validate registry authorization. + + + +#### GET Base + +Check that the endpoint implements Docker Registry API V2. + + +##### + +``` +GET /v2/ +Authorization: +``` + + + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`Authorization`|header|rfc7235 compliant authorization header.| + + + + +###### On Success: OK + +``` +200 OK +``` + +The API implements V2 protocol and is accessible. + + + +###### On Failure: Unauthorized + +``` +401 Unauthorized +WWW-Authenticate: realm="", ..." +``` + +The client is not authorized to access the registry. + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`WWW-Authenticate`|An RFC7235 compliant authentication challenge header.| + + + +###### On Failure: Not Found + +``` +404 Not Found +``` + +The registry does not implement the V2 API. + + + + + +### Tags + +Retrieve information about tags. + + + +#### GET Tags + +Fetch the tags under the repository identified by `name`. + + +##### + +``` +GET /v2//tags/list +``` + + + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`name`|path|Name of the target repository.| + + + + +###### On Success: OK + +``` +200 OK +Content-Type: application/json + +{ + "name": , + "tags": [ + , + ... + ] +} +``` + +A list of tags for the named repository. + + + +###### On Failure: Not Found + +``` +404 Not Found +``` + +The repository is not known to the registry. + + + +###### On Failure: Unauthorized + +``` +401 Unauthorized +``` + +The client doesn't have access to repository. + + + + + +### Manifest + +Create, update and retrieve manifests. + + + +#### GET Manifest + +Fetch the manifest identified by `name` and `tag`. + + +##### + +``` +GET /v2//manifests/ +``` + + + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`name`|path|Name of the target repository.| +|`tag`|path|Tag of the target manifiest.| + + + + +###### On Success: OK + +``` +200 OK +Content-Type: application/json + +{ + "name": , + "tag": , + "fsLayers": [ + { + "blobSum": + }, + ... + ] + ], + "history": , + "signature": +} +``` + +The manifest idenfied by `name` and `tag`. + + + +###### On Failure: Bad Request + +``` +400 Bad Request +Content-Type: application/json + +{ + "errors:" [{ + "code": , + "message": "", + "detail": ... + }, + ... + ] +} +``` + +The name or tag was invalid. + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `NAME_INVALID` | manifest name did not match URI | During a manifest upload, if the name in the manifest does not match the uri name, this error will be returned. | +| `TAG_INVALID` | manifest tag did not match URI | During a manifest upload, if the tag in the manifest does not match the uri tag, this error will be returned. | + + + +###### On Failure: Not Found + +``` +404 Not Found +Content-Type: application/json + +{ + "errors:" [{ + "code": , + "message": "", + "detail": ... + }, + ... + ] +} +``` + +The named manifest is not known to the registry. + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `NAME_UNKNOWN` | repository name not known to registry | This is returned if the name used during an operation is unknown to the registry. | +| `MANIFEST_UNKNOWN` | manifest unknown | This error is returned when the manifest, identified by name and tag is unknown to the repository. | + + + + +#### PUT Manifest + +Put the manifest identified by `name` and `tag`. + + +##### + +``` +PUT /v2//manifests/ +Authorization: +Content-Type: application/json + +{ + "name": , + "tag": , + "fsLayers": [ + { + "blobSum": + }, + ... + ] + ], + "history": , + "signature": +} +``` + + + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`Authorization`|header|rfc7235 compliant authorization header.| +|`name`|path|Name of the target repository.| +|`tag`|path|Tag of the target manifiest.| + + + + +###### On Success: Accepted + +``` +202 Accepted +``` + + + + + +###### On Failure: Bad Request + +``` +400 Bad Request +``` + + + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `NAME_INVALID` | manifest name did not match URI | During a manifest upload, if the name in the manifest does not match the uri name, this error will be returned. | +| `TAG_INVALID` | manifest tag did not match URI | During a manifest upload, if the tag in the manifest does not match the uri tag, this error will be returned. | +| `MANIFEST_INVALID` | manifest invalid | During upload, manifests undergo several checks ensuring validity. If those checks fail, this error may be returned, unless a more specific error is included. The detail will contain information the failed validation. | +| `MANIFEST_UNVERIFIED` | manifest failed signature verification | During manifest upload, if the manifest fails signature verification, this error will be returned. | +| `BLOB_UNKNOWN` | blob unknown to registry | This error may be returned when a blob is unknown to the registry in a specified repository. This can be returned with a standard get or if a manifest references an unknown layer during upload. | + + + +###### On Failure: Bad Request + +``` +400 Bad Request +Content-Type: application/json + +{ + "errors:" [{ + "code": "BLOB_UNKNOWN", + "message": "blob unknown to registry", + "detail": { + "digest": + } + }, + ... + ] +} +``` + +One or more layers may be missing during a manifest upload. If so, the missing layers will be enumerated in the error response. + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `BLOB_UNKNOWN` | blob unknown to registry | This error may be returned when a blob is unknown to the registry in a specified repository. This can be returned with a standard get or if a manifest references an unknown layer during upload. | + + + +###### On Failure: Unauthorized + +``` +401 Unauthorized +WWW-Authenticate: realm="", ..." +``` + + + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`WWW-Authenticate`|An RFC7235 compliant authentication challenge header.| + + + + +#### DELETE Manifest + +Delete the manifest identified by `name` and `tag`. + + +##### + +``` +DELETE /v2//manifests/ +Authorization: +``` + + + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`Authorization`|header|rfc7235 compliant authorization header.| +|`name`|path|Name of the target repository.| +|`tag`|path|Tag of the target manifiest.| + + + + +###### On Success: Accepted + +``` +202 Accepted +``` + + + + + +###### On Failure: Bad Request + +``` +400 Bad Request +``` + + + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `NAME_INVALID` | manifest name did not match URI | During a manifest upload, if the name in the manifest does not match the uri name, this error will be returned. | +| `TAG_INVALID` | manifest tag did not match URI | During a manifest upload, if the tag in the manifest does not match the uri tag, this error will be returned. | + + + +###### On Failure: Unauthorized + +``` +401 Unauthorized +WWW-Authenticate: realm="", ..." +``` + + + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`WWW-Authenticate`|An RFC7235 compliant authentication challenge header.| + + + +###### On Failure: Not Found + +``` +404 Not Found +``` + + + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `NAME_UNKNOWN` | repository name not known to registry | This is returned if the name used during an operation is unknown to the registry. | +| `MANIFEST_UNKNOWN` | manifest unknown | This error is returned when the manifest, identified by name and tag is unknown to the repository. | + + + + + +### Blob + +Fetch the blob identified by `name` and `digest`. Used to fetch layers by tarsum digest. + + + +#### GET Blob + +Retrieve the blob from the registry identified by `digest`. + + +##### + +``` +GET /v2//blobs/ +``` + + + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`name`|path|Name of the target repository.| +|`digest`|path|Digest of desired blob.| + + + + +###### On Success: OK + +``` +200 OK +Content-Type: application/octet-stream + + +``` + +The blob identified by `digest` is available. The blob content will be present in the body of the request. +###### On Success: Temporary Redirect + +``` +307 Temporary Redirect +Location: +``` + +The blob identified by `digest` is available at the provided location. +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`Location`|The location where the layer should be accessible.| + + + + +###### On Failure: Bad Request + +``` +400 Bad Request +``` + + + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `NAME_INVALID` | manifest name did not match URI | During a manifest upload, if the name in the manifest does not match the uri name, this error will be returned. | +| `DIGEST_INVALID` | provided digest did not match uploaded content | When a blob is uploaded, the registry will check that the content matches the digest provided by the client. The error may include a detail structure with the key "digest", including the invalid digest string. This error may also be returned when a manifest includes an invalid layer digest. | + + + +###### On Failure: Unauthorized + +``` +401 Unauthorized +``` + + + + + +###### On Failure: Not Found + +``` +404 Not Found +``` + + + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `NAME_UNKNOWN` | repository name not known to registry | This is returned if the name used during an operation is unknown to the registry. | +| `BLOB_UNKNOWN` | blob unknown to registry | This error may be returned when a blob is unknown to the registry in a specified repository. This can be returned with a standard get or if a manifest references an unknown layer during upload. | + + + + +#### HEAD Blob + +Check if the blob is known to the registry. + + +##### + +``` +HEAD /v2//blobs/ +``` + + + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`name`|path|Name of the target repository.| +|`digest`|path|Digest of desired blob.| + + + + + + + +### Intiate Blob Upload + +Initiate a blob upload. This endpoint can be used to create resumable uploads or monolithic uploads. + + + +#### POST Intiate Blob Upload + +Initiate a resumable blob upload. If successful, an upload location will be provided to complete the upload. Optionally, if the `digest` parameter is present, the request body will be used to complete the upload in a single request. + + +##### Initiate Monolithic Blob Upload + +``` +POST /v2//blobs/uploads/?digest= +Authorization: +Content-Length: +Content-Type: application/octect-stream + + +``` + +Upload a blob identified by the `digest` parameter in single request. This upload will not be resumable unless a recoverable error is returned. + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`Authorization`|header|rfc7235 compliant authorization header.| +|`Content-Length`|header|| +|`name`|path|Name of the target repository.| +|`digest`|query|Digest of uploaded blob. If present, the upload will be completed, in a single request, with contents of the request body as the resulting blob.| + + + + +###### On Success: Created + +``` +201 Created +Location: +Content-Length: 0 +``` + + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`Location`|| +|`Content-Length`|The `Content-Length` header must be zero and the body must be empty.| + + + + +###### On Failure: Invalid Name or Digest + +``` +400 Bad Request +``` + + + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `DIGEST_INVALID` | provided digest did not match uploaded content | When a blob is uploaded, the registry will check that the content matches the digest provided by the client. The error may include a detail structure with the key "digest", including the invalid digest string. This error may also be returned when a manifest includes an invalid layer digest. | +| `NAME_INVALID` | manifest name did not match URI | During a manifest upload, if the name in the manifest does not match the uri name, this error will be returned. | + + + +###### On Failure: Unauthorized + +``` +401 Unauthorized +WWW-Authenticate: realm="", ..." +``` + + + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`WWW-Authenticate`|An RFC7235 compliant authentication challenge header.| + + + +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +| `DIGEST_INVALID` | provided digest did not match uploaded content | When a blob is uploaded, the registry will check that the content matches the digest provided by the client. The error may include a detail structure with the key "digest", including the invalid digest string. This error may also be returned when a manifest includes an invalid layer digest. | +| `NAME_INVALID` | manifest name did not match URI | During a manifest upload, if the name in the manifest does not match the uri name, this error will be returned. | + + + +##### Initiate Resumable Blob Upload + +``` +POST /v2//blobs/uploads/ +Authorization: +Content-Length: 0 +``` + +Initiate a resumable blob upload with an empty request body. + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`Authorization`|header|rfc7235 compliant authorization header.| +|`Content-Length`|header|The `Content-Length` header must be zero and the body must be empty.| +|`name`|path|Name of the target repository.| + + + + +###### On Success: Accepted + +``` +202 Accepted +Content-Length: 0 +Location: /v2//blobs/uploads/ +Range: 0-0 +``` + +The upload has been created. The `Location` header must be used to complete the upload. The response should identical to a `GET` request on the contents of the returned `Location` header. +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`Content-Length`|The `Content-Length` header must be zero and the body must be empty.| +|`Location`|The location of the created upload. Clients should use the contents verbatim to complete the upload, adding parameters where required.| +|`Range`|Range header indicating the progress of the upload. When starting an upload, it will return an empty range, since no content has been received.| + + + + + +### Blob Upload + +Interact with blob uploads. Clients should never assemble URLs for this endpoint and should only take it through the `Location` header on related API requests. + + + +#### GET Blob Upload + +Retrieve status of upload identified by `uuid`. The primary purpose of this endpoint is to resolve the current status of a resumable upload. + + +##### + +``` +GET /v2//blobs/uploads/ +``` + +Retrieve the progress of the current upload, as reported by the `Range` header. + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`name`|path|Name of the target repository.| +|`uuid`|path|A uuid identifying the upload. This field can accept almost anything.| + + + + +###### On Success: No Content + +``` +204 No Content +Range: 0- +``` + + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`Range`|Range indicating the current progress of the upload.| + + + + +#### HEAD Blob Upload + +Retrieve status of upload identified by `uuid`. This is identical to the GET request. + + +##### + +``` +HEAD /v2//blobs/uploads/ +``` + +Retrieve the progress of the current upload, as reported by the `Range` header. + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`name`|path|Name of the target repository.| +|`uuid`|path|A uuid identifying the upload. This field can accept almost anything.| + + + + +###### On Success: No Content + +``` +204 No Content +Range: 0- +``` + + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`Range`|Range indicating the current progress of the upload.| + + + + +#### PATCH Blob Upload + +Upload a chunk of data for the specified upload. + + +##### + +``` +PATCH /v2//blobs/uploads/ +Content-Range: - +Content-Length: +Content-Type: application/octet-stream + + +``` + +Upload a chunk of data to specified upload without completing the upload. + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`Content-Range`|header|Range of bytes identifying the desired block of content represented by the body. Start must the end offset retrieved via status check plus one. Note that this is a non-standard use of the `Content-Range` header.| +|`Content-Length`|header|Length of the chunk being uploaded, corresponding the length of the request body.| +|`name`|path|Name of the target repository.| +|`uuid`|path|A uuid identifying the upload. This field can accept almost anything.| + + + + +###### On Success: No Content + +``` +204 No Content +Range: 0- +Content-Length: 0 +``` + + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`Range`|Range indicating the current progress of the upload.| +|`Content-Length`|The `Content-Length` header must be zero and the body must be empty.| + + + + +#### PUT Blob Upload + +Complete the upload specified by `uuid`, optionally appending the body as the final chunk. + + +##### + +``` +PUT /v2//blobs/uploads/?digest= +``` + +Upload the _final_ chunk of data. + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`name`|path|Name of the target repository.| +|`uuid`|path|A uuid identifying the upload. This field can accept almost anything.| +|`digest`|query|Digest of uploaded blob.| + + + + +###### On Success: No Content + +``` +204 No Content +Content-Range: - +Content-Length: +Content-Type: application/octet-stream + + +``` + + +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +|`Content-Range`|Range of bytes identifying the desired block of content represented by the body. Start must match the end of offset retrieved via status check. Note that this is a non-standard use of the `Content-Range` header.| +|`Content-Length`|Length of the chunk being uploaded, corresponding the length of the request body.| + + + + +#### DELETE Blob Upload + +Cancel outstanding upload processes, releasing associated resources. If this is not called, the unfinished uploads will eventually timeout. + + +##### + +``` +DELETE /v2//blobs/uploads/ +``` + +Cancel the upload specified by `uuid`. + + +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +|`name`|path|Name of the target repository.| +|`uuid`|path|A uuid identifying the upload. This field can accept almost anything.| + + + + + + + diff --git a/doc/SPEC.md.tmpl b/doc/SPEC.md.tmpl new file mode 100644 index 000000000..cf2fbb2cf --- /dev/null +++ b/doc/SPEC.md.tmpl @@ -0,0 +1,733 @@ +# Docker Registry API V2.1 + +> **Note**: This specification has been ported over from the proposal on +> docker/docker#9015. Much of the language in this document is still written +> in the proposal tense and needs to be converted. + +## Abstract + +> **TODO**: Merge this section into the overview/introduction. + +The docker registry is a service to manage information about docker images and +enable their distribution. While the current registry is usable, there are +several problems with the architecture that have led to this proposal. For +relevant details, please see the following issues: + +- docker/docker#8093 +- docker/docker-registry#612 + +The main driver of this proposal are changes to the docker the image format, +covered in docker/docker#8093. The new, self-contained image manifest +simplifies the image definition and the underlying backend layout. To reduce +bandwidth usage, the new registry will be architected to avoid uploading +existing layers and will support resumable layer uploads. + +While out of scope for this specification, the URI layout of the new API will +be structured to support a rich Authentication and Authorization model by +leveraging namespaces. + +Furthermore, to bring docker registry in line with docker core, the registry is written in Go. + +## Scope + +> **TODO**: Merge this section into the overview/introduction. + +This proposal covers the URL layout and protocols of the Docker Registry V2 +JSON API. This will affect the docker core registry API and the rewrite of +docker-registry. + +This includes the following features: + +- Namespace-oriented URI Layout +- PUSH/PULL registry server for V2 image manifest format +- Resumable layer PUSH support +- V2 Client library implementation + +While authentication and authorization support will influence this +specification, details of the protocol will be left to a future specification. +Other features marked as next generation will be incorporated when the initial +support is complete. Please see the road map for details. + +## Use Cases + +> **TODO**: Merge this section into the overview/introduction. + +For the most part, the use cases of the former registry API apply to the new +version. Differentiating uses cases are covered below. + +### Resumable Push + +Company X's build servers lose connectivity to docker registry before +completing an image layer transfer. After connectivity returns, the build +server attempts to re-upload the image. The registry notifies the build server +that the upload has already been partially attempted. The build server +responds by only sending the remaining data to complete the image file. + +### Resumable Pull + +Company X is having more connectivity problems but this time in their +deployment datacenter. When downloading an image, the connection is +interrupted before completion. The client keeps the partial data and uses http +`Range` requests to avoid downloading repeated data. + +### Layer Upload De-duplication + +Company Y's build system creates two identical docker layers from build +processes A and B. Build process A completes uploading the layer before B. +When process B attempts to upload the layer, the registry indicates that its +not necessary because the layer is already known. + +If process A and B upload the same layer at the same time, both operations +will proceed and the first to complete will be stored in the registry (Note: +we may modify this to prevent dogpile with some locking mechanism). + +## Overview + +This section covers client flows and details of the API endpoints. All +endpoints will be prefixed by the API version and the repository name: + + /v2// + +For example, an API endpoint that will work with the `library/ubuntu` +repository, the URI prefix will be: + + /v2/library/ubuntu/ + +This scheme provides rich access control over various operations and methods +using the URI prefix and http methods that can be controlled in variety of +ways. + +Classically, repository names have always been two path components where each +path component is less than 30 characters. The V2 registry API does not +enforce this. The rules for a repository name are as follows: + +1. A repository name is broken up into _path components_. A component of a + repository name must be at least two characters, optionally separated by + periods, dashes or underscores. More strictly, it must match the regular + expression `[a-z0-9]+(?:[._-][a-z0-9]+)*` and the matched result must be 2 + or more characters in length. +2. The name of a repository must have at least two path components, separated + by a forward slash. +3. The total length of a repository name, including slashes, must be less the + 256 characters. + +These name requirements _only_ apply to the registry API and should accept a +superset of what is supported by other docker ecosystem components. + +All endpoints should support aggressive http caching, compression and range +headers, where appropriate. The new API attempts to leverage HTTP semantics +where possible but may break from standards to implement targeted features. + +For detail on individual endpoints, please see the _Detail_ section. + +### Errors + +Actionable failure conditions, covered in detail in their relevant sections, +are reported as part of 4xx responses, in a json response body. One or more +errors will be returned in the following format: + + { + "errors:" [{ + "code": , + "message": , + "detail": + }, + ... + ] + } + +The `code` field will be a unique identifier, all caps with underscores by +convention. The `message` field will be a human readable string. The optional +`detail` field may contain arbitrary json data providing information the +client can use to resolve the issue. + +While the client can take action on certain error codes, the registry may add +new error codes over time. All client implementations should treat unknown +error codes as `UNKNOWN`, allowing future error codes to be added without +breaking API compatibility. For the purposes of the specification error codes +will only be added and never removed. + +For a complete account of all error codes, please see the _Detail_ section. + +### API Version Check + +A minimal endpoint, mounted at `/v2/` will provide version support information +based on its response statuses. The request format is as follows: + + GET /v2/ + +If a `200 OK` response is returned, the registry implements the V2(.1) +registry API and the client may proceed safely with other V2 operations. +Optionally, the response may contain information about the supported paths in +the response body. The client should be prepared to ignore this data. + +If a `401 Unauthorized` response is returned, the client should take action +based on the contents of the "WWW-Authenticate" header and try the endpoint +again. Depending on access control setup, the client may still have to +authenticate against different resources, even if this check succeeds. + +If `404 Not Found` response status, or other unexpected status, is returned, +the client should proceed with the assumption that the registry does not +implement V2 of the API. + +### Pulling An Image + +An "image" is a combination of a JSON manifest and individual layer files. The +process of pulling an image centers around retrieving these two components. + +The first step in pulling an image is to retrieve the manifest. For reference, +the relevant manifest fields for the registry are the following: + + field | description | +----------|------------------------------------------------| +name | The name of the image. | +tag | The tag for this version of the image. | +fsLayers | A list of layer descriptors (including tarsum) | +signature | A JWS used to verify the manifest content | + +For more information about the manifest format, please see +[docker/docker#8093](https://github.com/docker/docker/issues/8093). + +When the manifest is in hand, the client must verify the signature to ensure +the names and layers are valid. Once confirmed, the client will then use the +tarsums to download the individual layers. Layers are stored in as blobs in +the V2 registry API, keyed by their tarsum digest. + +#### Pulling an Image Manifest + +The image manifest can be fetched with the following url: + +``` +GET /v2//manifests/ +``` + +The "name" and "tag" parameter identify the image and are required. + +A `404 Not Found` response will be returned if the image is unknown to the +registry. If the image exists and the response is successful, the image +manifest will be returned, with the following format (see docker/docker#8093 +for details): + + { + "name": , + "tag": , + "fsLayers": [ + { + "blobSum": + }, + ... + ] + ], + "history": , + "signature": + } + +The client should verify the returned manifest signature for authenticity +before fetching layers. + +#### Pulling a Layer + +Layers are stored in the blob portion of the registry, keyed by tarsum digest. +Pulling a layer is carried out by a standard http request. The URL is as +follows: + + GET /v2//blobs/ + +Access to a layer will be gated by the `name` of the repository but is +identified uniquely in the registry by `tarsum`. The `tarsum` parameter is an +opaque field, to be interpreted by the tarsum library. + +This endpoint may issue a 307 (302 for /blobs/uploads/ +``` + +The parameters of this request are the image namespace under which the layer +will be linked. Responses to this request are covered below. + +##### Existing Layers + +The existence of a layer can be checked via a `HEAD` request to the blob store +API. The request should be formatted as follows: + +``` +HEAD /v2//blobs/ +``` + +If the layer with the tarsum specified in `digest` is available, a 200 OK +response will be received, with no actual body content (this is according to +http specification). The response will look as follows: + +``` +200 OK +Content-Length: +``` + +When this response is received, the client can assume that the layer is +already available in the registry under the given name and should take no +further action to upload the layer. Note that the binary digests may differ +for the existing registry layer, but the tarsums will be guaranteed to match. + +##### Uploading the Layer + +If the POST request is successful, a `202 Accepted` response will be returned +with the upload URL in the `Location` header: + +``` +202 Accepted +Location: /v2//blobs/uploads/ +Range: bytes=0- +Content-Length: 0 +``` + +The rest of the upload process can be carried out with the returned url, +called the "Upload URL" from the `Location` header. All responses to the +upload url, whether sending data or getting status, will be in this format. +Though the URI format (`/v2//blobs/uploads/`) for the `Location` +header is specified, clients should treat it as an opaque url and should never +try to assemble the it. While the `uuid` parameter may be an actual UUID, this +proposal imposes no constraints on the format and clients should never impose +any. + +##### Upload Progress + +The progress and chunk coordination of the upload process will be coordinated +through the `Range` header. While this is a non-standard use of the `Range` +header, there are examples of [similar approaches](https://developers.google.c +om/youtube/v3/guides/using_resumable_upload_protocol) in APIs with heavy use. +For an upload that just started, for an example with a 1000 byte layer file, +the `Range` header would be as follows: + +``` +Range: bytes=0-0 +``` + +To get the status of an upload, issue a GET request to the upload URL: + +``` +GET /v2//blobs/uploads/ +Host: +``` + +The response will be similar to the above, except will return 204 status: + +``` +204 No Content +Location: /v2//blobs/uploads/ +Range: bytes=0- +``` + +Note that the HTTP `Range` header byte ranges are inclusive and that will be +honored, even in non-standard use cases. + +##### Monolithic Upload + +A monolithic upload is simply a chunked upload with a single chunk and may be +favored by clients that would like to avoided the complexity of chunking. To +carry out a "monolithic" upload, one can simply put the entire content blob to +the provided URL: + +``` +PUT /v2//blobs/uploads/?digest=[&digest=sha256:] +Content-Length: +Content-Type: application/octet-stream + + +``` + +The "digest" parameter must be included with the PUT request. Please see the +_Completed Upload_ section for details on the parameters and expected +responses. + +Additionally, the download can be completed with a single `POST` request to +the uploads endpoint, including the "size" and "digest" parameters: + +``` +POST /v2//blobs/uploads/?digest=[&digest=sha256:] +Content-Length: +Content-Type: application/octet-stream + + +``` + +On the registry service, this should allocate a download, accept and verify +the data and return the same response as the final chunk of an upload. If the +POST request fails collecting the data in any way, the registry should attempt +to return an error response to the client with the `Location` header providing +a place to continue the download. + +The single `POST` method is provided for convenience and most clients should +implement `POST` + `PUT` to support reliable resume of uploads. + +##### Chunked Upload + +To carry out an upload of a chunk, the client can specify a range header and +only include that part of the layer file: + +``` +PATCH /v2//blobs/uploads/ +Content-Length: +Content-Range: - +Content-Type: application/octet-stream + + +``` + +There is no enforcement on layer chunk splits other than that the server must +receive them in order. The server may enforce a minimum chunk size. If the +server cannot accept the chunk, a `416 Requested Range Not Satisfiable` +response will be returned and will include a `Range` header indicating the +current status: + +``` +416 Requested Range Not Satisfiable +Location: /v2//blobs/uploads/ +Range: 0- +Content-Length: 0 +``` + +If this response is received, the client should resume from the "last valid +range" and upload the subsequent chunk. A 416 will be returned under the +following conditions: + +- Invalid Content-Range header format +- Out of order chunk: the range of the next chunk must start immediately after + the "last valid range" from the previous response. + +When a chunk is accepted as part of the upload, a `202 Accepted` response will +be returned, including a `Range` header with the current upload status: + +``` +202 Accepted +Location: /v2//blobs/uploads/ +Range: bytes=0- +Content-Length: 0 +``` + +##### Completed Upload + +For an upload to be considered complete, the client must submit a `PUT` +request on the upload endpoint with a digest parameter. If it is not provided, +the download will not be considered complete. The format for the final chunk +will be as follows: + +``` +PUT /v2//blob/uploads/?digest=[&digest=sha256:] +Content-Length: +Content-Range: - +Content-Type: application/octet-stream + + +``` + +Optionally, if all chunks have already been uploaded, a `PUT` request with a +`digest` parameter and zero-length body may be sent to complete and validated +the upload. Multiple "digest" parameters may be provided with different +digests. The server may verify none or all of them but _must_ notify the +client if the content is rejected. + +When the last chunk is received and the layer has been validated, the client +will receive a `201 Created` response: + +``` +201 Created +Location: /v2//blobs/ +Content-Length: 0 +``` + +The `Location` header will contain the registry URL to access the accepted +layer file. + +###### Digest Parameter + +The "digest" parameter is designed as an opaque parameter to support +verification of a successful transfer. The initial version of the registry API +will support a tarsum digest, in the standard tarsum format. For example, a +HTTP URI parameter might be as follows: + +``` +tarsum.v1+sha256:6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b +``` + +Given this parameter, the registry will verify that the provided content does +result in this tarsum. Optionally, the registry can support other other digest +parameters for non-tarfile content stored as a layer. A regular hash digest +might be specified as follows: + +``` +sha256:6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b +``` + +Such a parameter would be used to verify that the binary content (as opposed +to the tar content) would be verified at the end of the upload process. + +For the initial version, registry servers are only required to support the +tarsum format. + +##### Canceling an Upload + +An upload can be cancelled by issuing a DELETE request to the upload endpoint. +The format will be as follows: + +``` +DELETE /v2//blobs/uploads/ +``` + +After this request is issued, the upload uuid will no longer be valid and the +registry server will dump all intermediate data. While uploads will time out +if not completed, clients should issue this request if they encounter a fatal +error but still have the ability to issue an http request. + +##### Errors + +If an 502, 503 or 504 error is received, the client should assume that the +download can proceed due to a temporary condition, honoring the appropriate +retry mechanism. Other 5xx errors should be treated as terminal. + +If there is a problem with the upload, a 4xx error will be returned indicating +the problem. After receiving a 4xx response (except 416, as called out above), +the upload will be considered failed and the client should take appropriate +action. + +Note that the upload url will not be available forever. If the upload uuid is +unknown to the registry, a `404 Not Found` response will be returned and the +client must restart the upload process. + +#### Pushing an Image Manifest + +Once all of the layers for an image are uploaded, the client can upload the +image manifest. An image can be pushed using the following request format: + + PUT /v2//manifests/ + + { + "name": , + "tag": , + "fsLayers": [ + { + "blobSum": + }, + ... + ] + ], + "history": , + "signature": , + ... + } + +The `name` and `tag` fields of the response body must match those specified in +the URL. + +If there is a problem with pushing the manifest, a relevant 4xx response will +be returned with a JSON error message. Please see the _PUT Manifest section +for details on possible error codes that may be returned. + +If one or more layers are unknown to the registry, `BLOB_UNKNOWN` errors are +returned. The `detail` field of the error response will have a `digest` field +identifying the missing blob, which will be a tarsum. An error is returned for +each unknown blob. The response format is as follows: + + { + "errors:" [{ + "code": "BLOB_UNKNOWN", + "message": "blob unknown to registry", + "detail": { + "digest": + } + }, + ... + ] + } + +#### Listing Image Tags + +It may be necessary to list all of the tags under a given repository. The tags +for an image repository can be retrieved with the following request: + + GET /v2//tags/list + +The response will be in the following format: + + 200 OK + Content-Type: application/json + + { + "name": , + "tags": [ + , + ... + ] + } + +For repositories with a large number of tags, this response may be quite +large, so care should be taken by the client when parsing the response to +reduce copying. + +### Deleting an Image + +An image may be deleted from the registry via its `name` and `tag`. A delete +may be issued with the following request format: + + DELETE /v2//manifests/ + +If the image exists and has been successfully deleted, the following response +will be issued: + + 202 Accepted + Content-Length: None + +If the image had already been deleted or did not exist, a `404 Not Found` +response will be issued instead. + +## Detail + +> **Note**: This section is still under construction. For the purposes of +> implementation, if any details below differ from the described request flows +> above, the section below should be corrected. When they match, this note +> should be removed. + +The behavior of the endpoints are covered in detail in this section, organized +by route and entity. All aspects of the request and responses are covered, +including headers, parameters and body formats. Examples of requests and their +corresponding responses, with success and failure, are enumerated. + +> **Note**: The sections on endpoint detail are arranged with an example +> request, a description of the request, followed by information about that +> request. + +A list of methods and URIs are covered in the table below: + +|Method|Path|Entity|Description| +-------|----|------|------------ +{{range $route := .RouteDescriptors}}{{range $method := .Methods}}| {{$method.Method}} | `{{$route.Path|prettygorilla}}` | {{$route.Entity}} | {{$method.Description}} | +{{end}}{{end}} + +The detail for each endpoint is covered in the following sections. + +### Errors + +The error codes encountered via the API are enumerated in the following table: + +|Code|Message|Description| +-------|----|------|------------ +{{range $err := .ErrorDescriptors}} `{{$err.Value}}` | {{$err.Message}} | {{$err.Description|removenewlines}} +{{end}} + +{{range $route := .RouteDescriptors}} +### {{.Entity}} + +{{.Description}} + +{{range $method := $route.Methods}} + +#### {{.Method}} {{$route.Entity}} + +{{.Description}} + +{{if .Requests}}{{range .Requests}} +##### {{.Name}} + +``` +{{$method.Method}} {{$route.Path|prettygorilla}}{{if .QueryParameters}}?{{range .QueryParameters}}{{.Name}}={{.Format}}{{end}}{{end}}{{range .Headers}} +{{.Name}}: {{.Format}}{{end}}{{if .Body.ContentType}} +Content-Type: {{.Body.ContentType}}{{end}}{{if .Body.Format}} + +{{.Body.Format}}{{end}} +``` + +{{.Description}} + +{{if or .Headers .PathParameters .QueryParameters}} +The following parameters should be specified on the request: + +|Name|Kind|Description| +|----|----|-----------| +{{range .Headers}}|`{{.Name}}`|header|{{.Description}}| +{{end}}{{range .PathParameters}}|`{{.Name}}`|path|{{.Description}}| +{{end}}{{range .QueryParameters}}|`{{.Name}}`|query|{{.Description}}| +{{end}}{{end}} + +{{if .Successes}} +{{range .Successes}} +###### On Success: {{if .Name}}{{.Name}}{{else}}{{.StatusCode | statustext}}{{end}} + +``` +{{.StatusCode}} {{.StatusCode | statustext}}{{range .Headers}} +{{.Name}}: {{.Format}}{{end}}{{if .Body.ContentType}} +Content-Type: {{.Body.ContentType}}{{end}}{{if .Body.Format}} + +{{.Body.Format}}{{end}} +``` + +{{.Description}}{{if .Headers}} +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +{{range .Headers}}|`{{.Name}}`|{{.Description}}| +{{end}}{{end}}{{end}}{{end}} + +{{if .Failures}} +{{range .Failures}} +###### On Failure: {{if .Name}}{{.Name}}{{else}}{{.StatusCode | statustext}}{{end}} + +``` +{{.StatusCode}} {{.StatusCode | statustext}}{{range .Headers}} +{{.Name}}: {{.Format}}{{end}}{{if .Body.ContentType}} +Content-Type: {{.Body.ContentType}}{{end}}{{if .Body.Format}} + +{{.Body.Format}}{{end}} +``` + +{{.Description}} +{{if .Headers}} +The following headers will be returned on the response: + +|Name|Description| +|----|-----------| +{{range .Headers}}|`{{.Name}}`|{{.Description}}| +{{end}}{{end}} + +{{if .ErrorCodes}} +The error codes that may be included in the response body are enumerated below: + +|Code|Message|Description| +-------|----|------|------------ +{{range $err := .ErrorCodes}}| `{{$err}}` | {{$err.Descriptor.Message}} | {{$err.Descriptor.Description|removenewlines}} | +{{end}} + +{{end}}{{end}}{{end}}{{end}}{{end}}{{end}} + +{{end}} diff --git a/storagedriver/s3/s3.go b/storagedriver/s3/s3.go index e26d3be2a..b8a905c34 100644 --- a/storagedriver/s3/s3.go +++ b/storagedriver/s3/s3.go @@ -1,13 +1,28 @@ -// +build ignore - +// Package s3 provides a storagedriver.StorageDriver implementation to +// store blobs in Amazon S3 cloud storage. +// +// This package leverages the crowdmob/goamz client library for interfacing with +// s3. +// +// Because s3 is a key, value store the Stat call does not support last modification +// time for directories (directories are an abstraction for key, value stores) +// +// Keep in mind that s3 guarantees only eventual consistency, so do not assume +// that a successful write will mean immediate access to the data written (although +// in most regions a new object put has guaranteed read after write). The only true +// guarantee is that once you call Stat and receive a certain file size, that much of +// the file is already accessible. package s3 import ( "bytes" "fmt" "io" + "io/ioutil" "net/http" "strconv" + "strings" + "time" "github.com/crowdmob/goamz/aws" "github.com/crowdmob/goamz/s3" @@ -19,10 +34,10 @@ const driverName = "s3" // minChunkSize defines the minimum multipart upload chunk size // S3 API requires multipart upload chunks to be at least 5MB -const minChunkSize = 5 * 1024 * 1024 +const chunkSize = 5 * 1024 * 1024 -// listPartsMax is the largest amount of parts you can request from S3 -const listPartsMax = 1000 +// listMax is the largest amount of objects you can request from S3 in a list call +const listMax = 1000 func init() { factory.Register(driverName, &s3DriverFactory{}) @@ -31,16 +46,17 @@ func init() { // s3DriverFactory implements the factory.StorageDriverFactory interface type s3DriverFactory struct{} -func (factory *s3DriverFactory) Create(parameters map[string]string) (storagedriver.StorageDriver, error) { +func (factory *s3DriverFactory) Create(parameters map[string]interface{}) (storagedriver.StorageDriver, error) { return FromParameters(parameters) } // Driver is a storagedriver.StorageDriver implementation backed by Amazon S3 // Objects are stored at absolute keys in the provided bucket type Driver struct { - S3 *s3.S3 - Bucket *s3.Bucket - Encrypt bool + S3 *s3.S3 + Bucket *s3.Bucket + Encrypt bool + rootDirectory string } // FromParameters constructs a new Driver with a given parameters map @@ -50,28 +66,24 @@ type Driver struct { // - region // - bucket // - encrypt -func FromParameters(parameters map[string]string) (*Driver, error) { - accessKey, ok := parameters["accesskey"] - if !ok || accessKey == "" { - return nil, fmt.Errorf("No accesskey parameter provided") - } - - secretKey, ok := parameters["secretkey"] - if !ok || secretKey == "" { - return nil, fmt.Errorf("No secretkey parameter provided") - } +func FromParameters(parameters map[string]interface{}) (*Driver, error) { + // Providing no values for these is valid in case the user is authenticating + // with an IAM on an ec2 instance (in which case the instance credentials will + // be summoned when GetAuth is called) + accessKey, _ := parameters["accesskey"] + secretKey, _ := parameters["secretkey"] regionName, ok := parameters["region"] - if !ok || regionName == "" { + if !ok || regionName.(string) == "" { return nil, fmt.Errorf("No region parameter provided") } - region := aws.GetRegion(regionName) + region := aws.GetRegion(fmt.Sprint(regionName)) if region.Name == "" { return nil, fmt.Errorf("Invalid region provided: %v", region) } bucket, ok := parameters["bucket"] - if !ok || bucket == "" { + if !ok || fmt.Sprint(bucket) == "" { return nil, fmt.Errorf("No bucket parameter provided") } @@ -80,136 +92,415 @@ func FromParameters(parameters map[string]string) (*Driver, error) { return nil, fmt.Errorf("No encrypt parameter provided") } - encryptBool, err := strconv.ParseBool(encrypt) - if err != nil { - return nil, fmt.Errorf("Unable to parse the encrypt parameter: %v", err) + encryptBool, ok := encrypt.(bool) + if !ok { + return nil, fmt.Errorf("The encrypt parameter should be a boolean") } - return New(accessKey, secretKey, region, encryptBool, bucket) + + rootDirectory, ok := parameters["rootdirectory"] + if !ok { + return nil, fmt.Errorf("No rootdirectory parameter provided") + } + + return New(fmt.Sprint(accessKey), fmt.Sprint(secretKey), fmt.Sprint(bucket), fmt.Sprint(rootDirectory), region, encryptBool) } // New constructs a new Driver with the given AWS credentials, region, encryption flag, and // bucketName -func New(accessKey string, secretKey string, region aws.Region, encrypt bool, bucketName string) (*Driver, error) { - auth := aws.Auth{AccessKey: accessKey, SecretKey: secretKey} +func New(accessKey, secretKey, bucketName, rootDirectory string, region aws.Region, encrypt bool) (*Driver, error) { + auth, err := aws.GetAuth(accessKey, secretKey, "", time.Time{}) + if err != nil { + return nil, err + } + s3obj := s3.New(auth, region) bucket := s3obj.Bucket(bucketName) - if err := bucket.PutBucket(getPermissions()); err != nil { - s3Err, ok := err.(*s3.Error) - if !(ok && s3Err.Code == "BucketAlreadyOwnedByYou") { - return nil, err - } + if _, err := bucket.List("", "", "", 1); err != nil { + return nil, err } - return &Driver{s3obj, bucket, encrypt}, nil + // TODO Currently multipart uploads have no timestamps, so this would be unwise + // if you initiated a new s3driver while another one is running on the same bucket. + // multis, _, err := bucket.ListMulti("", "") + // if err != nil { + // return nil, err + // } + + // for _, multi := range multis { + // err := multi.Abort() + // //TODO appropriate to do this error checking? + // if err != nil { + // return nil, err + // } + // } + + return &Driver{s3obj, bucket, encrypt, rootDirectory}, nil } // Implement the storagedriver.StorageDriver interface // GetContent retrieves the content stored at "path" as a []byte. func (d *Driver) GetContent(path string) ([]byte, error) { - content, err := d.Bucket.Get(path) + if !storagedriver.PathRegexp.MatchString(path) { + return nil, storagedriver.InvalidPathError{Path: path} + } + + content, err := d.Bucket.Get(d.s3Path(path)) if err != nil { - return nil, storagedriver.PathNotFoundError{Path: path} + return nil, parseError(path, err) } return content, nil } // PutContent stores the []byte content at a location designated by "path". func (d *Driver) PutContent(path string, contents []byte) error { - return d.Bucket.Put(path, contents, d.getContentType(), getPermissions(), d.getOptions()) + if !storagedriver.PathRegexp.MatchString(path) { + return storagedriver.InvalidPathError{Path: path} + } + + return parseError(path, d.Bucket.Put(d.s3Path(path), contents, d.getContentType(), getPermissions(), d.getOptions())) } // ReadStream retrieves an io.ReadCloser for the content stored at "path" with a // given byte offset. func (d *Driver) ReadStream(path string, offset int64) (io.ReadCloser, error) { + if !storagedriver.PathRegexp.MatchString(path) { + return nil, storagedriver.InvalidPathError{Path: path} + } + + if offset < 0 { + return nil, storagedriver.InvalidOffsetError{Path: path, Offset: offset} + } + headers := make(http.Header) headers.Add("Range", "bytes="+strconv.FormatInt(offset, 10)+"-") - resp, err := d.Bucket.GetResponseWithHeaders(path, headers) + resp, err := d.Bucket.GetResponseWithHeaders(d.s3Path(path), headers) if err != nil { - return nil, storagedriver.PathNotFoundError{Path: path} + if s3Err, ok := err.(*s3.Error); ok && s3Err.Code == "InvalidRange" { + return ioutil.NopCloser(bytes.NewReader(nil)), nil + } + + return nil, parseError(path, err) } return resp.Body, nil } -// WriteStream stores the contents of the provided io.ReadCloser at a location -// designated by the given path. -func (d *Driver) WriteStream(path string, offset, size int64, reader io.ReadCloser) error { - defer reader.Close() +// WriteStream stores the contents of the provided io.Reader at a +// location designated by the given path. The driver will know it has +// received the full contents when the reader returns io.EOF. The number +// of successfully READ bytes will be returned, even if an error is +// returned. May be used to resume writing a stream by providing a nonzero +// offset. Offsets past the current size will write from the position +// beyond the end of the file. +func (d *Driver) WriteStream(path string, offset int64, reader io.Reader) (totalRead int64, err error) { + if !storagedriver.PathRegexp.MatchString(path) { + return 0, storagedriver.InvalidPathError{Path: path} + } - chunkSize := int64(minChunkSize) - for size/chunkSize >= listPartsMax { - chunkSize *= 2 + if offset < 0 { + return 0, storagedriver.InvalidOffsetError{Path: path, Offset: offset} } partNumber := 1 - var totalRead int64 - multi, parts, err := d.getAllParts(path) + bytesRead := 0 + parts := []s3.Part{} + var part s3.Part + + multi, err := d.Bucket.InitMulti(d.s3Path(path), d.getContentType(), getPermissions(), d.getOptions()) if err != nil { - return err - } - - if (offset) > int64(len(parts))*chunkSize || (offset < size && offset%chunkSize != 0) { - return storagedriver.InvalidOffsetError{Path: path, Offset: offset} - } - - if len(parts) > 0 { - partNumber = int(offset/chunkSize) + 1 - totalRead = offset - parts = parts[0 : partNumber-1] + return 0, err } buf := make([]byte, chunkSize) - for { - bytesRead, err := io.ReadFull(reader, buf) - totalRead += int64(bytesRead) + zeroBuf := make([]byte, chunkSize) - if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF { + // We never want to leave a dangling multipart upload, our only consistent state is + // when there is a whole object at path. This is in order to remain consistent with + // the stat call. + // + // Note that if the machine dies before executing the defer, we will be left with a dangling + // multipart upload, which will eventually be cleaned up, but we will lose all of the progress + // made prior to the machine crashing. + defer func() { + if len(parts) > 0 { + if multi == nil { + // Parts should be empty if the multi is not initialized + panic("Unreachable") + } else { + if multi.Complete(parts) != nil { + multi.Abort() + } + } + } + }() + + // Fills from 0 to total from current + fromSmallCurrent := func(total int64) error { + current, err := d.ReadStream(path, 0) + if err != nil { return err - } else if (int64(bytesRead) < chunkSize) && totalRead != size { - break - } else { - part, err := multi.PutPart(int(partNumber), bytes.NewReader(buf[0:bytesRead])) + } + + bytesRead = 0 + for int64(bytesRead) < total { + //The loop should very rarely enter a second iteration + nn, err := current.Read(buf[bytesRead:total]) + bytesRead += nn + if err != nil { + if err != io.EOF { + return err + } + + break + } + + } + return nil + } + + // Fills from parameter to chunkSize from reader + fromReader := func(from int64) error { + bytesRead = 0 + for from+int64(bytesRead) < chunkSize { + nn, err := reader.Read(buf[from+int64(bytesRead):]) + totalRead += int64(nn) + bytesRead += nn + + if err != nil { + if err != io.EOF { + return err + } + + break + } + } + + if bytesRead > 0 { + part, err = multi.PutPart(int(partNumber), bytes.NewReader(buf[0:int64(bytesRead)+from])) if err != nil { return err } parts = append(parts, part) - if totalRead == size { - multi.Complete(parts) - break + partNumber++ + } + + return nil + } + + if offset > 0 { + resp, err := d.Bucket.Head(d.s3Path(path), nil) + if err != nil { + if s3Err, ok := err.(*s3.Error); !ok || s3Err.Code != "NoSuchKey" { + return 0, err + } + } + + currentLength := int64(0) + if err == nil { + currentLength = resp.ContentLength + } + + if currentLength >= offset { + if offset < chunkSize { + // chunkSize > currentLength >= offset + if err = fromSmallCurrent(offset); err != nil { + return totalRead, err + } + + if err = fromReader(offset); err != nil { + return totalRead, err + } + + if totalRead+offset < chunkSize { + return totalRead, nil + } + } else { + // currentLength >= offset >= chunkSize + _, part, err = multi.PutPartCopy(partNumber, + s3.CopyOptions{CopySourceOptions: "bytes=0-" + strconv.FormatInt(offset-1, 10)}, + d.Bucket.Name+"/"+d.s3Path(path)) + if err != nil { + return 0, err + } + + parts = append(parts, part) + partNumber++ + } + } else { + // Fills between parameters with 0s but only when to - from <= chunkSize + fromZeroFillSmall := func(from, to int64) error { + bytesRead = 0 + for from+int64(bytesRead) < to { + nn, err := bytes.NewReader(zeroBuf).Read(buf[from+int64(bytesRead) : to]) + bytesRead += nn + if err != nil { + return err + } + } + + return nil + } + + // Fills between parameters with 0s, making new parts + fromZeroFillLarge := func(from, to int64) error { + bytesRead64 := int64(0) + for to-(from+bytesRead64) >= chunkSize { + part, err := multi.PutPart(int(partNumber), bytes.NewReader(zeroBuf)) + if err != nil { + return err + } + bytesRead64 += chunkSize + + parts = append(parts, part) + partNumber++ + } + + return fromZeroFillSmall(0, (to-from)%chunkSize) + } + + // currentLength < offset + if currentLength < chunkSize { + if offset < chunkSize { + // chunkSize > offset > currentLength + if err = fromSmallCurrent(currentLength); err != nil { + return totalRead, err + } + + if err = fromZeroFillSmall(currentLength, offset); err != nil { + return totalRead, err + } + + if err = fromReader(offset); err != nil { + return totalRead, err + } + + if totalRead+offset < chunkSize { + return totalRead, nil + } + } else { + // offset >= chunkSize > currentLength + if err = fromSmallCurrent(currentLength); err != nil { + return totalRead, err + } + + if err = fromZeroFillSmall(currentLength, chunkSize); err != nil { + return totalRead, err + } + + part, err = multi.PutPart(int(partNumber), bytes.NewReader(buf)) + if err != nil { + return totalRead, err + } + + parts = append(parts, part) + partNumber++ + + //Zero fill from chunkSize up to offset, then some reader + if err = fromZeroFillLarge(chunkSize, offset); err != nil { + return totalRead, err + } + + if err = fromReader(offset % chunkSize); err != nil { + return totalRead, err + } + + if totalRead+(offset%chunkSize) < chunkSize { + return totalRead, nil + } + } + } else { + // offset > currentLength >= chunkSize + _, part, err = multi.PutPartCopy(partNumber, + s3.CopyOptions{CopySourceOptions: "bytes=0-" + strconv.FormatInt(currentLength-1, 10)}, + d.Bucket.Name+"/"+d.s3Path(path)) + if err != nil { + return 0, err + } + + parts = append(parts, part) + partNumber++ + + //Zero fill from currentLength up to offset, then some reader + if err = fromZeroFillLarge(currentLength, offset); err != nil { + return totalRead, err + } + + if err = fromReader((offset - currentLength) % chunkSize); err != nil { + return totalRead, err + } + + if totalRead+((offset-currentLength)%chunkSize) < chunkSize { + return totalRead, nil + } } - partNumber++ } } - return nil + for { + if err = fromReader(0); err != nil { + return totalRead, err + } + + if int64(bytesRead) < chunkSize { + break + } + } + + return totalRead, nil } -// CurrentSize retrieves the curernt size in bytes of the object at the given -// path. -func (d *Driver) CurrentSize(path string) (uint64, error) { - _, parts, err := d.getAllParts(path) +// Stat retrieves the FileInfo for the given path, including the current size +// in bytes and the creation time. +func (d *Driver) Stat(path string) (storagedriver.FileInfo, error) { + if !storagedriver.PathRegexp.MatchString(path) { + return nil, storagedriver.InvalidPathError{Path: path} + } + + listResponse, err := d.Bucket.List(d.s3Path(path), "", "", 1) if err != nil { - return 0, err + return nil, err } - if len(parts) == 0 { - return 0, nil + fi := storagedriver.FileInfoFields{ + Path: path, } - return (((uint64(len(parts)) - 1) * uint64(parts[0].Size)) + uint64(parts[len(parts)-1].Size)), nil + if len(listResponse.Contents) == 1 { + if listResponse.Contents[0].Key != d.s3Path(path) { + fi.IsDir = true + } else { + fi.IsDir = false + fi.Size = listResponse.Contents[0].Size + + timestamp, err := time.Parse(time.RFC3339Nano, listResponse.Contents[0].LastModified) + if err != nil { + return nil, err + } + fi.ModTime = timestamp + } + } else if len(listResponse.CommonPrefixes) == 1 { + fi.IsDir = true + } else { + return nil, storagedriver.PathNotFoundError{Path: path} + } + + return storagedriver.FileInfoInternal{FileInfoFields: fi}, nil } -// List returns a list of the objects that are direct descendants of the given -// path. +// List returns a list of the objects that are direct descendants of the given path. func (d *Driver) List(path string) ([]string, error) { - if path[len(path)-1] != '/' { + if !storagedriver.PathRegexp.MatchString(path) && path != "/" { + return nil, storagedriver.InvalidPathError{Path: path} + } + + if path != "/" && path[len(path)-1] != '/' { path = path + "/" } - listResponse, err := d.Bucket.List(path, "/", "", listPartsMax) + listResponse, err := d.Bucket.List(d.s3Path(path), "/", "", listMax) if err != nil { return nil, err } @@ -219,15 +510,15 @@ func (d *Driver) List(path string) ([]string, error) { for { for _, key := range listResponse.Contents { - files = append(files, key.Key) + files = append(files, strings.Replace(key.Key, d.s3Path(""), "", 1)) } for _, commonPrefix := range listResponse.CommonPrefixes { - directories = append(directories, commonPrefix[0:len(commonPrefix)-1]) + directories = append(directories, strings.Replace(commonPrefix[0:len(commonPrefix)-1], d.s3Path(""), "", 1)) } if listResponse.IsTruncated { - listResponse, err = d.Bucket.List(path, "/", listResponse.NextMarker, listPartsMax) + listResponse, err = d.Bucket.List(d.s3Path(path), "/", listResponse.NextMarker, listMax) if err != nil { return nil, err } @@ -242,12 +533,17 @@ func (d *Driver) List(path string) ([]string, error) { // Move moves an object stored at sourcePath to destPath, removing the original // object. func (d *Driver) Move(sourcePath string, destPath string) error { + if !storagedriver.PathRegexp.MatchString(sourcePath) { + return storagedriver.InvalidPathError{Path: sourcePath} + } else if !storagedriver.PathRegexp.MatchString(destPath) { + return storagedriver.InvalidPathError{Path: destPath} + } + /* This is terrible, but aws doesn't have an actual move. */ - _, err := d.Bucket.PutCopy(destPath, getPermissions(), - s3.CopyOptions{Options: d.getOptions(), MetadataDirective: "", ContentType: d.getContentType()}, - d.Bucket.Name+"/"+sourcePath) + _, err := d.Bucket.PutCopy(d.s3Path(destPath), getPermissions(), + s3.CopyOptions{Options: d.getOptions(), ContentType: d.getContentType()}, d.Bucket.Name+"/"+d.s3Path(sourcePath)) if err != nil { - return storagedriver.PathNotFoundError{Path: sourcePath} + return parseError(sourcePath, err) } return d.Delete(sourcePath) @@ -255,12 +551,16 @@ func (d *Driver) Move(sourcePath string, destPath string) error { // Delete recursively deletes all objects stored at "path" and its subpaths. func (d *Driver) Delete(path string) error { - listResponse, err := d.Bucket.List(path, "", "", listPartsMax) + if !storagedriver.PathRegexp.MatchString(path) { + return storagedriver.InvalidPathError{Path: path} + } + + listResponse, err := d.Bucket.List(d.s3Path(path), "", "", listMax) if err != nil || len(listResponse.Contents) == 0 { return storagedriver.PathNotFoundError{Path: path} } - s3Objects := make([]s3.Object, listPartsMax) + s3Objects := make([]s3.Object, listMax) for len(listResponse.Contents) > 0 { for index, key := range listResponse.Contents { @@ -272,7 +572,7 @@ func (d *Driver) Delete(path string) error { return nil } - listResponse, err = d.Bucket.List(path, "", "", listPartsMax) + listResponse, err = d.Bucket.List(d.s3Path(path), "", "", listMax) if err != nil { return err } @@ -281,35 +581,16 @@ func (d *Driver) Delete(path string) error { return nil } -func (d *Driver) getHighestIDMulti(path string) (multi *s3.Multi, err error) { - multis, _, err := d.Bucket.ListMulti(path, "") - if err != nil && !hasCode(err, "NoSuchUpload") { - return nil, err - } - - uploadID := "" - - if len(multis) > 0 { - for _, m := range multis { - if m.Key == path && m.UploadId >= uploadID { - uploadID = m.UploadId - multi = m - } - } - return multi, nil - } - multi, err = d.Bucket.InitMulti(path, d.getContentType(), getPermissions(), d.getOptions()) - return multi, err +func (d *Driver) s3Path(path string) string { + return strings.TrimLeft(strings.TrimRight(d.rootDirectory, "/")+path, "/") } -func (d *Driver) getAllParts(path string) (*s3.Multi, []s3.Part, error) { - multi, err := d.getHighestIDMulti(path) - if err != nil { - return nil, nil, err +func parseError(path string, err error) error { + if s3Err, ok := err.(*s3.Error); ok && s3Err.Code == "NoSuchKey" { + return storagedriver.PathNotFoundError{Path: path} } - parts, err := multi.ListParts() - return multi, parts, err + return err } func hasCode(err error, code string) bool { diff --git a/storagedriver/s3/s3_test.go b/storagedriver/s3/s3_test.go index fd17cd58a..32af24ab2 100644 --- a/storagedriver/s3/s3_test.go +++ b/storagedriver/s3/s3_test.go @@ -1,8 +1,7 @@ -// +build ignore - package s3 import ( + "io/ioutil" "os" "strconv" "testing" @@ -22,13 +21,18 @@ func init() { secretKey := os.Getenv("AWS_SECRET_KEY") bucket := os.Getenv("S3_BUCKET") encrypt := os.Getenv("S3_ENCRYPT") + region := os.Getenv("AWS_REGION") + root, err := ioutil.TempDir("", "driver-") + if err != nil { + panic(err) + } s3DriverConstructor := func(region aws.Region) (storagedriver.StorageDriver, error) { shouldEncrypt, err := strconv.ParseBool(encrypt) if err != nil { return nil, err } - return New(accessKey, secretKey, region, shouldEncrypt, bucket) + return New(accessKey, secretKey, bucket, root, region, shouldEncrypt) } // Skip S3 storage driver tests if environment variable parameters are not provided @@ -39,18 +43,20 @@ func init() { return "" } - for _, region := range aws.Regions { - if region == aws.USGovWest { - continue - } + // for _, region := range aws.Regions { + // if region == aws.USGovWest { + // continue + // } - testsuites.RegisterInProcessSuite(s3DriverConstructor(region), skipCheck) - testsuites.RegisterIPCSuite(driverName, map[string]string{ - "accesskey": accessKey, - "secretkey": secretKey, - "region": region.Name, - "bucket": bucket, - "encrypt": encrypt, - }, skipCheck) - } + testsuites.RegisterInProcessSuite(func() (storagedriver.StorageDriver, error) { + return s3DriverConstructor(aws.GetRegion(region)) + }, skipCheck) + // testsuites.RegisterIPCSuite(driverName, map[string]string{ + // "accesskey": accessKey, + // "secretkey": secretKey, + // "region": region.Name, + // "bucket": bucket, + // "encrypt": encrypt, + // }, skipCheck) + // } } diff --git a/storagedriver/storagedriver.go b/storagedriver/storagedriver.go index f86e3d1eb..6ec0d244f 100644 --- a/storagedriver/storagedriver.go +++ b/storagedriver/storagedriver.go @@ -49,8 +49,6 @@ type StorageDriver interface { // WriteStream stores the contents of the provided io.ReadCloser at a // location designated by the given path. - // The driver will know it has received the full contents when it has read - // "size" bytes. // May be used to resume writing a stream by providing a nonzero offset. // The offset must be no larger than the CurrentSize for this path. WriteStream(path string, offset int64, reader io.Reader) (nn int64, err error) diff --git a/storagedriver/testsuites/testsuites.go b/storagedriver/testsuites/testsuites.go index 64aa1e814..25a066f37 100644 --- a/storagedriver/testsuites/testsuites.go +++ b/storagedriver/testsuites/testsuites.go @@ -362,7 +362,7 @@ func (suite *DriverSuite) TestContinueStreamAppend(c *check.C) { filename := randomPath(32) defer suite.StorageDriver.Delete(firstPart(filename)) - chunkSize := int64(10 * 1024 * 1024) + chunkSize := int64(5 * 1024 * 1024) contentsChunk1 := randomContents(chunkSize) contentsChunk2 := randomContents(chunkSize) @@ -687,9 +687,11 @@ func (suite *DriverSuite) TestStatCall(c *check.C) { c.Assert(fi.Size(), check.Equals, int64(0)) c.Assert(fi.IsDir(), check.Equals, true) - if start.After(fi.ModTime()) { - c.Errorf("modtime %s before file created (%v)", fi.ModTime(), start) - } + // Directories do not need to support ModTime, since key-value stores + // cannot support it efficiently. + // if start.After(fi.ModTime()) { + // c.Errorf("modtime %s before file created (%v)", fi.ModTime(), start) + // } if fi.ModTime().After(expectedModTime) { c.Errorf("modtime %s after file created (%v)", fi.ModTime(), expectedModTime) @@ -763,6 +765,54 @@ func (suite *DriverSuite) TestConcurrentFileStreams(c *check.C) { wg.Wait() } +// TestEventualConsistency checks that if stat says that a file is a certain size, then +// you can freely read from the file (this is the only guarantee that the driver needs to provide) +func (suite *DriverSuite) TestEventualConsistency(c *check.C) { + if testing.Short() { + c.Skip("Skipping test in short mode") + } + + filename := randomPath(32) + defer suite.StorageDriver.Delete(firstPart(filename)) + + var offset int64 + var misswrites int + var chunkSize int64 = 32 + + for i := 0; i < 1024; i++ { + contents := randomContents(chunkSize) + read, err := suite.StorageDriver.WriteStream(filename, offset, bytes.NewReader(contents)) + c.Assert(err, check.IsNil) + + fi, err := suite.StorageDriver.Stat(filename) + c.Assert(err, check.IsNil) + + // We are most concerned with being able to read data as soon as Stat declares + // it is uploaded. This is the strongest guarantee that some drivers (that guarantee + // at best eventual consistency) absolutely need to provide. + if fi.Size() == offset+chunkSize { + reader, err := suite.StorageDriver.ReadStream(filename, offset) + c.Assert(err, check.IsNil) + + readContents, err := ioutil.ReadAll(reader) + c.Assert(err, check.IsNil) + + c.Assert(readContents, check.DeepEquals, contents) + + reader.Close() + offset += read + } else { + misswrites++ + } + } + + if misswrites > 0 { + c.Log("There were " + string(misswrites) + " occurences of a write not being instantly available.") + } + + c.Assert(misswrites, check.Not(check.Equals), 1024) +} + // BenchmarkPutGetEmptyFiles benchmarks PutContent/GetContent for 0B files func (suite *DriverSuite) BenchmarkPutGetEmptyFiles(c *check.C) { suite.benchmarkPutGetFiles(c, 0)