diff --git a/reference/regexp.go b/reference/regexp.go index 42f86b8be..2dfcd8f96 100644 --- a/reference/regexp.go +++ b/reference/regexp.go @@ -2,13 +2,13 @@ package reference import "regexp" -var ( - // alphaNumeric defines the alpha numeric atom, typically a +const ( + // alphanumeric defines the alphanumeric atom, typically a // component of names. This only allows lower case characters and digits. - alphaNumeric = `[a-z0-9]+` + alphanumeric = `[a-z0-9]+` // separator defines the separators allowed to be embedded in name - // components. This allow one period, one or two underscore and multiple + // components. This allows one period, one or two underscore and multiple // dashes. Repeated dashes and underscores are intentionally treated // differently. In order to support valid hostnames as name components, // supporting repeated dash was added. Additionally double underscore is @@ -16,33 +16,43 @@ var ( // supported names. separator = `(?:[._]|__|[-]*)` - // nameComponent restricts registry path component names to start - // with at least one letter or number, with following parts able to be - // separated by one period, one or two underscore and multiple dashes. - nameComponent = expression( - alphaNumeric, - optional(repeated(separator, alphaNumeric))) - // domainNameComponent restricts the registry domain component of a // repository name to start with a component as defined by DomainRegexp. domainNameComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])` + // tag matches valid tag names. From docker/docker:graph/tags.go. + tag = `[\w][\w.-]{0,127}` + + // digestPat matches well-formed digests, including algorithm (e.g. "sha256:"). + // + // TODO(thaJeztah): this should follow the same rules as https://pkg.go.dev/github.com/opencontainers/go-digest@v1.0.0#DigestRegexp + // so that go-digest defines the canonical format. Note that the go-digest is + // more relaxed: + // - it allows multiple algorithms (e.g. "sha256+b64:") to allow + // future expansion of supported algorithms. + // - it allows the "" value to use urlsafe base64 encoding as defined + // in [rfc4648, section 5]. + // + // [rfc4648, section 5]: https://www.rfc-editor.org/rfc/rfc4648#section-5. + digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}` + + // identifier is the format for a content addressable identifier using sha256. + // These identifiers are like digests without the algorithm, since sha256 is used. + identifier = `([a-f0-9]{64})` + // ipv6address are enclosed between square brackets and may be represented // in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format // are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as // IPv4-Mapped are deliberately excluded. - ipv6address = expression( - literal(`[`), `(?:[a-fA-F0-9:]+)`, literal(`]`), - ) + ipv6address = `\[(?:[a-fA-F0-9:]+)\]` +) +var ( // domainName defines the structure of potential domain components // that may be part of image names. This is purposely a subset of what is // allowed by DNS to ensure backwards compatibility with Docker image // names. This includes IPv4 addresses on decimal format. - domainName = expression( - domainNameComponent, - optional(repeated(literal(`.`), domainNameComponent)), - ) + domainName = expression(domainNameComponent, optional(repeated(literal(`.`), domainNameComponent))) // host defines the structure of potential domains based on the URI // Host subcomponent on rfc3986. It may be a subset of DNS domain name, @@ -53,69 +63,63 @@ var ( // allowed by the URI Host subcomponent on rfc3986 to ensure backwards // compatibility with Docker image names. - domain = expression( - host, - optional(literal(`:`), `[0-9]+`)) + domain = expression(host, optional(literal(`:`), `[0-9]+`)) - // DomainRegexp defines the structure of potential domain components - // that may be part of image names. This is purposely a subset of what is - // allowed by DNS to ensure backwards compatibility with Docker image - // names. + // DomainRegexp matches hostname or IP-addresses, optionally including a port + // number. It defines the structure of potential domain components that may be + // part of image names. This is purposely a subset of what is allowed by DNS to + // ensure backwards compatibility with Docker image names. It may be a subset of + // DNS domain name, an IPv4 address in decimal format, or an IPv6 address between + // square brackets (excluding zone identifiers as defined by [rfc6874] or special + // addresses such as IPv4-Mapped). + // + // [rfc6874]: https://www.rfc-editor.org/rfc/rfc6874. DomainRegexp = regexp.MustCompile(domain) - tag = `[\w][\w.-]{0,127}` // TagRegexp matches valid tag names. From docker/docker:graph/tags.go. TagRegexp = regexp.MustCompile(tag) - anchoredTag = anchored(tag) // anchoredTagRegexp matches valid tag names, anchored at the start and // end of the matched string. - anchoredTagRegexp = regexp.MustCompile(anchoredTag) + anchoredTagRegexp = regexp.MustCompile(anchored(tag)) - digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}` - // DigestRegexp matches valid digests. + // DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:"). DigestRegexp = regexp.MustCompile(digestPat) - anchoredDigest = anchored(digestPat) // anchoredDigestRegexp matches valid digests, anchored at the start and // end of the matched string. - anchoredDigestRegexp = regexp.MustCompile(anchoredDigest) + anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat)) + + // nameComponent restricts registry path component names to start + // with at least one letter or number, with following parts able to be + // separated by one period, one or two underscore and multiple dashes. + nameComponent = expression(alphanumeric, optional(repeated(separator, alphanumeric))) + namePat = expression(optional(domain, literal(`/`)), nameComponent, optional(repeated(literal(`/`), nameComponent))) - namePat = expression( - optional(domain, literal(`/`)), - nameComponent, - optional(repeated(literal(`/`), nameComponent))) // NameRegexp is the format for the name component of references. The // regexp has capturing groups for the domain and name part omitting // the separating forward slash from either. NameRegexp = regexp.MustCompile(namePat) - anchoredName = anchored( - optional(capture(domain), literal(`/`)), - capture(nameComponent, - optional(repeated(literal(`/`), nameComponent)))) // anchoredNameRegexp is used to parse a name value, capturing the // domain and trailing components. - anchoredNameRegexp = regexp.MustCompile(anchoredName) + anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domain), literal(`/`)), capture(nameComponent, optional(repeated(literal(`/`), nameComponent))))) + + referencePat = anchored(capture(namePat), optional(literal(":"), capture(tag)), optional(literal("@"), capture(digestPat))) - referencePat = anchored(capture(namePat), - optional(literal(":"), capture(tag)), - optional(literal("@"), capture(digestPat))) // ReferenceRegexp is the full supported format of a reference. The regexp // is anchored and has capturing groups for name, tag, and digest // components. ReferenceRegexp = regexp.MustCompile(referencePat) - identifier = `([a-f0-9]{64})` // IdentifierRegexp is the format for string identifier used as a // content addressable identifier using sha256. These identifiers // are like digests without the algorithm, since sha256 is used. IdentifierRegexp = regexp.MustCompile(identifier) - anchoredIdentifier = anchored(identifier) // anchoredIdentifierRegexp is used to check or match an // identifier value, anchored at start and end of string. - anchoredIdentifierRegexp = regexp.MustCompile(anchoredIdentifier) + anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier)) ) // literal compiles s into a literal regular expression, escaping any regexp