distribution/reference/regexp.go

package reference

import "regexp"

var (
	// alphaNumericRegexp defines the alpha numeric atom, typically a
	// component of names. This only allows lower case characters and digits.
	alphaNumericRegexp = match(`[a-z0-9]+`)

	// separatorRegexp defines the separators allowed to be embedded in name
	// components. This allow one period, one or two underscore and multiple
	// dashes.
	separatorRegexp = match(`(?:[._]|__|[-]*)`)

	// nameComponentRegexp restricts registry path component names to start
	// with at least one letter or number, with following parts able to be
	// separated by one period, one or two underscore and multiple dashes.
	nameComponentRegexp = expression(
		alphaNumericRegexp,
		optional(repeated(separatorRegexp, alphaNumericRegexp)))

	// domainComponentRegexp restricts the registry domain component of a
	// repository name to start with a component as defined by domainRegexp
	// and followed by an optional port.
	domainComponentRegexp = match(`(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`)

	// domainRegexp defines the structure of potential domain components
	// that may be part of image names. This is purposely a subset of what is
	// allowed by DNS to ensure backwards compatibility with Docker image
	// names.
	domainRegexp = expression(
		domainComponentRegexp,
		optional(repeated(literal(`.`), domainComponentRegexp)),
		optional(literal(`:`), match(`[0-9]+`)))

	// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
	TagRegexp = match(`[\w][\w.-]{0,127}`)

	// anchoredTagRegexp matches valid tag names, anchored at the start and
	// end of the matched string.
	anchoredTagRegexp = anchored(TagRegexp)

	// DigestRegexp matches valid digests.
	DigestRegexp = match(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`)

	// anchoredDigestRegexp matches valid digests, anchored at the start and
	// end of the matched string.
	anchoredDigestRegexp = anchored(DigestRegexp)

	// NameRegexp is the format for the name component of references. The
	// regexp has capturing groups for the domain and name part omitting
	// the separating forward slash from either.
	NameRegexp = expression(
		optional(domainRegexp, literal(`/`)),
		nameComponentRegexp,
		optional(repeated(literal(`/`), nameComponentRegexp)))

	// anchoredNameRegexp is used to parse a name value, capturing the
	// domain and trailing components.
	anchoredNameRegexp = anchored(
		optional(capture(domainRegexp), literal(`/`)),
		capture(nameComponentRegexp,
			optional(repeated(literal(`/`), nameComponentRegexp))))

	// ReferenceRegexp is the full supported format of a reference. The regexp
	// is anchored and has capturing groups for name, tag, and digest
	// components.
	ReferenceRegexp = anchored(capture(NameRegexp),
		optional(literal(":"), capture(TagRegexp)),
		optional(literal("@"), capture(DigestRegexp)))

	// IdentifierRegexp is the format for string identifier used as a
	// content addressable identifier using sha256. These identifiers
	// are like digests without the algorithm, since sha256 is used.
	IdentifierRegexp = match(`([a-f0-9]{64})`)

	// ShortIdentifierRegexp is the format used to represent a prefix
	// of an identifier. A prefix may be used to match a sha256 identifier
	// within a list of trusted identifiers.
	ShortIdentifierRegexp = match(`([a-f0-9]{6,64})`)

	// anchoredIdentifierRegexp is used to check or match an
	// identifier value, anchored at start and end of string.
	anchoredIdentifierRegexp = anchored(IdentifierRegexp)

	// anchoredShortIdentifierRegexp is used to check if a value
	// is a possible identifier prefix, anchored at start and end
	// of string.
	anchoredShortIdentifierRegexp = anchored(ShortIdentifierRegexp)
)

// match compiles the string to a regular expression.
var match = regexp.MustCompile

// literal compiles s into a literal regular expression, escaping any regexp
// reserved characters.
func literal(s string) *regexp.Regexp {
	re := match(regexp.QuoteMeta(s))

	if _, complete := re.LiteralPrefix(); !complete {
		panic("must be a literal")
	}

	return re
}

// expression defines a full expression, where each regular expression must
// follow the previous.
func expression(res ...*regexp.Regexp) *regexp.Regexp {
	var s string
	for _, re := range res {
		s += re.String()
	}

	return match(s)
}

// optional wraps the expression in a non-capturing group and makes the
// production optional.
func optional(res ...*regexp.Regexp) *regexp.Regexp {
	return match(group(expression(res...)).String() + `?`)
}

// repeated wraps the regexp in a non-capturing group to get one or more
// matches.
func repeated(res ...*regexp.Regexp) *regexp.Regexp {
	return match(group(expression(res...)).String() + `+`)
}

// group wraps the regexp in a non-capturing group.
func group(res ...*regexp.Regexp) *regexp.Regexp {
	return match(`(?:` + expression(res...).String() + `)`)
}

// capture wraps the expression in a capturing group.
func capture(res ...*regexp.Regexp) *regexp.Regexp {
	return match(`(` + expression(res...).String() + `)`)
}

// anchored anchors the regular expression by adding start and end delimiters.
func anchored(res ...*regexp.Regexp) *regexp.Regexp {
	return match(`^` + expression(res...).String() + `$`)
}
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00			`package reference`

			`import "regexp"`

			`var (`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`// alphaNumericRegexp defines the alpha numeric atom, typically a`
			`// component of names. This only allows lower case characters and digits.`
			alphaNumericRegexp = match(`[a-z0-9]+`)
Update regexp to support repeated dash and double underscore In order to support valid hostnames as name components, supporting repeated dash was added. Additionally double underscore is now allowed as a separator to loosen the restriction for previously supported names. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-10-09 23:01:01 +00:00
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`// separatorRegexp defines the separators allowed to be embedded in name`
			`// components. This allow one period, one or two underscore and multiple`
			`// dashes.`
			separatorRegexp = match(`(?:[._]\|__\|[-]*)`)
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`// nameComponentRegexp restricts registry path component names to start`
			`// with at least one letter or number, with following parts able to be`
			`// separated by one period, one or two underscore and multiple dashes.`
			`nameComponentRegexp = expression(`
			`alphaNumericRegexp,`
			`optional(repeated(separatorRegexp, alphaNumericRegexp)))`
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00
Split apart repository reference into domain and path Allows having other parsers which are capable of unambiguously keeping domain and path separated in a Reference type. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-09 18:32:23 +00:00			`// domainComponentRegexp restricts the registry domain component of a`
			`// repository name to start with a component as defined by domainRegexp`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`// and followed by an optional port.`
Split apart repository reference into domain and path Allows having other parsers which are capable of unambiguously keeping domain and path separated in a Reference type. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-09 18:32:23 +00:00			domainComponentRegexp = match(`(?:[a-zA-Z0-9]\|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`)
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00
Split apart repository reference into domain and path Allows having other parsers which are capable of unambiguously keeping domain and path separated in a Reference type. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-09 18:32:23 +00:00			`// domainRegexp defines the structure of potential domain components`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`// that may be part of image names. This is purposely a subset of what is`
			`// allowed by DNS to ensure backwards compatibility with Docker image`
			`// names.`
Split apart repository reference into domain and path Allows having other parsers which are capable of unambiguously keeping domain and path separated in a Reference type. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-09 18:32:23 +00:00			`domainRegexp = expression(`
			`domainComponentRegexp,`
			optional(repeated(literal(`.`), domainComponentRegexp)),
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			optional(literal(`:`), match(`[0-9]+`)))
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00
			`// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			TagRegexp = match(`[\w][\w.-]{0,127}`)
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00
			`// anchoredTagRegexp matches valid tag names, anchored at the start and`
			`// end of the matched string.`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`anchoredTagRegexp = anchored(TagRegexp)`
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00
Add WithTag and WithDigest combinator functions These functions allow a Named type to be combined with a tag or a digest. WithTag will replace the ImageReference function in github.com/docker/docker/utils as the Docker Engine transitions to the reference package. Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com> 2015-10-10 00:09:54 +00:00			`// DigestRegexp matches valid digests.`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			DigestRegexp = match(`[A-Za-z][A-Za-z0-9](?:[-_+.][A-Za-z][A-Za-z0-9])*[:][[:xdigit:]]{32,}`)
Add WithTag and WithDigest combinator functions These functions allow a Named type to be combined with a tag or a digest. WithTag will replace the ImageReference function in github.com/docker/docker/utils as the Docker Engine transitions to the reference package. Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com> 2015-10-10 00:09:54 +00:00
			`// anchoredDigestRegexp matches valid digests, anchored at the start and`
			`// end of the matched string.`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`anchoredDigestRegexp = anchored(DigestRegexp)`
Add WithTag and WithDigest combinator functions These functions allow a Named type to be combined with a tag or a digest. WithTag will replace the ImageReference function in github.com/docker/docker/utils as the Docker Engine transitions to the reference package. Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com> 2015-10-10 00:09:54 +00:00
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00			`// NameRegexp is the format for the name component of references. The`
Split apart repository reference into domain and path Allows having other parsers which are capable of unambiguously keeping domain and path separated in a Reference type. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-09 18:32:23 +00:00			`// regexp has capturing groups for the domain and name part omitting`
Typo fixes in comments Correct spelling of words in source code comments. Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com> 2016-02-11 00:26:29 +00:00			`// the separating forward slash from either.`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`NameRegexp = expression(`
Split apart repository reference into domain and path Allows having other parsers which are capable of unambiguously keeping domain and path separated in a Reference type. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-09 18:32:23 +00:00			optional(domainRegexp, literal(`/`)),
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`nameComponentRegexp,`
			optional(repeated(literal(`/`), nameComponentRegexp)))
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`// anchoredNameRegexp is used to parse a name value, capturing the`
Split apart repository reference into domain and path Allows having other parsers which are capable of unambiguously keeping domain and path separated in a Reference type. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-09 18:32:23 +00:00			`// domain and trailing components.`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`anchoredNameRegexp = anchored(`
Split apart repository reference into domain and path Allows having other parsers which are capable of unambiguously keeping domain and path separated in a Reference type. Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-09 18:32:23 +00:00			optional(capture(domainRegexp), literal(`/`)),
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`capture(nameComponentRegexp,`
			optional(repeated(literal(`/`), nameComponentRegexp))))
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00			`// ReferenceRegexp is the full supported format of a reference. The regexp`
			`// is anchored and has capturing groups for name, tag, and digest`
			`// components.`
			`ReferenceRegexp = anchored(capture(NameRegexp),`
			`optional(literal(":"), capture(TagRegexp)),`
			`optional(literal("@"), capture(DigestRegexp)))`
Add identifier grammar Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2016-06-15 21:04:21 +00:00
			`// IdentifierRegexp is the format for string identifier used as a`
			`// content addressable identifier using sha256. These identifiers`
			`// are like digests without the algorithm, since sha256 is used.`
			IdentifierRegexp = match(`([a-f0-9]{64})`)

			`// ShortIdentifierRegexp is the format used to represent a prefix`
			`// of an identifier. A prefix may be used to match a sha256 identifier`
			`// within a list of trusted identifiers.`
			ShortIdentifierRegexp = match(`([a-f0-9]{6,64})`)

			`// anchoredIdentifierRegexp is used to check or match an`
			`// identifier value, anchored at start and end of string.`
			`anchoredIdentifierRegexp = anchored(IdentifierRegexp)`

			`// anchoredShortIdentifierRegexp is used to check if a value`
			`// is a possible identifier prefix, anchored at start and end`
			`// of string.`
			`anchoredShortIdentifierRegexp = anchored(ShortIdentifierRegexp)`
Update to provide small and clear interfaces Signed-off-by: Derek McGowan <derek@mcgstyle.net> (github: dmcgowan) 2015-09-08 23:00:48 +00:00			`)`
reference: refactor grammar and regular expressions To resolve some inconsistencies between the grammar and the regular expressions, the grammar has been slightly refactored to match the intent. The regular expressions have been redefined with the same structure to make it easier to verify the grammar is correct. Signed-off-by: Stephen J Day <stephen.day@docker.com> 2015-12-04 22:40:09 +00:00
			`// match compiles the string to a regular expression.`
			`var match = regexp.MustCompile`

			`// literal compiles s into a literal regular expression, escaping any regexp`
			`// reserved characters.`
			`func literal(s string) *regexp.Regexp {`
			`re := match(regexp.QuoteMeta(s))`

			`if _, complete := re.LiteralPrefix(); !complete {`
			`panic("must be a literal")`
			`}`

			`return re`
			`}`

			`// expression defines a full expression, where each regular expression must`
			`// follow the previous.`
			`func expression(res ...regexp.Regexp) regexp.Regexp {`
			`var s string`
			`for _, re := range res {`
			`s += re.String()`
			`}`

			`return match(s)`
			`}`

			`// optional wraps the expression in a non-capturing group and makes the`
			`// production optional.`
			`func optional(res ...regexp.Regexp) regexp.Regexp {`
			return match(group(expression(res...)).String() + `?`)
			`}`

			`// repeated wraps the regexp in a non-capturing group to get one or more`
			`// matches.`
			`func repeated(res ...regexp.Regexp) regexp.Regexp {`
			return match(group(expression(res...)).String() + `+`)
			`}`

			`// group wraps the regexp in a non-capturing group.`
			`func group(res ...regexp.Regexp) regexp.Regexp {`
			return match(`(?:` + expression(res...).String() + `)`)
			`}`

			`// capture wraps the expression in a capturing group.`
			`func capture(res ...regexp.Regexp) regexp.Regexp {`
			return match(`(` + expression(res...).String() + `)`)
			`}`

			`// anchored anchors the regular expression by adding start and end delimiters.`
			`func anchored(res ...regexp.Regexp) regexp.Regexp {`
			return match(`^` + expression(res...).String() + `$`)
			`}`