From 8163e20c6fcbea2d5e3d621c3bbba8a80f32cf8b Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Fri, 11 Nov 2022 13:08:45 +0100
Subject: [PATCH 01/16] reference: splitDockerDomain: remove incorrect "TODO"

My mistake; I added this TODO in 552b1526c6821a84daab48cbc7f5456ae215d6c4, but it
only applies to familiarizeName.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/normalize.go | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/reference/normalize.go b/reference/normalize.go
index 47de5850..4598bf53 100644
--- a/reference/normalize.go
+++ b/reference/normalize.go
@@ -96,11 +96,6 @@ func splitDockerDomain(name string) (domain, remainder string) {
 	if domain == legacyDefaultDomain {
 		domain = defaultDomain
 	}
-	// TODO(thaJeztah): this check may be too strict, as it assumes the
-	//  "library/" namespace does not have nested namespaces. While this
-	//  is true (currently), technically it would be possible for Docker
-	//  Hub to use those (e.g. "library/distros/ubuntu:latest").
-	//  See https://github.com/distribution/distribution/pull/3769#issuecomment-1302031785.
 	if domain == defaultDomain && !strings.ContainsRune(remainder, '/') {
 		remainder = officialRepoPrefix + remainder
 	}

From 2bf5e1879e9c6c702b8d509b2636a91f71465ee0 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Fri, 11 Nov 2022 00:29:58 +0100
Subject: [PATCH 02/16] reference: remove left-over occurrences of
 "short-identifier"

This was removed in 6d4f62d7fdfa25bd4bb42a18995c50aeededc0d6, which forgot
to remove it from the grammar.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/reference.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/reference/reference.go b/reference/reference.go
index 3499fbf8..b6113642 100644
--- a/reference/reference.go
+++ b/reference/reference.go
@@ -23,7 +23,6 @@
 //	digest-hex                      := /[0-9a-fA-F]{32,}/ ; At least 128 bit digest value
 //
 //	identifier                      := /[a-f0-9]{64}/
-//	short-identifier                := /[a-f0-9]{6,64}/
 package reference
 
 import (
@@ -188,7 +187,6 @@ func SplitHostname(named Named) (string, string) {
 
 // Parse parses s and returns a syntactically valid Reference.
 // If an error was encountered it is returned, along with a nil Reference.
-// NOTE: Parse will not handle short digests.
 func Parse(s string) (Reference, error) {
 	matches := ReferenceRegexp.FindStringSubmatch(s)
 	if matches == nil {
@@ -240,7 +238,6 @@ func Parse(s string) (Reference, error) {
 // the Named interface. The reference must have a name and be in the canonical
 // form, otherwise an error is returned.
 // If an error was encountered it is returned, along with a nil Reference.
-// NOTE: ParseNamed will not handle short digests.
 func ParseNamed(s string) (Named, error) {
 	named, err := ParseNormalizedNamed(s)
 	if err != nil {

From 53757ea33788ec46df1ab8592d96afed63518c7b Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Fri, 11 Nov 2022 15:02:10 +0100
Subject: [PATCH 03/16] reference: ParseDockerRef: slight refactor, and update
 docs

- improve documentation
- remove redundant error-check
- simplify interface matching, which slightly improves readability
- touch-up some docs

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/helpers.go   |  2 +-
 reference/normalize.go | 50 ++++++++++++++++++++++++++----------------
 reference/reference.go |  4 ++--
 reference/sort.go      | 16 ++++++++------
 4 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/reference/helpers.go b/reference/helpers.go
index 978df7ea..d10c7ef8 100644
--- a/reference/helpers.go
+++ b/reference/helpers.go
@@ -32,7 +32,7 @@ func FamiliarString(ref Reference) string {
 }
 
 // FamiliarMatch reports whether ref matches the specified pattern.
-// See https://godoc.org/path#Match for supported patterns.
+// See [path.Match] for supported patterns.
 func FamiliarMatch(pattern string, ref Reference) (bool, error) {
 	matched, err := path.Match(pattern, FamiliarString(ref))
 	if namedRef, isNamed := ref.(Named); isNamed && !matched {
diff --git a/reference/normalize.go b/reference/normalize.go
index 4598bf53..fb9172ce 100644
--- a/reference/normalize.go
+++ b/reference/normalize.go
@@ -54,31 +54,43 @@ func ParseNormalizedNamed(s string) (Named, error) {
 	return named, nil
 }
 
-// ParseDockerRef normalizes the image reference following the docker convention. This is added
-// mainly for backward compatibility.
-// The reference returned can only be either tagged or digested. For reference contains both tag
-// and digest, the function returns digested reference, e.g. docker.io/library/busybox:latest@
-// sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa will be returned as
-// docker.io/library/busybox@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa.
+// namedTaggedDigested is a reference that has both a tag and a digest.
+type namedTaggedDigested interface {
+	NamedTagged
+	Digested
+}
+
+// ParseDockerRef normalizes the image reference following the docker convention,
+// which allows for references to contain both a tag and a digest. It returns a
+// reference that is either tagged or digested. For references containing both
+// a tag and a digest, it returns a digested reference. For example, the following
+// reference:
+//
+//	docker.io/library/busybox:latest@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa
+//
+// Is returned as a digested reference (with the ":latest" tag removed):
+//
+//	docker.io/library/busybox@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa
+//
+// References that are already "tagged" or "digested" are returned unmodified:
+//
+//	// Already a digested reference
+//	docker.io/library/busybox@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa
+//
+//	// Already a named reference
+//	docker.io/library/busybox:latest
 func ParseDockerRef(ref string) (Named, error) {
 	named, err := ParseNormalizedNamed(ref)
 	if err != nil {
 		return nil, err
 	}
-	if _, ok := named.(NamedTagged); ok {
-		if canonical, ok := named.(Canonical); ok {
-			// The reference is both tagged and digested, only
-			// return digested.
-			newNamed, err := WithName(canonical.Name())
-			if err != nil {
-				return nil, err
-			}
-			newCanonical, err := WithDigest(newNamed, canonical.Digest())
-			if err != nil {
-				return nil, err
-			}
-			return newCanonical, nil
+	if canonical, ok := named.(namedTaggedDigested); ok {
+		// The reference is both tagged and digested; only return digested.
+		newNamed, err := WithName(canonical.Name())
+		if err != nil {
+			return nil, err
 		}
+		return WithDigest(newNamed, canonical.Digest())
 	}
 	return TagNameOnly(named), nil
 }
diff --git a/reference/reference.go b/reference/reference.go
index b6113642..e5d889a3 100644
--- a/reference/reference.go
+++ b/reference/reference.go
@@ -146,7 +146,7 @@ type namedRepository interface {
 	Path() string
 }
 
-// Domain returns the domain part of the Named reference
+// Domain returns the domain part of the [Named] reference.
 func Domain(named Named) string {
 	if r, ok := named.(namedRepository); ok {
 		return r.Domain()
@@ -155,7 +155,7 @@ func Domain(named Named) string {
 	return domain
 }
 
-// Path returns the name without the domain part of the Named reference
+// Path returns the name without the domain part of the [Named] reference.
 func Path(named Named) (name string) {
 	if r, ok := named.(namedRepository); ok {
 		return r.Path()
diff --git a/reference/sort.go b/reference/sort.go
index 2049c71a..416c37b0 100644
--- a/reference/sort.go
+++ b/reference/sort.go
@@ -20,14 +20,16 @@ import (
 	"sort"
 )
 
-// Sort sorts string references preferring higher information references
+// Sort sorts string references preferring higher information references.
+//
 // The precedence is as follows:
-// 1. Name + Tag + Digest
-// 2. Name + Tag
-// 3. Name + Digest
-// 4. Name
-// 5. Digest
-// 6. Parse error
+//
+//  1. [Named] + [Tagged] + [Digested] (e.g., "docker.io/library/busybox:latest@sha256:<digest>")
+//  2. [Named] + [Tagged]              (e.g., "docker.io/library/busybox:latest")
+//  3. [Named] + [Digested]            (e.g., "docker.io/library/busybo@sha256:<digest>")
+//  4. [Named]                         (e.g., "docker.io/library/busybox")
+//  5. [Digested]                      (e.g., "docker.io@sha256:<digest>")
+//  6. Parse error
 func Sort(references []string) []string {
 	var prefs []Reference
 	var bad []string

From 10eace9a5353989cb813d6a44ca75a2b888f0a56 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Fri, 11 Nov 2022 12:50:58 +0100
Subject: [PATCH 04/16] reference: document consts for normalizing and legacy
 domain

These const could use some documentation, as it won't be clear why
they exist.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/normalize.go | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/reference/normalize.go b/reference/normalize.go
index fb9172ce..5714ce89 100644
--- a/reference/normalize.go
+++ b/reference/normalize.go
@@ -8,10 +8,35 @@ import (
 )
 
 const (
+	// legacyDefaultDomain is the legacy domain for Docker Hub (which was
+	// originally named "the Docker Index"). This domain is still used for
+	// authentication and image search, which were part of the "v1" Docker
+	// registry specification.
+	//
+	// This domain will continue to be supported, but there are plans to consolidate
+	// legacy domains to new "canonical" domains. Once those domains are decided
+	// on, we must update the normalization functions, but preserve compatibility
+	// with existing installs, clients, and user configuration.
 	legacyDefaultDomain = "index.docker.io"
-	defaultDomain       = "docker.io"
-	officialRepoPrefix  = "library/"
-	defaultTag          = "latest"
+
+	// defaultDomain is the default domain used for images on Docker Hub.
+	// It is used to normalize "familiar" names to canonical names, for example,
+	// to convert "ubuntu" to "docker.io/library/ubuntu:latest".
+	//
+	// Note that actual domain of Docker Hub's registry is registry-1.docker.io.
+	// This domain will continue to be supported, but there are plans to consolidate
+	// legacy domains to new "canonical" domains. Once those domains are decided
+	// on, we must update the normalization functions, but preserve compatibility
+	// with existing installs, clients, and user configuration.
+	defaultDomain = "docker.io"
+
+	// officialRepoPrefix is the namespace used for official images on Docker Hub.
+	// It is used to normalize "familiar" names to canonical names, for example,
+	// to convert "ubuntu" to "docker.io/library/ubuntu:latest".
+	officialRepoPrefix = "library/"
+
+	// defaultTag is the default tag if no tag is provided.
+	defaultTag = "latest"
 )
 
 // normalizedNamed represents a name which has been

From 226b21beb6ee7ca666d6118c6a28b01b292c695a Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Tue, 8 Nov 2022 16:52:02 +0100
Subject: [PATCH 05/16] reference: make some regexp vars a const, remove
 intermediate vars

This patch:

- makes regexp strings that are constant a const
- moves some variables closer to where they're used
- removes some intermediate vars
- un-wraps some lines; they're lengthy, but probably more readable than having
  them wrapped over multiple lines.
- touches-up some docs.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 98 +++++++++++++++++++++++----------------------
 1 file changed, 51 insertions(+), 47 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index 42f86b8b..2dfcd8f9 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -2,13 +2,13 @@ package reference
 
 import "regexp"
 
-var (
-	// alphaNumeric defines the alpha numeric atom, typically a
+const (
+	// alphanumeric defines the alphanumeric atom, typically a
 	// component of names. This only allows lower case characters and digits.
-	alphaNumeric = `[a-z0-9]+`
+	alphanumeric = `[a-z0-9]+`
 
 	// separator defines the separators allowed to be embedded in name
-	// components. This allow one period, one or two underscore and multiple
+	// components. This allows one period, one or two underscore and multiple
 	// dashes. Repeated dashes and underscores are intentionally treated
 	// differently. In order to support valid hostnames as name components,
 	// supporting repeated dash was added. Additionally double underscore is
@@ -16,33 +16,43 @@ var (
 	// supported names.
 	separator = `(?:[._]|__|[-]*)`
 
-	// nameComponent restricts registry path component names to start
-	// with at least one letter or number, with following parts able to be
-	// separated by one period, one or two underscore and multiple dashes.
-	nameComponent = expression(
-		alphaNumeric,
-		optional(repeated(separator, alphaNumeric)))
-
 	// domainNameComponent restricts the registry domain component of a
 	// repository name to start with a component as defined by DomainRegexp.
 	domainNameComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`
 
+	// tag matches valid tag names. From docker/docker:graph/tags.go.
+	tag = `[\w][\w.-]{0,127}`
+
+	// digestPat matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
+	//
+	// TODO(thaJeztah): this should follow the same rules as https://pkg.go.dev/github.com/opencontainers/go-digest@v1.0.0#DigestRegexp
+	// so that go-digest defines the canonical format. Note that the go-digest is
+	// more relaxed:
+	//   - it allows multiple algorithms (e.g. "sha256+b64:<encoded>") to allow
+	//     future expansion of supported algorithms.
+	//   - it allows the "<encoded>" value to use urlsafe base64 encoding as defined
+	//     in [rfc4648, section 5].
+	//
+	// [rfc4648, section 5]: https://www.rfc-editor.org/rfc/rfc4648#section-5.
+	digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`
+
+	// identifier is the format for a content addressable identifier using sha256.
+	// These identifiers are like digests without the algorithm, since sha256 is used.
+	identifier = `([a-f0-9]{64})`
+
 	// ipv6address are enclosed between square brackets and may be represented
 	// in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format
 	// are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as
 	// IPv4-Mapped are deliberately excluded.
-	ipv6address = expression(
-		literal(`[`), `(?:[a-fA-F0-9:]+)`, literal(`]`),
-	)
+	ipv6address = `\[(?:[a-fA-F0-9:]+)\]`
+)
 
+var (
 	// domainName defines the structure of potential domain components
 	// that may be part of image names. This is purposely a subset of what is
 	// allowed by DNS to ensure backwards compatibility with Docker image
 	// names. This includes IPv4 addresses on decimal format.
-	domainName = expression(
-		domainNameComponent,
-		optional(repeated(literal(`.`), domainNameComponent)),
-	)
+	domainName = expression(domainNameComponent, optional(repeated(literal(`.`), domainNameComponent)))
 
 	// host defines the structure of potential domains based on the URI
 	// Host subcomponent on rfc3986. It may be a subset of DNS domain name,
@@ -53,69 +63,63 @@ var (
 
 	// allowed by the URI Host subcomponent on rfc3986 to ensure backwards
 	// compatibility with Docker image names.
-	domain = expression(
-		host,
-		optional(literal(`:`), `[0-9]+`))
+	domain = expression(host, optional(literal(`:`), `[0-9]+`))
 
-	// DomainRegexp defines the structure of potential domain components
-	// that may be part of image names. This is purposely a subset of what is
-	// allowed by DNS to ensure backwards compatibility with Docker image
-	// names.
+	// DomainRegexp matches hostname or IP-addresses, optionally including a port
+	// number. It defines the structure of potential domain components that may be
+	// part of image names. This is purposely a subset of what is allowed by DNS to
+	// ensure backwards compatibility with Docker image names. It may be a subset of
+	// DNS domain name, an IPv4 address in decimal format, or an IPv6 address between
+	// square brackets (excluding zone identifiers as defined by [rfc6874] or special
+	// addresses such as IPv4-Mapped).
+	//
+	// [rfc6874]: https://www.rfc-editor.org/rfc/rfc6874.
 	DomainRegexp = regexp.MustCompile(domain)
 
-	tag = `[\w][\w.-]{0,127}`
 	// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
 	TagRegexp = regexp.MustCompile(tag)
 
-	anchoredTag = anchored(tag)
 	// anchoredTagRegexp matches valid tag names, anchored at the start and
 	// end of the matched string.
-	anchoredTagRegexp = regexp.MustCompile(anchoredTag)
+	anchoredTagRegexp = regexp.MustCompile(anchored(tag))
 
-	digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`
-	// DigestRegexp matches valid digests.
+	// DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
 	DigestRegexp = regexp.MustCompile(digestPat)
 
-	anchoredDigest = anchored(digestPat)
 	// anchoredDigestRegexp matches valid digests, anchored at the start and
 	// end of the matched string.
-	anchoredDigestRegexp = regexp.MustCompile(anchoredDigest)
+	anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat))
+
+	// nameComponent restricts registry path component names to start
+	// with at least one letter or number, with following parts able to be
+	// separated by one period, one or two underscore and multiple dashes.
+	nameComponent = expression(alphanumeric, optional(repeated(separator, alphanumeric)))
+	namePat       = expression(optional(domain, literal(`/`)), nameComponent, optional(repeated(literal(`/`), nameComponent)))
 
-	namePat = expression(
-		optional(domain, literal(`/`)),
-		nameComponent,
-		optional(repeated(literal(`/`), nameComponent)))
 	// NameRegexp is the format for the name component of references. The
 	// regexp has capturing groups for the domain and name part omitting
 	// the separating forward slash from either.
 	NameRegexp = regexp.MustCompile(namePat)
 
-	anchoredName = anchored(
-		optional(capture(domain), literal(`/`)),
-		capture(nameComponent,
-			optional(repeated(literal(`/`), nameComponent))))
 	// anchoredNameRegexp is used to parse a name value, capturing the
 	// domain and trailing components.
-	anchoredNameRegexp = regexp.MustCompile(anchoredName)
+	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domain), literal(`/`)), capture(nameComponent, optional(repeated(literal(`/`), nameComponent)))))
+
+	referencePat = anchored(capture(namePat), optional(literal(":"), capture(tag)), optional(literal("@"), capture(digestPat)))
 
-	referencePat = anchored(capture(namePat),
-		optional(literal(":"), capture(tag)),
-		optional(literal("@"), capture(digestPat)))
 	// ReferenceRegexp is the full supported format of a reference. The regexp
 	// is anchored and has capturing groups for name, tag, and digest
 	// components.
 	ReferenceRegexp = regexp.MustCompile(referencePat)
 
-	identifier = `([a-f0-9]{64})`
 	// IdentifierRegexp is the format for string identifier used as a
 	// content addressable identifier using sha256. These identifiers
 	// are like digests without the algorithm, since sha256 is used.
 	IdentifierRegexp = regexp.MustCompile(identifier)
 
-	anchoredIdentifier = anchored(identifier)
 	// anchoredIdentifierRegexp is used to check or match an
 	// identifier value, anchored at start and end of string.
-	anchoredIdentifierRegexp = regexp.MustCompile(anchoredIdentifier)
+	anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier))
 )
 
 // literal compiles s into a literal regular expression, escaping any regexp

From 32a4d8e39f41b225f1dbc59e0890ae29609ad4d8 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Thu, 10 Nov 2022 23:28:37 +0100
Subject: [PATCH 06/16] reference: fix docs for NameRegexp

NameRegexp does not have capturing groups, so updating the documentation
to reflect that.

To verify if this was an unintentional regression, I looked up the commit
that introduced this regex (31a448a628b61bc50d4790b49c489b5747ebeca5), and
it looks like it never had capturing groups, so this was just a mistake in
the docs.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index 2dfcd8f9..dd9e06b6 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -96,9 +96,8 @@ var (
 	nameComponent = expression(alphanumeric, optional(repeated(separator, alphanumeric)))
 	namePat       = expression(optional(domain, literal(`/`)), nameComponent, optional(repeated(literal(`/`), nameComponent)))
 
-	// NameRegexp is the format for the name component of references. The
-	// regexp has capturing groups for the domain and name part omitting
-	// the separating forward slash from either.
+	// NameRegexp is the format for the name component of references, including
+	// an optional domain and port, but without tag or digest suffix.
 	NameRegexp = regexp.MustCompile(namePat)
 
 	// anchoredNameRegexp is used to parse a name value, capturing the

From a7e7ff933cd043e67344ca4f1893d97ec39131ee Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Fri, 11 Nov 2022 11:19:43 +0100
Subject: [PATCH 07/16] reference: align docs and variables with grammar

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index dd9e06b6..bf8d340c 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -90,11 +90,11 @@ var (
 	// end of the matched string.
 	anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat))
 
-	// nameComponent restricts registry path component names to start
-	// with at least one letter or number, with following parts able to be
-	// separated by one period, one or two underscore and multiple dashes.
-	nameComponent = expression(alphanumeric, optional(repeated(separator, alphanumeric)))
-	namePat       = expression(optional(domain, literal(`/`)), nameComponent, optional(repeated(literal(`/`), nameComponent)))
+	// pathComponent restricts registry path-components to start with at least
+	// one letter or number, with following parts able to be separated by one
+	// period, one or two underscore and multiple dashes.
+	pathComponent = expression(alphanumeric, optional(repeated(separator, alphanumeric)))
+	namePat       = expression(optional(domain, literal(`/`)), pathComponent, optional(repeated(literal(`/`), pathComponent)))
 
 	// NameRegexp is the format for the name component of references, including
 	// an optional domain and port, but without tag or digest suffix.
@@ -102,7 +102,7 @@ var (
 
 	// anchoredNameRegexp is used to parse a name value, capturing the
 	// domain and trailing components.
-	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domain), literal(`/`)), capture(nameComponent, optional(repeated(literal(`/`), nameComponent)))))
+	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domain), literal(`/`)), capture(pathComponent, optional(repeated(literal(`/`), pathComponent)))))
 
 	referencePat = anchored(capture(namePat), optional(literal(":"), capture(tag)), optional(literal("@"), capture(digestPat)))
 

From 1d4917d4fb8f4445232e05d58ae38e653b5509f5 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Wed, 9 Nov 2022 18:59:49 +0100
Subject: [PATCH 08/16] reference: expression(): use strings.Join()

It's easier to read, and more performant:

    pkg: github.com/distribution/distribution/v3/reference
    BenchmarkExpression
    BenchmarkExpression-10    	10474380	        97.32 ns/op	      64 B/op	       4 allocs/op
    BenchmarkJoin
    BenchmarkJoin-10          	27722588	        42.71 ns/op	      24 B/op	       1 allocs/op
    PASS

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index bf8d340c..916c4e60 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -1,6 +1,9 @@
 package reference
 
-import "regexp"
+import (
+	"regexp"
+	"strings"
+)
 
 const (
 	// alphanumeric defines the alphanumeric atom, typically a
@@ -136,37 +139,32 @@ func literal(s string) string {
 // expression defines a full expression, where each regular expression must
 // follow the previous.
 func expression(res ...string) string {
-	var s string
-	for _, re := range res {
-		s += re
-	}
-
-	return s
+	return strings.Join(res, "")
 }
 
 // optional wraps the expression in a non-capturing group and makes the
 // production optional.
 func optional(res ...string) string {
-	return group(expression(res...)) + `?`
+	return group(strings.Join(res, "")) + `?`
 }
 
 // repeated wraps the regexp in a non-capturing group to get one or more
 // matches.
 func repeated(res ...string) string {
-	return group(expression(res...)) + `+`
+	return group(strings.Join(res, "")) + `+`
 }
 
 // group wraps the regexp in a non-capturing group.
 func group(res ...string) string {
-	return `(?:` + expression(res...) + `)`
+	return `(?:` + strings.Join(res, "") + `)`
 }
 
 // capture wraps the expression in a capturing group.
 func capture(res ...string) string {
-	return `(` + expression(res...) + `)`
+	return `(` + strings.Join(res, "") + `)`
 }
 
 // anchored anchors the regular expression by adding start and end delimiters.
 func anchored(res ...string) string {
-	return `^` + expression(res...) + `$`
+	return `^` + strings.Join(res, "") + `$`
 }

From c786a2bd3ec3b186f94f913bdd96db282141afc3 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Wed, 9 Nov 2022 19:02:30 +0100
Subject: [PATCH 09/16] reference: inline "group()"

It was only used in a couple of places, and more transparent to just
inline it.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index 916c4e60..9875d4ae 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -145,18 +145,13 @@ func expression(res ...string) string {
 // optional wraps the expression in a non-capturing group and makes the
 // production optional.
 func optional(res ...string) string {
-	return group(strings.Join(res, "")) + `?`
+	return `(?:` + strings.Join(res, "") + `)?`
 }
 
 // repeated wraps the regexp in a non-capturing group to get one or more
 // matches.
 func repeated(res ...string) string {
-	return group(strings.Join(res, "")) + `+`
-}
-
-// group wraps the regexp in a non-capturing group.
-func group(res ...string) string {
-	return `(?:` + strings.Join(res, "") + `)`
+	return `(?:` + strings.Join(res, "") + `)+`
 }
 
 // capture wraps the expression in a capturing group.

From 04d6592df1d2a8ceb98bfb8423b11294eb13855c Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Wed, 9 Nov 2022 19:05:01 +0100
Subject: [PATCH 10/16] reference: remove "literal()" utility

With the exception of ".", none of the literals used required escaping, which made
the function rather redundant (and the extra abstraction made it harder to read).

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index 9875d4ae..f6a3d877 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -55,7 +55,7 @@ var (
 	// that may be part of image names. This is purposely a subset of what is
 	// allowed by DNS to ensure backwards compatibility with Docker image
 	// names. This includes IPv4 addresses on decimal format.
-	domainName = expression(domainNameComponent, optional(repeated(literal(`.`), domainNameComponent)))
+	domainName = expression(domainNameComponent, optional(repeated(`\.`+domainNameComponent)))
 
 	// host defines the structure of potential domains based on the URI
 	// Host subcomponent on rfc3986. It may be a subset of DNS domain name,
@@ -66,7 +66,7 @@ var (
 
 	// allowed by the URI Host subcomponent on rfc3986 to ensure backwards
 	// compatibility with Docker image names.
-	domain = expression(host, optional(literal(`:`), `[0-9]+`))
+	domain = expression(host, optional(`:[0-9]+`))
 
 	// DomainRegexp matches hostname or IP-addresses, optionally including a port
 	// number. It defines the structure of potential domain components that may be
@@ -93,11 +93,11 @@ var (
 	// end of the matched string.
 	anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat))
 
-	// pathComponent restricts registry path-components to start with at least
-	// one letter or number, with following parts able to be separated by one
-	// period, one or two underscore and multiple dashes.
+	// pathComponent restricts path-components to start with an alphanumeric
+	// character, with following parts able to be separated by a separator
+	// (one period, one or two underscore and multiple dashes).
 	pathComponent = expression(alphanumeric, optional(repeated(separator, alphanumeric)))
-	namePat       = expression(optional(domain, literal(`/`)), pathComponent, optional(repeated(literal(`/`), pathComponent)))
+	namePat       = expression(optional(domain+`/`), pathComponent, optional(repeated(`/`+pathComponent)))
 
 	// NameRegexp is the format for the name component of references, including
 	// an optional domain and port, but without tag or digest suffix.
@@ -105,9 +105,9 @@ var (
 
 	// anchoredNameRegexp is used to parse a name value, capturing the
 	// domain and trailing components.
-	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domain), literal(`/`)), capture(pathComponent, optional(repeated(literal(`/`), pathComponent)))))
+	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domain), `/`), capture(pathComponent, optional(repeated(`/`+pathComponent)))))
 
-	referencePat = anchored(capture(namePat), optional(literal(":"), capture(tag)), optional(literal("@"), capture(digestPat)))
+	referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))
 
 	// ReferenceRegexp is the full supported format of a reference. The regexp
 	// is anchored and has capturing groups for name, tag, and digest
@@ -124,18 +124,6 @@ var (
 	anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier))
 )
 
-// literal compiles s into a literal regular expression, escaping any regexp
-// reserved characters.
-func literal(s string) string {
-	re := regexp.MustCompile(regexp.QuoteMeta(s))
-
-	if _, complete := re.LiteralPrefix(); !complete {
-		panic("must be a literal")
-	}
-
-	return re.String()
-}
-
 // expression defines a full expression, where each regular expression must
 // follow the previous.
 func expression(res ...string) string {

From f0c7c97e73f822dbb80719a82890555ce0a2a6aa Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Wed, 9 Nov 2022 19:12:51 +0100
Subject: [PATCH 11/16] reference: remove remaining uses of "expression()"

The remaining uses of "expression()" were quite trivial; probably goes without
saying, but just using string-concatenating for these is more performant as well,
and removing the extra abstraction may make it easier to read;

    pkg: github.com/distribution/distribution/v3/reference
    BenchmarkExpression
    BenchmarkExpression-10    27260877        43.10 ns/op      24 B/op       1 allocs/op
    BenchmarkConcat
    BenchmarkConcat-10      1000000000         0.3154 ns/op     0 B/op       0 allocs/op
    PASS
    ok  	github.com/distribution/distribution/v3/reference	1.762s

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index f6a3d877..74862324 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -55,7 +55,7 @@ var (
 	// that may be part of image names. This is purposely a subset of what is
 	// allowed by DNS to ensure backwards compatibility with Docker image
 	// names. This includes IPv4 addresses on decimal format.
-	domainName = expression(domainNameComponent, optional(repeated(`\.`+domainNameComponent)))
+	domainName = domainNameComponent + optional(repeated(`\.`+domainNameComponent))
 
 	// host defines the structure of potential domains based on the URI
 	// Host subcomponent on rfc3986. It may be a subset of DNS domain name,
@@ -66,7 +66,7 @@ var (
 
 	// allowed by the URI Host subcomponent on rfc3986 to ensure backwards
 	// compatibility with Docker image names.
-	domain = expression(host, optional(`:[0-9]+`))
+	domain = host + optional(`:[0-9]+`)
 
 	// DomainRegexp matches hostname or IP-addresses, optionally including a port
 	// number. It defines the structure of potential domain components that may be
@@ -96,8 +96,8 @@ var (
 	// pathComponent restricts path-components to start with an alphanumeric
 	// character, with following parts able to be separated by a separator
 	// (one period, one or two underscore and multiple dashes).
-	pathComponent = expression(alphanumeric, optional(repeated(separator, alphanumeric)))
-	namePat       = expression(optional(domain+`/`), pathComponent, optional(repeated(`/`+pathComponent)))
+	pathComponent = alphanumeric + optional(repeated(separator, alphanumeric))
+	namePat       = optional(domain+`/`) + pathComponent + optional(repeated(`/`+pathComponent))
 
 	// NameRegexp is the format for the name component of references, including
 	// an optional domain and port, but without tag or digest suffix.
@@ -124,12 +124,6 @@ var (
 	anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier))
 )
 
-// expression defines a full expression, where each regular expression must
-// follow the previous.
-func expression(res ...string) string {
-	return strings.Join(res, "")
-}
-
 // optional wraps the expression in a non-capturing group and makes the
 // production optional.
 func optional(res ...string) string {

From 919bd8ab094dd05c78de6d21da8e73fd9caef425 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Thu, 10 Nov 2022 11:43:32 +0100
Subject: [PATCH 12/16] reference: add const for (optional) port, and rename
 "domain" variable

The `domain` variable didn't make it clear that this could include port-numbers
as well, so renaming it makes that more visible.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index 74862324..7fb9ee9f 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -23,6 +23,10 @@ const (
 	// repository name to start with a component as defined by DomainRegexp.
 	domainNameComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`
 
+	// optionalPort matches an optional port-number including the port separator
+	// (e.g. ":80").
+	optionalPort = `(?::[0-9]+)?`
+
 	// tag matches valid tag names. From docker/docker:graph/tags.go.
 	tag = `[\w][\w.-]{0,127}`
 
@@ -66,7 +70,7 @@ var (
 
 	// allowed by the URI Host subcomponent on rfc3986 to ensure backwards
 	// compatibility with Docker image names.
-	domain = host + optional(`:[0-9]+`)
+	domainAndPort = host + optionalPort
 
 	// DomainRegexp matches hostname or IP-addresses, optionally including a port
 	// number. It defines the structure of potential domain components that may be
@@ -77,7 +81,7 @@ var (
 	// addresses such as IPv4-Mapped).
 	//
 	// [rfc6874]: https://www.rfc-editor.org/rfc/rfc6874.
-	DomainRegexp = regexp.MustCompile(domain)
+	DomainRegexp = regexp.MustCompile(domainAndPort)
 
 	// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
 	TagRegexp = regexp.MustCompile(tag)
@@ -97,7 +101,7 @@ var (
 	// character, with following parts able to be separated by a separator
 	// (one period, one or two underscore and multiple dashes).
 	pathComponent = alphanumeric + optional(repeated(separator, alphanumeric))
-	namePat       = optional(domain+`/`) + pathComponent + optional(repeated(`/`+pathComponent))
+	namePat       = optional(domainAndPort+`/`) + pathComponent + optional(repeated(`/`+pathComponent))
 
 	// NameRegexp is the format for the name component of references, including
 	// an optional domain and port, but without tag or digest suffix.
@@ -105,7 +109,7 @@ var (
 
 	// anchoredNameRegexp is used to parse a name value, capturing the
 	// domain and trailing components.
-	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domain), `/`), capture(pathComponent, optional(repeated(`/`+pathComponent)))))
+	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(pathComponent, optional(repeated(`/`+pathComponent)))))
 
 	referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))
 

From 71a0666398d948f90b941f83899164a850aeedc1 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Wed, 9 Nov 2022 19:46:45 +0100
Subject: [PATCH 13/16] reference: optional repeated == any number of times

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index 7fb9ee9f..9448938b 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -59,7 +59,7 @@ var (
 	// that may be part of image names. This is purposely a subset of what is
 	// allowed by DNS to ensure backwards compatibility with Docker image
 	// names. This includes IPv4 addresses on decimal format.
-	domainName = domainNameComponent + optional(repeated(`\.`+domainNameComponent))
+	domainName = domainNameComponent + anyTimes(`\.`+domainNameComponent)
 
 	// host defines the structure of potential domains based on the URI
 	// Host subcomponent on rfc3986. It may be a subset of DNS domain name,
@@ -100,8 +100,8 @@ var (
 	// pathComponent restricts path-components to start with an alphanumeric
 	// character, with following parts able to be separated by a separator
 	// (one period, one or two underscore and multiple dashes).
-	pathComponent = alphanumeric + optional(repeated(separator, alphanumeric))
-	namePat       = optional(domainAndPort+`/`) + pathComponent + optional(repeated(`/`+pathComponent))
+	pathComponent = alphanumeric + anyTimes(separator+alphanumeric)
+	namePat       = optional(domainAndPort+`/`) + pathComponent + anyTimes(`/`+pathComponent)
 
 	// NameRegexp is the format for the name component of references, including
 	// an optional domain and port, but without tag or digest suffix.
@@ -109,7 +109,7 @@ var (
 
 	// anchoredNameRegexp is used to parse a name value, capturing the
 	// domain and trailing components.
-	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(pathComponent, optional(repeated(`/`+pathComponent)))))
+	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(pathComponent, anyTimes(`/`+pathComponent))))
 
 	referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))
 
@@ -134,10 +134,10 @@ func optional(res ...string) string {
 	return `(?:` + strings.Join(res, "") + `)?`
 }
 
-// repeated wraps the regexp in a non-capturing group to get one or more
-// matches.
-func repeated(res ...string) string {
-	return `(?:` + strings.Join(res, "") + `)+`
+// anyTimes wraps the expression in a non-capturing group that can occur
+// any number of times.
+func anyTimes(res ...string) string {
+	return `(?:` + strings.Join(res, "") + `)*`
 }
 
 // capture wraps the expression in a capturing group.

From bbd41f40bba09ef34f532089aae8578f66130518 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Thu, 10 Nov 2022 13:25:03 +0100
Subject: [PATCH 14/16] reference: introduce remoteName variable

This pattern was used in two places, so adding an intermediate variable allows
documenting its purpose. The "remote-name" grammer (which is interchangably
used with "path") also seemed to be missing from the grammar, so adding it.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/normalize.go | 12 ++++++------
 reference/reference.go |  3 ++-
 reference/regexp.go    | 10 ++++++++--
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/reference/normalize.go b/reference/normalize.go
index 5714ce89..43170e6f 100644
--- a/reference/normalize.go
+++ b/reference/normalize.go
@@ -58,14 +58,14 @@ func ParseNormalizedNamed(s string) (Named, error) {
 		return nil, fmt.Errorf("invalid repository name (%s), cannot specify 64-byte hexadecimal strings", s)
 	}
 	domain, remainder := splitDockerDomain(s)
-	var remoteName string
+	var remote string
 	if tagSep := strings.IndexRune(remainder, ':'); tagSep > -1 {
-		remoteName = remainder[:tagSep]
+		remote = remainder[:tagSep]
 	} else {
-		remoteName = remainder
+		remote = remainder
 	}
-	if strings.ToLower(remoteName) != remoteName {
-		return nil, fmt.Errorf("invalid reference format: repository name (%s) must be lowercase", remoteName)
+	if strings.ToLower(remote) != remote {
+		return nil, fmt.Errorf("invalid reference format: repository name (%s) must be lowercase", remote)
 	}
 
 	ref, err := Parse(domain + "/" + remainder)
@@ -120,7 +120,7 @@ func ParseDockerRef(ref string) (Named, error) {
 	return TagNameOnly(named), nil
 }
 
-// splitDockerDomain splits a repository name to domain and remotename string.
+// splitDockerDomain splits a repository name to domain and remote-name.
 // If no valid domain is found, the default domain is used. Repository name
 // needs to be already validated before.
 func splitDockerDomain(name string) (domain, remainder string) {
diff --git a/reference/reference.go b/reference/reference.go
index e5d889a3..e98c44da 100644
--- a/reference/reference.go
+++ b/reference/reference.go
@@ -4,13 +4,14 @@
 // Grammar
 //
 //	reference                       := name [ ":" tag ] [ "@" digest ]
-//	name                            := [domain '/'] path-component ['/' path-component]*
+//	name                            := [domain '/'] remote-name
 //	domain                          := host [':' port-number]
 //	host                            := domain-name | IPv4address | \[ IPv6address \]	; rfc3986 appendix-A
 //	domain-name                     := domain-component ['.' domain-component]*
 //	domain-component                := /([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])/
 //	port-number                     := /[0-9]+/
 //	path-component                  := alpha-numeric [separator alpha-numeric]*
+//	path (or "remote-name")         := path-component ['/' path-component]*
 //	alpha-numeric                   := /[a-z0-9]+/
 //	separator                       := /[_.]|__|[-]*/
 //
diff --git a/reference/regexp.go b/reference/regexp.go
index 9448938b..2974b1eb 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -101,7 +101,13 @@ var (
 	// character, with following parts able to be separated by a separator
 	// (one period, one or two underscore and multiple dashes).
 	pathComponent = alphanumeric + anyTimes(separator+alphanumeric)
-	namePat       = optional(domainAndPort+`/`) + pathComponent + anyTimes(`/`+pathComponent)
+
+	// remoteName matches the remote-name of a repository. It consists of one
+	// or more forward slash (/) delimited path-components:
+	//
+	//	pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu"
+	remoteName = pathComponent + anyTimes(`/`+pathComponent)
+	namePat    = optional(domainAndPort+`/`) + remoteName
 
 	// NameRegexp is the format for the name component of references, including
 	// an optional domain and port, but without tag or digest suffix.
@@ -109,7 +115,7 @@ var (
 
 	// anchoredNameRegexp is used to parse a name value, capturing the
 	// domain and trailing components.
-	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(pathComponent, anyTimes(`/`+pathComponent))))
+	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName)))
 
 	referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))
 

From a4cec8ca82ae277aaa412f4e55bbc013f6d0d8e4 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Fri, 11 Nov 2022 13:00:28 +0100
Subject: [PATCH 15/16] reference: introduce const for "localhost"

Localhost is treated special when parsing references, and always considered
to be a domain, despite not having a "." nor a ":port". Adding a const for
this, to allow documenting this special case (making it more visible).

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/normalize.go | 2 +-
 reference/regexp.go    | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/reference/normalize.go b/reference/normalize.go
index 43170e6f..f2ce13ad 100644
--- a/reference/normalize.go
+++ b/reference/normalize.go
@@ -125,7 +125,7 @@ func ParseDockerRef(ref string) (Named, error) {
 // needs to be already validated before.
 func splitDockerDomain(name string) (domain, remainder string) {
 	i := strings.IndexRune(name, '/')
-	if i == -1 || (!strings.ContainsAny(name[:i], ".:") && name[:i] != "localhost" && strings.ToLower(name[:i]) == name[:i]) {
+	if i == -1 || (!strings.ContainsAny(name[:i], ".:") && name[:i] != localhost && strings.ToLower(name[:i]) == name[:i]) {
 		domain, remainder = defaultDomain, name
 	} else {
 		domain, remainder = name[:i], name[i+1:]
diff --git a/reference/regexp.go b/reference/regexp.go
index 2974b1eb..cb8a048f 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -19,6 +19,10 @@ const (
 	// supported names.
 	separator = `(?:[._]|__|[-]*)`
 
+	// localhost is treated as a special value for domain-name. Any other
+	// domain-name without a "." or a ":port" are considered a path component.
+	localhost = `localhost`
+
 	// domainNameComponent restricts the registry domain component of a
 	// repository name to start with a component as defined by DomainRegexp.
 	domainNameComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`

From 02e88c0f156bf7aa0e0a7b2508b1d2f3b1122b22 Mon Sep 17 00:00:00 2001
From: Sebastiaan van Stijn <github@gone.nl>
Date: Wed, 9 Nov 2022 18:27:05 +0100
Subject: [PATCH 16/16] reference: move exported regexes to separate block

This makes them easier to find between the non-exported ones, and puts
them as separate sections in the generated docs. While updating, also
extended documentation for some to be more descriptive.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 reference/regexp.go | 64 +++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/reference/regexp.go b/reference/regexp.go
index cb8a048f..2af73840 100644
--- a/reference/regexp.go
+++ b/reference/regexp.go
@@ -5,6 +5,39 @@ import (
 	"strings"
 )
 
+// DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
+var DigestRegexp = regexp.MustCompile(digestPat)
+
+// DomainRegexp matches hostname or IP-addresses, optionally including a port
+// number. It defines the structure of potential domain components that may be
+// part of image names. This is purposely a subset of what is allowed by DNS to
+// ensure backwards compatibility with Docker image names. It may be a subset of
+// DNS domain name, an IPv4 address in decimal format, or an IPv6 address between
+// square brackets (excluding zone identifiers as defined by [RFC 6874] or special
+// addresses such as IPv4-Mapped).
+//
+// [RFC 6874]: https://www.rfc-editor.org/rfc/rfc6874.
+var DomainRegexp = regexp.MustCompile(domainAndPort)
+
+// IdentifierRegexp is the format for string identifier used as a
+// content addressable identifier using sha256. These identifiers
+// are like digests without the algorithm, since sha256 is used.
+var IdentifierRegexp = regexp.MustCompile(identifier)
+
+// NameRegexp is the format for the name component of references, including
+// an optional domain and port, but without tag or digest suffix.
+var NameRegexp = regexp.MustCompile(namePat)
+
+// ReferenceRegexp is the full supported format of a reference. The regexp
+// is anchored and has capturing groups for name, tag, and digest
+// components.
+var ReferenceRegexp = regexp.MustCompile(referencePat)
+
+// TagRegexp matches valid tag names. From [docker/docker:graph/tags.go].
+//
+// [docker/docker:graph/tags.go]: https://github.com/moby/moby/blob/v1.6.0/graph/tags.go#L26-L28
+var TagRegexp = regexp.MustCompile(tag)
+
 const (
 	// alphanumeric defines the alphanumeric atom, typically a
 	// component of names. This only allows lower case characters and digits.
@@ -76,27 +109,10 @@ var (
 	// compatibility with Docker image names.
 	domainAndPort = host + optionalPort
 
-	// DomainRegexp matches hostname or IP-addresses, optionally including a port
-	// number. It defines the structure of potential domain components that may be
-	// part of image names. This is purposely a subset of what is allowed by DNS to
-	// ensure backwards compatibility with Docker image names. It may be a subset of
-	// DNS domain name, an IPv4 address in decimal format, or an IPv6 address between
-	// square brackets (excluding zone identifiers as defined by [rfc6874] or special
-	// addresses such as IPv4-Mapped).
-	//
-	// [rfc6874]: https://www.rfc-editor.org/rfc/rfc6874.
-	DomainRegexp = regexp.MustCompile(domainAndPort)
-
-	// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
-	TagRegexp = regexp.MustCompile(tag)
-
 	// anchoredTagRegexp matches valid tag names, anchored at the start and
 	// end of the matched string.
 	anchoredTagRegexp = regexp.MustCompile(anchored(tag))
 
-	// DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
-	DigestRegexp = regexp.MustCompile(digestPat)
-
 	// anchoredDigestRegexp matches valid digests, anchored at the start and
 	// end of the matched string.
 	anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat))
@@ -113,26 +129,12 @@ var (
 	remoteName = pathComponent + anyTimes(`/`+pathComponent)
 	namePat    = optional(domainAndPort+`/`) + remoteName
 
-	// NameRegexp is the format for the name component of references, including
-	// an optional domain and port, but without tag or digest suffix.
-	NameRegexp = regexp.MustCompile(namePat)
-
 	// anchoredNameRegexp is used to parse a name value, capturing the
 	// domain and trailing components.
 	anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName)))
 
 	referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))
 
-	// ReferenceRegexp is the full supported format of a reference. The regexp
-	// is anchored and has capturing groups for name, tag, and digest
-	// components.
-	ReferenceRegexp = regexp.MustCompile(referencePat)
-
-	// IdentifierRegexp is the format for string identifier used as a
-	// content addressable identifier using sha256. These identifiers
-	// are like digests without the algorithm, since sha256 is used.
-	IdentifierRegexp = regexp.MustCompile(identifier)
-
 	// anchoredIdentifierRegexp is used to check or match an
 	// identifier value, anchored at start and end of string.
 	anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier))