Skip to content

Commit 1c89ce5

Browse files
committed
Improve how reference regexps are built
Previous implementation was doing a lot of string -> regexp -> string conversions Signed-off-by: Paul Cacheux <[email protected]>
1 parent bb1fb61 commit 1c89ce5

1 file changed

Lines changed: 63 additions & 51 deletions

File tree

reference/regexp.go

Lines changed: 63 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,145 +3,157 @@ package reference
33
import "regexp"
44

55
var (
6-
// alphaNumericRegexp defines the alpha numeric atom, typically a
6+
// alphaNumeric defines the alpha numeric atom, typically a
77
// component of names. This only allows lower case characters and digits.
8-
alphaNumericRegexp = match(`[a-z0-9]+`)
8+
alphaNumeric = `[a-z0-9]+`
99

10-
// separatorRegexp defines the separators allowed to be embedded in name
10+
// separator defines the separators allowed to be embedded in name
1111
// components. This allow one period, one or two underscore and multiple
1212
// dashes. Repeated dashes and underscores are intentionally treated
1313
// differently. In order to support valid hostnames as name components,
1414
// supporting repeated dash was added. Additionally double underscore is
1515
// now allowed as a separator to loosen the restriction for previously
1616
// supported names.
17-
separatorRegexp = match(`(?:[._]|__|[-]*)`)
17+
separator = `(?:[._]|__|[-]*)`
1818

19-
// nameComponentRegexp restricts registry path component names to start
19+
// nameComponent restricts registry path component names to start
2020
// with at least one letter or number, with following parts able to be
2121
// separated by one period, one or two underscore and multiple dashes.
22-
nameComponentRegexp = expression(
23-
alphaNumericRegexp,
24-
optional(repeated(separatorRegexp, alphaNumericRegexp)))
22+
nameComponent = expression(
23+
alphaNumeric,
24+
optional(repeated(separator, alphaNumeric)))
2525

26-
// domainComponentRegexp restricts the registry domain component of a
26+
// domainComponent restricts the registry domain component of a
2727
// repository name to start with a component as defined by DomainRegexp
2828
// and followed by an optional port.
29-
domainComponentRegexp = match(`(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`)
29+
domainComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`
3030

31+
domain = expression(
32+
domainComponent,
33+
optional(repeated(literal(`.`), domainComponent)),
34+
optional(literal(`:`), `[0-9]+`))
3135
// DomainRegexp defines the structure of potential domain components
3236
// that may be part of image names. This is purposely a subset of what is
3337
// allowed by DNS to ensure backwards compatibility with Docker image
3438
// names.
35-
DomainRegexp = expression(
36-
domainComponentRegexp,
37-
optional(repeated(literal(`.`), domainComponentRegexp)),
38-
optional(literal(`:`), match(`[0-9]+`)))
39+
DomainRegexp = re(domain)
3940

41+
tag = `[\w][\w.-]{0,127}`
4042
// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
41-
TagRegexp = match(`[\w][\w.-]{0,127}`)
43+
TagRegexp = re(tag)
4244

45+
anchoredTag = anchored(tag)
4346
// anchoredTagRegexp matches valid tag names, anchored at the start and
4447
// end of the matched string.
45-
anchoredTagRegexp = anchored(TagRegexp)
48+
anchoredTagRegexp = re(anchoredTag)
4649

50+
digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`
4751
// DigestRegexp matches valid digests.
48-
DigestRegexp = match(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`)
52+
DigestRegexp = re(digestPat)
4953

54+
anchoredDigest = anchored(digestPat)
5055
// anchoredDigestRegexp matches valid digests, anchored at the start and
5156
// end of the matched string.
52-
anchoredDigestRegexp = anchored(DigestRegexp)
57+
anchoredDigestRegexp = re(anchoredDigest)
5358

59+
namePat = expression(
60+
optional(domain, literal(`/`)),
61+
nameComponent,
62+
optional(repeated(literal(`/`), nameComponent)))
5463
// NameRegexp is the format for the name component of references. The
5564
// regexp has capturing groups for the domain and name part omitting
5665
// the separating forward slash from either.
57-
NameRegexp = expression(
58-
optional(DomainRegexp, literal(`/`)),
59-
nameComponentRegexp,
60-
optional(repeated(literal(`/`), nameComponentRegexp)))
66+
NameRegexp = re(namePat)
6167

68+
anchoredName = anchored(
69+
optional(capture(domain), literal(`/`)),
70+
capture(nameComponent,
71+
optional(repeated(literal(`/`), nameComponent))))
6272
// anchoredNameRegexp is used to parse a name value, capturing the
6373
// domain and trailing components.
64-
anchoredNameRegexp = anchored(
65-
optional(capture(DomainRegexp), literal(`/`)),
66-
capture(nameComponentRegexp,
67-
optional(repeated(literal(`/`), nameComponentRegexp))))
74+
anchoredNameRegexp = re(anchoredName)
6875

76+
referencePat = anchored(capture(namePat),
77+
optional(literal(":"), capture(tag)),
78+
optional(literal("@"), capture(digestPat)))
6979
// ReferenceRegexp is the full supported format of a reference. The regexp
7080
// is anchored and has capturing groups for name, tag, and digest
7181
// components.
72-
ReferenceRegexp = anchored(capture(NameRegexp),
73-
optional(literal(":"), capture(TagRegexp)),
74-
optional(literal("@"), capture(DigestRegexp)))
82+
ReferenceRegexp = re(referencePat)
7583

84+
identifier = `([a-f0-9]{64})`
7685
// IdentifierRegexp is the format for string identifier used as a
7786
// content addressable identifier using sha256. These identifiers
7887
// are like digests without the algorithm, since sha256 is used.
79-
IdentifierRegexp = match(`([a-f0-9]{64})`)
88+
IdentifierRegexp = re(identifier)
8089

90+
shortIdentifier = `([a-f0-9]{6,64})`
8191
// ShortIdentifierRegexp is the format used to represent a prefix
8292
// of an identifier. A prefix may be used to match a sha256 identifier
8393
// within a list of trusted identifiers.
84-
ShortIdentifierRegexp = match(`([a-f0-9]{6,64})`)
94+
ShortIdentifierRegexp = re(shortIdentifier)
8595

96+
anchoredIdentifier = anchored(identifier)
8697
// anchoredIdentifierRegexp is used to check or match an
8798
// identifier value, anchored at start and end of string.
88-
anchoredIdentifierRegexp = anchored(IdentifierRegexp)
99+
anchoredIdentifierRegexp = re(anchoredIdentifier)
89100

101+
anchoredShortIdentifier = anchored(shortIdentifier)
90102
// anchoredShortIdentifierRegexp is used to check if a value
91103
// is a possible identifier prefix, anchored at start and end
92104
// of string.
93-
anchoredShortIdentifierRegexp = anchored(ShortIdentifierRegexp)
105+
anchoredShortIdentifierRegexp = re(anchoredShortIdentifier)
94106
)
95107

96-
// match compiles the string to a regular expression.
97-
var match = regexp.MustCompile
108+
// re compiles the string to a regular expression.
109+
var re = regexp.MustCompile
98110

99111
// literal compiles s into a literal regular expression, escaping any regexp
100112
// reserved characters.
101-
func literal(s string) *regexp.Regexp {
102-
re := match(regexp.QuoteMeta(s))
113+
func literal(s string) string {
114+
re := re(regexp.QuoteMeta(s))
103115

104116
if _, complete := re.LiteralPrefix(); !complete {
105117
panic("must be a literal")
106118
}
107119

108-
return re
120+
return re.String()
109121
}
110122

111123
// expression defines a full expression, where each regular expression must
112124
// follow the previous.
113-
func expression(res ...*regexp.Regexp) *regexp.Regexp {
125+
func expression(res ...string) string {
114126
var s string
115127
for _, re := range res {
116-
s += re.String()
128+
s += re
117129
}
118130

119-
return match(s)
131+
return s
120132
}
121133

122134
// optional wraps the expression in a non-capturing group and makes the
123135
// production optional.
124-
func optional(res ...*regexp.Regexp) *regexp.Regexp {
125-
return match(group(expression(res...)).String() + `?`)
136+
func optional(res ...string) string {
137+
return group(expression(res...)) + `?`
126138
}
127139

128140
// repeated wraps the regexp in a non-capturing group to get one or more
129141
// matches.
130-
func repeated(res ...*regexp.Regexp) *regexp.Regexp {
131-
return match(group(expression(res...)).String() + `+`)
142+
func repeated(res ...string) string {
143+
return group(expression(res...)) + `+`
132144
}
133145

134146
// group wraps the regexp in a non-capturing group.
135-
func group(res ...*regexp.Regexp) *regexp.Regexp {
136-
return match(`(?:` + expression(res...).String() + `)`)
147+
func group(res ...string) string {
148+
return `(?:` + expression(res...) + `)`
137149
}
138150

139151
// capture wraps the expression in a capturing group.
140-
func capture(res ...*regexp.Regexp) *regexp.Regexp {
141-
return match(`(` + expression(res...).String() + `)`)
152+
func capture(res ...string) string {
153+
return `(` + expression(res...) + `)`
142154
}
143155

144156
// anchored anchors the regular expression by adding start and end delimiters.
145-
func anchored(res ...*regexp.Regexp) *regexp.Regexp {
146-
return match(`^` + expression(res...).String() + `$`)
157+
func anchored(res ...string) string {
158+
return `^` + expression(res...) + `$`
147159
}

0 commit comments

Comments
 (0)