Skip to content

Commit 928e56d

Browse files
lukekarrysjoaomorenonicolo-ribaudo
authored
fix: better handling of whitespace (#591)
This is a backport of the following commits squashed to land on `release/v6`: - 717534e - abdd93d - cc6fde2 - 99d8287 Ref: #564 Co-authored-by: joaomoreno <[email protected]> Co-authored-by: nicolo-ribaudo <[email protected]>
1 parent 39f6326 commit 928e56d

File tree

4 files changed

+158
-24
lines changed

4 files changed

+158
-24
lines changed

semver.js

+71-24
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@ var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER ||
2626
// Max safe segment length for coercion.
2727
var MAX_SAFE_COMPONENT_LENGTH = 16
2828

29+
var MAX_SAFE_BUILD_LENGTH = MAX_LENGTH - 6
30+
2931
// The actual regexps go on exports.re
3032
var re = exports.re = []
33+
var safeRe = exports.safeRe = []
3134
var src = exports.src = []
3235
var t = exports.tokens = {}
3336
var R = 0
@@ -36,6 +39,31 @@ function tok (n) {
3639
t[n] = R++
3740
}
3841

42+
var LETTERDASHNUMBER = '[a-zA-Z0-9-]'
43+
44+
// Replace some greedy regex tokens to prevent regex dos issues. These regex are
45+
// used internally via the safeRe object since all inputs in this library get
46+
// normalized first to trim and collapse all extra whitespace. The original
47+
// regexes are exported for userland consumption and lower level usage. A
48+
// future breaking change could export the safer regex only with a note that
49+
// all input should have extra whitespace removed.
50+
var safeRegexReplacements = [
51+
['\\s', 1],
52+
['\\d', MAX_LENGTH],
53+
[LETTERDASHNUMBER, MAX_SAFE_BUILD_LENGTH],
54+
]
55+
56+
function makeSafeRe (value) {
57+
for (var i = 0; i < safeRegexReplacements.length; i++) {
58+
var token = safeRegexReplacements[i][0]
59+
var max = safeRegexReplacements[i][1]
60+
value = value
61+
.split(token + '*').join(token + '{0,' + max + '}')
62+
.split(token + '+').join(token + '{1,' + max + '}')
63+
}
64+
return value
65+
}
66+
3967
// The following Regular Expressions can be used for tokenizing,
4068
// validating, and parsing SemVer version strings.
4169

@@ -45,14 +73,14 @@ function tok (n) {
4573
tok('NUMERICIDENTIFIER')
4674
src[t.NUMERICIDENTIFIER] = '0|[1-9]\\d*'
4775
tok('NUMERICIDENTIFIERLOOSE')
48-
src[t.NUMERICIDENTIFIERLOOSE] = '[0-9]+'
76+
src[t.NUMERICIDENTIFIERLOOSE] = '\\d+'
4977

5078
// ## Non-numeric Identifier
5179
// Zero or more digits, followed by a letter or hyphen, and then zero or
5280
// more letters, digits, or hyphens.
5381

5482
tok('NONNUMERICIDENTIFIER')
55-
src[t.NONNUMERICIDENTIFIER] = '\\d*[a-zA-Z-][a-zA-Z0-9-]*'
83+
src[t.NONNUMERICIDENTIFIER] = '\\d*[a-zA-Z-]' + LETTERDASHNUMBER + '*'
5684

5785
// ## Main Version
5886
// Three dot-separated numeric identifiers.
@@ -94,7 +122,7 @@ src[t.PRERELEASELOOSE] = '(?:-?(' + src[t.PRERELEASEIDENTIFIERLOOSE] +
94122
// Any combination of digits, letters, or hyphens.
95123

96124
tok('BUILDIDENTIFIER')
97-
src[t.BUILDIDENTIFIER] = '[0-9A-Za-z-]+'
125+
src[t.BUILDIDENTIFIER] = LETTERDASHNUMBER + '+'
98126

99127
// ## Build Metadata
100128
// Plus sign, followed by one or more period-separated build metadata
@@ -174,6 +202,7 @@ src[t.COERCE] = '(^|[^\\d])' +
174202
'(?:$|[^\\d])'
175203
tok('COERCERTL')
176204
re[t.COERCERTL] = new RegExp(src[t.COERCE], 'g')
205+
safeRe[t.COERCERTL] = new RegExp(makeSafeRe(src[t.COERCE]), 'g')
177206

178207
// Tilde ranges.
179208
// Meaning is "reasonably at or greater than"
@@ -183,6 +212,7 @@ src[t.LONETILDE] = '(?:~>?)'
183212
tok('TILDETRIM')
184213
src[t.TILDETRIM] = '(\\s*)' + src[t.LONETILDE] + '\\s+'
185214
re[t.TILDETRIM] = new RegExp(src[t.TILDETRIM], 'g')
215+
safeRe[t.TILDETRIM] = new RegExp(makeSafeRe(src[t.TILDETRIM]), 'g')
186216
var tildeTrimReplace = '$1~'
187217

188218
tok('TILDE')
@@ -198,6 +228,7 @@ src[t.LONECARET] = '(?:\\^)'
198228
tok('CARETTRIM')
199229
src[t.CARETTRIM] = '(\\s*)' + src[t.LONECARET] + '\\s+'
200230
re[t.CARETTRIM] = new RegExp(src[t.CARETTRIM], 'g')
231+
safeRe[t.CARETTRIM] = new RegExp(makeSafeRe(src[t.CARETTRIM]), 'g')
201232
var caretTrimReplace = '$1^'
202233

203234
tok('CARET')
@@ -219,6 +250,7 @@ src[t.COMPARATORTRIM] = '(\\s*)' + src[t.GTLT] +
219250

220251
// this one has to use the /g flag
221252
re[t.COMPARATORTRIM] = new RegExp(src[t.COMPARATORTRIM], 'g')
253+
safeRe[t.COMPARATORTRIM] = new RegExp(makeSafeRe(src[t.COMPARATORTRIM]), 'g')
222254
var comparatorTrimReplace = '$1$2$3'
223255

224256
// Something like `1.2.3 - 1.2.4`
@@ -247,6 +279,14 @@ for (var i = 0; i < R; i++) {
247279
debug(i, src[i])
248280
if (!re[i]) {
249281
re[i] = new RegExp(src[i])
282+
283+
// Replace all greedy whitespace to prevent regex dos issues. These regex are
284+
// used internally via the safeRe object since all inputs in this library get
285+
// normalized first to trim and collapse all extra whitespace. The original
286+
// regexes are exported for userland consumption and lower level usage. A
287+
// future breaking change could export the safer regex only with a note that
288+
// all input should have extra whitespace removed.
289+
safeRe[i] = new RegExp(makeSafeRe(src[i]))
250290
}
251291
}
252292

@@ -271,7 +311,7 @@ function parse (version, options) {
271311
return null
272312
}
273313

274-
var r = options.loose ? re[t.LOOSE] : re[t.FULL]
314+
var r = options.loose ? safeRe[t.LOOSE] : safeRe[t.FULL]
275315
if (!r.test(version)) {
276316
return null
277317
}
@@ -326,7 +366,7 @@ function SemVer (version, options) {
326366
this.options = options
327367
this.loose = !!options.loose
328368

329-
var m = version.trim().match(options.loose ? re[t.LOOSE] : re[t.FULL])
369+
var m = version.trim().match(options.loose ? safeRe[t.LOOSE] : safeRe[t.FULL])
330370

331371
if (!m) {
332372
throw new TypeError('Invalid Version: ' + version)
@@ -771,6 +811,7 @@ function Comparator (comp, options) {
771811
return new Comparator(comp, options)
772812
}
773813

814+
comp = comp.trim().split(/\s+/).join(' ')
774815
debug('comparator', comp, options)
775816
this.options = options
776817
this.loose = !!options.loose
@@ -787,7 +828,7 @@ function Comparator (comp, options) {
787828

788829
var ANY = {}
789830
Comparator.prototype.parse = function (comp) {
790-
var r = this.options.loose ? re[t.COMPARATORLOOSE] : re[t.COMPARATOR]
831+
var r = this.options.loose ? safeRe[t.COMPARATORLOOSE] : safeRe[t.COMPARATOR]
791832
var m = comp.match(r)
792833

793834
if (!m) {
@@ -911,17 +952,24 @@ function Range (range, options) {
911952
this.loose = !!options.loose
912953
this.includePrerelease = !!options.includePrerelease
913954

914-
// First, split based on boolean or ||
955+
// First reduce all whitespace as much as possible so we do not have to rely
956+
// on potentially slow regexes like \s*. This is then stored and used for
957+
// future error messages as well.
915958
this.raw = range
916-
this.set = range.split(/\s*\|\|\s*/).map(function (range) {
959+
.trim()
960+
.split(/\s+/)
961+
.join(' ')
962+
963+
// First, split based on boolean or ||
964+
this.set = this.raw.split('||').map(function (range) {
917965
return this.parseRange(range.trim())
918966
}, this).filter(function (c) {
919967
// throw out any that are not relevant for whatever reason
920968
return c.length
921969
})
922970

923971
if (!this.set.length) {
924-
throw new TypeError('Invalid SemVer Range: ' + range)
972+
throw new TypeError('Invalid SemVer Range: ' + this.raw)
925973
}
926974

927975
this.format()
@@ -940,28 +988,27 @@ Range.prototype.toString = function () {
940988

941989
Range.prototype.parseRange = function (range) {
942990
var loose = this.options.loose
943-
range = range.trim()
944991
// `1.2.3 - 1.2.4` => `>=1.2.3 <=1.2.4`
945-
var hr = loose ? re[t.HYPHENRANGELOOSE] : re[t.HYPHENRANGE]
992+
var hr = loose ? safeRe[t.HYPHENRANGELOOSE] : safeRe[t.HYPHENRANGE]
946993
range = range.replace(hr, hyphenReplace)
947994
debug('hyphen replace', range)
948995
// `> 1.2.3 < 1.2.5` => `>1.2.3 <1.2.5`
949-
range = range.replace(re[t.COMPARATORTRIM], comparatorTrimReplace)
950-
debug('comparator trim', range, re[t.COMPARATORTRIM])
996+
range = range.replace(safeRe[t.COMPARATORTRIM], comparatorTrimReplace)
997+
debug('comparator trim', range, safeRe[t.COMPARATORTRIM])
951998

952999
// `~ 1.2.3` => `~1.2.3`
953-
range = range.replace(re[t.TILDETRIM], tildeTrimReplace)
1000+
range = range.replace(safeRe[t.TILDETRIM], tildeTrimReplace)
9541001

9551002
// `^ 1.2.3` => `^1.2.3`
956-
range = range.replace(re[t.CARETTRIM], caretTrimReplace)
1003+
range = range.replace(safeRe[t.CARETTRIM], caretTrimReplace)
9571004

9581005
// normalize spaces
9591006
range = range.split(/\s+/).join(' ')
9601007

9611008
// At this point, the range is completely trimmed and
9621009
// ready to be split into comparators.
9631010

964-
var compRe = loose ? re[t.COMPARATORLOOSE] : re[t.COMPARATOR]
1011+
var compRe = loose ? safeRe[t.COMPARATORLOOSE] : safeRe[t.COMPARATOR]
9651012
var set = range.split(' ').map(function (comp) {
9661013
return parseComparator(comp, this.options)
9671014
}, this).join(' ').split(/\s+/)
@@ -1061,7 +1108,7 @@ function replaceTildes (comp, options) {
10611108
}
10621109

10631110
function replaceTilde (comp, options) {
1064-
var r = options.loose ? re[t.TILDELOOSE] : re[t.TILDE]
1111+
var r = options.loose ? safeRe[t.TILDELOOSE] : safeRe[t.TILDE]
10651112
return comp.replace(r, function (_, M, m, p, pr) {
10661113
debug('tilde', comp, _, M, m, p, pr)
10671114
var ret
@@ -1102,7 +1149,7 @@ function replaceCarets (comp, options) {
11021149

11031150
function replaceCaret (comp, options) {
11041151
debug('caret', comp, options)
1105-
var r = options.loose ? re[t.CARETLOOSE] : re[t.CARET]
1152+
var r = options.loose ? safeRe[t.CARETLOOSE] : safeRe[t.CARET]
11061153
return comp.replace(r, function (_, M, m, p, pr) {
11071154
debug('caret', comp, _, M, m, p, pr)
11081155
var ret
@@ -1161,7 +1208,7 @@ function replaceXRanges (comp, options) {
11611208

11621209
function replaceXRange (comp, options) {
11631210
comp = comp.trim()
1164-
var r = options.loose ? re[t.XRANGELOOSE] : re[t.XRANGE]
1211+
var r = options.loose ? safeRe[t.XRANGELOOSE] : safeRe[t.XRANGE]
11651212
return comp.replace(r, function (ret, gtlt, M, m, p, pr) {
11661213
debug('xRange', comp, ret, gtlt, M, m, p, pr)
11671214
var xM = isX(M)
@@ -1236,7 +1283,7 @@ function replaceXRange (comp, options) {
12361283
function replaceStars (comp, options) {
12371284
debug('replaceStars', comp, options)
12381285
// Looseness is ignored here. star is always as loose as it gets!
1239-
return comp.trim().replace(re[t.STAR], '')
1286+
return comp.trim().replace(safeRe[t.STAR], '')
12401287
}
12411288

12421289
// This function is passed to string.replace(re[t.HYPHENRANGE])
@@ -1562,7 +1609,7 @@ function coerce (version, options) {
15621609

15631610
var match = null
15641611
if (!options.rtl) {
1565-
match = version.match(re[t.COERCE])
1612+
match = version.match(safeRe[t.COERCE])
15661613
} else {
15671614
// Find the right-most coercible string that does not share
15681615
// a terminus with a more left-ward coercible string.
@@ -1573,17 +1620,17 @@ function coerce (version, options) {
15731620
// Stop when we get a match that ends at the string end, since no
15741621
// coercible string can be more right-ward without the same terminus.
15751622
var next
1576-
while ((next = re[t.COERCERTL].exec(version)) &&
1623+
while ((next = safeRe[t.COERCERTL].exec(version)) &&
15771624
(!match || match.index + match[0].length !== version.length)
15781625
) {
15791626
if (!match ||
15801627
next.index + next[0].length !== match.index + match[0].length) {
15811628
match = next
15821629
}
1583-
re[t.COERCERTL].lastIndex = next.index + next[1].length + next[2].length
1630+
safeRe[t.COERCERTL].lastIndex = next.index + next[1].length + next[2].length
15841631
}
15851632
// leave it in a clean state
1586-
re[t.COERCERTL].lastIndex = -1
1633+
safeRe[t.COERCERTL].lastIndex = -1
15871634
}
15881635

15891636
if (match === null) {

test/index.js

+10
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ test('negative range tests', function (t) {
355355
['*', 'not a version'],
356356
['>=2', 'glorp'],
357357
['2.x', '3.0.0-pre.0', { includePrerelease: true }],
358+
['== 1.0.0 || foo', '2.0.0', { loose: true }]
358359
].forEach(function (v) {
359360
var range = v[0]
360361
var ver = v[1]
@@ -1092,3 +1093,12 @@ test('really big numeric prerelease value', function (t) {
10921093
t.strictSame(r.prerelease, [ 'beta', '90071992547409910' ])
10931094
t.end()
10941095
})
1096+
1097+
test('long build id', function (t) {
1098+
var longBuild = '-928490632884417731e7af463c92b034d6a78268fc993bcb88a57944'
1099+
var shortVersion = '1.1.1'
1100+
var longVersion = Number.MAX_SAFE_INTEGER + '.' + Number.MAX_SAFE_INTEGER + '.' + Number.MAX_SAFE_INTEGER
1101+
t.equal(semver.valid(shortVersion + longBuild), shortVersion + longBuild)
1102+
t.equal(semver.valid(longVersion + longBuild), longVersion + longBuild)
1103+
t.end()
1104+
})

test/re.js

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
var test = require('tap').test
2+
var semver = require('../')
3+
4+
test('has a list of src, re, and safeRe', function (t) {
5+
semver.re.forEach(function (r) { return t.match(r, RegExp, 'regexps are regexps') })
6+
semver.src.forEach(function (s) { return t.match(s, String, 'src is strings') })
7+
8+
semver.safeRe.forEach(function (r) {
9+
t.notMatch(r.source, '\\s+', 'safe regex do not contain greedy whitespace')
10+
t.notMatch(r.source, '\\s*', 'safe regex do not contain greedy whitespace')
11+
})
12+
13+
t.end()
14+
})

test/whitespace.js

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
var test = require('tap').test
2+
var semver = require('../')
3+
4+
var validRange = semver.validRange
5+
var SemVer = semver.SemVer
6+
var Range = semver.Range
7+
var Comparator = semver.Comparator
8+
var minVersion = semver.minVersion
9+
var minSatisfying = semver.minSatisfying
10+
var maxSatisfying = semver.maxSatisfying
11+
12+
function s(n, char) {
13+
if (!n) {
14+
n = 500000
15+
}
16+
if (!char) {
17+
char = ' '
18+
}
19+
var c = ''
20+
for (var i = 0; i < n; i++) {
21+
c += char
22+
}
23+
return c
24+
}
25+
26+
test('regex dos via range whitespace', function (t) {
27+
// a range with this much whitespace would take a few minutes to process if
28+
// any redos susceptible regexes were used. there is a global tap timeout per
29+
// file set in the package.json that will error if this test takes too long.
30+
var r = `1.2.3 ${s()} <1.3.0`
31+
32+
t.equal(new Range(r).range, '1.2.3 <1.3.0')
33+
t.equal(validRange(r), '1.2.3 <1.3.0')
34+
t.equal(minVersion(r).version, '1.2.3')
35+
t.equal(minSatisfying(['1.2.3'], r), '1.2.3')
36+
t.equal(maxSatisfying(['1.2.3'], r), '1.2.3')
37+
38+
t.end()
39+
})
40+
41+
test('range with 0', function (t) {
42+
var r = `1.2.3 ${s(null, '0')} <1.3.0`
43+
t.throws(function () { return new Range(r).range })
44+
t.equal(validRange(r), null)
45+
t.throws(function () { return minVersion(r).version })
46+
t.equal(minSatisfying(['1.2.3'], r), null)
47+
t.equal(maxSatisfying(['1.2.3'], r), null)
48+
t.end()
49+
})
50+
51+
test('semver version', function (t) {
52+
var v = `${s(125)}1.2.3${s(125)}`
53+
var tooLong = `${s()}1.2.3${s()}`
54+
t.equal(new SemVer(v).version, '1.2.3')
55+
t.throws(function () { return new SemVer(tooLong) })
56+
t.end()
57+
})
58+
59+
test('comparator', function (t) {
60+
var c = `${s()}<${s()}1.2.3${s()}`
61+
t.equal(new Comparator(c).value, '<1.2.3')
62+
t.end()
63+
})

0 commit comments

Comments
 (0)