Skip to content

Commit 2979b84

Browse files
committed
fix #4241: ts arrow function type backtrack (hack)
1 parent 1180410 commit 2979b84

4 files changed

Lines changed: 144 additions & 13 deletions

File tree

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,17 @@
3636
3737
Using absolute paths instead of relative paths is not the default behavior because it means that the build results are no longer machine-independent (which means builds are no longer reproducible). Absolute paths can be useful when used with certain terminal emulators that allow you to click on absolute paths in the terminal text and/or when esbuild is being automatically invoked from several different directories within the same script.
3838
39+
* Fix a TypeScript parsing edge case ([#4241](https://github.com/evanw/esbuild/issues/4241))
40+
41+
This release fixes an edge case with parsing an arrow function in TypeScript with a return type that's in the middle of a `?:` ternary operator. For example:
42+
43+
```ts
44+
x = a ? (b) : c => d;
45+
y = a ? (b) : c => d : e;
46+
```
47+
48+
The `:` token in the value assigned to `x` pairs with the `?` token, so it's not the start of a return type annotation. However, the first `:` token in the value assigned to `y` is the start of a return type annotation because after parsing the arrow function body, it turns out there's another `:` token that can be used to pair with the `?` token. This case is notable as it's the first TypeScript edge case that esbuild has needed a backtracking parser to parse. It has been addressed by a quick hack (cloning the whole parser) as it's a rare edge case and esbuild doesn't otherwise need a backtracking parser. Hopefully this is sufficient and doesn't cause any issues.
49+
3950
* Inline small constant strings when minifying
4051

4152
Previously esbuild's minifier didn't inline string constants because strings can be arbitrarily long, and this isn't necessarily a size win if the string is used more than once. Starting with this release, esbuild will now inline string constants when the length of the string is three code units or less. For example:

internal/js_parser/js_parser.go

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3118,8 +3118,9 @@ func (p *parser) parseFnExpr(loc logger.Loc, isAsync bool, asyncRange logger.Ran
31183118
}
31193119

31203120
type parenExprOpts struct {
3121-
asyncRange logger.Range
3122-
forceArrowFn bool
3121+
asyncRange logger.Range
3122+
forceArrowFn bool
3123+
isAfterQuestionAndBeforeColon bool
31233124
}
31243125

31253126
// This assumes that the open parenthesis has already been parsed by the caller
@@ -3208,7 +3209,8 @@ func (p *parser) parseParenExpr(loc logger.Loc, level js_ast.L, opts parenExprOp
32083209
p.fnOrArrowDataParse = oldFnOrArrowData
32093210

32103211
// Are these arguments to an arrow function?
3211-
if p.lexer.Token == js_lexer.TEqualsGreaterThan || opts.forceArrowFn || (p.options.ts.Parse && p.lexer.Token == js_lexer.TColon) {
3212+
isArrowFn := p.lexer.Token == js_lexer.TEqualsGreaterThan
3213+
if isArrowFn || opts.forceArrowFn || (p.options.ts.Parse && p.lexer.Token == js_lexer.TColon) {
32123214
// Arrow functions are not allowed inside certain expressions
32133215
if level > js_ast.LAssign {
32143216
p.lexer.Unexpected()
@@ -3233,13 +3235,34 @@ func (p *parser) parseParenExpr(loc logger.Loc, level js_ast.L, opts parenExprOp
32333235
args = append(args, js_ast.Arg{Binding: binding, DefaultOrNil: initializerOrNil})
32343236
}
32353237

3238+
await := allowIdent
3239+
if isAsync {
3240+
await = allowExpr
3241+
}
3242+
32363243
// Avoid parsing TypeScript code like "a ? (1 + 2) : (3 + 4)" as an arrow
32373244
// function. The ":" after the ")" may be a return type annotation, so we
32383245
// attempt to convert the expressions to bindings first before deciding
32393246
// whether this is an arrow function, and only pick an arrow function if
32403247
// there were no conversion errors.
3241-
if p.lexer.Token == js_lexer.TEqualsGreaterThan || (len(invalidLog.invalidTokens) == 0 &&
3242-
p.trySkipTypeScriptArrowReturnTypeWithBacktracking()) || opts.forceArrowFn {
3248+
if p.options.ts.Parse && p.lexer.Token == js_lexer.TColon && len(invalidLog.invalidTokens) == 0 {
3249+
if opts.isAfterQuestionAndBeforeColon {
3250+
// Only do this very expensive check if we must
3251+
isArrowFn = p.isTypeScriptArrowReturnTypeAfterQuestionAndBeforeColon(await)
3252+
if isArrowFn {
3253+
// We know this will succeed because we've already done it once above
3254+
p.lexer.Next()
3255+
p.skipTypeScriptReturnType()
3256+
}
3257+
} else {
3258+
// Otherwise, do the less expensive check
3259+
isArrowFn = p.trySkipTypeScriptArrowReturnTypeWithBacktracking()
3260+
}
3261+
}
3262+
3263+
// Arrow function parsing may be forced if this parenthesized expression
3264+
// was prefixed by a TypeScript type parameter list such as "<T,>()"
3265+
if isArrowFn || opts.forceArrowFn {
32433266
if commaAfterSpread.Start != 0 {
32443267
p.log.AddError(&p.tracker, logger.Range{Loc: commaAfterSpread, Len: 1}, "Unexpected \",\" after rest pattern")
32453268
}
@@ -3260,11 +3283,6 @@ func (p *parser) parseParenExpr(loc logger.Loc, level js_ast.L, opts parenExprOp
32603283
p.markSyntaxFeature(entry.feature, entry.token)
32613284
}
32623285

3263-
await := allowIdent
3264-
if isAsync {
3265-
await = allowExpr
3266-
}
3267-
32683286
arrow := p.parseArrowBody(args, fnOrArrowDataParse{
32693287
needsAsyncLoc: loc,
32703288
await: await,
@@ -3448,6 +3466,7 @@ const (
34483466
exprFlagDecorator exprFlag = 1 << iota
34493467
exprFlagForLoopInit
34503468
exprFlagForAwaitLoopInit
3469+
exprFlagAfterQuestionAndBeforeColon
34513470
)
34523471

34533472
func (p *parser) parsePrefix(level js_ast.L, errors *deferredErrors, flags exprFlag) js_ast.Expr {
@@ -3494,7 +3513,9 @@ func (p *parser) parsePrefix(level js_ast.L, errors *deferredErrors, flags exprF
34943513
return value
34953514
}
34963515

3497-
value := p.parseParenExpr(loc, level, parenExprOpts{})
3516+
value := p.parseParenExpr(loc, level, parenExprOpts{
3517+
isAfterQuestionAndBeforeColon: (flags & exprFlagAfterQuestionAndBeforeColon) != 0,
3518+
})
34983519
return value
34993520

35003521
case js_lexer.TFalse:
@@ -4487,12 +4508,12 @@ func (p *parser) parseSuffix(left js_ast.Expr, level js_ast.L, errors *deferredE
44874508
oldAllowIn := p.allowIn
44884509
p.allowIn = true
44894510

4490-
yes := p.parseExpr(js_ast.LComma)
4511+
yes := p.parseExprWithFlags(js_ast.LComma, exprFlagAfterQuestionAndBeforeColon)
44914512

44924513
p.allowIn = oldAllowIn
44934514

44944515
p.lexer.Expect(js_lexer.TColon)
4495-
no := p.parseExpr(js_ast.LComma)
4516+
no := p.parseExprWithFlags(js_ast.LComma, flags&exprFlagAfterQuestionAndBeforeColon)
44964517
left = js_ast.Expr{Loc: left.Loc, Data: &js_ast.EIf{Test: left, Yes: yes, No: no}}
44974518

44984519
case js_lexer.TExclamation:

internal/js_parser/ts_parser.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,88 @@ func (p *parser) trySkipTypeScriptArrowReturnTypeWithBacktracking() bool {
920920
return true
921921
}
922922

923+
// This is a very specific function that determines whether a colon token is a
924+
// TypeScript arrow function return type in the case where the arrow function
925+
// is the middle expression of a JavaScript ternary operator (i.e. is between
926+
// the "?" and ":" tokens). It's separate from the other function above called
927+
// "trySkipTypeScriptArrowReturnTypeWithBacktracking" because it's much more
928+
// expensive, and likely not as robust.
929+
func (originalParser *parser) isTypeScriptArrowReturnTypeAfterQuestionAndBeforeColon(await awaitOrYield) bool {
930+
// Implement "backtracking" by swallowing lexer errors on a temporary parser
931+
defer func() {
932+
r := recover()
933+
if _, isLexerPanic := r.(js_lexer.LexerPanic); isLexerPanic {
934+
return // Swallow this error
935+
} else if r != nil {
936+
panic(r)
937+
}
938+
}()
939+
940+
// THIS IS A GROSS HACK. Some context:
941+
//
942+
// JavaScript is designed to not require a backtracking parser. Generally a
943+
// backtracking parser is not regarded as a good thing and you try to avoid
944+
// having one if it's not necessary.
945+
//
946+
// However, TypeScript's parser does do backtracking in an (admittedly noble)
947+
// effort to retrofit nice type syntax onto JavaScript. Up until this edge
948+
// case was discovered, this backtracking was limited to type syntax so
949+
// esbuild could deal with it by using a backtracking lexer without needing a
950+
// backtracking parser.
951+
//
952+
// This edge case requires a backtracking parser. The TypeScript compiler's
953+
// algorithm for parsing this is to try to parse the entire arrow function
954+
// body and then reset all the way back to the colon for the arrow function
955+
// return type if the token following the arrow function body is not another
956+
// colon. For example:
957+
//
958+
// x = a ? (b) : c => d;
959+
// y = a ? (b) : c => d : e;
960+
//
961+
// The first colon of "x" pairs with the "?" because the arrow function
962+
// "(b) : c => d" is not followed by a colon. However, the first colon of "y"
963+
// starts a return type because the arrow function "(b) : c => d" is followed
964+
// by a colon. In other words, the first ":" before the arrow function body
965+
// must pair with the "?" unless there is another ":" to pair with it after
966+
// the function body.
967+
//
968+
// I'm not going to rewrite esbuild's parser to support backtracking for this
969+
// one edge case. So instead, esbuild tries to parse the arrow function body
970+
// using a rough copy of the parser and then always throws the result away.
971+
// So arrow function bodies will always be parsed twice for this edge case.
972+
//
973+
// This is a hack instead of a good solution because the parser isn't designed
974+
// for this, and doing this is not going to have good test coverage given that
975+
// it's an edge case. We can't prevent parser code (either currently or in the
976+
// future) from accidentally depending on some parser state that isn't cloned
977+
// here. That could result in a parser panic when parsing a more complex
978+
// version of this edge case.
979+
p := parser{
980+
// Clone all state that the parser needs to parse this arrow function body
981+
options: originalParser.options,
982+
source: originalParser.source,
983+
lexer: originalParser.lexer,
984+
currentScope: &js_ast.Scope{
985+
Kind: js_ast.ScopeFunctionArgs,
986+
Members: make(map[string]js_ast.ScopeMember),
987+
},
988+
}
989+
p.lexer.IsLogDisabled = true
990+
991+
// Parse the return type
992+
p.lexer.Expect(js_lexer.TColon)
993+
p.skipTypeScriptReturnType()
994+
995+
// Parse the body and throw it out (with the side effect of maybe throwing an error)
996+
_ = p.parseArrowBody([]js_ast.Arg{}, fnOrArrowDataParse{await: await})
997+
998+
// There must be a colon following the arrow function body to pair with the leading "?"
999+
p.lexer.Expect(js_lexer.TColon)
1000+
1001+
// Parsing was successful if we get here
1002+
return true
1003+
}
1004+
9231005
func (p *parser) trySkipTypeScriptArrowArgsWithBacktracking() bool {
9241006
oldLexer := p.lexer
9251007
p.lexer.IsLogDisabled = true

internal/js_parser/ts_parser_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2375,6 +2375,23 @@ func TestTSArrow(t *testing.T) {
23752375
expectPrintedTS(t, "function f(async?) { g(async in x) }", "function f(async) {\n g(async in x);\n}\n")
23762376
expectPrintedTS(t, "function f(async?) { g(async as boolean) }", "function f(async) {\n g(async);\n}\n")
23772377
expectPrintedTS(t, "function f() { g(async as => boolean) }", "function f() {\n g(async (as) => boolean);\n}\n")
2378+
2379+
// https://github.com/evanw/esbuild/issues/4241
2380+
expectPrintedTS(t, "x = a ? (b = c) : d", "x = a ? b = c : d;\n")
2381+
expectPrintedTS(t, "x = a ? (b = c) : d => e", "x = a ? b = c : (d) => e;\n")
2382+
expectPrintedTS(t, "x = a ? (b = c) : T => d : (e = f)", "x = a ? (b = c) => d : e = f;\n")
2383+
expectPrintedTS(t, "x = a ? (b = c) : T => d : (e = f) : T => g", "x = a ? (b = c) => d : (e = f) => g;\n")
2384+
expectPrintedTS(t, "x = a ? b ? c : (d = e) : f => g", "x = a ? b ? c : d = e : (f) => g;\n")
2385+
expectPrintedTS(t, "x = a ? b ? (c = d) => e : (f = g) : h => i", "x = a ? b ? (c = d) => e : f = g : (h) => i;\n")
2386+
expectPrintedTS(t, "x = a ? b ? (c = d) : T => e : (f = g) : h => i", "x = a ? b ? (c = d) => e : f = g : (h) => i;\n")
2387+
expectPrintedTS(t, "x = a ? b ? (c = d) : T => e : (f = g) : (h = i) : T => j", "x = a ? b ? (c = d) => e : f = g : (h = i) => j;\n")
2388+
expectPrintedTS(t, "x = a ? (b) : T => c : d", "x = a ? (b) => c : d;\n")
2389+
expectPrintedTS(t, "x = a ? b - (c) : d => e", "x = a ? b - c : (d) => e;\n")
2390+
expectPrintedTS(t, "x = a ? b = (c) : T => d : e", "x = a ? b = (c) => d : e;\n")
2391+
expectParseErrorTS(t, "x = a ? (b = c) : T => d : (e = f) : g", "<stdin>: ERROR: Expected \";\" but found \":\"\n")
2392+
expectParseErrorTS(t, "x = a ? b ? (c = d) : T => e : (f = g)", "<stdin>: ERROR: Expected \":\" but found end of file\n")
2393+
expectParseErrorTS(t, "x = a ? - (b) : c => d : e", "<stdin>: ERROR: Expected \";\" but found \":\"\n")
2394+
expectParseErrorTS(t, "x = a ? b - (c) : d => e : f", "<stdin>: ERROR: Expected \";\" but found \":\"\n")
23782395
}
23792396

23802397
func TestTSSuperCall(t *testing.T) {

0 commit comments

Comments
 (0)