Skip to content

Commit 5e371a7

Browse files
committed
2.6 Pratt Parser (prefix)
1 parent c44317d commit 5e371a7

File tree

4 files changed

+406
-2
lines changed

4 files changed

+406
-2
lines changed

ast/ast.go

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,19 @@ package ast
33
// Packge ast implement the Abstract Syntax Tree (AST) that represents the
44
// parsed source code before being passed on to the interpreter for evaluation.
55

6-
import "github.com/cedrickchee/hou/token"
6+
import (
7+
"bytes"
8+
9+
"github.com/cedrickchee/hou/token"
10+
)
711

812
// Node defines an interface for all nodes in the AST.
913
type Node interface {
1014
// Returns the literal value of the token it's associated with.
1115
// This method will be used only for debugging and testing.
1216
TokenLiteral() string
17+
// Returns a stringified version of the AST for debugging.
18+
String() string
1319
}
1420

1521
// Statement defines the interface for all statement nodes.
@@ -47,6 +53,20 @@ func (p *Program) TokenLiteral() string {
4753
}
4854
}
4955

56+
// String returns a stringified version of the AST for debugging.
57+
func (p *Program) String() string {
58+
// Creates a buffer and writes the return value of each statements String()
59+
// method to it.
60+
var out bytes.Buffer
61+
62+
for _, s := range p.Statements {
63+
// Delegates most of program work to the Statements of *ast.Program.
64+
out.WriteString(s.String())
65+
}
66+
67+
return out.String()
68+
}
69+
5070
// LetStatement the `let` statement represents the AST node that binds an
5171
// expression to an identifier
5272
type LetStatement struct {
@@ -62,6 +82,23 @@ func (ls *LetStatement) statementNode() {}
6282
// TokenLiteral prints the literal value of the token associated with this node.
6383
func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal }
6484

85+
// String returns a stringified version of the `let` node.
86+
func (ls *LetStatement) String() string {
87+
var out bytes.Buffer
88+
89+
out.WriteString(ls.TokenLiteral() + " ")
90+
out.WriteString(ls.Name.String())
91+
out.WriteString(" = ")
92+
93+
if ls.Value != nil {
94+
out.WriteString(ls.Value.String())
95+
}
96+
97+
out.WriteString(";")
98+
99+
return out.String()
100+
}
101+
65102
// Identifier is a node that holds the literal value of an identifier
66103
type Identifier struct {
67104
Token token.Token // the token.IDENT token
@@ -75,6 +112,11 @@ func (i *Identifier) expressionNode() {}
75112
// TokenLiteral prints the literal value of the token associated with this node.
76113
func (i *Identifier) TokenLiteral() string { return i.Token.Literal }
77114

115+
// String returns a stringified version of the identifier node.
116+
func (i *Identifier) String() string {
117+
return i.Value
118+
}
119+
78120
// ReturnStatement the `return` statement that represents the AST node that
79121
// holds a return value to the outter stack in the call stack.
80122
type ReturnStatement struct {
@@ -86,3 +128,80 @@ func (rs *ReturnStatement) statementNode() {}
86128

87129
// TokenLiteral prints the literal value of the token associated with this node.
88130
func (rs *ReturnStatement) TokenLiteral() string { return rs.Token.Literal }
131+
132+
// String returns a stringified version of the `return` node.
133+
func (rs *ReturnStatement) String() string {
134+
var out bytes.Buffer
135+
136+
out.WriteString(rs.TokenLiteral() + " ")
137+
138+
if rs.ReturnValue != nil {
139+
out.WriteString(rs.ReturnValue.String())
140+
}
141+
142+
out.WriteString(";")
143+
144+
return out.String()
145+
}
146+
147+
// ExpressionStatement represents an expression node.
148+
type ExpressionStatement struct {
149+
Token token.Token // the first token of the expression
150+
Expression Expression
151+
}
152+
153+
func (es *ExpressionStatement) statementNode() {}
154+
155+
// TokenLiteral prints the literal value of the token associated with this node.
156+
func (es *ExpressionStatement) TokenLiteral() string { return es.Token.Literal }
157+
158+
// String returns a stringified version of the expression node
159+
func (es *ExpressionStatement) String() string {
160+
// The nil-checks will be taken out, later on, when we can fully build
161+
// expressions.
162+
if es.Expression != nil {
163+
return es.Expression.String()
164+
}
165+
return ""
166+
}
167+
168+
// IntegerLiteral represents a literal integer node.
169+
type IntegerLiteral struct {
170+
Token token.Token
171+
Value int64
172+
}
173+
174+
func (il *IntegerLiteral) expressionNode() {}
175+
176+
// TokenLiteral prints the literal value of the token associated with this node.
177+
func (il *IntegerLiteral) TokenLiteral() string { return il.Token.Literal }
178+
179+
// String returns a stringified version of the expression node.
180+
func (il *IntegerLiteral) String() string { return il.Token.Literal }
181+
182+
// PrefixExpression represents a prefix expression node.
183+
type PrefixExpression struct {
184+
Token token.Token // The prefix token, e.g. !
185+
Operator string
186+
Right Expression
187+
}
188+
189+
func (pe *PrefixExpression) expressionNode() {}
190+
191+
// TokenLiteral prints the literal value of the token associated with this node.
192+
func (pe *PrefixExpression) TokenLiteral() string { return pe.Token.Literal }
193+
194+
// String returns a stringified version of the expression node.
195+
func (pe *PrefixExpression) String() string {
196+
var out bytes.Buffer
197+
198+
// We deliberately add parentheses around the operator and its operand,
199+
// the expression in Right. That allows us to see which operands belong to
200+
// which operator.
201+
out.WriteString("(")
202+
out.WriteString(pe.Operator)
203+
out.WriteString(pe.Right.String())
204+
out.WriteString(")")
205+
206+
return out.String()
207+
}

ast/ast_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package ast
2+
3+
import (
4+
"testing"
5+
6+
"github.com/cedrickchee/hou/token"
7+
)
8+
9+
func TestString(t *testing.T) {
10+
program := &Program{
11+
Statements: []Statement{
12+
&LetStatement{
13+
Token: token.Token{Type: token.LET, Literal: "let"},
14+
Name: &Identifier{
15+
Token: token.Token{Type: token.IDENT, Literal: "myVar"},
16+
Value: "myVar",
17+
},
18+
Value: &Identifier{
19+
Token: token.Token{Type: token.IDENT, Literal: "anotherVar"},
20+
Value: "anotherVar",
21+
},
22+
},
23+
},
24+
}
25+
26+
if program.String() != "let myVar = anotherVar;" {
27+
t.Errorf("program.String() wrong. got=%q", program.String())
28+
}
29+
}

parser/parser.go

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,40 @@ package parser
55

66
import (
77
"fmt"
8+
"strconv"
89

910
"github.com/cedrickchee/hou/ast"
1011
"github.com/cedrickchee/hou/lexer"
1112
"github.com/cedrickchee/hou/token"
1213
)
1314

15+
// Define the precedences of the language.
16+
// These constants is able to answer: "does the * operator have a higher
17+
// precedence than the == operator? Does a prefix operator have a higher
18+
// preference than a call expression?"
19+
const (
20+
_ int = iota
21+
LOWEST // lowest possible precedence
22+
EQUALS // ==
23+
LESSGREATER // > or <
24+
SUM // +
25+
PRODUCT // *
26+
PREFIX // -X or !X
27+
CALL // myFunction(X)
28+
)
29+
30+
// Pratt parser's idea is the association of parsing functions with token types.
31+
// Whenever this token type is encountered, the parsing functions are called to
32+
// parse the appropriate expression and return an AST node that represents it.
33+
// Each token type can have up to two parsing functions associated with it,
34+
// depending on whether the token is found in a prefix or an infix position.
35+
type (
36+
prefixParseFn func() ast.Expression
37+
// This function argument is "left side" of the infix operator that’s being
38+
// parsed.
39+
infixParseFn func(ast.Expression) ast.Expression
40+
)
41+
1442
// Parser implements the parser.
1543
type Parser struct {
1644
l *lexer.Lexer
@@ -19,6 +47,11 @@ type Parser struct {
1947

2048
curToken token.Token
2149
peekToken token.Token
50+
51+
// maps to get the correct prefixParseFn or infixParseFn for the current
52+
// token type.
53+
prefixParseFns map[token.TokenType]prefixParseFn
54+
infixParseFns map[token.TokenType]infixParseFn
2255
}
2356

2457
// New constructs a new Parser with a Lexer as input.
@@ -28,6 +61,13 @@ func New(l *lexer.Lexer) *Parser {
2861
errors: []string{},
2962
}
3063

64+
// Initialize the prefixParseFns map.
65+
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
66+
p.registerPrefix(token.IDENT, p.parseIdentifier)
67+
p.registerPrefix(token.INT, p.parseIntegerLiteral)
68+
p.registerPrefix(token.BANG, p.parsePrefixExpression)
69+
p.registerPrefix(token.MINUS, p.parsePrefixExpression)
70+
3171
// Read two tokens, so curToken and peekToken are both set.
3272
p.nextToken()
3373
p.nextToken()
@@ -81,7 +121,7 @@ func (p *Parser) parseStatement() ast.Statement {
81121
case token.RETURN:
82122
return p.parseReturnStatement()
83123
default:
84-
return nil
124+
return p.parseExpressionStatement()
85125
}
86126
}
87127

@@ -126,6 +166,83 @@ func (p *Parser) parseReturnStatement() *ast.ReturnStatement {
126166
return stmt
127167
}
128168

169+
// The top-level method that kicks off expression parsing.
170+
func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement {
171+
stmt := &ast.ExpressionStatement{Token: p.curToken}
172+
173+
stmt.Expression = p.parseExpression(LOWEST)
174+
175+
if p.peekTokenIs(token.SEMICOLON) {
176+
p.nextToken()
177+
}
178+
179+
return stmt
180+
}
181+
182+
// Check whether there's a parsing function associated with p.curToken.Type in
183+
// the prefix position.
184+
func (p *Parser) parseExpression(precedence int) ast.Expression {
185+
prefix := p.prefixParseFns[p.curToken.Type]
186+
if prefix == nil {
187+
// noPrefixParseFnError give us better error messages when
188+
// program.Statements does not contain one statement but simply one nil.
189+
p.noPrefixParseFnError(p.curToken.Type)
190+
return nil
191+
}
192+
193+
leftExp := prefix()
194+
195+
return leftExp
196+
}
197+
198+
func (p *Parser) parseIdentifier() ast.Expression {
199+
// This method doesn’t advance the tokens, it doesn’t call nextToken.
200+
// That’s important.
201+
// All of our parsing functions, prefixParseFn or infixParseFn, are going to
202+
// follow this protocol:
203+
// start with curToken being the type of token you’re associated with and
204+
// return with curToken being the last token that’s part of your expression
205+
// type. Never advance the tokens too far.
206+
return &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
207+
}
208+
209+
func (p *Parser) noPrefixParseFnError(t token.TokenType) {
210+
msg := fmt.Sprintf("no prefix parse function for %s found", t)
211+
p.errors = append(p.errors, msg)
212+
}
213+
214+
func (p *Parser) parseIntegerLiteral() ast.Expression {
215+
lit := &ast.IntegerLiteral{Token: p.curToken}
216+
217+
value, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
218+
if err != nil {
219+
msg := fmt.Sprintf("could not parse %q as integer", p.curToken.Literal)
220+
p.errors = append(p.errors, msg)
221+
return nil
222+
}
223+
224+
lit.Value = value
225+
226+
return lit
227+
}
228+
229+
func (p *Parser) parsePrefixExpression() ast.Expression {
230+
expression := &ast.PrefixExpression{
231+
Token: p.curToken,
232+
Operator: p.curToken.Literal,
233+
}
234+
235+
// Advances our tokens in order to correctly parse a prefix expression
236+
// like `-5` more than one token has to be "consumed".
237+
p.nextToken()
238+
239+
// parseExpression() value changes depending on the caller's knowledge and
240+
// its context.
241+
expression.Right = p.parseExpression(PREFIX)
242+
243+
return expression
244+
}
245+
129246
// "assertion functions".
130247
// Enforce the correctness of the order of tokens by checking the type of the
131248
// next token.
@@ -145,3 +262,13 @@ func (p *Parser) peekTokenIs(t token.TokenType) bool {
145262
func (p *Parser) curTokenIs(t token.TokenType) bool {
146263
return p.curToken.Type == t
147264
}
265+
266+
// Helper method that add entries to the prefixParseFns map.
267+
func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) {
268+
p.prefixParseFns[tokenType] = fn
269+
}
270+
271+
// Helper method that add entries to the infixParseFns map.
272+
func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) {
273+
p.infixParseFns[tokenType] = fn
274+
}

0 commit comments

Comments
 (0)