2.6 Pratt Parser (prefix)

cedrickchee · cedrickchee · commit 5e371a793103 · 2020-03-27T17:45:48.000+08:00
diff --git a/ast/ast.go b/ast/ast.go
@@ -3,13 +3,19 @@ package ast
 // Packge ast implement the Abstract Syntax Tree (AST) that represents the
 // parsed source code before being passed on to the interpreter for evaluation.
 
-import "github.com/cedrickchee/hou/token"
+import (
+	"bytes"
+
+	"github.com/cedrickchee/hou/token"
+)
 
 // Node defines an interface for all nodes in the AST.
 type Node interface {
 	// Returns the literal value of the token it's associated with.
 	// This method will be used only for debugging and testing.
 	TokenLiteral() string
+	// Returns a stringified version of the AST for debugging.
+	String() string
 }
 
 // Statement defines the interface for all statement nodes.
@@ -47,6 +53,20 @@ func (p *Program) TokenLiteral() string {
 	}
 }
 
+// String returns a stringified version of the AST for debugging.
+func (p *Program) String() string {
+	// Creates a buffer and writes the return value of each statements String()
+	// method to it.
+	var out bytes.Buffer
+
+	for _, s := range p.Statements {
+		// Delegates most of program work to the Statements of *ast.Program.
+		out.WriteString(s.String())
+	}
+
+	return out.String()
+}
+
 // LetStatement the `let` statement represents the AST node that binds an
 // expression to an identifier
 type LetStatement struct {
@@ -62,6 +82,23 @@ func (ls *LetStatement) statementNode() {}
 // TokenLiteral prints the literal value of the token associated with this node.
 func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal }
 
+// String returns a stringified version of the `let` node.
+func (ls *LetStatement) String() string {
+	var out bytes.Buffer
+
+	out.WriteString(ls.TokenLiteral() + " ")
+	out.WriteString(ls.Name.String())
+	out.WriteString(" = ")
+
+	if ls.Value != nil {
+		out.WriteString(ls.Value.String())
+	}
+
+	out.WriteString(";")
+
+	return out.String()
+}
+
 // Identifier is a node that holds the literal value of an identifier
 type Identifier struct {
 	Token token.Token // the token.IDENT token
@@ -75,6 +112,11 @@ func (i *Identifier) expressionNode() {}
 // TokenLiteral prints the literal value of the token associated with this node.
 func (i *Identifier) TokenLiteral() string { return i.Token.Literal }
 
+// String returns a stringified version of the identifier node.
+func (i *Identifier) String() string {
+	return i.Value
+}
+
 // ReturnStatement the `return` statement that represents the AST node that
 // holds a return value to the outter stack in the call stack.
 type ReturnStatement struct {
@@ -86,3 +128,80 @@ func (rs *ReturnStatement) statementNode() {}
 
 // TokenLiteral prints the literal value of the token associated with this node.
 func (rs *ReturnStatement) TokenLiteral() string { return rs.Token.Literal }
+
+// String returns a stringified version of the `return` node.
+func (rs *ReturnStatement) String() string {
+	var out bytes.Buffer
+
+	out.WriteString(rs.TokenLiteral() + " ")
+
+	if rs.ReturnValue != nil {
+		out.WriteString(rs.ReturnValue.String())
+	}
+
+	out.WriteString(";")
+
+	return out.String()
+}
+
+// ExpressionStatement represents an expression node.
+type ExpressionStatement struct {
+	Token      token.Token // the first token of the expression
+	Expression Expression
+}
+
+func (es *ExpressionStatement) statementNode() {}
+
+// TokenLiteral prints the literal value of the token associated with this node.
+func (es *ExpressionStatement) TokenLiteral() string { return es.Token.Literal }
+
+// String returns a stringified version of the expression node
+func (es *ExpressionStatement) String() string {
+	// The nil-checks will be taken out, later on, when we can fully build
+	// expressions.
+	if es.Expression != nil {
+		return es.Expression.String()
+	}
+	return ""
+}
+
+// IntegerLiteral represents a literal integer node.
+type IntegerLiteral struct {
+	Token token.Token
+	Value int64
+}
+
+func (il *IntegerLiteral) expressionNode() {}
+
+// TokenLiteral prints the literal value of the token associated with this node.
+func (il *IntegerLiteral) TokenLiteral() string { return il.Token.Literal }
+
+// String returns a stringified version of the expression node.
+func (il *IntegerLiteral) String() string { return il.Token.Literal }
+
+// PrefixExpression represents a prefix expression node.
+type PrefixExpression struct {
+	Token    token.Token // The prefix token, e.g. !
+	Operator string
+	Right    Expression
+}
+
+func (pe *PrefixExpression) expressionNode() {}
+
+// TokenLiteral prints the literal value of the token associated with this node.
+func (pe *PrefixExpression) TokenLiteral() string { return pe.Token.Literal }
+
+// String returns a stringified version of the expression node.
+func (pe *PrefixExpression) String() string {
+	var out bytes.Buffer
+
+	// We deliberately add parentheses around the operator and its operand,
+	// the expression in Right. That allows us to see which operands belong to
+	// which operator.
+	out.WriteString("(")
+	out.WriteString(pe.Operator)
+	out.WriteString(pe.Right.String())
+	out.WriteString(")")
+
+	return out.String()
+}
diff --git a/ast/ast_test.go b/ast/ast_test.go
@@ -0,0 +1,29 @@
+package ast
+
+import (
+	"testing"
+
+	"github.com/cedrickchee/hou/token"
+)
+
+func TestString(t *testing.T) {
+	program := &Program{
+		Statements: []Statement{
+			&LetStatement{
+				Token: token.Token{Type: token.LET, Literal: "let"},
+				Name: &Identifier{
+					Token: token.Token{Type: token.IDENT, Literal: "myVar"},
+					Value: "myVar",
+				},
+				Value: &Identifier{
+					Token: token.Token{Type: token.IDENT, Literal: "anotherVar"},
+					Value: "anotherVar",
+				},
+			},
+		},
+	}
+
+	if program.String() != "let myVar = anotherVar;" {
+		t.Errorf("program.String() wrong. got=%q", program.String())
+	}
+}
diff --git a/parser/parser.go b/parser/parser.go
@@ -5,12 +5,40 @@ package parser
 
 import (
 	"fmt"
+	"strconv"
 
 	"github.com/cedrickchee/hou/ast"
 	"github.com/cedrickchee/hou/lexer"
 	"github.com/cedrickchee/hou/token"
 )
 
+// Define the precedences of the language.
+// These constants is able to answer: "does the * operator have a higher
+// precedence than the == operator? Does a prefix operator have a higher
+// preference than a call expression?"
+const (
+	_           int = iota
+	LOWEST          // lowest possible precedence
+	EQUALS          // ==
+	LESSGREATER     // > or <
+	SUM             // +
+	PRODUCT         // *
+	PREFIX          // -X or !X
+	CALL            // myFunction(X)
+)
+
+// Pratt parser's idea is the association of parsing functions with token types.
+// Whenever this token type is encountered, the parsing functions are called to
+// parse the appropriate expression and return an AST node that represents it.
+// Each token type can have up to two parsing functions associated with it,
+// depending on whether the token is found in a prefix or an infix position.
+type (
+	prefixParseFn func() ast.Expression
+	// This function argument is "left side" of the infix operator that’s being
+	// parsed.
+	infixParseFn func(ast.Expression) ast.Expression
+)
+
 // Parser implements the parser.
 type Parser struct {
 	l *lexer.Lexer
@@ -19,6 +47,11 @@ type Parser struct {
 
 	curToken  token.Token
 	peekToken token.Token
+
+	// maps to get the correct prefixParseFn or infixParseFn for the current
+	// token type.
+	prefixParseFns map[token.TokenType]prefixParseFn
+	infixParseFns  map[token.TokenType]infixParseFn
 }
 
 // New constructs a new Parser with a Lexer as input.
@@ -28,6 +61,13 @@ func New(l *lexer.Lexer) *Parser {
 		errors: []string{},
 	}
 
+	// Initialize the prefixParseFns map.
+	p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
+	p.registerPrefix(token.IDENT, p.parseIdentifier)
+	p.registerPrefix(token.INT, p.parseIntegerLiteral)
+	p.registerPrefix(token.BANG, p.parsePrefixExpression)
+	p.registerPrefix(token.MINUS, p.parsePrefixExpression)
+
 	// Read two tokens, so curToken and peekToken are both set.
 	p.nextToken()
 	p.nextToken()
@@ -81,7 +121,7 @@ func (p *Parser) parseStatement() ast.Statement {
 	case token.RETURN:
 		return p.parseReturnStatement()
 	default:
-		return nil
+		return p.parseExpressionStatement()
 	}
 }
 
@@ -126,6 +166,83 @@ func (p *Parser) parseReturnStatement() *ast.ReturnStatement {
 	return stmt
 }
 
+// The top-level method that kicks off expression parsing.
+func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement {
+	stmt := &ast.ExpressionStatement{Token: p.curToken}
+
+	stmt.Expression = p.parseExpression(LOWEST)
+
+	if p.peekTokenIs(token.SEMICOLON) {
+		p.nextToken()
+	}
+
+	return stmt
+}
+
+// Check whether there's a parsing function associated with p.curToken.Type in
+// the prefix position.
+func (p *Parser) parseExpression(precedence int) ast.Expression {
+	prefix := p.prefixParseFns[p.curToken.Type]
+	if prefix == nil {
+		// noPrefixParseFnError give us better error messages when
+		// program.Statements does not contain one statement but simply one nil.
+		p.noPrefixParseFnError(p.curToken.Type)
+		return nil
+	}
+
+	leftExp := prefix()
+
+	return leftExp
+}
+
+func (p *Parser) parseIdentifier() ast.Expression {
+	// This method doesn’t advance the tokens, it doesn’t call nextToken.
+	// That’s important.
+	// All of our parsing functions, prefixParseFn or infixParseFn, are going to
+	// follow this protocol:
+	// start with curToken being the type of token you’re associated with and
+	// return with curToken being the last token that’s part of your expression
+	// type. Never advance the tokens too far.
+	return &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
+}
+
+func (p *Parser) noPrefixParseFnError(t token.TokenType) {
+	msg := fmt.Sprintf("no prefix parse function for %s found", t)
+	p.errors = append(p.errors, msg)
+}
+
+func (p *Parser) parseIntegerLiteral() ast.Expression {
+	lit := &ast.IntegerLiteral{Token: p.curToken}
+
+	value, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
+	if err != nil {
+		msg := fmt.Sprintf("could not parse %q as integer", p.curToken.Literal)
+		p.errors = append(p.errors, msg)
+		return nil
+	}
+
+	lit.Value = value
+
+	return lit
+}
+
+func (p *Parser) parsePrefixExpression() ast.Expression {
+	expression := &ast.PrefixExpression{
+		Token:    p.curToken,
+		Operator: p.curToken.Literal,
+	}
+
+	// Advances our tokens in order to correctly parse a prefix expression
+	// like `-5` more than one token has to be "consumed".
+	p.nextToken()
+
+	// parseExpression() value changes depending on the caller's knowledge and
+	// its context.
+	expression.Right = p.parseExpression(PREFIX)
+
+	return expression
+}
+
 // "assertion functions".
 // Enforce the correctness of the order of tokens by checking the type of the
 // next token.
@@ -145,3 +262,13 @@ func (p *Parser) peekTokenIs(t token.TokenType) bool {
 func (p *Parser) curTokenIs(t token.TokenType) bool {
 	return p.curToken.Type == t
 }
+
+// Helper method that add entries to the prefixParseFns map.
+func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) {
+	p.prefixParseFns[tokenType] = fn
+}
+
+// Helper method that add entries to the infixParseFns map.
+func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) {
+	p.infixParseFns[tokenType] = fn
+}
diff --git a/parser/parser_test.go b/parser/parser_test.go