feat: implement AST and parser (#5)

## What does this PR do? implement AST and parser
Aden-Q · May 2, 2024 · 804c29d · 804c29d
1 parent 616f093
commit 804c29d
Show file tree

Hide file tree

Showing 12 changed files with 390 additions and 56 deletions.
diff --git a/README.md b/README.md
@@ -14,6 +14,13 @@ An interpreted language written in Go
 
 + Identifiers only consist of alphabet letters or underscore
 
+## Components
+
++ Token set
++ Lexer
++ Abstract Syntax Tree (AST)
++ Pratt parser
+
 ## TODOs
 
 - [ ] feat: add Unicode support
@@ -32,6 +39,9 @@ An interpreted language written in Go
 - [ ] refactor: unary operators, binary operators, ternary operators
 - [ ] feat: use Cobra to enable multiple modes when launching the REPL
 - [ ] feat: use quit(), exit(), or Ctrl-D to exit
+- [ ] ci: fix the release GHA workflow
+- [ ] feat: concurrency, Mutex, RWMutex
+- [ ] feat: add support for comments
 
 
 

diff --git a/internal/ast/ast.go b/internal/ast/ast.go
@@ -0,0 +1,96 @@
+package ast
+
+import "github.com/Aden-Q/monkey/internal/token"
+
+// interface compliance check
+var _ Node = (*Program)(nil)
+var _ Expression = (*Identifier)(nil)
+var _ Statement = (*LetStatement)(nil)
+var _ Statement = (*ReturnStatement)(nil)
+
+// Node is a common interface for nodes in AST
+type Node interface {
+	TokenLiteral() string
+}
+
+// Statement is a node that does not produce a value
+type Statement interface {
+	Node
+	statementNode()
+}
+
+// Expression is a node that produces a value
+type Expression interface {
+	Node
+	expressionNode()
+}
+
+// Program is a representation of the AST
+type Program struct {
+	Statements []Statement
+}
+
+func (p *Program) TokenLiteral() string {
+	if len(p.Statements) == 0 {
+		return ""
+	}
+
+	// the root of the AST is the first node
+	return p.Statements[0].TokenLiteral()
+}
+
+type Identifier struct {
+	// the identifier token
+	Token token.Token
+}
+
+func (i *Identifier) TokenLiteral() string {
+	return i.Token.Literal
+}
+
+func (i *Identifier) expressionNode() {}
+
+// LetStatement represents the let statement
+type LetStatement struct {
+	// the let token
+	Token token.Token
+	// the identifier
+	Identifier *Identifier
+	// the expression value on the right side of the statement
+	Value Expression
+}
+
+func (ls *LetStatement) TokenLiteral() string {
+	return ls.Token.Literal
+}
+
+func (ls *LetStatement) statementNode() {}
+
+func NewLetStatement(identifier *Identifier, value Expression) *LetStatement {
+	return &LetStatement{
+		Token:      token.New(token.LET, "let"),
+		Identifier: identifier,
+		Value:      value,
+	}
+}
+
+// ReturnStatement represents the return statement
+type ReturnStatement struct {
+	// the return token
+	Token token.Token
+	// the expression value on the right of the return keyword
+	Value Expression
+}
+
+func (rs *ReturnStatement) TokenLiteral() string {
+	return rs.Token.Literal
+}
+
+func (rs *ReturnStatement) statementNode() {}
+
+func NewReturnStatement(value Expression) *ReturnStatement {
+	return &ReturnStatement{
+		Token: token.New(token.RETURN, "return"),
+		Value: value,
+	}
+}
diff --git a/internal/ast/ast_suite_test.go b/internal/ast/ast_suite_test.go
@@ -0,0 +1,13 @@
+package ast_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestAst(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Ast Suite")
+}
diff --git a/internal/ast/ast_test.go b/internal/ast/ast_test.go
@@ -0,0 +1,10 @@
+package ast_test
+
+import (
+	. "github.com/onsi/ginkgo/v2"
+	_ "github.com/onsi/gomega"
+)
+
+var _ = Describe("Ast", func() {
+
+})
diff --git a/internal/lexer/lexer.go b/internal/lexer/lexer.go
@@ -4,30 +4,43 @@ import (
 	"github.com/Aden-Q/monkey/internal/token"
 )
 
-type Lexer struct {
-	input    string
+var _ Lexer = (*lexer)(nil)
+
+type Lexer interface {
+	// Read reads the input text and stores into the buffer
+	Read(text string) int
+	// NextToken reads the next token starting at the current offset and move the ptr forward
+	NextToken() token.Token
+}
+
+type lexer struct {
+	buf      string
 	position uint32 // current position index in input
 }
 
-func New(input string) *Lexer {
-	l := &Lexer{
-		input: input,
-	}
+func New() Lexer {
+	return &lexer{}
+}
+
+func (l *lexer) Read(text string) int {
+	l.buf = text
+	l.position = 0
 
-	return l
+	return len(text)
 }
 
-func (l *Lexer) NextToken() (token.Token, bool) {
+func (l *lexer) NextToken() token.Token {
+	l.skipWhiteSpaces()
+
 	if !l.hasNext() {
-		return token.Token{}, false
+		return token.Token{
+			Type:    token.EOF,
+			Literal: "eof",
+		}
 	}
 
-	l.skipWhiteSpaces()
-
 	var tok token.Token
-	ok := true
-
-	ch := l.input[l.position]
+	ch := l.buf[l.position]
 
 	switch ch {
 	// operators with two characters
@@ -51,43 +64,40 @@ func (l *Lexer) NextToken() (token.Token, bool) {
 		if isLetter(ch) {
 			literal := l.readWord()
 			tok = token.New(token.LookupTokenType(literal), literal)
-			ok = true
 		} else if isDigit(ch) {
 			literal := l.readInt()
 			tok = token.New(token.LookupTokenType(literal), literal)
-			ok = true
 		} else {
 			tok = token.New(token.ILLEGAL, string(ch))
-			ok = false
 		}
 	}
 
-	return tok, ok
+	return tok
 }
 
 // hasNext checks whether there are characters remaining
-func (l *Lexer) hasNext() bool {
-	return l.position < uint32(len(l.input))
+func (l *lexer) hasNext() bool {
+	return l.position < uint32(len(l.buf))
 }
 
 // peekNextNextChar looks at the next character after the next character
-func (l *Lexer) peekNextNextChar() byte {
-	if l.position+1 > uint32(len(l.input))-1 {
+func (l *lexer) peekNextNextChar() byte {
+	if l.position+1 > uint32(len(l.buf))-1 {
 		return 0
 	}
 
-	return l.input[l.position+1]
+	return l.buf[l.position+1]
 }
 
 // readChar reads a single char at the current offset and move the ptr forward by 1
-func (l *Lexer) readChar() string {
+func (l *lexer) readChar() string {
 	if !l.hasNext() {
 		return ""
 	}
 
 	l.position++
 
-	return l.input[l.position-1 : l.position]
+	return l.buf[l.position-1 : l.position]
 }
 
 // isLetter check whether a character is allow be to in an identifier
@@ -96,57 +106,57 @@ func isLetter(ch byte) bool {
 }
 
 // read a word starting from the current position, and move the offset forward
-func (l *Lexer) readWord() string {
+func (l *lexer) readWord() string {
 	startPos := l.position
 
 	for {
 		if !l.hasNext() {
 			break
 		}
 
-		ch := l.input[l.position]
+		ch := l.buf[l.position]
 		if !isLetter(ch) {
 			break
 		}
 
 		l.position++
 	}
 
-	return l.input[startPos:l.position]
+	return l.buf[startPos:l.position]
 }
 
 // isLetter check whether a character is an digit
 func isDigit(ch byte) bool {
 	return '0' <= ch && ch <= '9'
 }
 
-func (l *Lexer) readInt() string {
+func (l *lexer) readInt() string {
 	startPos := l.position
 
 	for {
 		if !l.hasNext() {
 			break
 		}
 
-		ch := l.input[l.position]
+		ch := l.buf[l.position]
 		if !isDigit(ch) {
 			break
 		}
 
 		l.position++
 	}
 
-	return l.input[startPos:l.position]
+	return l.buf[startPos:l.position]
 }
 
 // skipWhiteSpaces skips all white spaces starting at the current position, including newline characters
-func (l *Lexer) skipWhiteSpaces() {
+func (l *lexer) skipWhiteSpaces() {
 	for {
 		if !l.hasNext() {
 			break
 		}
 
-		ch := l.input[l.position]
+		ch := l.buf[l.position]
 		if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
 			l.position += 1
 		} else {