Skip to content

Commit

Permalink
feat: implement AST and parser (#5)
Browse files Browse the repository at this point in the history
## What does this PR do?

implement AST and parser
  • Loading branch information
Zecheng authored May 2, 2024
1 parent 616f093 commit 804c29d
Show file tree
Hide file tree
Showing 12 changed files with 390 additions and 56 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ An interpreted language written in Go

+ Identifiers only consist of alphabet letters or underscore

## Components

+ Token set
+ Lexer
+ Abstract Syntax Tree (AST)
+ Pratt parser

## TODOs

- [ ] feat: add Unicode support
Expand All @@ -32,6 +39,9 @@ An interpreted language written in Go
- [ ] refactor: unary operators, binary operators, ternary operators
- [ ] feat: use Cobra to enable multiple modes when launching the REPL
- [ ] feat: use quit(), exit(), or Ctrl-D to exit
- [ ] ci: fix the release GHA workflow
- [ ] feat: concurrency, Mutex, RWMutex
- [ ] feat: add support for comments



Expand Down
96 changes: 96 additions & 0 deletions internal/ast/ast.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package ast

import "github.com/Aden-Q/monkey/internal/token"

// interface compliance check
var _ Node = (*Program)(nil)
var _ Expression = (*Identifier)(nil)
var _ Statement = (*LetStatement)(nil)
var _ Statement = (*ReturnStatement)(nil)

// Node is a common interface for nodes in AST
type Node interface {
TokenLiteral() string
}

// Statement is a node that does not produce a value
type Statement interface {
Node
statementNode()
}

// Expression is a node that produces a value
type Expression interface {
Node
expressionNode()
}

// Program is a representation of the AST
type Program struct {
Statements []Statement
}

func (p *Program) TokenLiteral() string {
if len(p.Statements) == 0 {
return ""
}

// the root of the AST is the first node
return p.Statements[0].TokenLiteral()
}

type Identifier struct {
// the identifier token
Token token.Token
}

func (i *Identifier) TokenLiteral() string {
return i.Token.Literal
}

func (i *Identifier) expressionNode() {}

// LetStatement represents the let statement
type LetStatement struct {
// the let token
Token token.Token
// the identifier
Identifier *Identifier
// the expression value on the right side of the statement
Value Expression
}

func (ls *LetStatement) TokenLiteral() string {
return ls.Token.Literal
}

func (ls *LetStatement) statementNode() {}

func NewLetStatement(identifier *Identifier, value Expression) *LetStatement {
return &LetStatement{
Token: token.New(token.LET, "let"),
Identifier: identifier,
Value: value,
}
}

// ReturnStatement represents the return statement
type ReturnStatement struct {
// the return token
Token token.Token
// the expression value on the right of the return keyword
Value Expression
}

func (rs *ReturnStatement) TokenLiteral() string {
return rs.Token.Literal
}

func (rs *ReturnStatement) statementNode() {}

func NewReturnStatement(value Expression) *ReturnStatement {
return &ReturnStatement{
Token: token.New(token.RETURN, "return"),
Value: value,
}
}
13 changes: 13 additions & 0 deletions internal/ast/ast_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package ast_test

import (
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func TestAst(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Ast Suite")
}
10 changes: 10 additions & 0 deletions internal/ast/ast_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package ast_test

import (
. "github.com/onsi/ginkgo/v2"
_ "github.com/onsi/gomega"
)

var _ = Describe("Ast", func() {

})
76 changes: 43 additions & 33 deletions internal/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,43 @@ import (
"github.com/Aden-Q/monkey/internal/token"
)

type Lexer struct {
input string
var _ Lexer = (*lexer)(nil)

type Lexer interface {
// Read reads the input text and stores into the buffer
Read(text string) int
// NextToken reads the next token starting at the current offset and move the ptr forward
NextToken() token.Token
}

type lexer struct {
buf string
position uint32 // current position index in input
}

func New(input string) *Lexer {
l := &Lexer{
input: input,
}
func New() Lexer {
return &lexer{}
}

func (l *lexer) Read(text string) int {
l.buf = text
l.position = 0

return l
return len(text)
}

func (l *Lexer) NextToken() (token.Token, bool) {
func (l *lexer) NextToken() token.Token {
l.skipWhiteSpaces()

if !l.hasNext() {
return token.Token{}, false
return token.Token{
Type: token.EOF,
Literal: "eof",
}
}

l.skipWhiteSpaces()

var tok token.Token
ok := true

ch := l.input[l.position]
ch := l.buf[l.position]

switch ch {
// operators with two characters
Expand All @@ -51,43 +64,40 @@ func (l *Lexer) NextToken() (token.Token, bool) {
if isLetter(ch) {
literal := l.readWord()
tok = token.New(token.LookupTokenType(literal), literal)
ok = true
} else if isDigit(ch) {
literal := l.readInt()
tok = token.New(token.LookupTokenType(literal), literal)
ok = true
} else {
tok = token.New(token.ILLEGAL, string(ch))
ok = false
}
}

return tok, ok
return tok
}

// hasNext checks whether there are characters remaining
func (l *Lexer) hasNext() bool {
return l.position < uint32(len(l.input))
func (l *lexer) hasNext() bool {
return l.position < uint32(len(l.buf))
}

// peekNextNextChar looks at the next character after the next character
func (l *Lexer) peekNextNextChar() byte {
if l.position+1 > uint32(len(l.input))-1 {
func (l *lexer) peekNextNextChar() byte {
if l.position+1 > uint32(len(l.buf))-1 {
return 0
}

return l.input[l.position+1]
return l.buf[l.position+1]
}

// readChar reads a single char at the current offset and move the ptr forward by 1
func (l *Lexer) readChar() string {
func (l *lexer) readChar() string {
if !l.hasNext() {
return ""
}

l.position++

return l.input[l.position-1 : l.position]
return l.buf[l.position-1 : l.position]
}

// isLetter check whether a character is allow be to in an identifier
Expand All @@ -96,57 +106,57 @@ func isLetter(ch byte) bool {
}

// read a word starting from the current position, and move the offset forward
func (l *Lexer) readWord() string {
func (l *lexer) readWord() string {
startPos := l.position

for {
if !l.hasNext() {
break
}

ch := l.input[l.position]
ch := l.buf[l.position]
if !isLetter(ch) {
break
}

l.position++
}

return l.input[startPos:l.position]
return l.buf[startPos:l.position]
}

// isLetter check whether a character is an digit
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}

func (l *Lexer) readInt() string {
func (l *lexer) readInt() string {
startPos := l.position

for {
if !l.hasNext() {
break
}

ch := l.input[l.position]
ch := l.buf[l.position]
if !isDigit(ch) {
break
}

l.position++
}

return l.input[startPos:l.position]
return l.buf[startPos:l.position]
}

// skipWhiteSpaces skips all white spaces starting at the current position, including newline characters
func (l *Lexer) skipWhiteSpaces() {
func (l *lexer) skipWhiteSpaces() {
for {
if !l.hasNext() {
break
}

ch := l.input[l.position]
ch := l.buf[l.position]
if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
l.position += 1
} else {
Expand Down
Loading

0 comments on commit 804c29d

Please sign in to comment.