diff --git a/.gitignore b/.gitignore index d03be5f..2d8adde 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ package-lock.json a.out .idea 1.cc -.vscode \ No newline at end of file +.vscode +tmp diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1978792 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +DIR_SRC = ./src +DIR_TMP = ./tmp +CC = g++ + +SRC = $(wildcard ${DIR_SRC}/*.cpp) +TMP = $(wildcard $(DIR_TMP)/*.o) + +all: + ${CC} -c $(foreach i, $(SRC), $(i)) + +install: + $(shell mkdir ${DIR_TMP}) + $(shell mv *.o ${DIR_TMP}) + + ${CC} $(foreach i, $(TMP), $(i)) -o drift + + @echo "\n\t\033[41;37m: ./drift\033[0m 🐇🐰🍻 \n" + +run: + ./test/run.sh + +clean: + rm -f *.o + rm -rf ${DIR_TMP} + rm -f drift \ No newline at end of file diff --git a/drift b/drift new file mode 100644 index 0000000..d60533b Binary files /dev/null and b/drift differ diff --git a/src/ast.hpp b/src/ast.hpp index 82703a9..64b26bc 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -10,4 +10,1055 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_AST_H +#define DRIFT_AST_H + +#include + +#include "token.hpp" + +// abstract syntax tree +namespace ast { + // types for drift + enum TypeKind { + T_INT, // int + T_FLOAT, // float + T_STR, // str + T_CHAR, // char + T_BOOL, // bool + T_ARRAY, // [] + T_MAP, // + T_TUPLE, // (T) + T_USER, // user + }; + +// basic type for drift +// +#define S_INT "int" // 1 +#define S_FLOAT "float" // 2 +#define S_STR "str" // 3 +#define S_CHAR "char" // 4 +#define S_BOOL "bool" // 5 + + // TYPE + class Type { + public: + // stringer + virtual std::string stringer() = 0; + // kind of basic type + virtual TypeKind kind() = 0; + }; + + // + class Int : public Type { + public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_INT; } + }; + + // float + class Float : public Type { + public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_FLOAT; } + }; + + // str + class Str : public Type { + public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_STR; } + }; + + // char + class Char : public Type { + public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_CHAR; } + }; + + // bool + class Bool : public Type { + public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_BOOL; } + }; + + // array (not keyword, for compiler analysis) + // [] + class Array : public Type { + public: + Type *T; // type for elements + + explicit Array(Type *T) : T(T) {} + + std::string stringer() override { + return ""; + } + + TypeKind kind() override { return T_ARRAY; } + }; + + // map (not keyword, for compiler analysis) + // + class Map : public Type { + public: + Type *T1; // K + Type *T2; // V + + explicit Map(Type *T1, Type *T2) : T1(T1), T2(T2) {} + + std::string stringer() override { + return ""; + } + + TypeKind kind() override { return T_MAP; } + }; + + // tuple (not keyword, for compiler analysis) + // (type) + class Tuple : public Type { + public: + Type *T; // type for elements + + explicit Tuple(Type *T) : T(T) {} + + std::string stringer() override { + return ""; + } + + TypeKind kind() override { return T_TUPLE; } + }; + + // user definition type + // `type` + class User : public Type { + public: + token::Token name; + + explicit User(token::Token name) { this->name = std::move(name); } + + std::string stringer() override { + return ""; + } + + TypeKind kind() override { return T_USER; } + }; + + // ast types + enum Kind { + // expression + EXPR_LITERAL, // literal + EXPR_BINARY, // T1 T2 + EXPR_GROUP, // (EXPR) + EXPR_UNARY, // EXPR + EXPR_NAME, // IDENT + EXPR_CALL, // EXPR(..) + EXPR_GET, // EXPR.NAME + EXPR_SET, // EXPR.NAME = EXPR + EXPR_ASSIGN, // EXPR = EXPR + EXPR_ARRAY, // [..] + EXPR_MAP, // {K1: V1, K2: V2} + EXPR_TUPLE, // (..) + EXPR_INDEX, // EXPR[EXPR] + EXPR_NEW, + // statement + STMT_EXPR, // EXPR + STMT_VAR, // VAR + STMT_BLOCK, // BLOCK + STMT_IF, // IF + STMT_FOR, // FOR + STMT_DO, // DO + STMT_OUT, // OUT + STMT_TIN, // TIN + STMT_FUNC, // FUNC + STMT_WHOLE, // CLASS | ENUM + STMT_AND, // AND + STMT_MOD, // MOD + STMT_USE, // USE + STMT_RET, // RET + STMT_ENUM, // ENUM + STMT_INHERIT, // <- + .. + STMT_CALLINHERIT, // <~ expr + STMT_INTERFACE, // INTERFACE + STMT_PUB, // PUB + }; + + // K1: V1 | K1 + K2: V2 + using Arg = std::map; + + // abstract expr + class Expr { + public: + // return string of expr + virtual std::string stringer() = 0; + // return kind of expr + virtual Kind kind() = 0; + }; + + // number | string | char + class LiteralExpr : public Expr { + public: + // literal + token::Token token; + + explicit LiteralExpr(token::Token tok) { this->token = std::move(tok); } + + std::string stringer() override { + return ""; + } + + Kind kind() override { return EXPR_LITERAL; } + }; + + // T1 T2 + // + | - | * | / | += | -= | *= | /= | > | >= | < | <= | != | == | & | | + class BinaryExpr : public Expr { + public: + Expr *left; // left + token::Token op; // operator + Expr *right; // right + + explicit BinaryExpr(Expr *l, token::Token op, Expr *r) : left(l), right(r) { + this->op = std::move(op); + } + + std::string stringer() override { + std::stringstream str; + + str << ""; + + return str.str(); + } + + Kind kind() override { return EXPR_BINARY; } + }; + + //(EXPR) + class GroupExpr : public Expr { + public: + Expr *expr; + + explicit GroupExpr(Expr *expr) : expr(expr) {} + + std::string stringer() override { + return ""; + } + + Kind kind() override { return EXPR_GROUP; } + }; + + //EXPR + class UnaryExpr : public Expr { + public: + token::Token token; + Expr *expr; + + explicit UnaryExpr(token::Token tok, Expr *expr) : expr(expr) { + this->token = std::move(tok); + } + + std::string stringer() override { + std::stringstream str; + + str << ""; + + return str.str(); + } + + Kind kind() override { return EXPR_UNARY; } + }; + + // IDENT + class NameExpr : public Expr { + public: + token::Token token; + + bool selfIncrement; // ++ + bool selfDecrement; // -- + + bool prefix; // prefix, calc it first + + explicit NameExpr(token::Token tok, + bool x = false, // increment + bool y = false, // decrement + bool z = false // prefix + ) { + this->token = std::move(tok); + + this->selfIncrement = x; + this->selfDecrement = y; + + this->prefix = z; + } + + std::string stringer() override { + std::stringstream str; + + str << ""; + + return str.str(); + } + + Kind kind() override { return EXPR_NAME; } + }; + + // EXPR(..) + class CallExpr : public Expr { + public: + Expr *callee; + std::vector arguments; + + explicit CallExpr(Expr *expr, std::vector args) : callee(expr) { + this->arguments = std::move(args); + } + + std::string stringer() override { + std::stringstream str; + + str << "stringer() << " "; + str << ")"; + } else + str << " Args=()"; + + str << " }>"; + return str.str(); + } + + Kind kind() override { return EXPR_CALL; } + }; + + // EXPR.NAME + class GetExpr : public Expr { + public: + token::Token name; + + Expr *expr; + + explicit GetExpr(Expr *expr, token::Token name) : expr(expr) { + this->name = std::move(name); + } + + std::string stringer() override { + return ""; + } + + Kind kind() override { return EXPR_GET; } + }; + + // EXPR.NAME = EXPR + class SetExpr : public Expr { + public: + Expr *expr; + token::Token name; + Expr *value; + + explicit SetExpr(Expr *e, token::Token name, Expr *v) : expr(e), value(v) { + this->name = std::move(name); + } + + std::string stringer() override { + return ""; + } + + Kind kind() override { return EXPR_SET; } + }; + + // EXPR = EXPR + class AssignExpr : public Expr { + public: + Expr *expr; + Expr *value; + + explicit AssignExpr(Expr *e, Expr *v) : expr(e), value(v) {} + + std::string stringer() override { + return ""; + } + + Kind kind() override { return EXPR_ASSIGN; } + }; + + //[..] + class ArrayExpr : public Expr { + public: + std::vector elements; + + explicit ArrayExpr(std::vector e) { this->elements = std::move(e); } + + std::string stringer() override { + std::stringstream str; + + str << "stringer() << " "; + } + + str << "] }>"; + return str.str(); + } + + Kind kind() override { return EXPR_ARRAY; } + }; + + //{K1: V1, K2: V2} + class MapExpr : public Expr { + public: + std::map elements; + + explicit MapExpr(std::map e) { + this->elements = std::move(e); + } + + std::string stringer() override { + std::stringstream str; + + str << "stringer() + << ", V : " << i.second->stringer() << " "; + } + + str << "} }>"; + return str.str(); + } + + Kind kind() override { return EXPR_MAP; } + }; + + //(..) + class TupleExpr : public Expr { + public: + std::vector elements; + + explicit TupleExpr(std::vector e) { this->elements = std::move(e); } + + std::string stringer() override { + std::stringstream str; + + str << "stringer() << " "; + } + + str << ") }>"; + return str.str(); + } + + Kind kind() override { return EXPR_TUPLE; } + }; + + // EXPR[EXPR] + class IndexExpr : public Expr { + public: + Expr *left; + Expr *right; + + explicit IndexExpr(Expr *l, Expr *r) : left(l), right(r) {} + + std::string stringer() override { + return ""; + } + + Kind kind() override { return EXPR_INDEX; } + }; + + // new {K1: V1, K2: V2} + class NewExpr : public Expr { + public: + token::Token name; + std::map builder; + + explicit NewExpr(token::Token name, + std::map builder) { + this->name = std::move(name); + this->builder = builder; + } + + std::string stringer() override { + if (builder.empty()) { + return ""; + } + std::stringstream str; + + str << "literal + << "' V : " << i.second->stringer(); + } + str << ")"; + } + return str.str(); + } + + Kind kind() override { return EXPR_NEW; } + }; + + // abstract stmt + class Stmt { + public: + // return string of stmt + virtual std::string stringer() = 0; + // return kind of stmt + virtual Kind kind() = 0; + }; + + // + class ExprStmt : public Stmt { + public: + Expr *expr; + + explicit ExprStmt(Expr *expr) : expr(expr) {} + + std::string stringer() override { + return ""; + } + + Kind kind() override { return STMT_EXPR; } + }; + + // def : = + class VarStmt : public Stmt { + public: + token::Token name; + + // type define + Type *T; + + // default is not init + Expr *expr = nullptr; + + // has expr + explicit VarStmt(token::Token name, Type *T, Expr *e) : T(T), expr(e) { + this->name = std::move(name); + } + + // not init expr + explicit VarStmt(token::Token name, Type *T) : T(T) { + this->name = std::move(name); + } + + std::string stringer() override { + std::stringstream str; + + str << ""; + else + str << " }>"; + + return str.str(); + } + + Kind kind() override { return STMT_VAR; } + }; + + // .. end + class BlockStmt : public Stmt { + public: + std::vector block; + + explicit BlockStmt(std::vector block) : block(block) {} + + std::string stringer() override { + std::stringstream str; + + str << ""; + return str.str(); + } + + Kind kind() override { return STMT_BLOCK; } + }; + + /** + * if + * + * ef + * + * ef + * + * nf + * + */ + class IfStmt : public Stmt { + public: + Expr *condition; // main condition + BlockStmt *ifBranch; // main condition branch + + std::map efBranch; // ef cond and branch + + BlockStmt *nfBranch; // nf branch; + + explicit IfStmt(Expr *cond, BlockStmt *then, + std::map ef, BlockStmt *nf) { + this->condition = cond; + this->ifBranch = then; + this->efBranch = std::move(ef); + this->nfBranch = nf; + } + + explicit IfStmt(Expr *cond, BlockStmt *then) { + this->condition = cond; + this->ifBranch = then; + } + + std::string stringer() override { + std::stringstream str; + + str << "stringer() + << ", V : " << i.second->stringer() << " "; + } + if (nfBranch != nullptr) str << " NfBranch=" << nfBranch->stringer(); + + str << " }>"; + return str.str(); + } + + Kind kind() override { return STMT_IF; } + }; + + /** + * for + * + * end + */ + class ForStmt : public Stmt { + public: + Expr *condition; // cond + BlockStmt *block; // stmt + + explicit ForStmt(Expr *cond, BlockStmt *block) { + this->condition = cond; + this->block = block; + } + + std::string stringer() override { + std::stringstream str; + + str << ""; + return str.str(); + } + + Kind kind() override { return STMT_FOR; } + }; + + /** + * do + * + * for + * + * end + */ + class DoStmt : public Stmt { + public: + BlockStmt *block; // first do block + Stmt *stmt; // for statement + + explicit DoStmt(BlockStmt *block, Stmt *stmt) { + this->block = block; + this->stmt = stmt; + } + + std::string stringer() override { + return ""; + } + + Kind kind() override { return STMT_DO; } + }; + + // out + class OutStmt : public Stmt { + public: + Expr *expr; + + OutStmt() { this->expr = nullptr; } + + explicit OutStmt(Expr *e) : expr(e) {} + + std::string stringer() override { + if (expr == nullptr) { + return ""; + } + return ""; + } + + Kind kind() override { return STMT_OUT; } + }; + + // tin + class TinStmt : public Stmt { + public: + Expr *expr; + + TinStmt() { this->expr = nullptr; } + + explicit TinStmt(Expr *e) : expr(e) {} + + std::string stringer() override { + if (expr == nullptr) { + return ""; + } + return ""; + } + + Kind kind() override { return STMT_TIN; } + }; + + // mod + class ModStmt : public Stmt { + public: + token::Token name; + + explicit ModStmt(token::Token name) { this->name = std::move(name); } + + std::string stringer() override { + return ""; + } + + Kind kind() override { return STMT_MOD; } + }; + + // use | as + class UseStmt : public Stmt { + public: + token::Token name; + token::Token *as = nullptr; + + // use + explicit UseStmt(token::Token name) { this->name = std::move(name); } + + // use as + explicit UseStmt(token::Token name, token::Token *as) { + this->name = std::move(name); + this->as = as; + } + + std::string stringer() override { + std::stringstream str; + + str << ""; + return str.str(); + } + + Kind kind() override { return STMT_USE; } + }; + + // ret + // ret -> + class RetStmt : public Stmt { + public: + Stmt *stmt = nullptr; + + explicit RetStmt() {} + + explicit RetStmt(Stmt *s) : stmt(s) {} + + std::string stringer() override { + if (stmt == nullptr) { + return ""; + } + return ""; + } + + Kind kind() override { return STMT_RET; } + }; + + // and -> end + class AndStmt : public Stmt { + public: + token::Token name; // alias name + BlockStmt *block; // block + + explicit AndStmt(token::Token name, BlockStmt *block) : block(block) { + this->name = std::move(name); + } + + std::string stringer() override { + return ""; + } + + Kind kind() override { return STMT_AND; } + }; + + // def (..) -> + // + // end + class FuncStmt : public Stmt { + public: + Arg arguments; // args + + token::Token name; // name + Type *ret; // return + BlockStmt *block; // body + + explicit FuncStmt(Arg args, token::Token name, Type *ret, + BlockStmt *block) { + this->arguments = args; + this->name = std::move(name); + this->ret = ret; + this->block = block; + } + + std::string stringer() override { + std::stringstream str; + + str << "literal << "' T : " << i.second->stringer() + << " "; + } + str << ")"; + } else + str << "()"; + + if (ret == nullptr) + str << " Ret=NONE"; + else + str << " Ret=" << ret->stringer(); + + str << " Block=" << block->stringer(); + + str << " }>"; + return str.str(); + } + + Kind kind() override { return STMT_FUNC; } + }; + + // <- + .. + class InheritStmt : public Stmt { + public: + std::vector names; + + explicit InheritStmt(std::vector names) { + this->names = std::move(names); + } + + std::string stringer() override { + std::stringstream str; + + str << "literal << "' "; + } + str << ")"; + } else + str << "()"; + + str << " }>"; + return str.str(); + } + + Kind kind() override { return STMT_INHERIT; } + }; + + // <~ expr + class CallInheritStmt : public Stmt { + public: + Expr *expr; + + int line; // line of call inherit statement + + explicit CallInheritStmt(int line, Expr *e) : expr(e) { this->line = line; } + + std::string stringer() override { + return ""; + } + + Kind kind() override { return STMT_CALLINHERIT; } + }; + + // def (..) * -> + class InterfaceStmt : public Stmt { + public: + Arg arguments; // arguments + + token::Token name; // name + Type *ret; // return + + explicit InterfaceStmt(Arg args, token::Token name, Type *ret) { + this->arguments = args; + this->name = std::move(name); + this->ret = ret; + } + + std::string stringer() override { + std::stringstream str; + + str << "literal << "' T : " << i.second->stringer() + << " "; + } + str << ")"; + } else + str << "()"; + + if (ret == nullptr) + str << " Ret=NONE"; + else + str << " Ret=" << ret->stringer(); + + str << " }>"; + return str.str(); + } + + Kind kind() override { return STMT_INTERFACE; } + }; + + // class + class WholeStmt : public Stmt { + public: + Stmt *inherit = nullptr; // inherit within class + BlockStmt *body; + + token::Token name; + + explicit WholeStmt(token::Token name, Stmt *inherit, BlockStmt *body) + : body(body), inherit(inherit) { + this->name = std::move(name); + } + + std::string stringer() override { + std::stringstream str; + + str << ""; + return str.str(); + } + + Kind kind() override { return STMT_WHOLE; } + }; + + // enum + // + // TODO: semantic analysis + // + class EnumStmt : public Stmt { + public: + std::vector field; + + token::Token name; + + explicit EnumStmt(token::Token name, std::vector f) + : field(f) { + this->name = std::move(name); + } + + std::string stringer() override { + std::stringstream str; + + str << "literal; + if (++iter != field.end()) { + str << " "; + } + } + str << ")"; + } + + str << " }>"; + return str.str(); + } + + Kind kind() override { return STMT_ENUM; } + }; + + // pub + class PubStmt : public Stmt { + public: + int line; + Stmt *stmt; + + PubStmt(int line, Stmt *stmt) { + this->line = line; + this->stmt = stmt; + } + + std::string stringer() override { + return ""; + } + + Kind kind() override { return STMT_PUB; } + }; +}; // namespace ast + +#endif \ No newline at end of file diff --git a/src/compiler.cpp b/src/compiler.cpp index 82703a9..6c7704e 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -10,4 +10,581 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#include "compiler.hpp" + +// return the current statement +ast::Stmt *Compiler::look() { return this->statements.at(this->position); } + +// compile statements to entities +void Compiler::compile() { + while (this->position < this->statements.size()) { + this->stmt(look()); // START + this->position++; + } + this->emitCode(byte::RET); // RET +} + +// push bytecode to entity +void Compiler::emitCode(byte::Code co) { this->now->codes.push_back(co); } + +// push offset to entity +void Compiler::emitOffset(int off) { this->now->offsets.push_back(off); } + +// push constant to entity +void Compiler::emitConstant(object::Object *obj) { + this->now->constants.push_back(obj); + this->emitOffset(this->icf++); +} + +// push name to entity +void Compiler::emitName(std::string v) { + std::vector::iterator iter = + std::find(now->names.begin(), now->names.end(), v); + if (iter != now->names.end()) { + // found + this->emitOffset( + std::distance(now->names.begin(), iter)); // only push offset + } else { + // not found + this->now->names.push_back(v); // push new name + this->emitOffset(this->inf++); // push new offset + } +} + +// push names type to entity +void Compiler::emitType(ast::Type *t) { + this->now->types.push_back(t); + this->emitOffset(this->itf++); +} + +// insert position with current counts of bytecode +void Compiler::insertPosOffset(int pos) { + this->now->offsets.insert(now->offsets.begin() + pos, now->codes.size()); +} + +// with custom value +void Compiler::insertPosOffset(int pos, int val) { + this->now->offsets.insert(now->offsets.begin() + pos, val); +} + +// expression +void Compiler::expr(ast::Expr *expr) { + switch (expr->kind()) { + case ast::EXPR_LITERAL: { + ast::LiteralExpr *l = static_cast(expr); + token::Token tok = l->token; + + if (tok.kind == token::NUM) { + this->emitConstant(new object::Int(std::stoi(tok.literal))); + } + if (tok.kind == token::FLOAT) { + this->emitConstant(new object::Float(std::stof(tok.literal))); + } + if (tok.kind == token::STR) { + this->emitConstant( + // judge long characters at here + new object::Str(tok.literal, tok.literal.back() == '`')); + } + if (tok.kind == token::CHAR) { + this->emitConstant(new object::Char(tok.literal.at(0))); + } + this->emitCode(byte::CONST); + } break; + case ast::EXPR_BINARY: { + ast::BinaryExpr *b = static_cast(expr); + + this->expr(b->left); + this->expr(b->right); + + switch (b->op.kind) { + case token::ADD: this->emitCode(byte::ADD); break; + case token::SUB: this->emitCode(byte::SUB); break; + case token::MUL: this->emitCode(byte::MUL); break; + case token::DIV: this->emitCode(byte::DIV); break; + + case token::AS_ADD: this->emitCode(byte::A_ADD); break; + case token::AS_SUB: this->emitCode(byte::A_SUB); break; + case token::AS_MUL: this->emitCode(byte::A_MUL); break; + case token::AS_DIV: this->emitCode(byte::A_DIV); break; + + case token::GREATER: this->emitCode(byte::GR); break; + case token::LESS: this->emitCode(byte::LE); break; + case token::GR_EQ: this->emitCode(byte::GR_E); break; + case token::LE_EQ: this->emitCode(byte::LE_E); break; + case token::EQ_EQ: this->emitCode(byte::E_E); break; + case token::BANG_EQ: this->emitCode(byte::N_E); break; + case token::ADDR: this->emitCode(byte::AND); break; + case token::OR: this->emitCode(byte::OR); break; + } + + if (b->op.kind == token::AS_ADD || b->op.kind == token::AS_SUB || + b->op.kind == token::AS_MUL || b->op.kind == token::AS_DIV) { + ast::NameExpr *n = static_cast(b->left); + + this->emitName(n->token.literal); + this->emitCode(byte::ASSIGN); + } + } break; + // + case ast::EXPR_GROUP: { + ast::GroupExpr *g = static_cast(expr); + + this->expr(g->expr); + } break; + // + case ast::EXPR_UNARY: { + ast::UnaryExpr *u = static_cast(expr); + + this->expr(u->expr); + + if (u->token.kind == token::BANG) { + this->emitCode(byte::BANG); + } + if (u->token.kind == token::SUB) { + this->emitCode(byte::NOT); + } + } break; + // + case ast::EXPR_NAME: { + ast::NameExpr *n = static_cast(expr); + + this->emitCode(byte::LOAD); + this->emitName(n->token.literal); // new name + + // increment and prefix + if (n->selfIncrement && n->prefix) this->emitCode(byte::P_INCR); + if (n->selfIncrement) this->emitCode(byte::INCR); // suffix + + // decrement and prefix + if (n->selfDecrement && n->prefix) this->emitCode(byte::P_DECR); + if (n->selfDecrement) this->emitCode(byte::DECR); // suffix + } break; + // + case ast::EXPR_CALL: { + ast::CallExpr *c = static_cast(expr); + + this->expr(c->callee); + + for (int i = c->arguments.size(); i > 0; i--) + this->expr(c->arguments.at(i - 1)); // arguments + + this->emitCode(byte::CALL); + this->emitOffset(c->arguments.size()); + } break; + // + case ast::EXPR_GET: { + ast::GetExpr *g = static_cast(expr); + + this->expr(g->expr); + + this->emitCode(byte::GET); + this->emitName(g->name.literal); // name + } break; + // + case ast::EXPR_SET: { + ast::SetExpr *s = static_cast(expr); + + this->expr(s->value); // right expression + this->expr(s->expr); // left expression + + this->emitCode(byte::SET); + this->emitName(s->name.literal); // name + } break; + // + case ast::EXPR_ASSIGN: { + ast::AssignExpr *a = static_cast(expr); + + this->expr(a->value); // right expression + + this->emitCode(byte::ASSIGN); + this->emitName(static_cast(a->expr)->token.literal); + } break; + // + case ast::EXPR_ARRAY: { + ast::ArrayExpr *a = static_cast(expr); + + // push elements from right to left + for (int i = a->elements.size(); i > 0; i--) + this->expr(a->elements.at(i - 1)); + + this->emitCode(byte::B_ARR); + this->emitOffset(a->elements.size()); // length + } break; + // + case ast::EXPR_TUPLE: { + ast::TupleExpr *t = static_cast(expr); + + for (int i = t->elements.size(); i > 0; i--) + this->expr(t->elements.at(i - 1)); + + this->emitCode(byte::B_TUP); + this->emitOffset(t->elements.size()); // length + } break; + // + case ast::EXPR_MAP: { + ast::MapExpr *m = static_cast(expr); + + for (std::map::reverse_iterator iter = + m->elements.rbegin(); + iter != m->elements.rend(); iter++) { + // from right to left by iterator + this->expr(iter->first); + this->expr(iter->second); + } + + this->emitCode(byte::B_MAP); + this->emitOffset(m->elements.size() * 2); // length + } break; + // + case ast::EXPR_INDEX: { + ast::IndexExpr *i = static_cast(expr); + + this->expr(i->right); + this->expr(i->left); + + this->emitCode(byte::INDEX); + } break; + // + case ast::EXPR_NEW: { + ast::NewExpr *n = static_cast(expr); + + for (auto i : n->builder) { + this->emitCode(byte::NAME); + this->emitName(i.first->literal); // K + + this->expr(i.second); // V + } + + this->emitCode(byte::NEW); + this->emitName(n->name.literal); // name + this->emitOffset(n->builder.size() * 2); // fields + } break; + } +} + +// statements +void Compiler::stmt(ast::Stmt *stmt) { + switch (stmt->kind()) { + case ast::STMT_EXPR: + this->expr(static_cast(stmt)->expr); // expression + break; + // + case ast::STMT_VAR: { + ast::VarStmt *v = static_cast(stmt); + + if (v->expr != nullptr) + this->expr(v->expr); // initial value + else + this->emitCode(byte::ORIG); // original value + + this->emitCode(byte::STORE); + this->emitName(v->name.literal); + + this->emitType(v->T); // type + } break; + // + case ast::STMT_BLOCK: { + ast::BlockStmt *b = static_cast(stmt); + + for (auto i : b->block) this->stmt(i); + } break; + // + case ast::STMT_IF: { + ast::IfStmt *i = static_cast(stmt); + /** + * Moisture regain algorithm + */ + this->expr(i->condition); + this->emitCode(byte::F_JUMP); + int ifPos = now->offsets.size(); + + this->stmt(i->ifBranch); + this->emitCode(byte::JUMP); // jump out after + + int ifOff = now->offsets.size(); // jump after execution if branch + std::vector tempEfOffs; // ef condition offsets + + // ef branch + if (!i->efBranch.empty()) { + bool firstStmt = true; + + for (auto i : i->efBranch) { + // if jump to the first ef + if (firstStmt) { + this->insertPosOffset(ifPos); // TO: if (F_JUMP) + firstStmt = false; + } + + this->expr(i.first); // condition + this->emitCode(byte::F_JUMP); + int efPos = now->offsets.size(); + + this->stmt(i.second); // block + this->insertPosOffset(efPos, + now->codes.size() + 1); // TO: ef (F_JUMP) + + this->emitCode(byte::JUMP); // jump out after + tempEfOffs.push_back(now->offsets.size()); + } + // nf branch + if (i->nfBranch != nullptr) this->stmt(i->nfBranch); + } + // nf branch + else { + if (i->nfBranch != nullptr) { + this->insertPosOffset(ifPos); // TO: if (F_JUMP) + this->stmt(i->nfBranch); + } else { + // no ef and nf statement + this->insertPosOffset(ifPos); // TO: if (F_JUMP) + } + } + + // for (auto i : tempEfOffs) std::cout << i << std::endl; + for (int i = 0; i < tempEfOffs.size(); i++) { + // insertion increment successively + this->insertPosOffset(tempEfOffs.at(i) + i); + } + + this->insertPosOffset(ifOff + 1); // TO: if (JUMP) + } break; + // + case ast::STMT_FOR: { + ast::ForStmt *f = static_cast(stmt); + + int original = now->codes.size(); // original state: for callback loops + + // DEAD LOOP + if (f->condition == nullptr) this->stmt(f->block); + // condition and block + else { + this->expr(f->condition); + this->emitCode(byte::F_JUMP); + int ePos = now->offsets.size(); // skip loop for false + + this->stmt(f->block); // block + + // jump to next bytecode + this->insertPosOffset(ePos, + now->codes.size() + 1); // TO: (F_JUMP) + } + this->emitCode(byte::JUMP); // back to original state + this->emitOffset(original); + // replace placeholder + for (std::vector::iterator iter = now->offsets.begin(); + iter != now->offsets.end(); iter++) { + // out statement + if (*iter == -1) { + *iter = now->codes.size(); + } + // tin statement + if (*iter == -2) { + *iter = original; + } + } + } break; + // + case ast::STMT_DO: { + ast::DoStmt *d = static_cast(stmt); + + this->stmt(d->block); // execute the do block first + this->stmt(d->stmt); // then execute loop + } break; + // + case ast::STMT_OUT: { + ast::OutStmt *o = static_cast(stmt); + + if (o->expr != nullptr) this->expr(o->expr); + + // jump straight out + this->emitCode(o->expr == nullptr ? byte::JUMP : byte::T_JUMP); + // place holder + this->emitOffset(-1); + } break; + // + case ast::STMT_TIN: { + ast::TinStmt *t = static_cast(stmt); + + if (t->expr != nullptr) this->expr(t->expr); + + // jump straight out + this->emitCode(t->expr == nullptr ? byte::JUMP : byte::T_JUMP); + // place holder + this->emitOffset(-2); + } break; + // + case ast::STMT_FUNC: { + ast::FuncStmt *f = static_cast(stmt); + + int entitiesSize = this->entities.size() - 1; // original + + this->entities.push_back( + new Entity(f->name.literal)); // new entity for function statement + this->now = this->entities.back(); + + object::Func *obj = new object::Func; + + obj->name = f->name.literal; // function name + obj->arguments = f->arguments; // function arguments + obj->ret = f->ret; // function return + + int x = this->icf; + int y = this->inf; + int z = this->itf; + + this->icf = 0; // x + this->inf = 0; // y + this->itf = 0; // z + + this->stmt(f->block); + + this->icf = x; + this->inf = y; + this->itf = z; + + obj->entity = this->now; // function entity + + // if more than one it points to the last one + this->now = this->entities.at(entitiesSize); // restore to main entity + + // TO main ENTITY + this->emitCode(byte::FUNC); + this->emitConstant(obj); // push to constant object + } break; + // + case ast::STMT_WHOLE: { + ast::WholeStmt *w = static_cast(stmt); + + int entitiesSize = this->entities.size() - 1; // original + + this->entities.push_back( + new Entity(w->name.literal)); // new entity for whole statement + this->now = this->entities.back(); + + object::Whole *obj = new object::Whole; + + obj->name = w->name.literal; // whole name + + // whole inherit + if (w->inherit != nullptr) { + ast::InheritStmt *i = static_cast(w->inherit); + for (auto iter : i->names) { + obj->inherit.push_back(iter->literal); + } + } + + int x = this->icf; + int y = this->inf; + int z = this->itf; + + this->icf = 0; // x + this->inf = 0; // y + this->itf = 0; // z + + // block statement + for (auto i : w->body->block) { + // interface definition + if (i->kind() == ast::STMT_INTERFACE) { + ast::InterfaceStmt *inter = static_cast(i); + obj->interface.push_back(std::make_tuple( + inter->name.literal, inter->arguments, inter->ret)); + continue; + } + this->stmt(i); + } + + this->icf = x; + this->inf = y; + this->itf = z; + + obj->entity = this->now; // whole entity + + // if more than one it points to the last one + this->now = this->entities.at(entitiesSize); // restore to main entity + + // TO main ENTITY + this->emitCode(byte::WHOLE); + this->emitConstant(obj); // push to constant object + } break; + // + case ast::STMT_AND: { + ast::AndStmt *a = static_cast(stmt); + + this->emitCode(byte::CHA); + this->emitName(a->name.literal); // STORE + + this->stmt(a->block); + + this->emitCode(byte::END); + this->emitName(a->name.literal); // END + } break; + // + case ast::STMT_MOD: { + ast::ModStmt *m = static_cast(stmt); + + this->emitCode(byte::MOD); + this->emitName(m->name.literal); + } break; + // + case ast::STMT_USE: { + ast::UseStmt *u = static_cast(stmt); + + if (u->as != nullptr) { + this->emitCode(byte::UAS); + + this->emitName(u->name.literal); // name + this->emitName(u->as->literal); // alias + } else { + this->emitCode(byte::USE); + + this->emitName(u->name.literal); + } + } break; + // + case ast::STMT_RET: { + ast::RetStmt *r = static_cast(stmt); + + if (r->stmt != nullptr) this->stmt(r->stmt); + + this->emitCode(byte::RET); + } break; + // + case ast::STMT_ENUM: { + ast::EnumStmt *e = static_cast(stmt); + + object::Enum *obj = new object::Enum; + obj->name = e->name.literal; + + for (int i = 0; i < e->field.size(); i++) { + obj->elements.insert(std::make_pair(i, e->field.at(i)->literal)); + } + + this->emitCode(byte::ENUM); + this->emitConstant(obj); // push to constant object + } break; + // + case ast::STMT_CALLINHERIT: { + ast::CallInheritStmt *c = static_cast(stmt); + ast::CallExpr *e = static_cast(c->expr); + + this->expr(e->callee); + + for (int i = e->arguments.size(); i > 0; i--) + this->expr(e->arguments.at(i - 1)); + + this->emitCode(byte::CALL_I); + this->emitOffset(e->arguments.size()); // length + } break; + // + case ast::STMT_PUB: { + ast::PubStmt *p = static_cast(stmt); + + this->stmt(p->stmt); + this->emitCode(byte::PUB); + } break; + // + default: break; + } +} \ No newline at end of file diff --git a/src/compiler.hpp b/src/compiler.hpp index 82703a9..4702dbc 100644 --- a/src/compiler.hpp +++ b/src/compiler.hpp @@ -10,4 +10,52 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_COMPILER_H +#define DRIFT_COMPILER_H + +#include + +#include "ast.hpp" +#include "opcode.hpp" +#include "object.hpp" +#include "entity.hpp" + +// compiler structure +class Compiler { +private: + int position = 0; + // after semantic analysis + std::vector statements; + // return the current statement + ast::Stmt *look(); + // offset of constant, offset of name, offset of type + int icf = 0, inf = 0, itf = 0; + + void emitCode(byte::Code); // push bytecode to entity + void emitOffset(int); // push offset to entity + void emitConstant(object::Object *); // push constant to entity + void emitName(std::string); // push name to entity + void emitType(ast::Type *); // push names type to entity + + // insert position with current counts of bytecode + void insertPosOffset(int); + void insertPosOffset(int, int); // with custom value + + void expr(ast::Expr *); // expression + void stmt(ast::Stmt *); // statements + +public: + Compiler(std::vector statements) : statements(statements) {} + + // entities of compiled + std::vector entities = {new Entity("main")}; + // compile statements to entities + void compile(); + + // currently compile entity + Entity *now = entities.at(0); +}; + +#endif \ No newline at end of file diff --git a/src/drift.cpp b/src/drift.cpp index f2f4473..dfee191 100644 --- a/src/drift.cpp +++ b/src/drift.cpp @@ -12,4269 +12,41 @@ // https://www.drift-lang.fun/ // -#include -#include +#include #include -#include -#include -#include -#include -#include -#include +#include "compiler.hpp" +#include "lexer.hpp" +#include "parser.hpp" +#include "semantic.hpp" +#include "vm.hpp" // DEBUG to output tokens and statements bool DEBUG = false; // repl mode bool REPL = false; -// tokens -namespace token { - // total number of token for drift - constexpr int len = 56; - // token type - enum Kind { - IDENT, // identifier literal - NUM, // number literal - STR, // string literal - CHAR, // char literal - FLOAT, // float literal - - ADD, // + - SUB, // - - MUL, // * - DIV, // / - - AS_ADD, // += - AS_SUB, // -= - AS_MUL, // *= - AS_DIV, // /= - - PLUS, // ++ - MINUS, // -- - - R_ARROW, // -> - L_ARROW, // <- - L_CURVED_ARROW, // <~ - - DOLLAR, // $ - DOT, // . - COMMA, // , - COLON, // : - EQ, // = - - GREATER, // > - LESS, // < - GR_EQ, // >= - LE_EQ, // <= - - ADDR, // & - OR, // | - BANG, // ! - BANG_EQ, // != - EQ_EQ, // == - // SINGLE_MARK, // ' - // DOUBLE_MARk, // " - // OBLIQUE_MARK, // ` - - L_BRACE, // { - R_BRACE, // } - L_PAREN, // ( - R_PAREN, // ) - L_BRACKET, // [ - R_BRACKET, // ] - - UNDERLINE, // _ - - EFF, // end of file - - // keywords - USE, - DEF, - PUB, - RET, - AND, - END, - IF, - EF, - NF, - FOR, - DO, - OUT, - TIN, - NEW, - MOD, - AS - }; - - // returns a string of each type - std::string kindString[len] = { - "IDENT", "NUM", "STR", - "CHAR", "FLOAT", "ADD", - "SUB", "MUL", "DIV", - "AS_ADD", "AS_SUB", "AS_MUL", - "AS_DIV", "PLUS", "MINUS", - "R_ARROW", "L_ARROW", "L_CURVED_ARROW", - "DOLLAR", "DOT", "COMMA", - "COLON", "EQ", "GREATER", - "LESS", "GR_EQ", "LE_EQ", - "ADDR", "OR", "BANG", - "BANG_EQ", "EQ_EQ", "L_BRACE", - "R_BRACE", "L_PAREN", "R_PAREN", - "L_BRACKET", "R_BRACKET", "UNDERLINE", - "EFF", "USE", "DEF", - "PUB", "RET", "AND", - "END", "IF", "EF", - "NF", "FOR", "DO", - "OUT", "TIN", "NEW", - "MOD", "AS", - }; - - // token structure - struct Token { - // token type - Kind kind = EFF; - // token literal - std::string literal; - // line of source code - int line; - }; - - // format return token structure - std::string toString(const Token &token) { - std::stringstream str; - - str << ""; - - return str.str(); - } - - // keywords for drift - static std::map keyword; - - // 16 keywords - // initialize it when tokenizer - void initializeKeywords() { - keyword["use"] = USE; // 1 - keyword["def"] = DEF; // 2 - keyword["pub"] = PUB; // 3 - keyword["ret"] = RET; // 4 - keyword["and"] = AND; // 5 - keyword["end"] = END; // 6 - keyword["if"] = IF; // 7 - keyword["ef"] = EF; // 8 - keyword["nf"] = NF; // 9 - keyword["for"] = FOR; // 10 - keyword["do"] = DO; // 11 - keyword["out"] = OUT; // 12 - keyword["tin"] = TIN; // 13 - keyword["new"] = NEW; // 14 - keyword["mod"] = MOD; // 15 - keyword["as"] = AS; // 16 - } - - // return the corresponding keyword type according to the literal amount - Kind getKeyword(const std::string &literal) { - auto i = keyword.find(literal); - // search map - if (i != keyword.end()) - return i->second; - else - return IDENT; - } -} // namespace token - -// exceptions -namespace exp { - // total number of exceptions - constexpr int len = 12; - // exception type - enum Kind { - // LEXER - UNKNOWN_SYMBOL, // unknown symbol - CHARACTER_EXP, // character is empty - STRING_EXP, // lost left or right mark - // PARSER - UNEXPECTED, // unexpected - INVALID_SYNTAX, // invalid syntax - INCREMENT_OP, // left value increment operand - // SEMANTIC - TYPE_ERROR, // type error - DIVISION_ZERO, // div zero - CANNOT_PUBLIC, // can not to public - ENUMERATION, // whole body not definition of enum - CALL_INHERIT, // can only be with call expr - // - RUNTIME_ERROR, - }; - - // return a string of exception type - std::string kindString[len] = { - "UNKNOWN_SYMBOL", "CHARACTER_EXP", "STRING_EXP", "UNEXPECTED", - "INVALID_SYNTAX", "INCREMENT_OP", "TYPE_ERROR", "DIVISION_ZERO", - "CANNOT_PUBLIC", "ENUMERATION", "CALL_INHERIT", "RUNTIME_ERROR"}; - - // exception structure - class Exp : public std::exception { - private: - // exception kind - Kind kind; - // exception message - std::string message; - // at exception line of source code - int line; - - public: - explicit Exp(Kind kind, std::string message, int line) { - this->kind = kind; - this->message = std::move(message); - this->line = line; - } - - // return a string of exception structure - std::string stringer(); - }; - - // return a string of exception structure - std::string Exp::stringer() { - std::stringstream str; - - str << "message << "\" Line="; - str << this->line << " }>"; - - return str.str(); - } -} // namespace exp - -// lexer -namespace lexer { - // lexer structure - class Lexer { - private: - // current character - int position = 0; - // current line - int line = 1; - // source code - std::string source; - - // resolve identifier - void lexIdent(); - // resolve digit - void lexDigit(); - // resolve "xxx" string literal - void lexString(bool longStr); - // resolve 'x' character literal - void lexChar(); - // resolve other symbol - void lexSymbol(); - // return current char of resolve - inline char now(); - // return next char of resolve - char peek(); - // judge the current character and process the token - bool peekEmit(token::Token *t, - char c, // current char - token::Kind k, // equal token kind - const std::string &l // equal token literal - ); - // return resolve is end - inline bool isEnd(); - // return current char is identifier - inline bool isIdent(); - // return current char is digit - inline bool isDigit(); - // return current char is whitespace - inline bool isSpace(); - // resolve to skip whitespace - inline void skipWhitespace(); - // resolve to skip line comment - inline void skipLineComment(); - // resolve to skip block comment - inline void skipBlockComment(); - - public: - explicit Lexer(std::string source) : source(std::move(source)) { - // initializer keywords map here - token::initializeKeywords(); - } - - // final token list - std::vector tokens; - // start - void tokenizer(); - // final to dissemble tokens list - void dissembleTokens(); - }; - - // start - void Lexer::tokenizer() { - while (!this->isEnd()) { - // first to skip whitespace - if (isSpace()) skipWhitespace(); - // identifier - else if (isIdent()) - this->lexIdent(); - // digit - else if (isDigit()) - this->lexDigit(); - // string - else if (now() == '"') - this->lexString(false); - // long strings - else if (now() == '`') - this->lexString(true); - // character - else if (now() == '\'') - this->lexChar(); - // symbol - else - this->lexSymbol(); - } - this->tokens.push_back( - // resolve end insert EFF for end of file - token::Token{token::EFF, "EFF", ++this->line}); - } - - // final to dissemble tokens list - void Lexer::dissembleTokens() { - int i = 1; - for (const auto &token : this->tokens) - std::cout << i++ << " " + token::toString(token) << std::endl; - } - - // return resolve is end - inline bool Lexer::isEnd() { return this->position >= this->source.length(); } - - // resolve to skip whitespace - inline void Lexer::skipWhitespace() { - while (!isEnd() && this->isSpace()) { - if (now() == '\n') this->line++; - this->position++; - } - } - - // resolve to skip line comment - inline void Lexer::skipLineComment() { - while (!isEnd() && now() != '\n') this->position++; - } - - // resolve to skip block comment - inline void Lexer::skipBlockComment() { - while (!isEnd()) { - if (now() == '*' && peek() == '/') { - this->position += 2; - break; - } - this->position++; - } - } - - // return current char is identifier - inline bool Lexer::isIdent() { - return now() >= 'a' && now() <= 'z' || now() >= 'A' && now() <= 'Z' || - now() == '_'; - } - - // return current char is digit - inline bool Lexer::isDigit() { return now() >= '0' && now() <= '9'; } - - // return current char is whitespace - inline bool Lexer::isSpace() { - if (now() == ' ' || now() == '\r' || now() == '\t' || now() == '\n') { - return true; - } - return false; - } - - // return current char of resolve - inline char Lexer::now() { return this->source.at(this->position); } - - // resolve identifier - void Lexer::lexIdent() { - std::stringstream literal; - - while (!isEnd()) { - if (isIdent()) - literal << now(); - else - break; - this->position++; - } - - this->tokens.push_back(token::Token{// keyword or IDENT - token::getKeyword(literal.str()), - literal.str(), this->line}); - } - - // resolve digit - void Lexer::lexDigit() { - std::stringstream literal; - - bool floating = false; - - while (!isEnd()) { - if (isDigit() || now() == '.') { - literal << now(); - - if (now() == '.') floating = true; - } else - break; - this->position++; - } - - this->tokens.push_back( - // number or float - token::Token{floating ? token::FLOAT : token::NUM, literal.str(), - this->line}); - } - - // resolve string literal - void Lexer::lexString(bool longStr) { - char cond = '"'; - // longer string - if (longStr) cond = '`'; - - std::stringstream literal; - bool isEndFile = false; - - // skip left double quotation mark - this->position++; - - while (!isEnd()) { - if (now() == cond) { - // end string - this->position++; - isEndFile = true; - break; - } - if (now() == '\n' && !longStr) { - throw exp::Exp(exp::STRING_EXP, - // long strings - "for long strings use the ` operator", this->line); - break; - } - literal << now(); - this->position++; - } - - // missing closing symbol - if (!isEndFile) - throw exp::Exp(exp::STRING_EXP, "missing closing symbol", this->line); - - // add judgment character - // used to judge long characters at compile time - literal << cond; - - this->tokens.push_back( - // string - token::Token{token::STR, literal.str(), this->line}); - } - - // resolve character - void Lexer::lexChar() { - std::stringstream literal; - - // skip left single quotation mark - this->position++; - if (isEnd()) - throw exp::Exp(exp::CHARACTER_EXP, "wrong character", this->line); - - literal << now(); - - if (peek() != '\'') - // this character is empty - throw exp::Exp(exp::CHARACTER_EXP, "wrong character", this->line); - else - // skip value and right single quotation mark - this->position += 2; - - this->tokens.push_back( - // character - token::Token{token::CHAR, literal.str(), this->line}); - } - - // resolve symbols - void Lexer::lexSymbol() { - token::Token tok; - - tok.literal = now(); - tok.line = this->line; - - switch (now()) { - case '(': tok.kind = token::L_PAREN; break; - case ')': tok.kind = token::R_PAREN; break; - case '{': tok.kind = token::L_BRACE; break; - case '}': tok.kind = token::R_BRACE; break; - case '[': tok.kind = token::L_BRACKET; break; - case ']': tok.kind = token::R_BRACKET; break; - case ':': tok.kind = token::COLON; break; - case '+': - if (peekEmit(&tok, '=', token::AS_ADD, "+=")) break; - if (peekEmit(&tok, '+', token::PLUS, "++")) - break; - else - tok.kind = token::ADD; - break; - case '-': - if (peekEmit(&tok, '>', token::R_ARROW, "->")) break; - if (peekEmit(&tok, '-', token::MINUS, "--")) break; - if (peekEmit(&tok, '=', token::AS_SUB, "-=")) - break; - else - tok.kind = token::SUB; - break; - case '*': - if (peekEmit(&tok, '=', token::AS_MUL, "*=")) - break; - else - tok.kind = token::MUL; - break; - case '/': - if (peekEmit(&tok, '=', token::AS_DIV, "/=")) break; - // to resolve skip comment - else if (peek() == '/') { - this->skipLineComment(); - // continue - return; - } - // block comment - else if (peek() == '*') { - this->skipBlockComment(); - return; - } else - tok.kind = token::DIV; - break; - case '$': tok.kind = token::DOLLAR; break; - case '.': tok.kind = token::DOT; break; - case ',': tok.kind = token::COMMA; break; - case '>': - if (peekEmit(&tok, '=', token::GR_EQ, ">=")) - break; - else - tok.kind = token::GREATER; - break; - case '<': - if (peekEmit(&tok, '=', token::LE_EQ, "<=")) break; - if (peekEmit(&tok, '-', token::L_ARROW, "<-")) break; - if (peekEmit(&tok, '~', token::L_CURVED_ARROW, "<~")) - break; - else - tok.kind = token::LESS; - break; - case '&': tok.kind = token::ADDR; break; - case '|': tok.kind = token::OR; break; - case '!': - if (peekEmit(&tok, '=', token::BANG_EQ, "!=")) - break; - else - tok.kind = token::BANG; - break; - case '=': - if (peekEmit(&tok, '=', token::EQ_EQ, "==")) - break; - else - tok.kind = token::EQ; - break; - case '_': - tok.kind = token::UNDERLINE; - break; - break; - default: - // what - throw exp::Exp(exp::UNKNOWN_SYMBOL, "unknown symbol", this->line); - } - // skip current single symbol - this->position++; - this->tokens.push_back(tok); - } - - // return next char of resolve - char Lexer::peek() { - if (position + 1 >= source.length()) - return -1; - else - return source.at(position + 1); - } - - // judge the current character and process the token - bool Lexer::peekEmit(token::Token *t, char c, token::Kind k, - const std::string &l) { - if (peek() == c) { - t->kind = k; - t->literal = l; - // advance - this->position++; - // - return true; - } else - return false; - } -} // namespace lexer - -// abstract syntax tree -namespace ast { - // types for drift - enum TypeKind { - T_INT, // int - T_FLOAT, // float - T_STR, // str - T_CHAR, // char - T_BOOL, // bool - T_ARRAY, // [] - T_MAP, // - T_TUPLE, // (T) - T_USER, // user - }; - -// basic type for drift -// -#define S_INT "int" // 1 -#define S_FLOAT "float" // 2 -#define S_STR "str" // 3 -#define S_CHAR "char" // 4 -#define S_BOOL "bool" // 5 - - // TYPE - class Type { - public: - // stringer - virtual std::string stringer() = 0; - // kind of basic type - virtual TypeKind kind() = 0; - }; - - // - class Int : public Type { - public: - std::string stringer() override { return ""; } - - TypeKind kind() override { return T_INT; } - }; - - // float - class Float : public Type { - public: - std::string stringer() override { return ""; } - - TypeKind kind() override { return T_FLOAT; } - }; - - // str - class Str : public Type { - public: - std::string stringer() override { return ""; } - - TypeKind kind() override { return T_STR; } - }; - - // char - class Char : public Type { - public: - std::string stringer() override { return ""; } - - TypeKind kind() override { return T_CHAR; } - }; - - // bool - class Bool : public Type { - public: - std::string stringer() override { return ""; } - - TypeKind kind() override { return T_BOOL; } - }; - - // array (not keyword, for compiler analysis) - // [] - class Array : public Type { - public: - Type *T; // type for elements - - explicit Array(Type *T) : T(T) {} - - std::string stringer() override { - return ""; - } - - TypeKind kind() override { return T_ARRAY; } - }; - - // map (not keyword, for compiler analysis) - // - class Map : public Type { - public: - Type *T1; // K - Type *T2; // V - - explicit Map(Type *T1, Type *T2) : T1(T1), T2(T2) {} - - std::string stringer() override { - return ""; - } - - TypeKind kind() override { return T_MAP; } - }; - - // tuple (not keyword, for compiler analysis) - // (type) - class Tuple : public Type { - public: - Type *T; // type for elements - - explicit Tuple(Type *T) : T(T) {} - - std::string stringer() override { - return ""; - } - - TypeKind kind() override { return T_TUPLE; } - }; - - // user definition type - // `type` - class User : public Type { - public: - token::Token name; - - explicit User(token::Token name) { this->name = std::move(name); } - - std::string stringer() override { - return ""; - } - - TypeKind kind() override { return T_USER; } - }; - - // ast types - enum Kind { - // expression - EXPR_LITERAL, // literal - EXPR_BINARY, // T1 T2 - EXPR_GROUP, // (EXPR) - EXPR_UNARY, // EXPR - EXPR_NAME, // IDENT - EXPR_CALL, // EXPR(..) - EXPR_GET, // EXPR.NAME - EXPR_SET, // EXPR.NAME = EXPR - EXPR_ASSIGN, // EXPR = EXPR - EXPR_ARRAY, // [..] - EXPR_MAP, // {K1: V1, K2: V2} - EXPR_TUPLE, // (..) - EXPR_INDEX, // EXPR[EXPR] - EXPR_NEW, - // statement - STMT_EXPR, // EXPR - STMT_VAR, // VAR - STMT_BLOCK, // BLOCK - STMT_IF, // IF - STMT_FOR, // FOR - STMT_DO, // DO - STMT_OUT, // OUT - STMT_TIN, // TIN - STMT_FUNC, // FUNC - STMT_WHOLE, // CLASS | ENUM - STMT_AND, // AND - STMT_MOD, // MOD - STMT_USE, // USE - STMT_RET, // RET - STMT_ENUM, // ENUM - STMT_INHERIT, // <- + .. - STMT_CALLINHERIT, // <~ expr - STMT_INTERFACE, // INTERFACE - STMT_PUB, // PUB - }; - - // K1: V1 | K1 + K2: V2 - using Arg = std::map; - - // abstract expr - class Expr { - public: - // return string of expr - virtual std::string stringer() = 0; - // return kind of expr - virtual Kind kind() = 0; - }; - - // number | string | char - class LiteralExpr : public Expr { - public: - // literal - token::Token token; - - explicit LiteralExpr(token::Token tok) { this->token = std::move(tok); } - - std::string stringer() override { - return ""; - } - - Kind kind() override { return EXPR_LITERAL; } - }; - - // T1 T2 - // + | - | * | / | += | -= | *= | /= | > | >= | < | <= | != | == | & | | - class BinaryExpr : public Expr { - public: - Expr *left; // left - token::Token op; // operator - Expr *right; // right - - explicit BinaryExpr(Expr *l, token::Token op, Expr *r) : left(l), right(r) { - this->op = std::move(op); - } - - std::string stringer() override { - std::stringstream str; - - str << ""; - - return str.str(); - } - - Kind kind() override { return EXPR_BINARY; } - }; - - //(EXPR) - class GroupExpr : public Expr { - public: - Expr *expr; - - explicit GroupExpr(Expr *expr) : expr(expr) {} - - std::string stringer() override { - return ""; - } - - Kind kind() override { return EXPR_GROUP; } - }; - - //EXPR - class UnaryExpr : public Expr { - public: - token::Token token; - Expr *expr; - - explicit UnaryExpr(token::Token tok, Expr *expr) : expr(expr) { - this->token = std::move(tok); - } - - std::string stringer() override { - std::stringstream str; - - str << ""; - - return str.str(); - } - - Kind kind() override { return EXPR_UNARY; } - }; - - // IDENT - class NameExpr : public Expr { - public: - token::Token token; - - bool selfIncrement; // ++ - bool selfDecrement; // -- - - bool prefix; // prefix, calc it first - - explicit NameExpr(token::Token tok, - bool x = false, // increment - bool y = false, // decrement - bool z = false // prefix - ) { - this->token = std::move(tok); - - this->selfIncrement = x; - this->selfDecrement = y; - - this->prefix = z; - } - - std::string stringer() override { - std::stringstream str; - - str << ""; - - return str.str(); - } - - Kind kind() override { return EXPR_NAME; } - }; - - // EXPR(..) - class CallExpr : public Expr { - public: - Expr *callee; - std::vector arguments; - - explicit CallExpr(Expr *expr, std::vector args) : callee(expr) { - this->arguments = std::move(args); - } - - std::string stringer() override { - std::stringstream str; - - str << "stringer() << " "; - str << ")"; - } else - str << " Args=()"; - - str << " }>"; - return str.str(); - } - - Kind kind() override { return EXPR_CALL; } - }; - - // EXPR.NAME - class GetExpr : public Expr { - public: - token::Token name; - - Expr *expr; - - explicit GetExpr(Expr *expr, token::Token name) : expr(expr) { - this->name = std::move(name); - } - - std::string stringer() override { - return ""; - } - - Kind kind() override { return EXPR_GET; } - }; - - // EXPR.NAME = EXPR - class SetExpr : public Expr { - public: - Expr *expr; - token::Token name; - Expr *value; - - explicit SetExpr(Expr *e, token::Token name, Expr *v) : expr(e), value(v) { - this->name = std::move(name); - } - - std::string stringer() override { - return ""; - } - - Kind kind() override { return EXPR_SET; } - }; - - // EXPR = EXPR - class AssignExpr : public Expr { - public: - Expr *expr; - Expr *value; - - explicit AssignExpr(Expr *e, Expr *v) : expr(e), value(v) {} - - std::string stringer() override { - return ""; - } - - Kind kind() override { return EXPR_ASSIGN; } - }; - - //[..] - class ArrayExpr : public Expr { - public: - std::vector elements; - - explicit ArrayExpr(std::vector e) { this->elements = std::move(e); } - - std::string stringer() override { - std::stringstream str; - - str << "stringer() << " "; - } - - str << "] }>"; - return str.str(); - } - - Kind kind() override { return EXPR_ARRAY; } - }; - - //{K1: V1, K2: V2} - class MapExpr : public Expr { - public: - std::map elements; - - explicit MapExpr(std::map e) { - this->elements = std::move(e); - } - - std::string stringer() override { - std::stringstream str; - - str << "stringer() - << ", V : " << i.second->stringer() << " "; - } - - str << "} }>"; - return str.str(); - } - - Kind kind() override { return EXPR_MAP; } - }; - - //(..) - class TupleExpr : public Expr { - public: - std::vector elements; - - explicit TupleExpr(std::vector e) { this->elements = std::move(e); } - - std::string stringer() override { - std::stringstream str; - - str << "stringer() << " "; - } - - str << ") }>"; - return str.str(); - } - - Kind kind() override { return EXPR_TUPLE; } - }; - - // EXPR[EXPR] - class IndexExpr : public Expr { - public: - Expr *left; - Expr *right; - - explicit IndexExpr(Expr *l, Expr *r) : left(l), right(r) {} - - std::string stringer() override { - return ""; - } - - Kind kind() override { return EXPR_INDEX; } - }; - - // new {K1: V1, K2: V2} - class NewExpr : public Expr { - public: - token::Token name; - std::map builder; - - explicit NewExpr(token::Token name, - std::map builder) { - this->name = std::move(name); - this->builder = builder; - } - - std::string stringer() override { - if (builder.empty()) { - return ""; - } - std::stringstream str; - - str << "literal - << "' V : " << i.second->stringer(); - } - str << ")"; - } - return str.str(); - } - - Kind kind() override { return EXPR_NEW; } - }; - - // abstract stmt - class Stmt { - public: - // return string of stmt - virtual std::string stringer() = 0; - // return kind of stmt - virtual Kind kind() = 0; - }; - - // - class ExprStmt : public Stmt { - public: - Expr *expr; - - explicit ExprStmt(Expr *expr) : expr(expr) {} - - std::string stringer() override { - return ""; - } - - Kind kind() override { return STMT_EXPR; } - }; - - // def : = - class VarStmt : public Stmt { - public: - token::Token name; - - // type define - Type *T; - - // default is not init - Expr *expr = nullptr; - - // has expr - explicit VarStmt(token::Token name, Type *T, Expr *e) : T(T), expr(e) { - this->name = std::move(name); - } - - // not init expr - explicit VarStmt(token::Token name, Type *T) : T(T) { - this->name = std::move(name); - } - - std::string stringer() override { - std::stringstream str; - - str << ""; - else - str << " }>"; - - return str.str(); - } - - Kind kind() override { return STMT_VAR; } - }; - - // .. end - class BlockStmt : public Stmt { - public: - std::vector block; - - explicit BlockStmt(std::vector block) : block(block) {} - - std::string stringer() override { - std::stringstream str; - - str << ""; - return str.str(); - } - - Kind kind() override { return STMT_BLOCK; } - }; - - /** - * if - * - * ef - * - * ef - * - * nf - * - */ - class IfStmt : public Stmt { - public: - Expr *condition; // main condition - BlockStmt *ifBranch; // main condition branch - - std::map efBranch; // ef cond and branch - - BlockStmt *nfBranch; // nf branch; - - explicit IfStmt(Expr *cond, BlockStmt *then, - std::map ef, BlockStmt *nf) { - this->condition = cond; - this->ifBranch = then; - this->efBranch = std::move(ef); - this->nfBranch = nf; - } - - explicit IfStmt(Expr *cond, BlockStmt *then) { - this->condition = cond; - this->ifBranch = then; - } - - std::string stringer() override { - std::stringstream str; - - str << "stringer() - << ", V : " << i.second->stringer() << " "; - } - if (nfBranch != nullptr) str << " NfBranch=" << nfBranch->stringer(); - - str << " }>"; - return str.str(); - } - - Kind kind() override { return STMT_IF; } - }; - - /** - * for - * - * end - */ - class ForStmt : public Stmt { - public: - Expr *condition; // cond - BlockStmt *block; // stmt - - explicit ForStmt(Expr *cond, BlockStmt *block) { - this->condition = cond; - this->block = block; - } - - std::string stringer() override { - std::stringstream str; - - str << ""; - return str.str(); - } - - Kind kind() override { return STMT_FOR; } - }; - - /** - * do - * - * for - * - * end - */ - class DoStmt : public Stmt { - public: - BlockStmt *block; // first do block - Stmt *stmt; // for statement - - explicit DoStmt(BlockStmt *block, Stmt *stmt) { - this->block = block; - this->stmt = stmt; - } - - std::string stringer() override { - return ""; - } - - Kind kind() override { return STMT_DO; } - }; - - // out - class OutStmt : public Stmt { - public: - Expr *expr; - - OutStmt() { this->expr = nullptr; } - - explicit OutStmt(Expr *e) : expr(e) {} - - std::string stringer() override { - if (expr == nullptr) { - return ""; - } - return ""; - } - - Kind kind() override { return STMT_OUT; } - }; - - // tin - class TinStmt : public Stmt { - public: - Expr *expr; - - TinStmt() { this->expr = nullptr; } - - explicit TinStmt(Expr *e) : expr(e) {} - - std::string stringer() override { - if (expr == nullptr) { - return ""; - } - return ""; - } - - Kind kind() override { return STMT_TIN; } - }; - - // mod - class ModStmt : public Stmt { - public: - token::Token name; - - explicit ModStmt(token::Token name) { this->name = std::move(name); } - - std::string stringer() override { - return ""; - } - - Kind kind() override { return STMT_MOD; } - }; - - // use | as - class UseStmt : public Stmt { - public: - token::Token name; - token::Token *as = nullptr; - - // use - explicit UseStmt(token::Token name) { this->name = std::move(name); } - - // use as - explicit UseStmt(token::Token name, token::Token *as) { - this->name = std::move(name); - this->as = as; - } - - std::string stringer() override { - std::stringstream str; - - str << ""; - return str.str(); - } - - Kind kind() override { return STMT_USE; } - }; - - // ret - // ret -> - class RetStmt : public Stmt { - public: - Stmt *stmt = nullptr; - - explicit RetStmt() {} - - explicit RetStmt(Stmt *s) : stmt(s) {} - - std::string stringer() override { - if (stmt == nullptr) { - return ""; - } - return ""; - } - - Kind kind() override { return STMT_RET; } - }; - - // and -> end - class AndStmt : public Stmt { - public: - token::Token name; // alias name - BlockStmt *block; // block - - explicit AndStmt(token::Token name, BlockStmt *block) : block(block) { - this->name = std::move(name); - } - - std::string stringer() override { - return ""; - } - - Kind kind() override { return STMT_AND; } - }; - - // def (..) -> - // - // end - class FuncStmt : public Stmt { - public: - Arg arguments; // args - - token::Token name; // name - Type *ret; // return - BlockStmt *block; // body - - explicit FuncStmt(Arg args, token::Token name, Type *ret, - BlockStmt *block) { - this->arguments = args; - this->name = std::move(name); - this->ret = ret; - this->block = block; - } - - std::string stringer() override { - std::stringstream str; - - str << "literal << "' T : " << i.second->stringer() - << " "; - } - str << ")"; - } else - str << "()"; - - if (ret == nullptr) - str << " Ret=NONE"; - else - str << " Ret=" << ret->stringer(); - - str << " Block=" << block->stringer(); - - str << " }>"; - return str.str(); - } - - Kind kind() override { return STMT_FUNC; } - }; - - // <- + .. - class InheritStmt : public Stmt { - public: - std::vector names; - - explicit InheritStmt(std::vector names) { - this->names = std::move(names); - } - - std::string stringer() override { - std::stringstream str; - - str << "literal << "' "; - } - str << ")"; - } else - str << "()"; - - str << " }>"; - return str.str(); - } - - Kind kind() override { return STMT_INHERIT; } - }; - - // <~ expr - class CallInheritStmt : public Stmt { - public: - Expr *expr; - - int line; // line of call inherit statement - - explicit CallInheritStmt(int line, Expr *e) : expr(e) { this->line = line; } - - std::string stringer() override { - return ""; - } - - Kind kind() override { return STMT_CALLINHERIT; } - }; - - // def (..) * -> - class InterfaceStmt : public Stmt { - public: - Arg arguments; // arguments - - token::Token name; // name - Type *ret; // return - - explicit InterfaceStmt(Arg args, token::Token name, Type *ret) { - this->arguments = args; - this->name = std::move(name); - this->ret = ret; - } - - std::string stringer() override { - std::stringstream str; - - str << "literal << "' T : " << i.second->stringer() - << " "; - } - str << ")"; - } else - str << "()"; - - if (ret == nullptr) - str << " Ret=NONE"; - else - str << " Ret=" << ret->stringer(); - - str << " }>"; - return str.str(); - } - - Kind kind() override { return STMT_INTERFACE; } - }; - - // class - class WholeStmt : public Stmt { - public: - Stmt *inherit = nullptr; // inherit within class - BlockStmt *body; - - token::Token name; - - explicit WholeStmt(token::Token name, Stmt *inherit, BlockStmt *body) - : body(body), inherit(inherit) { - this->name = std::move(name); - } - - std::string stringer() override { - std::stringstream str; - - str << ""; - return str.str(); - } - - Kind kind() override { return STMT_WHOLE; } - }; - - // enum - // - // TODO: semantic analysis - // - class EnumStmt : public Stmt { - public: - std::vector field; - - token::Token name; - - explicit EnumStmt(token::Token name, std::vector f) - : field(f) { - this->name = std::move(name); - } - - std::string stringer() override { - std::stringstream str; - - str << "literal; - if (++iter != field.end()) { - str << " "; - } - } - str << ")"; - } - - str << " }>"; - return str.str(); - } - - Kind kind() override { return STMT_ENUM; } - }; - - // pub - class PubStmt : public Stmt { - public: - int line; - Stmt *stmt; - - PubStmt(int line, Stmt *stmt) { - this->line = line; - this->stmt = stmt; - } - - std::string stringer() override { - return ""; - } - - Kind kind() override { return STMT_PUB; } - }; -} // namespace ast - -// parser -namespace parser { - // parser structure - class Parser { - private: - // current token - int position = 0; - // token list - std::vector tokens; - // return is end of token - // end of file - inline bool isEnd(); - // return the address of the token - inline token::Token *look(bool previous); - // look current token, if equal to peek next - bool look(token::Kind kind); - // look current token, do nothing - inline token::Token look(); - // look the appoint position of tokens - token::Token look(int i); - // look previous token - inline token::Token previous(); - // parsing expressions - ast::Expr *expr(); - ast::Expr *assignment(); - ast::Expr *logicalOr(); - ast::Expr *logicalAnd(); - ast::Expr *equality(); - ast::Expr *comparison(); - ast::Expr *addition(); - ast::Expr *multiplication(); - ast::Expr *unary(); - ast::Expr *call(); - ast::Expr *primary(); - // parsing statements - ast::Stmt *stmt(); - // determine where to stop the analysis - ast::BlockStmt *block(token::Kind x, token::Kind y = token::EFF, - token::Kind z = token::EFF); - // - ast::Type *type(); - // throw an exception - inline void error(exp::Kind kind, std::string message); - - public: - // parser constructor - explicit Parser(std::vector tokens) { - // tokens - this->tokens = std::move(tokens); - } - - // final stmts list - std::vector statements; - // do parsing - void parse(); - // final to dissemble statement list - void dissembleStmts(); - }; - - // do parsing - void Parser::parse() { - while (!this->isEnd()) { - // push to final list - this->statements.push_back(this->stmt()); - } - } - - // final to dissemble statement list - void Parser::dissembleStmts() { - if (this->statements.empty()) { - std::cout << "Empty Statements" << std::endl; - return; - } - int i = 1; - for (auto stmt : this->statements) - std::cout << i++ << " " + stmt->stringer() << std::endl; - } - - // if kind of current token is EFF, its end of file and end of tokens - inline bool Parser::isEnd() { - return look().kind == token::EFF || this->position >= this->tokens.size(); - } - - // return the address of the token - inline token::Token *Parser::look(bool previous) { - if (previous) { - return &this->tokens.at(this->position - 1); - } - return &this->tokens.at(this->position); - } - - // return the token of the current location - inline token::Token Parser::look() { return this->tokens.at(this->position); } - - // look the appoint position of tokens - token::Token Parser::look(int i) { - if (this->position + i >= this->tokens.size()) - return token::Token - // EFF token - {token::EFF, "EFF", -1}; - else - return this->tokens.at(this->position + i); - } - - // if argument is equal to current token - bool Parser::look(token::Kind kind) { - if (this->look().kind == kind) { - this->position++; - // - return true; - } - return false; - } - - // return the previous of tokens - inline token::Token Parser::previous() { - return this->tokens.at(this->position - 1); - } - - /** - * expression - * - * assignment -> logicalOr -> logicalAnd -> equality | - * v - * | unary <- multiplication <- addition <- comparison - * v - * primary -> call - * - * - top down operation precedence grammar analysis - - */ - ast::Expr *Parser::expr() { return assignment(); } - - // EXPR.NAME = EXPR : SET - // EXPR = EXPR : ASSIGN - ast::Expr *Parser::assignment() { - ast::Expr *expr = logicalOr(); - - if (look(token::EQ)) { - ast::Expr *value = assignment(); - - // EXPR = EXPR - if (expr->kind() == ast::EXPR_NAME || expr->kind() == ast::EXPR_INDEX) { - return new ast::AssignExpr(expr, value); - } - // EXPR.NAME = EXPR - if (expr->kind() == ast::EXPR_GET) { - ast::GetExpr *get = static_cast(expr); - return new ast::SetExpr(get->expr, get->name, value); - } - error(exp::INVALID_SYNTAX, "cannot assign value"); - } - return expr; - } - - // | - ast::Expr *Parser::logicalOr() { - ast::Expr *expr = logicalAnd(); - - while (look(token::OR)) { - token::Token op = this->previous(); - ast::Expr *right = logicalAnd(); - // - expr = new ast::BinaryExpr(expr, op, right); - } - return expr; - } - - // & - ast::Expr *Parser::logicalAnd() { - ast::Expr *expr = equality(); - - while (look(token::ADDR)) { - token::Token op = this->previous(); - ast::Expr *right = equality(); - // - expr = new ast::BinaryExpr(expr, op, right); - } - return expr; - } - - // == | != - ast::Expr *Parser::equality() { - ast::Expr *expr = comparison(); - - while (look(token::EQ_EQ) || look(token::BANG_EQ)) { - token::Token op = this->previous(); - ast::Expr *right = comparison(); - // - expr = new ast::BinaryExpr(expr, op, right); - } - return expr; - } - - // > | >= | < | <= - ast::Expr *Parser::comparison() { - ast::Expr *expr = addition(); - - while (look(token::GREATER) || look(token::GR_EQ) || look(token::LESS) || - look(token::LE_EQ)) { - token::Token op = this->previous(); - ast::Expr *right = addition(); - // - expr = new ast::BinaryExpr(expr, op, right); - } - return expr; - } - - // + | - | += | -= - ast::Expr *Parser::addition() { - ast::Expr *expr = multiplication(); - - while (look(token::ADD) || look(token::SUB) || look(token::AS_ADD) || - look(token::AS_SUB)) { - token::Token op = this->previous(); - ast::Expr *right = multiplication(); - // - expr = new ast::BinaryExpr(expr, op, right); - } - return expr; - } - - // * | / | *= | /= - ast::Expr *Parser::multiplication() { - ast::Expr *expr = unary(); - - while (look(token::MUL) || look(token::DIV) || look(token::AS_MUL) || - look(token::AS_DIV)) { - token::Token op = this->previous(); - ast::Expr *right = unary(); - // - expr = new ast::BinaryExpr(expr, op, right); - } - return expr; - } - - // ! | - - ast::Expr *Parser::unary() { - while (look(token::BANG) || look(token::SUB)) { - token::Token op = previous(); - ast::Expr *expr = unary(); - // - return new ast::UnaryExpr(op, expr); - } - return call(); - } - - // expr(..) | expr.name | expr[expr] - ast::Expr *Parser::call() { - ast::Expr *expr = primary(); - // stack up the expression!! - // - // LIKE: bar(foo(1, 2, 3)[x + 4]) - // - while (true) { - // call - if (look(token::L_PAREN)) { - // arguments - auto args = std::vector(); - // no argument - if (look(token::R_PAREN)) { - expr = new ast::CallExpr(expr, args); - // to next loop - continue; - // have arguments - } else { - do { - args.push_back(this->expr()); - // - } while (look(token::COMMA)); - } - if (!look(token::R_PAREN)) - error(exp::UNEXPECTED, "expect ')' after arguments"); - expr = new ast::CallExpr(expr, args); - // get - } else if (look(token::DOT)) { - token::Token name = look(); - - this->position++; // skip name token - expr = new ast::GetExpr(expr, name); - // index for array - } else if (look(token::L_BRACKET)) { - // empty index - if (look(token::R_BRACKET)) error(exp::UNEXPECTED, "null index"); - // index - auto index = this->expr(); - - if (!look(token::R_BRACKET)) - error(exp::UNEXPECTED, "expect ']' after index of array"); - expr = new ast::IndexExpr(expr, index); - } else { - break; - } - } - return expr; - } - - // primary - ast::Expr *Parser::primary() { - // literal expr - // number | float | string | char - if (look(token::NUM) || look(token::FLOAT) || look(token::STR) || - look(token::CHAR)) - return new ast::LiteralExpr(this->previous()); - // name expr - if (look(token::IDENT)) { - token::Token tok = previous(); - // ++ | -- - if (look(token::PLUS) || look(token::MINUS)) - // self increment - return new ast::NameExpr(tok, previous().kind == token::PLUS, - previous().kind == token::MINUS, false); - return new ast::NameExpr(tok); - } - // name expr of ++ or -- operators - if (look(token::PLUS) || look(token::MINUS)) { - token::Token op = previous(); - // - if (look(token::IDENT)) - return new ast::NameExpr(previous(), op.kind == token::PLUS, - op.kind == token::MINUS, true); - else - error(exp::INCREMENT_OP, - "increment operand can only be performed on name"); - } - // group expr - if (look(token::L_PAREN)) { - // vector for tuple and group expression - std::vector elem; - // empty tuple expr - if (look(token::R_PAREN)) return new ast::TupleExpr(elem); - // tuple or group ? - elem.push_back(this->expr()); - - // tuple expr - if (look(token::COMMA)) { - do { - elem.push_back(this->expr()); - // - } while (look(token::COMMA)); - // - if (!look(token::R_PAREN)) - error(exp::UNEXPECTED, "expect ')' after tuple expression"); - return new ast::TupleExpr(elem); - } - - if (look(token::R_PAREN) == false) - error(exp::UNEXPECTED, "expect ')' after group expression"); - // - return new ast::GroupExpr(elem.at(0)); - } - // array expr - if (look(token::L_BRACKET)) { - auto elem = std::vector(); - - if (look(token::R_BRACKET)) - return new ast::ArrayExpr(elem); - else { - do { - elem.push_back(this->expr()); - // - } while (look(token::COMMA)); - } - if (!look(token::R_BRACKET)) - error(exp::UNEXPECTED, "expect ']' after elements"); - return new ast::ArrayExpr(elem); - } - // map expr - if (look(token::L_BRACE)) { - std::map elem; - // empty map expr - if (look(token::R_BRACE)) return new ast::MapExpr(elem); - - while (true) { - ast::Expr *K = this->expr(); - - if (!look(token::COLON)) { - error(exp::UNEXPECTED, "expect ':' after key in map"); - } - ast::Expr *V = this->expr(); - - // push to map - elem.insert(std::make_pair(K, V)); - - if (look(token::COMMA)) { - continue; - } - if (look(token::R_BRACE)) { - break; - } - error(exp::UNEXPECTED, "expect ',' or '}' after value in map"); - } - return new ast::MapExpr(elem); - } - // new expr - if (look(token::NEW)) { - if (!look(token::IDENT)) { - error(exp::INVALID_SYNTAX, "name of new must be an identifier"); - } - token::Token name = previous(); // name of new - - std::map builder; // fields - - if (!look(token::L_BRACE)) return new ast::NewExpr(name, builder); - - while (true) { - if (!look(token::IDENT)) { - error(exp::INVALID_SYNTAX, - "key of name for new statement must be an identifier"); - } - int tempPos = this->position - 1; - - if (!look(token::COLON)) { - error(exp::INVALID_SYNTAX, "expect ':' after key"); - } - ast::Expr *V = this->expr(); // expr V - - builder.insert(std::make_pair(&this->tokens.at(tempPos), V)); - - if (look(token::COMMA)) { - continue; - } - if (look(token::R_BRACE)) { - break; - } - } - return new ast::NewExpr(name, builder); - } - // end - error(exp::INVALID_SYNTAX, "invalid expression: " + look().literal); - return nullptr; - } - - // statement - ast::Stmt *Parser::stmt() { - switch (this->look().kind) { - // definition statement - case token::DEF: - this->position++; - // variable - if (look(token::IDENT) && look().kind == token::COLON) { - token::Token name = previous(); - this->position++; // skip colon symbol - - ast::Type *T = this->type(); - - // value of variable - if (look(token::EQ)) - // there is an initial value - return new ast::VarStmt(name, T, this->expr()); - else - return new ast::VarStmt(name, T); - } - // function or interface - else if (look(token::L_PAREN)) { - // arguments - // - // <[ [token] ], Expr> - ast::Arg args; - // name - token::Token name; - // return - ast::Type *ret = nullptr; - // cache multiple parameters - std::vector temp; - // - bool interfaceStmt = false; - - while (!look(token::R_PAREN)) { - if (!look(token::IDENT)) { - error(exp::UNEXPECTED, "argument name muse be an identifier"); - } - // K - token::Token *K = look(true); // address of token - // handle multiparameter - while (look(token::ADD)) { - if (!look(token::IDENT)) { - error(exp::UNEXPECTED, "argument name muse be an identifier"); - } else { - temp.push_back(K); // previous, - // left of the - // plus sign - temp.push_back(look(true)); // address - // of token - } - } - if (!look(token::COLON)) { - error(exp::UNEXPECTED, "expect ':' after parameter name"); - } - // handle multiparameter - if (temp.empty()) { - // no - args.insert(std::make_pair(K, this->type())); - } else { - // multip - ast::Type *T = this->type(); - - for (auto i : temp) { - args.insert(std::make_pair(i, T)); - } - } - if (look(token::COMMA)) { - continue; - } - } - // function - if (look(token::IDENT)) { - name = previous(); - } - // interface - else if (look(token::MUL)) { - name = look(); // name of interface - // - this->position++; // skip name of - // interface - // - // current parsing interface statement - interfaceStmt = true; - } - // error - else { - error(exp::UNEXPECTED, - "expect '*' to interface or identifier to function"); - } - // return value - if (look(token::R_ARROW)) { - ret = this->type(); - } - - if (interfaceStmt) - return new ast::InterfaceStmt(args, name, ret); - else - return new ast::FuncStmt(args, name, ret, this->block(token::END)); - // - break; - // whole - } else { - ast::Stmt *inherit = nullptr; - - token::Token name = previous(); // name - - // inherit - if (look().kind == token::L_ARROW) { - inherit = this->stmt(); - } - return new ast::WholeStmt(name, inherit, this->block(token::END)); - } - break; - // if - case token::IF: { - this->position++; - // if condition - ast::Expr *condition = this->expr(); - // if then branch - ast::BlockStmt *thenBranch = - this->block(token::EF, token::END, token::NF); - - std::map elem; - - while (previous().kind == token::EF) { - ast::Expr *efCondition = this->expr(); - ast::BlockStmt *efBranch = - this->block(token::EF, token::END, token::NF); - // - elem.insert(std::make_pair(efCondition, efBranch)); - } - - ast::BlockStmt *nfBranch = nullptr; - - if (previous().kind == token::NF) { - nfBranch = this->block(token::END); - } - return new ast::IfStmt(condition, thenBranch, elem, nfBranch); - } break; - // loop - case token::FOR: { - this->position++; - // dead loop - if (look(token::R_ARROW)) - return new ast::ForStmt(nullptr, this->block(token::END)); - // for condition - ast::Expr *condition = this->expr(); - ast::BlockStmt *block = this->block(token::END); - // - return new ast::ForStmt(condition, block); - } break; - // do loop - case token::DO: { - this->position++; - // block - ast::BlockStmt *block = this->block(token::FOR); - // go back to the position of the `for` keyword - this->position--; - ast::Stmt *stmt = this->stmt(); - // - return new ast::DoStmt(block, stmt); - } break; - // out in loop - // out - case token::OUT: - this->position++; - // - if (look(token::R_ARROW)) { - // no condition - return new ast::OutStmt(); - } - return new ast::OutStmt(this->expr()); - break; - // tin in loop - // tin - case token::TIN: - this->position++; - // - if (look(token::R_ARROW)) { - // no condition - return new ast::TinStmt(); - } - return new ast::TinStmt(this->expr()); - // and - case token::AND: { - this->position++; - - if (!look(token::R_ARROW)) { - error(exp::UNEXPECTED, "expect '->' after and keyword"); - } - if (!look(token::IDENT)) { - error(exp::UNEXPECTED, "alias must be an identifier"); - } - // name - token::Token alias = previous(); - // block - ast::BlockStmt *stmt = this->block(token::END); - // - return new ast::AndStmt(alias, stmt); - } break; - // mod - case token::MOD: - this->position++; - // - if (!look(token::IDENT)) { - error(exp::UNEXPECTED, "module name must be an identifier"); - } - return new ast::ModStmt(previous()); - break; - // use - case token::USE: { - this->position++; - - if (!look(token::IDENT)) { - error(exp::UNEXPECTED, "use of module name must be an identifier"); - } - // name - token::Token name = previous(); - // no alias - if (!look(token::AS)) { - return new ast::UseStmt(name); - } - if (!look(token::IDENT)) { - error(exp::UNEXPECTED, "alias of module name must be an identifier"); - } - int previous = this->position - 1; - // - // [Q]: why can't variables on the stack be referenced - // - return new ast::UseStmt(name, &this->tokens.at(previous)); - } break; - // return - // ret - // ret -> - case token::RET: - this->position++; - // - if (look(token::R_ARROW)) { - // no return value - return new ast::RetStmt(); - } - return new ast::RetStmt(this->stmt()); - break; - // inherit for class - case token::L_ARROW: { - this->position++; - - if (!look(token::IDENT)) { - error(exp::UNEXPECTED, "inheritance name must be an indentifier"); - } - std::vector names; - // single - names.push_back(look(true)); // address of token - - while (look(token::ADD)) { - if (!look(token::IDENT)) { - error(exp::UNEXPECTED, "inheritance name must be an indentifier"); - } - names.push_back(look(true)); // address - } - - return new ast::InheritStmt(names); - } break; - // call of super class - // <~ expr - case token::L_CURVED_ARROW: // TODO: can only call method - this->position++; - // - return new ast::CallInheritStmt(look().line, this->expr()); - break; - // pub - case token::PUB: - this->position++; - // - return new ast::PubStmt(look().line, this->stmt()); - break; - default: - // expression statement - return new ast::ExprStmt(this->expr()); - } - // end - error(exp::INVALID_SYNTAX, "invalid statement"); - return nullptr; - } - - /** - * parse block statement - * - * the x parameter is required, and y and z have default value - * determine where to stop the analysis - */ - ast::BlockStmt *Parser::block(token::Kind x, token::Kind y, token::Kind z) { - std::vector body; - // until end token - while (true) { - if (look(x)) { - break; - } - // it is not the default value and holds - if (y != token::EFF && look(y)) { - break; - } - // it is not the default value and holds - if (z != token::EFF && look(z)) { - break; - } - body.push_back(this->stmt()); - } - return new ast::BlockStmt(body); - } - - // throw an exception - inline void Parser::error(exp::Kind kind, std::string message) { - throw exp::Exp(kind, std::move(message), look().line); - } - - // type analysis - ast::Type *Parser::type() { - token::Token now = this->look(); - // type - if (now.kind == token::IDENT) { - // skip type ident - this->position++; - // T1 - if (now.literal == S_INT) return new ast::Int(); - // T2 - if (now.literal == S_FLOAT) return new ast::Float(); - // T3 - if (now.literal == S_STR) return new ast::Str; - // T4 - if (now.literal == S_CHAR) return new ast::Char(); - // T5 - if (now.literal == S_BOOL) return new ast::Bool; - // user define type - return new ast::User(now); - } - // T6 - if (now.kind == token::L_BRACKET) { - this->position++; // skip left [ symbol - - if (!look(token::R_BRACKET)) { - error(exp::UNEXPECTED, "expect ']' after left square bracket"); - } - return new ast::Array(this->type()); - } - // T7 - if (now.kind == token::LESS) { - this->position++; // skip left < symbol - // key - ast::Type *T1 = this->type(); - - if (!look(token::COMMA)) { - error(exp::UNEXPECTED, "expect ',' after key of map"); - } - ast::Type *T2 = this->type(); - - if (!look(token::GREATER)) { - error(exp::UNEXPECTED, "expect '>' after value of map"); - } - return new ast::Map(T1, T2); - } - // T8 - if (now.kind == token::L_PAREN) { - this->position++; // skip left ( symbol - - ast::Type *T = this->type(); - - if (!look(token::R_PAREN)) { - error(exp::UNEXPECTED, "expect ')' after tuple define"); - } - return new ast::Tuple(T); - } - error(exp::INVALID_SYNTAX, "invalid type"); - // - return nullptr; - } -} // namespace parser - -// semantic -namespace semantic { - // analysis - class Analysis { - private: - int position = 0; - // stmts - std::vector *statements; - - // return the kind of current statement - inline ast::Kind look() { return statements->at(position)->kind(); } - - // return the current statement - inline ast::Stmt *now() { return statements->at(position); } - - // throw semantic analysis exception - void error(exp::Kind k, std::string message, int line) { - throw exp::Exp(k, message, line); - } - - public: - explicit Analysis(std::vector *stmts) { - this->statements = stmts; - - while (position < statements->size()) { - this->analysisStmt(now()); - this->position++; - } - } - - // statement - void analysisStmt(ast::Stmt *stmt) { - switch (stmt->kind()) { - case ast::STMT_EXPR: { - ast::ExprStmt *e = static_cast(stmt); - ast::Expr *expr = static_cast(e->expr); - // expression - this->analysisExpr(expr); - } break; - // - case ast::STMT_PUB: { - ast::PubStmt *p = static_cast(stmt); - - switch (p->stmt->kind()) { - case ast::STMT_VAR: // defintin - case ast::STMT_FUNC: // function - case ast::STMT_WHOLE: // whole - case ast::STMT_INTERFACE: // interface - break; - default: - error(exp::CANNOT_PUBLIC, "statement cannot be public", p->line); - } - - // if its whole statement and must to analysis body - // for example it contains a new whole statement inside - if (p->stmt->kind() == ast::STMT_WHOLE) this->analysisStmt(p->stmt); - } break; - // - case ast::STMT_WHOLE: { - ast::WholeStmt *w = static_cast(stmt); - - if (w->body->block.empty()) break; - - ast::Stmt *f = w->body->block.at(0); // first statement - - // just a ident of expression statement - // - // enumeration - // - if (f->kind() == ast::STMT_EXPR) { - // - if (w->inherit != nullptr) { - error(exp::ENUMERATION, "enumeration type cannot be inherited", - w->name.line); - } - - ast::ExprStmt *expr = static_cast(f); - std::vector fields; - - for (auto &i : w->body->block) { - // - if (i->kind() != ast::STMT_EXPR) { - error(exp::ENUMERATION, "whole is an enumeration type", - w->name.line); - } - ast::ExprStmt *pStmt = static_cast(i); - if (pStmt->expr->kind() != ast::EXPR_NAME) { - error(exp::ENUMERATION, "whole is an enumeration type", - w->name.line); - } - - ast::NameExpr *name = static_cast(pStmt->expr); - // push to enumeration - // structure - fields.push_back(&name->token); - } - // replace new statement into - ast::Stmt *n = new ast::EnumStmt(w->name, fields); - std::replace(std::begin(*statements), std::end(*statements), now(), - n); - std::cout << "\033[33m[Semantic analysis replace " << position + 1 - << "]\033[0m: WholeStmt -> " << n->stringer() - << std::endl; - } - // normal whole statement if hinder of statements include name expr - // to throw an error - else { - for (auto &i : w->body->block) { - if (i->kind() == ast::STMT_EXPR) { - error(exp::ENUMERATION, - "it an whole statement but contains some other value", - w->name.line); - } - } - } - } break; - // - case ast::STMT_CALLINHERIT: { - ast::CallInheritStmt *c = static_cast(stmt); - - if (c->expr->kind() != ast::EXPR_CALL) { - error(exp::CALL_INHERIT, - "only methods of the parent class can be called", 2); - } - } break; - // - default: break; - } - } - - // expression - void analysisExpr(ast::Expr *expr) { - using namespace token; - - switch (expr->kind()) { - case ast::EXPR_BINARY: { - ast::BinaryExpr *binary = static_cast(expr); - - if (binary->left->kind() != ast::EXPR_LITERAL) { - this->analysisExpr(binary->left); - break; - } - if (binary->right->kind() != ast::EXPR_LITERAL) { - this->analysisExpr(binary->right); - break; - } - - Token l = (static_cast(binary->left))->token; - Token r = (static_cast(binary->right))->token; - - switch (binary->op.kind) { - case ADD: // + - case SUB: // - - if (l.kind == NUM) { - // - if (r.kind == STR || r.kind == CHAR) { - error(exp::TYPE_ERROR, "unsupported operand", l.line); - } - } - if (l.kind == STR || l.kind == CHAR) { - // - if (r.kind == NUM) { - error(exp::TYPE_ERROR, "unsupported operand", l.line); - } - } - break; - case AS_ADD: // += - case AS_SUB: // -= - case AS_MUL: // *= - case AS_DIV: // /= - if (binary->left->kind() != ast::EXPR_NAME) { - error(exp::TYPE_ERROR, "unsupported operand", l.line); - } - break; - case DIV: // / - if (l.kind == STR || l.kind == CHAR || r.kind == STR || - r.kind == CHAR) { - error(exp::TYPE_ERROR, "unsupported operand", l.line); - } - if (r.kind == NUM) { - // convert, keep floating point - // numbers - if (std::stof(r.literal) == 0) { - error(exp::DIVISION_ZERO, "division by zero", l.line); - } - } - // array - if (binary->left->kind() == ast::EXPR_ARRAY || - binary->right->kind() == ast::EXPR_ARRAY) { - error(exp::TYPE_ERROR, "unsupported operand", l.line); - } - break; - case MUL: // * - if ((l.kind == CHAR || l.kind == STR) && - (r.kind == CHAR || r.kind == STR)) { - error(exp::TYPE_ERROR, "unsupported operand", l.line); - } - break; - case GR_EQ: // >= - case LE_EQ: // <= - case GREATER: // > - case LESS: { // < - if (l.kind == STR || r.kind == STR) { - error(exp::TYPE_ERROR, "unsupported operand", l.line); - } - } break; - default: break; - } - } break; - case ast::EXPR_GROUP: { - ast::GroupExpr *group = static_cast(expr); - this->analysisExpr(group->expr); - } break; - // - case ast::EXPR_UNARY: { - ast::UnaryExpr *unary = static_cast(expr); - this->analysisExpr(unary->expr); - } break; - // - case ast::EXPR_CALL: { - ast::CallExpr *call = static_cast(expr); - - for (auto i : call->arguments) { - this->analysisExpr(i); - } - } break; - // - case ast::EXPR_GET: { - ast::GetExpr *get = static_cast(expr); - this->analysisExpr(get->expr); - } break; - // - case ast::EXPR_SET: { - ast::SetExpr *set = static_cast(expr); - this->analysisExpr(set->expr); - this->analysisExpr(set->value); - } break; - // - case ast::EXPR_ASSIGN: { - ast::AssignExpr *assign = static_cast(expr); - this->analysisExpr(assign->expr); - this->analysisExpr(assign->value); - } break; - // - default: break; - } - } - }; // namespace semantic -}; // namespace semantic - -struct Entity; - -// bytecode -namespace byte { - // total number of bytecodes - constexpr int len = 50; - // bytecode type - enum Code { - CONST, // O - ASSIGN, // ASSIGN - STORE, // V - LOAD, // V - INDEX, // IDEX - GET, // GET - SET, // SET - CALL, // CALL - CALL_I, // CALL_I - ORIG, // ORIG - NAME, // NAME - NEW, // NEW - FUNC, // FUNC - CHA, // CHA - END, // END - WHOLE, // WHOLE - ENUM, // ENUM - - PUB, // PUB - MOD, // MOD - USE, // USE - UAS, // UAS - - B_ARR, // ARRAY - B_TUP, // TUPLE - B_MAP, // MAP - - INCR, // DECR - DECR, // INCR - P_INCR, // P_INCR - P_DECR, // P_DECR - - // INFIX - ADD, // + - SUB, // - - MUL, // * - DIV, // / - A_ADD, // += - A_SUB, // -= - A_MUL, // *= - A_DIV, // /= - GR, // > - LE, // < - GR_E, // >= - LE_E, // <= - E_E, // == - N_E, // != - AND, // & - OR, // | - - // PREFIX - BANG, // ! - NOT, // - - - JUMP, // JUMP - F_JUMP, // F_JUMP - T_JUMP, - - RET, - }; - - // return a string of bytecode - std::string codeString[len] = { - "CONST", "ASSIGN", "STORE", "LOAD", "INDEX", "GET", "SET", - "CALL", "CALL_I", "ORIG", "NAME", "NEW", "FUNC", "CHA", - "END", "WHOLE", "ENUM", "PUB", "MOD", "USE", "UAS", - "B_ARR", "B_TUP", "B_MAP", "INCR", "DECR", "P_INCR", "P_DECR", - "ADD", "SUB", "MUL", "DIV", "A_ADD", "A_SUB", "A_MUL", - "A_DIV", "GR", "LE", "GR_E", "LE_E", "E_E", "N_E", - "AND", "OR", "BANG", "NOT", "JUMP", "F_JUMP", "T_JUMP", - "RET", - }; - - // compare two bytecode - bool compare(Code a, Code b) { return a == b; } -}; // namespace byte - -// object -namespace object { - // kind - enum Kind { - INT, - FLOAT, - STR, - CHAR, - BOOL, - ARRAY, - TUPLE, - MAP, - ENUM, - FUNC, - WHOLE - }; - - // object abstract - class Object { - public: - // return a string of dis object - virtual std::string stringer() = 0; - // return the kind of object - virtual Kind kind() = 0; - }; - - // INT - class Int : public Object { - public: - int value; - - Int(int v) : value(v) {} - - std::string stringer() override { - return ""; - } - - Kind kind() override { return INT; } - }; - - // FLOAT - class Float : public Object { - public: - double value; - - Float(float v) : value(v) {} - - std::string stringer() override { - return ""; - } - - Kind kind() override { return FLOAT; } - }; - - // STR - class Str : public Object { - public: - std::string value; - bool longer = false; - - Str(std::string v) : value(v) {} - - Str(std::string v, bool longer) : value(v), longer(longer) { - value.pop_back(); // long character judgment end, delete judgment - // char - } - - std::string stringer() override { - if (longer) { - return ""; - } - return ""; - } - - Kind kind() override { return STR; } - }; - - // CHAR - class Char : public Object { - public: - char value; - - Char(char v) : value(v) {} - - std::string stringer() override { - std::stringstream str; - - str << ""; - - return str.str(); - } - - Kind kind() override { return CHAR; } - }; - - // BOOL - class Bool : public Object { - public: - bool value; - - Bool(bool v) : value(v) {} - - std::string stringer() override { return ""; } - - Kind kind() override { return BOOL; } - }; - - // ARRAY - class Array : public Object { - public: - std::vector elements; - - std::string stringer() override { - std::stringstream str; - - str << "stringer(); - if (++iter != elements.end()) { - str << ", "; - } - } - - str << "]>"; - return str.str(); - } - - Kind kind() override { return ARRAY; } - }; - - // TUPLE - class Tuple : public Object { - public: - std::vector elements; - - std::string stringer() override { - std::stringstream str; - - str << "stringer(); - if (++iter != elements.end()) { - str << ", "; - } - } - - str << ")>"; - return str.str(); - } - - Kind kind() override { return TUPLE; } - }; - - // MAP - class Map : public Object { - public: - std::map value; - - std::string stringer() override { - if (value.empty()) return ""; - - std::stringstream str; - str << "first->stringer() - << " V: " << iter->second->stringer(); - if (++iter != value.end()) { - str << ", "; - } - } - - str << "}>"; - return str.str(); - } - - Kind kind() override { return MAP; } - }; - - // ENUM - class Enum : public Object { - public: - std::string name; - std::map elements; - - std::string stringer() override { return ""; } - - Kind kind() override { return ENUM; } - }; - - // FUNC - class Func : public Object { - public: - std::string name; // function name - - ast::Arg arguments; // function args - ast::Type *ret; // function return - - Entity *entity; // function entity - - std::string stringer() override { return ""; } - - Kind kind() override { return FUNC; } - }; - - // WHOLE - class Whole : public Object { - public: - std::string name; // whole name - - Entity *entity; // whole entity - - // interface definition - std::vector> interface; - - // inherit definition - std::vector inherit; - - std::string stringer() override { return ""; } - - Kind kind() override { return WHOLE; } - }; -}; // namespace object - -// entity structure -struct Entity { - std::string title = ""; // TITLE FOR ENTITY - - explicit Entity() {} - explicit Entity(std::string title) : title(title) {} // TO title - - std::vector codes; // bytecodes - std::vector offsets; // offset of bytecode - std::vector constants; // constant - std::vector names; // names - std::vector types; // type of variables - // std::vector lineno; // lineno of each bytecode - - // output entity data - void dissemble() { - std::cout << "ENTITY '" << title << "': " << std::endl; - - for (int ip = 0, op = 0; ip < codes.size(); ip++) { - byte::Code co = codes.at(ip); - - switch (co) { - case byte::CONST: { - printf("%20d: %s %10d %s\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), - constants.at(offsets.at(op))->stringer().c_str()); - } break; - case byte::ASSIGN: { - printf("%20d: %s %9d '%s'\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), - names.at(offsets.at(op)).c_str()); - } break; - case byte::STORE: { - printf("%20d: %s %10d '%s' %d %s\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op), - names.at(offsets.at(op)).c_str(), offsets.at(op + 1), - types.at(offsets.at(op + 1))->stringer().c_str()); - op += 2; - } break; - case byte::LOAD: - case byte::NAME: { - printf("%20d: %s %11d '%s'\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), - names.at(offsets.at(op)).c_str()); - } break; - case byte::FUNC: - case byte::ENUM: { - printf("%20d: %s %11d %s\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), - constants.at(offsets.at(op))->stringer().c_str()); - } break; - case byte::WHOLE: { - printf("%20d: %s %10d %s\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), - constants.at(offsets.at(op))->stringer().c_str()); - } break; - case byte::GET: - case byte::SET: - case byte::MOD: - case byte::USE: - case byte::CHA: { - printf("%20d: %s %12d '%s'\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), - names.at(offsets.at(op)).c_str()); - } break; - case byte::CALL: { - printf("%20d: %s %11d\n", ip, byte::codeString[codes.at(ip)].c_str(), - offsets.at(op++)); - } break; - case byte::CALL_I: { - printf("%20d: %s %9d\n", ip, byte::codeString[codes.at(ip)].c_str(), - offsets.at(op++)); - } break; - case byte::B_ARR: - case byte::B_TUP: - case byte::B_MAP: { - printf("%20d: %s %10d\n", ip, byte::codeString[codes.at(ip)].c_str(), - offsets.at(op++)); - } break; - case byte::F_JUMP: - case byte::T_JUMP: { - printf("%20d: %s %9d\n", ip, byte::codeString[codes.at(ip)].c_str(), - offsets.at(op++)); - } break; - case byte::JUMP: { - printf("%20d: %s %11d\n", ip, byte::codeString[codes.at(ip)].c_str(), - offsets.at(op++)); - } break; - case byte::NEW: { - printf("%20d: %s %12d '%s' %d\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op), - names.at(offsets.at(op)).c_str(), offsets.at(op + 1)); - op += 2; - } break; - case byte::UAS: { - printf("%20d: %s %12d '%s' %d '%s'\n", ip, - byte::codeString[codes.at(ip)].c_str(), offsets.at(op), - names.at(offsets.at(op)).c_str(), offsets.at(op + 1), - names.at(offsets.at(op + 1)).c_str()); - op += 2; - } break; - default: - printf("%20d: %s\n", ip, byte::codeString[codes.at(ip)].c_str()); - break; - } - } - - std::cout << "CONSTANT: " << std::endl; - if (constants.empty()) { - printf("%20s\n", "EMPTY"); - } else { - for (int i = 0; i < constants.size(); i++) { - printf("%20d: %s\n", i, constants.at(i)->stringer().c_str()); - } - } - - std::cout << "NAME: " << std::endl; - if (names.empty()) { - printf("%20s\n", "EMPTY"); - } else { - for (int i = 0; i < names.size(); i++) { - if (i % 4 == 0) { - printf("%20d: '%s'\t", i, names.at(i).c_str()); - } else { - printf("%5d: '%s' \t", i, names.at(i).c_str()); - } - if ((i + 1) % 4 == 0) { - printf("\n"); - } - } - printf("\n"); - } - - // std::cout << "LINENO: " << std::endl; - // if (lineno.empty()) { - // printf("%20s\n", "EMPTY"); - // } else { - // for (int i = 0; i < lineno.size(); i++) { - // if (i % 4 == 0) { - // printf("%20d: '%d'\t", i, lineno.at(i)); - // } else { - // printf("%5d: '%d' \t", i, lineno.at(i)); - // } - // if ((i + 1) % 4 == 0) { - // printf("\n"); - // } - // } - // printf("\n"); - // } - - std::cout << "OFFSET: " << std::endl; - if (offsets.empty()) { - printf("%20s\n", "EMPTY"); - } else { - for (int i = 0; i < offsets.size(); i++) { - if (i % 4 == 0) { - printf("%20d: %d \t", i, offsets.at(i)); - } else { - printf("%5d: %d \t", i, offsets.at(i)); - } - if ((i + 1) % 4 == 0) { - printf("\n"); - } - } - printf("\n"); - } - - std::cout << "TYPE: " << std::endl; - if (types.empty()) { - printf("%20s\n", "EMPTY"); - } else { - for (int i = 0; i < types.size(); i++) { - printf("%20d: %s\n", i, types.at(i)->stringer().c_str()); - } - } - } -}; // namespace entity - -// compiler -namespace compiler { - // compiler structure - class Compiler { - private: - int position = 0; - // after semantic analysis - std::vector statements; - // return the current statement - ast::Stmt *look(); - // offset of constant, offset of name, offset of type - int icf = 0, inf = 0, itf = 0; - - void emitCode(byte::Code); // push bytecode to entity - void emitOffset(int); // push offset to entity - void emitConstant(object::Object *); // push constant to entity - void emitName(std::string); // push name to entity - void emitType(ast::Type *); // push names type to entity - - // insert position with current counts of bytecode - void insertPosOffset(int); - void insertPosOffset(int, int); // with custom value - - void expr(ast::Expr *); // expression - void stmt(ast::Stmt *); // statements - - public: - Compiler(std::vector statements) : statements(statements) {} - - // entities of compiled - std::vector entities = {new Entity("main")}; - // compile statements to entities - void compile(); - - // currently compile entity - Entity *now = entities.at(0); - }; - - // return the current statement - ast::Stmt *Compiler::look() { return this->statements.at(this->position); } - - // compile statements to entities - void Compiler::compile() { - while (this->position < this->statements.size()) { - this->stmt(look()); // START - this->position++; - } - this->emitCode(byte::RET); // RET - } - - // push bytecode to entity - void Compiler::emitCode(byte::Code co) { this->now->codes.push_back(co); } - - // push offset to entity - void Compiler::emitOffset(int off) { this->now->offsets.push_back(off); } - - // push constant to entity - void Compiler::emitConstant(object::Object *obj) { - this->now->constants.push_back(obj); - this->emitOffset(this->icf++); - } - - // push name to entity - void Compiler::emitName(std::string v) { - std::vector::iterator iter = - std::find(now->names.begin(), now->names.end(), v); - if (iter != now->names.end()) { - // found - this->emitOffset( - std::distance(now->names.begin(), iter)); // only push offset - } else { - // not found - this->now->names.push_back(v); // push new name - this->emitOffset(this->inf++); // push new offset - } - } - - // push names type to entity - void Compiler::emitType(ast::Type *t) { - this->now->types.push_back(t); - this->emitOffset(this->itf++); - } - - // insert position with current counts of bytecode - void Compiler::insertPosOffset(int pos) { - this->now->offsets.insert(now->offsets.begin() + pos, now->codes.size()); - } - - // with custom value - void Compiler::insertPosOffset(int pos, int val) { - this->now->offsets.insert(now->offsets.begin() + pos, val); - } - - // expression - void Compiler::expr(ast::Expr *expr) { - switch (expr->kind()) { - case ast::EXPR_LITERAL: { - ast::LiteralExpr *l = static_cast(expr); - token::Token tok = l->token; - - if (tok.kind == token::NUM) { - this->emitConstant(new object::Int(std::stoi(tok.literal))); - } - if (tok.kind == token::FLOAT) { - this->emitConstant(new object::Float(std::stof(tok.literal))); - } - if (tok.kind == token::STR) { - this->emitConstant( - // judge long characters at here - new object::Str(tok.literal, tok.literal.back() == '`')); - } - if (tok.kind == token::CHAR) { - this->emitConstant(new object::Char(tok.literal.at(0))); - } - this->emitCode(byte::CONST); - } break; - case ast::EXPR_BINARY: { - ast::BinaryExpr *b = static_cast(expr); - - this->expr(b->left); - this->expr(b->right); - - switch (b->op.kind) { - case token::ADD: this->emitCode(byte::ADD); break; - case token::SUB: this->emitCode(byte::SUB); break; - case token::MUL: this->emitCode(byte::MUL); break; - case token::DIV: this->emitCode(byte::DIV); break; - - case token::AS_ADD: this->emitCode(byte::A_ADD); break; - case token::AS_SUB: this->emitCode(byte::A_SUB); break; - case token::AS_MUL: this->emitCode(byte::A_MUL); break; - case token::AS_DIV: this->emitCode(byte::A_DIV); break; - - case token::GREATER: this->emitCode(byte::GR); break; - case token::LESS: this->emitCode(byte::LE); break; - case token::GR_EQ: this->emitCode(byte::GR_E); break; - case token::LE_EQ: this->emitCode(byte::LE_E); break; - case token::EQ_EQ: this->emitCode(byte::E_E); break; - case token::BANG_EQ: this->emitCode(byte::N_E); break; - case token::ADDR: this->emitCode(byte::AND); break; - case token::OR: this->emitCode(byte::OR); break; - } - - if (b->op.kind == token::AS_ADD || b->op.kind == token::AS_SUB || - b->op.kind == token::AS_MUL || b->op.kind == token::AS_DIV) { - ast::NameExpr *n = static_cast(b->left); - - this->emitName(n->token.literal); - this->emitCode(byte::ASSIGN); - } - } break; - // - case ast::EXPR_GROUP: { - ast::GroupExpr *g = static_cast(expr); - - this->expr(g->expr); - } break; - // - case ast::EXPR_UNARY: { - ast::UnaryExpr *u = static_cast(expr); - - this->expr(u->expr); - - if (u->token.kind == token::BANG) { - this->emitCode(byte::BANG); - } - if (u->token.kind == token::SUB) { - this->emitCode(byte::NOT); - } - } break; - // - case ast::EXPR_NAME: { - ast::NameExpr *n = static_cast(expr); - - this->emitCode(byte::LOAD); - this->emitName(n->token.literal); // new name - - // increment and prefix - if (n->selfIncrement && n->prefix) this->emitCode(byte::P_INCR); - if (n->selfIncrement) this->emitCode(byte::INCR); // suffix - - // decrement and prefix - if (n->selfDecrement && n->prefix) this->emitCode(byte::P_DECR); - if (n->selfDecrement) this->emitCode(byte::DECR); // suffix - } break; - // - case ast::EXPR_CALL: { - ast::CallExpr *c = static_cast(expr); - - this->expr(c->callee); - - for (int i = c->arguments.size(); i > 0; i--) - this->expr(c->arguments.at(i - 1)); // arguments - - this->emitCode(byte::CALL); - this->emitOffset(c->arguments.size()); - } break; - // - case ast::EXPR_GET: { - ast::GetExpr *g = static_cast(expr); - - this->expr(g->expr); - - this->emitCode(byte::GET); - this->emitName(g->name.literal); // name - } break; - // - case ast::EXPR_SET: { - ast::SetExpr *s = static_cast(expr); - - this->expr(s->value); // right expression - this->expr(s->expr); // left expression - - this->emitCode(byte::SET); - this->emitName(s->name.literal); // name - } break; - // - case ast::EXPR_ASSIGN: { - ast::AssignExpr *a = static_cast(expr); - - this->expr(a->value); // right expression - - this->emitCode(byte::ASSIGN); - this->emitName(static_cast(a->expr)->token.literal); - } break; - // - case ast::EXPR_ARRAY: { - ast::ArrayExpr *a = static_cast(expr); - - // push elements from right to left - for (int i = a->elements.size(); i > 0; i--) - this->expr(a->elements.at(i - 1)); - - this->emitCode(byte::B_ARR); - this->emitOffset(a->elements.size()); // length - } break; - // - case ast::EXPR_TUPLE: { - ast::TupleExpr *t = static_cast(expr); - - for (int i = t->elements.size(); i > 0; i--) - this->expr(t->elements.at(i - 1)); - - this->emitCode(byte::B_TUP); - this->emitOffset(t->elements.size()); // length - } break; - // - case ast::EXPR_MAP: { - ast::MapExpr *m = static_cast(expr); - - for (std::map::reverse_iterator iter = - m->elements.rbegin(); - iter != m->elements.rend(); iter++) { - // from right to left by iterator - this->expr(iter->first); - this->expr(iter->second); - } - - this->emitCode(byte::B_MAP); - this->emitOffset(m->elements.size() * 2); // length - } break; - // - case ast::EXPR_INDEX: { - ast::IndexExpr *i = static_cast(expr); - - this->expr(i->right); - this->expr(i->left); - - this->emitCode(byte::INDEX); - } break; - // - case ast::EXPR_NEW: { - ast::NewExpr *n = static_cast(expr); - - for (auto i : n->builder) { - this->emitCode(byte::NAME); - this->emitName(i.first->literal); // K - - this->expr(i.second); // V - } - - this->emitCode(byte::NEW); - this->emitName(n->name.literal); // name - this->emitOffset(n->builder.size() * 2); // fields - } break; - } - } - - // statements - void Compiler::stmt(ast::Stmt *stmt) { - switch (stmt->kind()) { - case ast::STMT_EXPR: - this->expr(static_cast(stmt)->expr); // expression - break; - // - case ast::STMT_VAR: { - ast::VarStmt *v = static_cast(stmt); - - if (v->expr != nullptr) - this->expr(v->expr); // initial value - else - this->emitCode(byte::ORIG); // original value - - this->emitCode(byte::STORE); - this->emitName(v->name.literal); - - this->emitType(v->T); // type - } break; - // - case ast::STMT_BLOCK: { - ast::BlockStmt *b = static_cast(stmt); - - for (auto i : b->block) this->stmt(i); - } break; - // - case ast::STMT_IF: { - ast::IfStmt *i = static_cast(stmt); - /** - * Moisture regain algorithm - */ - this->expr(i->condition); - this->emitCode(byte::F_JUMP); - int ifPos = now->offsets.size(); - - this->stmt(i->ifBranch); - this->emitCode(byte::JUMP); // jump out after - - int ifOff = now->offsets.size(); // jump after execution if branch - std::vector tempEfOffs; // ef condition offsets - - // ef branch - if (!i->efBranch.empty()) { - bool firstStmt = true; - - for (auto i : i->efBranch) { - // if jump to the first ef - if (firstStmt) { - this->insertPosOffset(ifPos); // TO: if (F_JUMP) - firstStmt = false; - } - - this->expr(i.first); // condition - this->emitCode(byte::F_JUMP); - int efPos = now->offsets.size(); - - this->stmt(i.second); // block - this->insertPosOffset(efPos, - now->codes.size() + 1); // TO: ef (F_JUMP) - - this->emitCode(byte::JUMP); // jump out after - tempEfOffs.push_back(now->offsets.size()); - } - // nf branch - if (i->nfBranch != nullptr) this->stmt(i->nfBranch); - } - // nf branch - else { - if (i->nfBranch != nullptr) { - this->insertPosOffset(ifPos); // TO: if (F_JUMP) - this->stmt(i->nfBranch); - } else { - // no ef and nf statement - this->insertPosOffset(ifPos); // TO: if (F_JUMP) - } - } - - // for (auto i : tempEfOffs) std::cout << i << std::endl; - for (int i = 0; i < tempEfOffs.size(); i++) { - // insertion increment successively - this->insertPosOffset(tempEfOffs.at(i) + i); - } - - this->insertPosOffset(ifOff + 1); // TO: if (JUMP) - } break; - // - case ast::STMT_FOR: { - ast::ForStmt *f = static_cast(stmt); - - int original = now->codes.size(); // original state: for callback loops - - // DEAD LOOP - if (f->condition == nullptr) this->stmt(f->block); - // condition and block - else { - this->expr(f->condition); - this->emitCode(byte::F_JUMP); - int ePos = now->offsets.size(); // skip loop for false - - this->stmt(f->block); // block - - // jump to next bytecode - this->insertPosOffset(ePos, - now->codes.size() + 1); // TO: (F_JUMP) - } - this->emitCode(byte::JUMP); // back to original state - this->emitOffset(original); - // replace placeholder - for (std::vector::iterator iter = now->offsets.begin(); - iter != now->offsets.end(); iter++) { - // out statement - if (*iter == -1) { - *iter = now->codes.size(); - } - // tin statement - if (*iter == -2) { - *iter = original; - } - } - } break; - // - case ast::STMT_DO: { - ast::DoStmt *d = static_cast(stmt); - - this->stmt(d->block); // execute the do block first - this->stmt(d->stmt); // then execute loop - } break; - // - case ast::STMT_OUT: { - ast::OutStmt *o = static_cast(stmt); - - if (o->expr != nullptr) this->expr(o->expr); - - // jump straight out - this->emitCode(o->expr == nullptr ? byte::JUMP : byte::T_JUMP); - // place holder - this->emitOffset(-1); - } break; - // - case ast::STMT_TIN: { - ast::TinStmt *t = static_cast(stmt); - - if (t->expr != nullptr) this->expr(t->expr); - - // jump straight out - this->emitCode(t->expr == nullptr ? byte::JUMP : byte::T_JUMP); - // place holder - this->emitOffset(-2); - } break; - // - case ast::STMT_FUNC: { - ast::FuncStmt *f = static_cast(stmt); - - int entitiesSize = this->entities.size() - 1; // original - - this->entities.push_back( - new Entity(f->name.literal)); // new entity for function statement - this->now = this->entities.back(); - - object::Func *obj = new object::Func; - - obj->name = f->name.literal; // function name - obj->arguments = f->arguments; // function arguments - obj->ret = f->ret; // function return - - int x = this->icf; - int y = this->inf; - int z = this->itf; - - this->icf = 0; // x - this->inf = 0; // y - this->itf = 0; // z - - this->stmt(f->block); - - this->icf = x; - this->inf = y; - this->itf = z; - - obj->entity = this->now; // function entity - - // if more than one it points to the last one - this->now = this->entities.at(entitiesSize); // restore to main entity - - // TO main ENTITY - this->emitCode(byte::FUNC); - this->emitConstant(obj); // push to constant object - } break; - // - case ast::STMT_WHOLE: { - ast::WholeStmt *w = static_cast(stmt); - - int entitiesSize = this->entities.size() - 1; // original - - this->entities.push_back( - new Entity(w->name.literal)); // new entity for whole statement - this->now = this->entities.back(); - - object::Whole *obj = new object::Whole; - - obj->name = w->name.literal; // whole name - - // whole inherit - if (w->inherit != nullptr) { - ast::InheritStmt *i = static_cast(w->inherit); - for (auto iter : i->names) { - obj->inherit.push_back(iter->literal); - } - } - - int x = this->icf; - int y = this->inf; - int z = this->itf; - - this->icf = 0; // x - this->inf = 0; // y - this->itf = 0; // z - - // block statement - for (auto i : w->body->block) { - // interface definition - if (i->kind() == ast::STMT_INTERFACE) { - ast::InterfaceStmt *inter = static_cast(i); - obj->interface.push_back(std::make_tuple( - inter->name.literal, inter->arguments, inter->ret)); - continue; - } - this->stmt(i); - } - - this->icf = x; - this->inf = y; - this->itf = z; - - obj->entity = this->now; // whole entity - - // if more than one it points to the last one - this->now = this->entities.at(entitiesSize); // restore to main entity - - // TO main ENTITY - this->emitCode(byte::WHOLE); - this->emitConstant(obj); // push to constant object - } break; - // - case ast::STMT_AND: { - ast::AndStmt *a = static_cast(stmt); - - this->emitCode(byte::CHA); - this->emitName(a->name.literal); // STORE - - this->stmt(a->block); - - this->emitCode(byte::END); - this->emitName(a->name.literal); // END - } break; - // - case ast::STMT_MOD: { - ast::ModStmt *m = static_cast(stmt); - - this->emitCode(byte::MOD); - this->emitName(m->name.literal); - } break; - // - case ast::STMT_USE: { - ast::UseStmt *u = static_cast(stmt); - - if (u->as != nullptr) { - this->emitCode(byte::UAS); - - this->emitName(u->name.literal); // name - this->emitName(u->as->literal); // alias - } else { - this->emitCode(byte::USE); - - this->emitName(u->name.literal); - } - } break; - // - case ast::STMT_RET: { - ast::RetStmt *r = static_cast(stmt); - - if (r->stmt != nullptr) this->stmt(r->stmt); - - this->emitCode(byte::RET); - } break; - // - case ast::STMT_ENUM: { - ast::EnumStmt *e = static_cast(stmt); - - object::Enum *obj = new object::Enum; - obj->name = e->name.literal; - - for (int i = 0; i < e->field.size(); i++) { - obj->elements.insert(std::make_pair(i, e->field.at(i)->literal)); - } - - this->emitCode(byte::ENUM); - this->emitConstant(obj); // push to constant object - } break; - // - case ast::STMT_CALLINHERIT: { - ast::CallInheritStmt *c = static_cast(stmt); - ast::CallExpr *e = static_cast(c->expr); - - this->expr(e->callee); - - for (int i = e->arguments.size(); i > 0; i--) - this->expr(e->arguments.at(i - 1)); - - this->emitCode(byte::CALL_I); - this->emitOffset(e->arguments.size()); // length - } break; - // - case ast::STMT_PUB: { - ast::PubStmt *p = static_cast(stmt); - - this->stmt(p->stmt); - this->emitCode(byte::PUB); - } break; - // - default: break; - } - } -}; // namespace compiler - -template class Stack { -private: - int capacity = 4, count = 0; - - T *elements; - -public: - explicit Stack() { this->elements = new T[capacity]; } - - ~Stack() { delete[] elements; } - - void push(T t) { - if (count + 1 > capacity) { - this->capacity = capacity * 2; - this->elements = (T *)realloc(this->elements, sizeof(T) * capacity); - } - this->elements[count++] = t; - } - - T pop() { return this->elements[--count]; } - - T top() { return this->elements[count]; } - - int len() { return count; } - - bool empty() { return count == 0; } - - std::string stringer() { - return ""; - } -}; - -/** - * VIRTUAL MACHINE - * - * ENTITY: - * BYTE, CONSTANT, NAME, TYPE, OFFSET - * - * OBJECT: - * INT, FLOAT, STR, CHAR, BOOL, ARRAY, TUPLE, MAP, ENUM, FUNC, WHOLE - * - * FRAME: - * (ENTITY, TABLE(K TO OBJECT), DATA STACK) - */ -namespace vm { - // structure - class vm { - private: - std::vector frames; // execute frames - // push object to the current frame - void pushData(object::Object *); - // pop the top of data stack - object::Object *popData(); - // emit new name of table to the current frame - void emitTable(std::string, object::Object *); - // look up a name from current top frame - object::Object *lookUp(std::string); - // first to end iterator - object::Object *retConstant(); - // first to end iterator - ast::Type *retType(); - // first to end iterator - std::string retName(); - // first to end iterator - int retOffset(); - // are the comparison types the same - void typeChecker(ast::Type *, object::Object *); - - int op = 0; // offset pointer - - public: - explicit vm(Entity *main) { - // to main frame as main - this->frames.push_back(new Frame(main)); - } - - // top frame - Frame *top(); - - // repl mode to clean pointer for offset - void clean() { this->op = 0; } - - void evaluate(); // evaluate the top of frame - }; - - // top frame - Frame *vm::top() { return frames.back(); } - - // push object to the current frame - void vm::pushData(object::Object *obj) { top()->data.push(obj); } - - // pop the top of data stack - object::Object *vm::popData() { return top()->data.pop(); } - - // emit new name of table to the current frame - void vm::emitTable(std::string name, object::Object *obj) { - top()->local.symbols.insert(std::make_pair(name, obj)); - } - - // look up a name - object::Object *vm::lookUp(std::string n) { return top()->local.lookUp(n); } - - // first to end constant iterator for current frame's entity - object::Object *vm::retConstant() { - return top()->entity->constants.at(top()->entity->offsets.at(op)); - } - - // first to end - ast::Type *vm::retType() { - return top()->entity->types.at(top()->entity->offsets.at(op)); - } - - // first to end - std::string vm::retName() { - return top()->entity->names.at(top()->entity->offsets.at(op)); - } - - // first to end - int vm::retOffset() { return top()->entity->offsets.at(op); } - - // throw an exception - void error(std::string message) { - throw exp::Exp(exp::RUNTIME_ERROR, message, -1); - } - - // are the comparison types the same - void vm::typeChecker(ast::Type *x, object::Object *y) { - // base type error - if ( - // int - (x->kind() == ast::T_INT && y->kind() != object::INT) || - // float - (x->kind() == ast::T_FLOAT && y->kind() != object::FLOAT) || - // str - (x->kind() == ast::T_STR && y->kind() != object::STR) || - // char - (x->kind() == ast::T_CHAR && y->kind() != object::CHAR)) { - error("type error, require: " + x->stringer() + - ", found: " + y->stringer()); - } - // list - if (x->kind() == ast::T_ARRAY) { - ast::Array *T = static_cast(x); - object::Array *arr = static_cast(y); - - for (auto i : arr->elements) { - this->typeChecker(T->T, i); - } - } - // tuple - if (x->kind() == ast::T_TUPLE) { - ast::Tuple *T = static_cast(x); - object::Tuple *tup = static_cast(y); - - for (auto i : tup->elements) { - this->typeChecker(T->T, i); - } - } - // map - if (x->kind() == ast::T_MAP) { - ast::Map *T = static_cast(x); - object::Map *map = static_cast(y); - - for (auto &i : map->value) { - this->typeChecker(T->T1, i.first); // K - this->typeChecker(T->T2, i.second); // R - } - } - } - - void vm::evaluate() { - -#define BINARY_OP(T, L, OP, R) this->pushData(new T(L OP R)); - - for (int ip = 0; ip < top()->entity->codes.size(); ip++) { - - // bytecode - byte::Code co = top()->entity->codes.at(ip); - - switch (co) { - - case byte::CONST: - this->pushData(this->retConstant()); - this->op++; - break; - - case byte::ADD: - case byte::A_ADD: { - object::Object *y = this->popData(); - object::Object *x = this->popData(); - - if (x->kind() == object::INT) { - switch (y->kind()) { - case object::INT: { - BINARY_OP(object::Int, static_cast(x)->value, +, - static_cast(y)->value); - break; - } - case object::FLOAT: { - BINARY_OP(object::Float, static_cast(x)->value, - +, static_cast(y)->value); - break; - } - } - } - if (x->kind() == object::FLOAT) { - switch (y->kind()) { - case object::INT: { - BINARY_OP(object::Float, static_cast(x)->value, - +, static_cast(y)->value); - break; - } - case object::FLOAT: { - BINARY_OP(object::Float, static_cast(x)->value, - +, static_cast(y)->value); - break; - } - } - } - if (x->kind() == object::STR && y->kind() == object::STR) { - object::Str *l = static_cast(x); - object::Str *r = static_cast(y); - - if (l->longer || r->longer) { - error("cannot plus two long string literal"); - } - - this->pushData(new object::Str(l->value + r->value)); - } - break; - } - case byte::SUB: - case byte::A_SUB: { - object::Object *y = this->popData(); - object::Object *x = this->popData(); - - if (x->kind() == object::INT) { - switch (y->kind()) { - case object::INT: { - BINARY_OP(object::Int, static_cast(x)->value, -, - static_cast(y)->value); - break; - } - case object::FLOAT: { - BINARY_OP(object::Float, static_cast(x)->value, - -, static_cast(y)->value); - break; - } - } - } - if (x->kind() == object::FLOAT) { - switch (y->kind()) { - case object::INT: { - BINARY_OP(object::Float, static_cast(x)->value, - -, static_cast(y)->value); - break; - } - case object::FLOAT: { - BINARY_OP(object::Float, static_cast(x)->value, - -, static_cast(y)->value); - break; - } - } - } - break; - } - case byte::MUL: - case byte::A_MUL: { - object::Object *y = this->popData(); - object::Object *x = this->popData(); - - if (x->kind() == object::INT) { - switch (y->kind()) { - case object::INT: { - BINARY_OP(object::Int, static_cast(x)->value, *, - static_cast(y)->value); - break; - } - case object::FLOAT: { - BINARY_OP(object::Float, - static_cast(x)->value, *, - static_cast(y)->value); - break; - } - } - } - if (x->kind() == object::FLOAT) { - switch (y->kind()) { - case object::INT: { - BINARY_OP(object::Float, - static_cast(x)->value, *, - static_cast(y)->value); - break; - } - case object::FLOAT: { - BINARY_OP(object::Float, - static_cast(x)->value, *, - static_cast(y)->value); - break; - } - } - } - break; - } - case byte::DIV: - case byte::A_DIV: { - object::Object *y = this->popData(); - object::Object *x = this->popData(); - - if (x->kind() == object::INT) { - switch (y->kind()) { - case object::INT: { - if (static_cast(y)->value == 0) - error("division by zero"); - BINARY_OP(object::Int, static_cast(x)->value, /, - static_cast(y)->value); - break; - } - case object::FLOAT: { - if (static_cast(y)->value == 0) - error("division by zero"); - BINARY_OP(object::Float, static_cast(x)->value, - /, static_cast(y)->value); - break; - } - } - } - if (x->kind() == object::FLOAT) { - switch (y->kind()) { - case object::INT: { - if (static_cast(y)->value == 0) - error("division by zero"); - BINARY_OP(object::Float, static_cast(x)->value, - /, static_cast(y)->value); - break; - } - case object::FLOAT: { - if (static_cast(y)->value == 0) - error("division by zero"); - BINARY_OP(object::Float, static_cast(x)->value, - /, static_cast(y)->value); - break; - } - } - } - break; - } - - case byte::STORE: { - object::Object *obj = this->popData(); // OBJECT - ast::Type *type = this->retType(); // TO TYPE - - std::string name = this->retName(); // TO - - this->typeChecker(type, obj); - - if (top()->local.lookUp(name) != nullptr) { - error("redefining name '" + name + "'"); - } - - top()->local.symbols[name] = obj; // store to table - this->op += 2; - break; - } - - case byte::LOAD: { - std::string name = this->retName(); // NAME - object::Object *obj = top()->local.lookUp(name); // OBJECT - - if (obj == nullptr) error("not defined name '" + name + "'"); - - this->pushData(obj); - this->op++; - break; - } - - case byte::B_ARR: { - int count = this->retOffset(); // COUNT - - object::Array *arr = new object::Array; - // emit elements - for (int i = 0; i < count; i++) { - arr->elements.push_back(this->popData()); - } - - this->pushData(arr); - this->op++; - break; - } - - case byte::B_TUP: { - int count = this->retOffset(); // COUNT - - object::Tuple *tup = new object::Tuple; - // emit elements - for (int i = 0; i < count; i++) { - tup->elements.push_back(this->popData()); - } - - this->pushData(tup); - this->op++; - break; - } - - case byte::B_MAP: { - int count = this->retOffset(); // COUNT - - object::Map *map = new object::Map; - // emit elements - for (int i = 0; i < count - 2; i++) { - object::Object *y = this->popData(); - object::Object *x = this->popData(); - - map->value.insert(std::make_pair(x, y)); - } - - this->pushData(map); - this->op++; - break; - } - - case byte::RET: { - // std::cout << top()->data.stringer() << std::endl; - while (!top()->data.empty()) { - std::cout << top()->data.pop()->stringer() << std::endl; - } - } - } - } -#undef BINARY_OP - } -} // namespace vm - -vm::vm *mac; +vm *mac; // run source code void run(std::string source) { try { // lexer - auto lex = new lexer::Lexer(source); + auto lex = new Lexer(source); lex->tokenizer(); if (DEBUG) lex->dissembleTokens(); // parser - auto parser = new parser::Parser(lex->tokens); + auto parser = new Parser(lex->tokens); parser->parse(); if (DEBUG) parser->dissembleStmts(); // semantic - auto semantic = new semantic::Analysis(&parser->statements); + auto semantic = new Analysis(&parser->statements); // compiler - auto compiler = new compiler::Compiler(parser->statements); + auto compiler = new Compiler(parser->statements); compiler->compile(); for (auto i : compiler->entities) i->dissemble(); @@ -4286,7 +58,7 @@ void run(std::string source) { mac->clean(); } else { // new virtual machine - mac = new vm::vm(compiler->entities[0]); + mac = new vm(compiler->entities[0]); } mac->evaluate(); // diff --git a/src/entity.hpp b/src/entity.hpp index 82703a9..9a7e556 100644 --- a/src/entity.hpp +++ b/src/entity.hpp @@ -10,4 +10,176 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_ENTITY_H +#define DRIFT_ENTITY_H + +#include +#include + +#include "ast.hpp" +#include "object.hpp" +#include "opcode.hpp" + +// entity structure +struct Entity { + std::string title = ""; // TITLE FOR ENTITY + + explicit Entity() {} + explicit Entity(std::string title) : title(title) {} // TO title + + std::vector codes; // bytecodes + std::vector offsets; // offset of bytecode + std::vector constants; // constant + std::vector names; // names + std::vector types; // type of variables + + // output entity data + void dissemble() { + std::cout << "ENTITY '" << title << "': " << std::endl; + + for (int ip = 0, op = 0; ip < codes.size(); ip++) { + byte::Code co = codes.at(ip); + + switch (co) { + case byte::CONST: { + printf("%20d: %s %10d %s\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), + constants.at(offsets.at(op))->stringer().c_str()); + } break; + case byte::ASSIGN: { + printf("%20d: %s %9d '%s'\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), + names.at(offsets.at(op)).c_str()); + } break; + case byte::STORE: { + printf("%20d: %s %10d '%s' %d %s\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op), + names.at(offsets.at(op)).c_str(), offsets.at(op + 1), + types.at(offsets.at(op + 1))->stringer().c_str()); + op += 2; + } break; + case byte::LOAD: + case byte::NAME: { + printf("%20d: %s %11d '%s'\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), + names.at(offsets.at(op)).c_str()); + } break; + case byte::FUNC: + case byte::ENUM: { + printf("%20d: %s %11d %s\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), + constants.at(offsets.at(op))->stringer().c_str()); + } break; + case byte::WHOLE: { + printf("%20d: %s %10d %s\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), + constants.at(offsets.at(op))->stringer().c_str()); + } break; + case byte::GET: + case byte::SET: + case byte::MOD: + case byte::USE: + case byte::CHA: { + printf("%20d: %s %12d '%s'\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op++), + names.at(offsets.at(op)).c_str()); + } break; + case byte::CALL: { + printf("%20d: %s %11d\n", ip, byte::codeString[codes.at(ip)].c_str(), + offsets.at(op++)); + } break; + case byte::CALL_I: { + printf("%20d: %s %9d\n", ip, byte::codeString[codes.at(ip)].c_str(), + offsets.at(op++)); + } break; + case byte::B_ARR: + case byte::B_TUP: + case byte::B_MAP: { + printf("%20d: %s %10d\n", ip, byte::codeString[codes.at(ip)].c_str(), + offsets.at(op++)); + } break; + case byte::F_JUMP: + case byte::T_JUMP: { + printf("%20d: %s %9d\n", ip, byte::codeString[codes.at(ip)].c_str(), + offsets.at(op++)); + } break; + case byte::JUMP: { + printf("%20d: %s %11d\n", ip, byte::codeString[codes.at(ip)].c_str(), + offsets.at(op++)); + } break; + case byte::NEW: { + printf("%20d: %s %12d '%s' %d\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op), + names.at(offsets.at(op)).c_str(), offsets.at(op + 1)); + op += 2; + } break; + case byte::UAS: { + printf("%20d: %s %12d '%s' %d '%s'\n", ip, + byte::codeString[codes.at(ip)].c_str(), offsets.at(op), + names.at(offsets.at(op)).c_str(), offsets.at(op + 1), + names.at(offsets.at(op + 1)).c_str()); + op += 2; + } break; + default: + printf("%20d: %s\n", ip, byte::codeString[codes.at(ip)].c_str()); + break; + } + } + + std::cout << "CONSTANT: " << std::endl; + if (constants.empty()) { + printf("%20s\n", "EMPTY"); + } else { + for (int i = 0; i < constants.size(); i++) { + printf("%20d: %s\n", i, constants.at(i)->stringer().c_str()); + } + } + + std::cout << "NAME: " << std::endl; + if (names.empty()) { + printf("%20s\n", "EMPTY"); + } else { + for (int i = 0; i < names.size(); i++) { + if (i % 4 == 0) { + printf("%20d: '%s'\t", i, names.at(i).c_str()); + } else { + printf("%5d: '%s' \t", i, names.at(i).c_str()); + } + if ((i + 1) % 4 == 0) { + printf("\n"); + } + } + printf("\n"); + } + + std::cout << "OFFSET: " << std::endl; + if (offsets.empty()) { + printf("%20s\n", "EMPTY"); + } else { + for (int i = 0; i < offsets.size(); i++) { + if (i % 4 == 0) { + printf("%20d: %d \t", i, offsets.at(i)); + } else { + printf("%5d: %d \t", i, offsets.at(i)); + } + if ((i + 1) % 4 == 0) { + printf("\n"); + } + } + printf("\n"); + } + + std::cout << "TYPE: " << std::endl; + if (types.empty()) { + printf("%20s\n", "EMPTY"); + } else { + for (int i = 0; i < types.size(); i++) { + printf("%20d: %s\n", i, types.at(i)->stringer().c_str()); + } + } + } +}; + +#endif \ No newline at end of file diff --git a/src/exp.cpp b/src/exp.cpp deleted file mode 100644 index 82703a9..0000000 --- a/src/exp.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// -// Copyright (c) 2021 bingxio(丙杺,黄菁). All rights reserved. -// - -// GNU General Public License, more to see file: LICENSE -// https://www.gnu.org/licenses - -// THE DRIFT PROGRAMMING LANGUAGE -// -// https://github.com/bingxio/drift -// -// https://www.drift-lang.fun/ -// \ No newline at end of file diff --git a/src/exp.hpp b/src/exp.hpp index 82703a9..0deb7da 100644 --- a/src/exp.hpp +++ b/src/exp.hpp @@ -10,4 +10,75 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_EXP_H +#define DRIFT_EXP_H + +#include +#include +#include + +// exceptions +namespace exp { + // total number of exceptions + constexpr int len = 12; + // exception type + enum Kind { + // LEXER + UNKNOWN_SYMBOL, // unknown symbol + CHARACTER_EXP, // character is empty + STRING_EXP, // lost left or right mark + // PARSER + UNEXPECTED, // unexpected + INVALID_SYNTAX, // invalid syntax + INCREMENT_OP, // left value increment operand + // SEMANTIC + TYPE_ERROR, // type error + DIVISION_ZERO, // div zero + CANNOT_PUBLIC, // can not to public + ENUMERATION, // whole body not definition of enum + CALL_INHERIT, // can only be with call expr + // + RUNTIME_ERROR, + }; + + // return a string of exception type + static std::string kindString[len] = { + "UNKNOWN_SYMBOL", "CHARACTER_EXP", "STRING_EXP", "UNEXPECTED", + "INVALID_SYNTAX", "INCREMENT_OP", "TYPE_ERROR", "DIVISION_ZERO", + "CANNOT_PUBLIC", "ENUMERATION", "CALL_INHERIT", "RUNTIME_ERROR", + }; + + // exception structure + class Exp : public std::exception { + private: + // exception kind + Kind kind; + // exception message + std::string message; + // at exception line of source code + int line; + + public: + explicit Exp(Kind kind, std::string message, int line) { + this->kind = kind; + this->message = std::move(message); + this->line = line; + } + + // return a string of exception structure + std::string stringer() { + std::stringstream str; + + str << "message << "\" Line="; + str << this->line << " }>"; + + return str.str(); + } + }; +}; // namespace exp + +#endif \ No newline at end of file diff --git a/src/expr.hpp b/src/expr.hpp deleted file mode 100644 index 82703a9..0000000 --- a/src/expr.hpp +++ /dev/null @@ -1,13 +0,0 @@ -// -// Copyright (c) 2021 bingxio(丙杺,黄菁). All rights reserved. -// - -// GNU General Public License, more to see file: LICENSE -// https://www.gnu.org/licenses - -// THE DRIFT PROGRAMMING LANGUAGE -// -// https://github.com/bingxio/drift -// -// https://www.drift-lang.fun/ -// \ No newline at end of file diff --git a/src/frame.hpp b/src/frame.hpp index b01367b..684e04e 100644 --- a/src/frame.hpp +++ b/src/frame.hpp @@ -12,11 +12,12 @@ // https://www.drift-lang.fun/ // -#include - #ifndef DRIFT_FRAME_H #define DRIFT_FRAME_H +#include "stack.hpp" +#include "table.hpp" + // frame structure struct Frame { Entity *entity; // ENTITY diff --git a/src/lexer.cpp b/src/lexer.cpp index 82703a9..1263529 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -10,4 +10,312 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#include "lexer.hpp" + +// start +void Lexer::tokenizer() { + while (!this->isEnd()) { + // first to skip whitespace + if (isSpace()) skipWhitespace(); + // identifier + else if (isIdent()) + this->lexIdent(); + // digit + else if (isDigit()) + this->lexDigit(); + // string + else if (now() == '"') + this->lexString(false); + // long strings + else if (now() == '`') + this->lexString(true); + // character + else if (now() == '\'') + this->lexChar(); + // symbol + else + this->lexSymbol(); + } + this->tokens.push_back( + // resolve end insert EFF for end of file + token::Token{token::EFF, "EFF", ++this->line}); +} + +// final to dissemble tokens list +void Lexer::dissembleTokens() { + int i = 1; + for (const auto &token : this->tokens) + std::cout << i++ << " " + token::toString(token) << std::endl; +} + +// return resolve is end +inline bool Lexer::isEnd() { return this->position >= this->source.length(); } + +// resolve to skip whitespace +inline void Lexer::skipWhitespace() { + while (!isEnd() && this->isSpace()) { + if (now() == '\n') this->line++; + this->position++; + } +} + +// resolve to skip line comment +inline void Lexer::skipLineComment() { + while (!isEnd() && now() != '\n') this->position++; +} + +// resolve to skip block comment +inline void Lexer::skipBlockComment() { + while (!isEnd()) { + if (now() == '*' && peek() == '/') { + this->position += 2; + break; + } + this->position++; + } +} + +// return current char is identifier +inline bool Lexer::isIdent() { + return now() >= 'a' && now() <= 'z' || now() >= 'A' && now() <= 'Z' || + now() == '_'; +} + +// return current char is digit +inline bool Lexer::isDigit() { return now() >= '0' && now() <= '9'; } + +// return current char is whitespace +inline bool Lexer::isSpace() { + if (now() == ' ' || now() == '\r' || now() == '\t' || now() == '\n') { + return true; + } + return false; +} + +// return current char of resolve +inline char Lexer::now() { return this->source.at(this->position); } + +// resolve identifier +void Lexer::lexIdent() { + std::stringstream literal; + + while (!isEnd()) { + if (isIdent()) + literal << now(); + else + break; + this->position++; + } + + std::cout << "SIZE " << token::keyword.size() << std::endl; + + this->tokens.push_back(token::Token{// keyword or IDENT + token::getKeyword(literal.str()), + literal.str(), this->line}); +} + +// resolve digit +void Lexer::lexDigit() { + std::stringstream literal; + + bool floating = false; + + while (!isEnd()) { + if (isDigit() || now() == '.') { + literal << now(); + + if (now() == '.') floating = true; + } else + break; + this->position++; + } + + this->tokens.push_back( + // number or float + token::Token{floating ? token::FLOAT : token::NUM, literal.str(), + this->line}); +} + +// resolve string literal +void Lexer::lexString(bool longStr) { + char cond = '"'; + // longer string + if (longStr) cond = '`'; + + std::stringstream literal; + bool isEndFile = false; + + // skip left double quotation mark + this->position++; + + while (!isEnd()) { + if (now() == cond) { + // end string + this->position++; + isEndFile = true; + break; + } + if (now() == '\n' && !longStr) { + throw exp::Exp(exp::STRING_EXP, + // long strings + "for long strings use the ` operator", this->line); + break; + } + literal << now(); + this->position++; + } + + // missing closing symbol + if (!isEndFile) + throw exp::Exp(exp::STRING_EXP, "missing closing symbol", this->line); + + // add judgment character + // used to judge long characters at compile time + literal << cond; + + this->tokens.push_back( + // string + token::Token{token::STR, literal.str(), this->line}); +} + +// resolve character +void Lexer::lexChar() { + std::stringstream literal; + + // skip left single quotation mark + this->position++; + if (isEnd()) + throw exp::Exp(exp::CHARACTER_EXP, "wrong character", this->line); + + literal << now(); + + if (peek() != '\'') + // this character is empty + throw exp::Exp(exp::CHARACTER_EXP, "wrong character", this->line); + else + // skip value and right single quotation mark + this->position += 2; + + this->tokens.push_back( + // character + token::Token{token::CHAR, literal.str(), this->line}); +} + +// resolve symbols +void Lexer::lexSymbol() { + token::Token tok; + + tok.literal = now(); + tok.line = this->line; + + switch (now()) { + case '(': tok.kind = token::L_PAREN; break; + case ')': tok.kind = token::R_PAREN; break; + case '{': tok.kind = token::L_BRACE; break; + case '}': tok.kind = token::R_BRACE; break; + case '[': tok.kind = token::L_BRACKET; break; + case ']': tok.kind = token::R_BRACKET; break; + case ':': tok.kind = token::COLON; break; + case '+': + if (peekEmit(&tok, '=', token::AS_ADD, "+=")) break; + if (peekEmit(&tok, '+', token::PLUS, "++")) + break; + else + tok.kind = token::ADD; + break; + case '-': + if (peekEmit(&tok, '>', token::R_ARROW, "->")) break; + if (peekEmit(&tok, '-', token::MINUS, "--")) break; + if (peekEmit(&tok, '=', token::AS_SUB, "-=")) + break; + else + tok.kind = token::SUB; + break; + case '*': + if (peekEmit(&tok, '=', token::AS_MUL, "*=")) + break; + else + tok.kind = token::MUL; + break; + case '/': + if (peekEmit(&tok, '=', token::AS_DIV, "/=")) break; + // to resolve skip comment + else if (peek() == '/') { + this->skipLineComment(); + // continue + return; + } + // block comment + else if (peek() == '*') { + this->skipBlockComment(); + return; + } else + tok.kind = token::DIV; + break; + case '$': tok.kind = token::DOLLAR; break; + case '.': tok.kind = token::DOT; break; + case ',': tok.kind = token::COMMA; break; + case '>': + if (peekEmit(&tok, '=', token::GR_EQ, ">=")) + break; + else + tok.kind = token::GREATER; + break; + case '<': + if (peekEmit(&tok, '=', token::LE_EQ, "<=")) break; + if (peekEmit(&tok, '-', token::L_ARROW, "<-")) break; + if (peekEmit(&tok, '~', token::L_CURVED_ARROW, "<~")) + break; + else + tok.kind = token::LESS; + break; + case '&': tok.kind = token::ADDR; break; + case '|': tok.kind = token::OR; break; + case '!': + if (peekEmit(&tok, '=', token::BANG_EQ, "!=")) + break; + else + tok.kind = token::BANG; + break; + case '=': + if (peekEmit(&tok, '=', token::EQ_EQ, "==")) + break; + else + tok.kind = token::EQ; + break; + case '_': + tok.kind = token::UNDERLINE; + break; + break; + default: + // what + throw exp::Exp(exp::UNKNOWN_SYMBOL, "unknown symbol", this->line); + } + // skip current single symbol + this->position++; + this->tokens.push_back(tok); +} + +// return next char of resolve +char Lexer::peek() { + if (position + 1 >= source.length()) + return -1; + else + return source.at(position + 1); +} + +// judge the current character and process the token +bool Lexer::peekEmit(token::Token *t, char c, token::Kind k, + const std::string &l) { + if (peek() == c) { + t->kind = k; + t->literal = l; + // advance + this->position++; + // + return true; + } else + return false; +} \ No newline at end of file diff --git a/src/lexer.hpp b/src/lexer.hpp index 82703a9..00a8c27 100644 --- a/src/lexer.hpp +++ b/src/lexer.hpp @@ -10,4 +10,93 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_LEXER_H +#define DRIFT_LEXER_H + +#include +#include + +#include "exp.hpp" +#include "token.hpp" + +// lexer structure +class Lexer { +private: + // current character + int position = 0; + // current line + int line = 1; + // source code + std::string source; + + // resolve identifier + void lexIdent(); + // resolve digit + void lexDigit(); + // resolve "xxx" string literal + void lexString(bool longStr); + // resolve 'x' character literal + void lexChar(); + // resolve other symbol + void lexSymbol(); + // return current char of resolve + inline char now(); + // return next char of resolve + char peek(); + // judge the current character and process the token + bool peekEmit(token::Token *t, + char c, // current char + token::Kind k, // equal token kind + const std::string &l // equal token literal + ); + // return resolve is end + inline bool isEnd(); + // return current char is identifier + inline bool isIdent(); + // return current char is digit + inline bool isDigit(); + // return current char is whitespace + inline bool isSpace(); + // resolve to skip whitespace + inline void skipWhitespace(); + // resolve to skip line comment + inline void skipLineComment(); + // resolve to skip block comment + inline void skipBlockComment(); + +public: + explicit Lexer(std::string source) : source(std::move(source)) { + using namespace token; + // initializer keywords map here + // + keyword["use"] = USE; // 1 + keyword["def"] = DEF; // 2 + keyword["pub"] = PUB; // 3 + keyword["ret"] = RET; // 4 + keyword["and"] = AND; // 5 + keyword["end"] = END; // 6 + keyword["if"] = IF; // 7 + keyword["ef"] = EF; // 8 + keyword["nf"] = NF; // 9 + keyword["for"] = FOR; // 10 + keyword["do"] = DO; // 11 + keyword["out"] = OUT; // 12 + keyword["tin"] = TIN; // 13 + keyword["new"] = NEW; // 14 + keyword["mod"] = MOD; // 15 + keyword["as"] = AS; // 16 + + std::cout << "SIZE " << token::keyword.size() << std::endl; + } + + // final token list + std::vector tokens; + // start + void tokenizer(); + // final to dissemble tokens list + void dissembleTokens(); +}; + +#endif \ No newline at end of file diff --git a/src/object.cpp b/src/object.cpp deleted file mode 100644 index 82703a9..0000000 --- a/src/object.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// -// Copyright (c) 2021 bingxio(丙杺,黄菁). All rights reserved. -// - -// GNU General Public License, more to see file: LICENSE -// https://www.gnu.org/licenses - -// THE DRIFT PROGRAMMING LANGUAGE -// -// https://github.com/bingxio/drift -// -// https://www.drift-lang.fun/ -// \ No newline at end of file diff --git a/src/object.hpp b/src/object.hpp index 82703a9..98b0094 100644 --- a/src/object.hpp +++ b/src/object.hpp @@ -10,4 +10,236 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_OBJECT_H +#define DRIFT_OBJECT_H + +#include "ast.hpp" + +struct Entity; + +// object +namespace object { + // kind + enum Kind { + INT, + FLOAT, + STR, + CHAR, + BOOL, + ARRAY, + TUPLE, + MAP, + ENUM, + FUNC, + WHOLE + }; + + // object abstract + class Object { + public: + // return a string of dis object + virtual std::string stringer() = 0; + // return the kind of object + virtual Kind kind() = 0; + }; + + // INT + class Int : public Object { + public: + int value; + + Int(int v) : value(v) {} + + std::string stringer() override { + return ""; + } + + Kind kind() override { return INT; } + }; + + // FLOAT + class Float : public Object { + public: + double value; + + Float(float v) : value(v) {} + + std::string stringer() override { + return ""; + } + + Kind kind() override { return FLOAT; } + }; + + // STR + class Str : public Object { + public: + std::string value; + bool longer = false; + + Str(std::string v) : value(v) {} + + Str(std::string v, bool longer) : value(v), longer(longer) { + value.pop_back(); // long character judgment end, delete judgment + // char + } + + std::string stringer() override { + if (longer) { + return ""; + } + return ""; + } + + Kind kind() override { return STR; } + }; + + // CHAR + class Char : public Object { + public: + char value; + + Char(char v) : value(v) {} + + std::string stringer() override { + std::stringstream str; + + str << ""; + + return str.str(); + } + + Kind kind() override { return CHAR; } + }; + + // BOOL + class Bool : public Object { + public: + bool value; + + Bool(bool v) : value(v) {} + + std::string stringer() override { return ""; } + + Kind kind() override { return BOOL; } + }; + + // ARRAY + class Array : public Object { + public: + std::vector elements; + + std::string stringer() override { + std::stringstream str; + + str << "stringer(); + if (++iter != elements.end()) { + str << ", "; + } + } + + str << "]>"; + return str.str(); + } + + Kind kind() override { return ARRAY; } + }; + + // TUPLE + class Tuple : public Object { + public: + std::vector elements; + + std::string stringer() override { + std::stringstream str; + + str << "stringer(); + if (++iter != elements.end()) { + str << ", "; + } + } + + str << ")>"; + return str.str(); + } + + Kind kind() override { return TUPLE; } + }; + + // MAP + class Map : public Object { + public: + std::map value; + + std::string stringer() override { + if (value.empty()) return ""; + + std::stringstream str; + str << "first->stringer() + << " V: " << iter->second->stringer(); + if (++iter != value.end()) { + str << ", "; + } + } + + str << "}>"; + return str.str(); + } + + Kind kind() override { return MAP; } + }; + + // ENUM + class Enum : public Object { + public: + std::string name; + std::map elements; + + std::string stringer() override { return ""; } + + Kind kind() override { return ENUM; } + }; + + // FUNC + class Func : public Object { + public: + std::string name; // function name + + ast::Arg arguments; // function args + ast::Type *ret; // function return + + Entity *entity; // function entity + + std::string stringer() override { return ""; } + + Kind kind() override { return FUNC; } + }; + + // WHOLE + class Whole : public Object { + public: + std::string name; // whole name + + Entity *entity; // whole entity + + // interface definition + std::vector> interface; + + // inherit definition + std::vector inherit; + + std::string stringer() override { return ""; } + + Kind kind() override { return WHOLE; } + }; +}; // namespace object + +#endif \ No newline at end of file diff --git a/src/opcode.hpp b/src/opcode.hpp new file mode 100644 index 0000000..5131cc3 --- /dev/null +++ b/src/opcode.hpp @@ -0,0 +1,100 @@ +// +// Copyright (c) 2021 bingxio(丙杺,黄菁). All rights reserved. +// + +// GNU General Public License, more to see file: LICENSE +// https://www.gnu.org/licenses + +// THE DRIFT PROGRAMMING LANGUAGE +// +// https://github.com/bingxio/drift +// +// https://www.drift-lang.fun/ +// + +#ifndef DRIFT_OPCODE_H +#define DRIFT_OPCODE_H + +#include + +// bytecode +namespace byte { + // total number of bytecodes + constexpr int len = 50; + // bytecode type + enum Code { + CONST, // O + ASSIGN, // ASSIGN + STORE, // V + LOAD, // V + INDEX, // INDEX + GET, // GET + SET, // SET + CALL, // CALL + CALL_I, // CALL_I + ORIG, // ORIG + NAME, // NAME + NEW, // NEW + FUNC, // FUNC + CHA, // CHA + END, // END + WHOLE, // WHOLE + ENUM, // ENUM + + PUB, // PUB + MOD, // MOD + USE, // USE + UAS, // UAS + + B_ARR, // ARRAY + B_TUP, // TUPLE + B_MAP, // MAP + + INCR, // DECR + DECR, // INCR + P_INCR, // P_INCR + P_DECR, // P_DECR + + // INFIX + ADD, // + + SUB, // - + MUL, // * + DIV, // / + A_ADD, // += + A_SUB, // -= + A_MUL, // *= + A_DIV, // /= + GR, // > + LE, // < + GR_E, // >= + LE_E, // <= + E_E, // == + N_E, // != + AND, // & + OR, // | + + // PREFIX + BANG, // ! + NOT, // - + + JUMP, // JUMP + F_JUMP, // F_JUMP + T_JUMP, + + RET, + }; + + // return a string of bytecode + static std::string codeString[len] = { + "CONST", "ASSIGN", "STORE", "LOAD", "INDEX", "GET", "SET", + "CALL", "CALL_I", "ORIG", "NAME", "NEW", "FUNC", "CHA", + "END", "WHOLE", "ENUM", "PUB", "MOD", "USE", "UAS", + "B_ARR", "B_TUP", "B_MAP", "INCR", "DECR", "P_INCR", "P_DECR", + "ADD", "SUB", "MUL", "DIV", "A_ADD", "A_SUB", "A_MUL", + "A_DIV", "GR", "LE", "GR_E", "LE_E", "E_E", "N_E", + "AND", "OR", "BANG", "NOT", "JUMP", "F_JUMP", "T_JUMP", + "RET", + }; +}; // namespace byte + +#endif \ No newline at end of file diff --git a/src/parser.cpp b/src/parser.cpp index 82703a9..d4dcc43 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -10,4 +10,758 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#include "parser.hpp" + +// do parsing +void Parser::parse() { + while (!this->isEnd()) { + // push to final list + this->statements.push_back(this->stmt()); + } +} + +// final to dissemble statement list +void Parser::dissembleStmts() { + if (this->statements.empty()) { + std::cout << "Empty Statements" << std::endl; + return; + } + int i = 1; + for (auto stmt : this->statements) + std::cout << i++ << " " + stmt->stringer() << std::endl; +} + +// if kind of current token is EFF, its end of file and end of tokens +inline bool Parser::isEnd() { + return look().kind == token::EFF || this->position >= this->tokens.size(); +} + +// return the address of the token +inline token::Token *Parser::look(bool previous) { + if (previous) { + return &this->tokens.at(this->position - 1); + } + return &this->tokens.at(this->position); +} + +// return the token of the current location +inline token::Token Parser::look() { return this->tokens.at(this->position); } + +// look the appoint position of tokens +token::Token Parser::look(int i) { + if (this->position + i >= this->tokens.size()) + return token::Token + // EFF token + {token::EFF, "EFF", -1}; + else + return this->tokens.at(this->position + i); +} + +// if argument is equal to current token +bool Parser::look(token::Kind kind) { + if (this->look().kind == kind) { + this->position++; + // + return true; + } + return false; +} + +// return the previous of tokens +inline token::Token Parser::previous() { + return this->tokens.at(this->position - 1); +} + +/** + * expression + * + * assignment -> logicalOr -> logicalAnd -> equality | + * v + * | unary <- multiplication <- addition <- comparison + * v + * primary -> call + * + * - top down operation precedence grammar analysis - + */ +ast::Expr *Parser::expr() { return assignment(); } + +// EXPR.NAME = EXPR : SET +// EXPR = EXPR : ASSIGN +ast::Expr *Parser::assignment() { + ast::Expr *expr = logicalOr(); + + if (look(token::EQ)) { + ast::Expr *value = assignment(); + + // EXPR = EXPR + if (expr->kind() == ast::EXPR_NAME || expr->kind() == ast::EXPR_INDEX) { + return new ast::AssignExpr(expr, value); + } + // EXPR.NAME = EXPR + if (expr->kind() == ast::EXPR_GET) { + ast::GetExpr *get = static_cast(expr); + return new ast::SetExpr(get->expr, get->name, value); + } + error(exp::INVALID_SYNTAX, "cannot assign value"); + } + return expr; +} + +// | +ast::Expr *Parser::logicalOr() { + ast::Expr *expr = logicalAnd(); + + while (look(token::OR)) { + token::Token op = this->previous(); + ast::Expr *right = logicalAnd(); + // + expr = new ast::BinaryExpr(expr, op, right); + } + return expr; +} + +// & +ast::Expr *Parser::logicalAnd() { + ast::Expr *expr = equality(); + + while (look(token::ADDR)) { + token::Token op = this->previous(); + ast::Expr *right = equality(); + // + expr = new ast::BinaryExpr(expr, op, right); + } + return expr; +} + +// == | != +ast::Expr *Parser::equality() { + ast::Expr *expr = comparison(); + + while (look(token::EQ_EQ) || look(token::BANG_EQ)) { + token::Token op = this->previous(); + ast::Expr *right = comparison(); + // + expr = new ast::BinaryExpr(expr, op, right); + } + return expr; +} + +// > | >= | < | <= +ast::Expr *Parser::comparison() { + ast::Expr *expr = addition(); + + while (look(token::GREATER) || look(token::GR_EQ) || look(token::LESS) || + look(token::LE_EQ)) { + token::Token op = this->previous(); + ast::Expr *right = addition(); + // + expr = new ast::BinaryExpr(expr, op, right); + } + return expr; +} + +// + | - | += | -= +ast::Expr *Parser::addition() { + ast::Expr *expr = multiplication(); + + while (look(token::ADD) || look(token::SUB) || look(token::AS_ADD) || + look(token::AS_SUB)) { + token::Token op = this->previous(); + ast::Expr *right = multiplication(); + // + expr = new ast::BinaryExpr(expr, op, right); + } + return expr; +} + +// * | / | *= | /= +ast::Expr *Parser::multiplication() { + ast::Expr *expr = unary(); + + while (look(token::MUL) || look(token::DIV) || look(token::AS_MUL) || + look(token::AS_DIV)) { + token::Token op = this->previous(); + ast::Expr *right = unary(); + // + expr = new ast::BinaryExpr(expr, op, right); + } + return expr; +} + +// ! | - +ast::Expr *Parser::unary() { + while (look(token::BANG) || look(token::SUB)) { + token::Token op = previous(); + ast::Expr *expr = unary(); + // + return new ast::UnaryExpr(op, expr); + } + return call(); +} + +// expr(..) | expr.name | expr[expr] +ast::Expr *Parser::call() { + ast::Expr *expr = primary(); + // stack up the expression!! + // + // LIKE: bar(foo(1, 2, 3)[x + 4]) + // + while (true) { + // call + if (look(token::L_PAREN)) { + // arguments + auto args = std::vector(); + // no argument + if (look(token::R_PAREN)) { + expr = new ast::CallExpr(expr, args); + // to next loop + continue; + // have arguments + } else { + do { + args.push_back(this->expr()); + // + } while (look(token::COMMA)); + } + if (!look(token::R_PAREN)) + error(exp::UNEXPECTED, "expect ')' after arguments"); + expr = new ast::CallExpr(expr, args); + // get + } else if (look(token::DOT)) { + token::Token name = look(); + + this->position++; // skip name token + expr = new ast::GetExpr(expr, name); + // index for array + } else if (look(token::L_BRACKET)) { + // empty index + if (look(token::R_BRACKET)) error(exp::UNEXPECTED, "null index"); + // index + auto index = this->expr(); + + if (!look(token::R_BRACKET)) + error(exp::UNEXPECTED, "expect ']' after index of array"); + expr = new ast::IndexExpr(expr, index); + } else { + break; + } + } + return expr; +} + +// primary +ast::Expr *Parser::primary() { + // literal expr + // number | float | string | char + if (look(token::NUM) || look(token::FLOAT) || look(token::STR) || + look(token::CHAR)) + return new ast::LiteralExpr(this->previous()); + // name expr + if (look(token::IDENT)) { + token::Token tok = previous(); + // ++ | -- + if (look(token::PLUS) || look(token::MINUS)) + // self increment + return new ast::NameExpr(tok, previous().kind == token::PLUS, + previous().kind == token::MINUS, false); + return new ast::NameExpr(tok); + } + // name expr of ++ or -- operators + if (look(token::PLUS) || look(token::MINUS)) { + token::Token op = previous(); + // + if (look(token::IDENT)) + return new ast::NameExpr(previous(), op.kind == token::PLUS, + op.kind == token::MINUS, true); + else + error(exp::INCREMENT_OP, + "increment operand can only be performed on name"); + } + // group expr + if (look(token::L_PAREN)) { + // vector for tuple and group expression + std::vector elem; + // empty tuple expr + if (look(token::R_PAREN)) return new ast::TupleExpr(elem); + // tuple or group ? + elem.push_back(this->expr()); + + // tuple expr + if (look(token::COMMA)) { + do { + elem.push_back(this->expr()); + // + } while (look(token::COMMA)); + // + if (!look(token::R_PAREN)) + error(exp::UNEXPECTED, "expect ')' after tuple expression"); + return new ast::TupleExpr(elem); + } + + if (look(token::R_PAREN) == false) + error(exp::UNEXPECTED, "expect ')' after group expression"); + // + return new ast::GroupExpr(elem.at(0)); + } + // array expr + if (look(token::L_BRACKET)) { + auto elem = std::vector(); + + if (look(token::R_BRACKET)) + return new ast::ArrayExpr(elem); + else { + do { + elem.push_back(this->expr()); + // + } while (look(token::COMMA)); + } + if (!look(token::R_BRACKET)) + error(exp::UNEXPECTED, "expect ']' after elements"); + return new ast::ArrayExpr(elem); + } + // map expr + if (look(token::L_BRACE)) { + std::map elem; + // empty map expr + if (look(token::R_BRACE)) return new ast::MapExpr(elem); + + while (true) { + ast::Expr *K = this->expr(); + + if (!look(token::COLON)) { + error(exp::UNEXPECTED, "expect ':' after key in map"); + } + ast::Expr *V = this->expr(); + + // push to map + elem.insert(std::make_pair(K, V)); + + if (look(token::COMMA)) { + continue; + } + if (look(token::R_BRACE)) { + break; + } + error(exp::UNEXPECTED, "expect ',' or '}' after value in map"); + } + return new ast::MapExpr(elem); + } + // new expr + if (look(token::NEW)) { + if (!look(token::IDENT)) { + error(exp::INVALID_SYNTAX, "name of new must be an identifier"); + } + token::Token name = previous(); // name of new + + std::map builder; // fields + + if (!look(token::L_BRACE)) return new ast::NewExpr(name, builder); + + while (true) { + if (!look(token::IDENT)) { + error(exp::INVALID_SYNTAX, + "key of name for new statement must be an identifier"); + } + int tempPos = this->position - 1; + + if (!look(token::COLON)) { + error(exp::INVALID_SYNTAX, "expect ':' after key"); + } + ast::Expr *V = this->expr(); // expr V + + builder.insert(std::make_pair(&this->tokens.at(tempPos), V)); + + if (look(token::COMMA)) { + continue; + } + if (look(token::R_BRACE)) { + break; + } + } + return new ast::NewExpr(name, builder); + } + // end + error(exp::INVALID_SYNTAX, "invalid expression: " + look().literal); + return nullptr; +} + +// statement +ast::Stmt *Parser::stmt() { + switch (this->look().kind) { + // definition statement + case token::DEF: + this->position++; + // variable + if (look(token::IDENT) && look().kind == token::COLON) { + token::Token name = previous(); + this->position++; // skip colon symbol + + ast::Type *T = this->type(); + + // value of variable + if (look(token::EQ)) + // there is an initial value + return new ast::VarStmt(name, T, this->expr()); + else + return new ast::VarStmt(name, T); + } + // function or interface + else if (look(token::L_PAREN)) { + // arguments + // + // <[ [token] ], Expr> + ast::Arg args; + // name + token::Token name; + // return + ast::Type *ret = nullptr; + // cache multiple parameters + std::vector temp; + // + bool interfaceStmt = false; + + while (!look(token::R_PAREN)) { + if (!look(token::IDENT)) { + error(exp::UNEXPECTED, "argument name muse be an identifier"); + } + // K + token::Token *K = look(true); // address of token + // handle multiparameter + while (look(token::ADD)) { + if (!look(token::IDENT)) { + error(exp::UNEXPECTED, "argument name muse be an identifier"); + } else { + temp.push_back(K); // previous, + // left of the + // plus sign + temp.push_back(look(true)); // address + // of token + } + } + if (!look(token::COLON)) { + error(exp::UNEXPECTED, "expect ':' after parameter name"); + } + // handle multiparameter + if (temp.empty()) { + // no + args.insert(std::make_pair(K, this->type())); + } else { + // multip + ast::Type *T = this->type(); + + for (auto i : temp) { + args.insert(std::make_pair(i, T)); + } + } + if (look(token::COMMA)) { + continue; + } + } + // function + if (look(token::IDENT)) { + name = previous(); + } + // interface + else if (look(token::MUL)) { + name = look(); // name of interface + // + this->position++; // skip name of + // interface + // + // current parsing interface statement + interfaceStmt = true; + } + // error + else { + error(exp::UNEXPECTED, + "expect '*' to interface or identifier to function"); + } + // return value + if (look(token::R_ARROW)) { + ret = this->type(); + } + + if (interfaceStmt) + return new ast::InterfaceStmt(args, name, ret); + else + return new ast::FuncStmt(args, name, ret, this->block(token::END)); + // + break; + // whole + } else { + ast::Stmt *inherit = nullptr; + + token::Token name = previous(); // name + + // inherit + if (look().kind == token::L_ARROW) { + inherit = this->stmt(); + } + return new ast::WholeStmt(name, inherit, this->block(token::END)); + } + break; + // if + case token::IF: { + this->position++; + // if condition + ast::Expr *condition = this->expr(); + // if then branch + ast::BlockStmt *thenBranch = + this->block(token::EF, token::END, token::NF); + + std::map elem; + + while (previous().kind == token::EF) { + ast::Expr *efCondition = this->expr(); + ast::BlockStmt *efBranch = + this->block(token::EF, token::END, token::NF); + // + elem.insert(std::make_pair(efCondition, efBranch)); + } + + ast::BlockStmt *nfBranch = nullptr; + + if (previous().kind == token::NF) { + nfBranch = this->block(token::END); + } + return new ast::IfStmt(condition, thenBranch, elem, nfBranch); + } break; + // loop + case token::FOR: { + this->position++; + // dead loop + if (look(token::R_ARROW)) + return new ast::ForStmt(nullptr, this->block(token::END)); + // for condition + ast::Expr *condition = this->expr(); + ast::BlockStmt *block = this->block(token::END); + // + return new ast::ForStmt(condition, block); + } break; + // do loop + case token::DO: { + this->position++; + // block + ast::BlockStmt *block = this->block(token::FOR); + // go back to the position of the `for` keyword + this->position--; + ast::Stmt *stmt = this->stmt(); + // + return new ast::DoStmt(block, stmt); + } break; + // out in loop + // out + case token::OUT: + this->position++; + // + if (look(token::R_ARROW)) { + // no condition + return new ast::OutStmt(); + } + return new ast::OutStmt(this->expr()); + break; + // tin in loop + // tin + case token::TIN: + this->position++; + // + if (look(token::R_ARROW)) { + // no condition + return new ast::TinStmt(); + } + return new ast::TinStmt(this->expr()); + // and + case token::AND: { + this->position++; + + if (!look(token::R_ARROW)) { + error(exp::UNEXPECTED, "expect '->' after and keyword"); + } + if (!look(token::IDENT)) { + error(exp::UNEXPECTED, "alias must be an identifier"); + } + // name + token::Token alias = previous(); + // block + ast::BlockStmt *stmt = this->block(token::END); + // + return new ast::AndStmt(alias, stmt); + } break; + // mod + case token::MOD: + this->position++; + // + if (!look(token::IDENT)) { + error(exp::UNEXPECTED, "module name must be an identifier"); + } + return new ast::ModStmt(previous()); + break; + // use + case token::USE: { + this->position++; + + if (!look(token::IDENT)) { + error(exp::UNEXPECTED, "use of module name must be an identifier"); + } + // name + token::Token name = previous(); + // no alias + if (!look(token::AS)) { + return new ast::UseStmt(name); + } + if (!look(token::IDENT)) { + error(exp::UNEXPECTED, "alias of module name must be an identifier"); + } + int previous = this->position - 1; + // + // [Q]: why can't variables on the stack be referenced + // + return new ast::UseStmt(name, &this->tokens.at(previous)); + } break; + // return + // ret + // ret -> + case token::RET: + this->position++; + // + if (look(token::R_ARROW)) { + // no return value + return new ast::RetStmt(); + } + return new ast::RetStmt(this->stmt()); + break; + // inherit for class + case token::L_ARROW: { + this->position++; + + if (!look(token::IDENT)) { + error(exp::UNEXPECTED, "inheritance name must be an indentifier"); + } + std::vector names; + // single + names.push_back(look(true)); // address of token + + while (look(token::ADD)) { + if (!look(token::IDENT)) { + error(exp::UNEXPECTED, "inheritance name must be an indentifier"); + } + names.push_back(look(true)); // address + } + + return new ast::InheritStmt(names); + } break; + // call of super class + // <~ expr + case token::L_CURVED_ARROW: // TODO: can only call method + this->position++; + // + return new ast::CallInheritStmt(look().line, this->expr()); + break; + // pub + case token::PUB: + this->position++; + // + return new ast::PubStmt(look().line, this->stmt()); + break; + default: + // expression statement + return new ast::ExprStmt(this->expr()); + } + // end + error(exp::INVALID_SYNTAX, "invalid statement"); + return nullptr; +} + +/** + * parse block statement + * + * the x parameter is required, and y and z have default value + * determine where to stop the analysis + */ +ast::BlockStmt *Parser::block(token::Kind x, token::Kind y, token::Kind z) { + std::vector body; + // until end token + while (true) { + if (look(x)) { + break; + } + // it is not the default value and holds + if (y != token::EFF && look(y)) { + break; + } + // it is not the default value and holds + if (z != token::EFF && look(z)) { + break; + } + body.push_back(this->stmt()); + } + return new ast::BlockStmt(body); +} + +// throw an exception +inline void Parser::error(exp::Kind kind, std::string message) { + throw exp::Exp(kind, std::move(message), look().line); +} + +// type analysis +ast::Type *Parser::type() { + token::Token now = this->look(); + // type + if (now.kind == token::IDENT) { + // skip type ident + this->position++; + // T1 + if (now.literal == S_INT) return new ast::Int(); + // T2 + if (now.literal == S_FLOAT) return new ast::Float(); + // T3 + if (now.literal == S_STR) return new ast::Str; + // T4 + if (now.literal == S_CHAR) return new ast::Char(); + // T5 + if (now.literal == S_BOOL) return new ast::Bool; + // user define type + return new ast::User(now); + } + // T6 + if (now.kind == token::L_BRACKET) { + this->position++; // skip left [ symbol + + if (!look(token::R_BRACKET)) { + error(exp::UNEXPECTED, "expect ']' after left square bracket"); + } + return new ast::Array(this->type()); + } + // T7 + if (now.kind == token::LESS) { + this->position++; // skip left < symbol + // key + ast::Type *T1 = this->type(); + + if (!look(token::COMMA)) { + error(exp::UNEXPECTED, "expect ',' after key of map"); + } + ast::Type *T2 = this->type(); + + if (!look(token::GREATER)) { + error(exp::UNEXPECTED, "expect '>' after value of map"); + } + return new ast::Map(T1, T2); + } + // T8 + if (now.kind == token::L_PAREN) { + this->position++; // skip left ( symbol + + ast::Type *T = this->type(); + + if (!look(token::R_PAREN)) { + error(exp::UNEXPECTED, "expect ')' after tuple define"); + } + return new ast::Tuple(T); + } + error(exp::INVALID_SYNTAX, "invalid type"); + // + return nullptr; +} \ No newline at end of file diff --git a/src/parser.hpp b/src/parser.hpp index 82703a9..e1fe180 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -10,4 +10,71 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_PARSER_H +#define DRIFT_PARSER_H + +#include + +#include "ast.hpp" +#include "exp.hpp" + +// parser structure +class Parser { +private: + // current token + int position = 0; + // token list + std::vector tokens; + // return is end of token + // end of file + inline bool isEnd(); + // return the address of the token + inline token::Token *look(bool previous); + // look current token, if equal to peek next + bool look(token::Kind kind); + // look current token, do nothing + inline token::Token look(); + // look the appoint position of tokens + token::Token look(int i); + // look previous token + inline token::Token previous(); + // parsing expressions + ast::Expr *expr(); + ast::Expr *assignment(); + ast::Expr *logicalOr(); + ast::Expr *logicalAnd(); + ast::Expr *equality(); + ast::Expr *comparison(); + ast::Expr *addition(); + ast::Expr *multiplication(); + ast::Expr *unary(); + ast::Expr *call(); + ast::Expr *primary(); + // parsing statements + ast::Stmt *stmt(); + // determine where to stop the analysis + ast::BlockStmt *block(token::Kind x, token::Kind y = token::EFF, + token::Kind z = token::EFF); + // + ast::Type *type(); + // throw an exception + inline void error(exp::Kind kind, std::string message); + +public: + // parser constructor + explicit Parser(std::vector tokens) { + // tokens + this->tokens = std::move(tokens); + } + + // final stmts list + std::vector statements; + // do parsing + void parse(); + // final to dissemble statement list + void dissembleStmts(); +}; + +#endif \ No newline at end of file diff --git a/src/semantic.cpp b/src/semantic.cpp new file mode 100644 index 0000000..1620e97 --- /dev/null +++ b/src/semantic.cpp @@ -0,0 +1,231 @@ +// +// Copyright (c) 2021 bingxio(丙杺,黄菁). All rights reserved. +// + +// GNU General Public License, more to see file: LICENSE +// https://www.gnu.org/licenses + +// THE DRIFT PROGRAMMING LANGUAGE +// +// https://github.com/bingxio/drift +// +// https://www.drift-lang.fun/ +// + +#include "semantic.hpp" + +// statement +void Analysis::analysisStmt(ast::Stmt *stmt) { + switch (stmt->kind()) { + case ast::STMT_EXPR: { + ast::ExprStmt *e = static_cast(stmt); + ast::Expr *expr = static_cast(e->expr); + // expression + this->analysisExpr(expr); + } break; + // + case ast::STMT_PUB: { + ast::PubStmt *p = static_cast(stmt); + + switch (p->stmt->kind()) { + case ast::STMT_VAR: // definition + case ast::STMT_FUNC: // function + case ast::STMT_WHOLE: // whole + case ast::STMT_INTERFACE: // interface + break; + default: + error(exp::CANNOT_PUBLIC, "statement cannot be public", p->line); + } + + // if its whole statement and must to analysis body + // for example it contains a new whole statement inside + if (p->stmt->kind() == ast::STMT_WHOLE) this->analysisStmt(p->stmt); + } break; + // + case ast::STMT_WHOLE: { + ast::WholeStmt *w = static_cast(stmt); + + if (w->body->block.empty()) break; + + ast::Stmt *f = w->body->block.at(0); // first statement + + // just a ident of expression statement + // + // enumeration + // + if (f->kind() == ast::STMT_EXPR) { + // + if (w->inherit != nullptr) { + error(exp::ENUMERATION, "enumeration type cannot be inherited", + w->name.line); + } + + ast::ExprStmt *expr = static_cast(f); + std::vector fields; + + for (auto &i : w->body->block) { + // + if (i->kind() != ast::STMT_EXPR) { + error(exp::ENUMERATION, "whole is an enumeration type", + w->name.line); + } + ast::ExprStmt *pStmt = static_cast(i); + if (pStmt->expr->kind() != ast::EXPR_NAME) { + error(exp::ENUMERATION, "whole is an enumeration type", + w->name.line); + } + + ast::NameExpr *name = static_cast(pStmt->expr); + // push to enumeration + // structure + fields.push_back(&name->token); + } + // replace new statement into + ast::Stmt *n = new ast::EnumStmt(w->name, fields); + std::replace(std::begin(*statements), std::end(*statements), now(), n); + std::cout << "\033[33m[Semantic analysis replace " << position + 1 + << "]\033[0m: WholeStmt -> " << n->stringer() << std::endl; + } + // normal whole statement if hinder of statements include name expr + // to throw an error + else { + for (auto &i : w->body->block) { + if (i->kind() == ast::STMT_EXPR) { + error(exp::ENUMERATION, + "it an whole statement but contains some other value", + w->name.line); + } + } + } + } break; + // + case ast::STMT_CALLINHERIT: { + ast::CallInheritStmt *c = static_cast(stmt); + + if (c->expr->kind() != ast::EXPR_CALL) { + error(exp::CALL_INHERIT, + "only methods of the parent class can be called", 2); + } + } break; + // + default: break; + } +} + +// expression +void Analysis::analysisExpr(ast::Expr *expr) { + using namespace token; + + switch (expr->kind()) { + case ast::EXPR_BINARY: { + ast::BinaryExpr *binary = static_cast(expr); + + if (binary->left->kind() != ast::EXPR_LITERAL) { + this->analysisExpr(binary->left); + break; + } + if (binary->right->kind() != ast::EXPR_LITERAL) { + this->analysisExpr(binary->right); + break; + } + + Token l = (static_cast(binary->left))->token; + Token r = (static_cast(binary->right))->token; + + switch (binary->op.kind) { + case ADD: // + + case SUB: // - + if (l.kind == NUM) { + // + if (r.kind == STR || r.kind == CHAR) { + error(exp::TYPE_ERROR, "unsupported operand", l.line); + } + } + if (l.kind == STR || l.kind == CHAR) { + // + if (r.kind == NUM) { + error(exp::TYPE_ERROR, "unsupported operand", l.line); + } + } + break; + case AS_ADD: // += + case AS_SUB: // -= + case AS_MUL: // *= + case AS_DIV: // /= + if (binary->left->kind() != ast::EXPR_NAME) { + error(exp::TYPE_ERROR, "unsupported operand", l.line); + } + break; + case DIV: // / + if (l.kind == STR || l.kind == CHAR || r.kind == STR || + r.kind == CHAR) { + error(exp::TYPE_ERROR, "unsupported operand", l.line); + } + if (r.kind == NUM) { + // convert, keep floating point + // numbers + if (std::stof(r.literal) == 0) { + error(exp::DIVISION_ZERO, "division by zero", l.line); + } + } + // array + if (binary->left->kind() == ast::EXPR_ARRAY || + binary->right->kind() == ast::EXPR_ARRAY) { + error(exp::TYPE_ERROR, "unsupported operand", l.line); + } + break; + case MUL: // * + if ((l.kind == CHAR || l.kind == STR) && + (r.kind == CHAR || r.kind == STR)) { + error(exp::TYPE_ERROR, "unsupported operand", l.line); + } + break; + case GR_EQ: // >= + case LE_EQ: // <= + case GREATER: // > + case LESS: { // < + if (l.kind == STR || r.kind == STR) { + error(exp::TYPE_ERROR, "unsupported operand", l.line); + } + } break; + default: break; + } + } break; + case ast::EXPR_GROUP: { + ast::GroupExpr *group = static_cast(expr); + this->analysisExpr(group->expr); + } break; + // + case ast::EXPR_UNARY: { + ast::UnaryExpr *unary = static_cast(expr); + this->analysisExpr(unary->expr); + } break; + // + case ast::EXPR_CALL: { + ast::CallExpr *call = static_cast(expr); + + for (auto i : call->arguments) { + this->analysisExpr(i); + } + } break; + // + case ast::EXPR_GET: { + ast::GetExpr *get = static_cast(expr); + this->analysisExpr(get->expr); + } break; + // + case ast::EXPR_SET: { + ast::SetExpr *set = static_cast(expr); + this->analysisExpr(set->expr); + this->analysisExpr(set->value); + } break; + // + case ast::EXPR_ASSIGN: { + ast::AssignExpr *assign = static_cast(expr); + this->analysisExpr(assign->expr); + this->analysisExpr(assign->value); + } break; + // + default: break; + } +} \ No newline at end of file diff --git a/src/semantic.hpp b/src/semantic.hpp new file mode 100644 index 0000000..10f7653 --- /dev/null +++ b/src/semantic.hpp @@ -0,0 +1,57 @@ +// +// Copyright (c) 2021 bingxio(丙杺,黄菁). All rights reserved. +// + +// GNU General Public License, more to see file: LICENSE +// https://www.gnu.org/licenses + +// THE DRIFT PROGRAMMING LANGUAGE +// +// https://github.com/bingxio/drift +// +// https://www.drift-lang.fun/ +// + +#ifndef DRIFT_SEMANTIC_H +#define DRIFT_SEMANTIC_H + +#include + +#include "ast.hpp" +#include "exp.hpp" + +// analysis +class Analysis { +private: + int position = 0; + // stmts + std::vector *statements; + + // return the kind of current statement + inline ast::Kind look() { return statements->at(position)->kind(); } + + // return the current statement + inline ast::Stmt *now() { return statements->at(position); } + + // throw semantic analysis exception + void error(exp::Kind k, std::string message, int line) { + throw exp::Exp(k, message, line); + } + +public: + explicit Analysis(std::vector *stmts) { + this->statements = stmts; + + while (position < statements->size()) { + this->analysisStmt(now()); + this->position++; + } + } + + // statement + void analysisStmt(ast::Stmt *stmt); + // expression + void analysisExpr(ast::Expr *expr); +}; + +#endif \ No newline at end of file diff --git a/src/stack.hpp b/src/stack.hpp index 82703a9..9220359 100644 --- a/src/stack.hpp +++ b/src/stack.hpp @@ -10,4 +10,43 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_STACK_H +#define DRIFT_STACK_H + +#include + +template class Stack { +private: + int capacity = 4, count = 0; + + T *elements; + +public: + explicit Stack() { this->elements = new T[capacity]; } + + ~Stack() { delete[] elements; } + + void push(T t) { + if (count + 1 > capacity) { + this->capacity = capacity * 2; + this->elements = (T *)realloc(this->elements, sizeof(T) * capacity); + } + this->elements[count++] = t; + } + + T pop() { return this->elements[--count]; } + + T top() { return this->elements[count]; } + + int len() { return count; } + + bool empty() { return count == 0; } + + std::string stringer() { + return ""; + } +}; + +#endif \ No newline at end of file diff --git a/src/stmt.hpp b/src/stmt.hpp deleted file mode 100644 index 82703a9..0000000 --- a/src/stmt.hpp +++ /dev/null @@ -1,13 +0,0 @@ -// -// Copyright (c) 2021 bingxio(丙杺,黄菁). All rights reserved. -// - -// GNU General Public License, more to see file: LICENSE -// https://www.gnu.org/licenses - -// THE DRIFT PROGRAMMING LANGUAGE -// -// https://github.com/bingxio/drift -// -// https://www.drift-lang.fun/ -// \ No newline at end of file diff --git a/src/table.hpp b/src/table.hpp index a344482..cdc416e 100644 --- a/src/table.hpp +++ b/src/table.hpp @@ -12,22 +12,23 @@ // https://www.drift-lang.fun/ // -#include - #ifndef DRIFT_TABLE_H #define DRIFT_TABLE_H -// symbols table +#include + +#include "object.hpp" + struct Table { std::map symbols; - // lookup a name object::Object *lookUp(std::string name) { if (symbols.count(name) == 0) return nullptr; return symbols.at(name); } void clear() { symbols.clear(); } + bool empty() { return symbols.empty(); } }; #endif \ No newline at end of file diff --git a/src/token.cpp b/src/token.cpp deleted file mode 100644 index 82703a9..0000000 --- a/src/token.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// -// Copyright (c) 2021 bingxio(丙杺,黄菁). All rights reserved. -// - -// GNU General Public License, more to see file: LICENSE -// https://www.gnu.org/licenses - -// THE DRIFT PROGRAMMING LANGUAGE -// -// https://github.com/bingxio/drift -// -// https://www.drift-lang.fun/ -// \ No newline at end of file diff --git a/src/token.hpp b/src/token.hpp index 82703a9..f0ce8b1 100644 --- a/src/token.hpp +++ b/src/token.hpp @@ -10,4 +10,154 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_TOKEN_H +#define DRIFT_TOKEN_H + +#include +#include +#include + +// tokens +namespace token { + // total number of token for drift + constexpr int len = 56; + // token type + enum Kind { + IDENT, // identifier literal + NUM, // number literal + STR, // string literal + CHAR, // char literal + FLOAT, // float literal + + ADD, // + + SUB, // - + MUL, // * + DIV, // / + + AS_ADD, // += + AS_SUB, // -= + AS_MUL, // *= + AS_DIV, // /= + + PLUS, // ++ + MINUS, // -- + + R_ARROW, // -> + L_ARROW, // <- + L_CURVED_ARROW, // <~ + + DOLLAR, // $ + DOT, // . + COMMA, // , + COLON, // : + EQ, // = + + GREATER, // > + LESS, // < + GR_EQ, // >= + LE_EQ, // <= + + ADDR, // & + OR, // | + BANG, // ! + BANG_EQ, // != + EQ_EQ, // == + // SINGLE_MARK, // ' + // DOUBLE_MARk, // " + // OBLIQUE_MARK, // ` + + L_BRACE, // { + R_BRACE, // } + L_PAREN, // ( + R_PAREN, // ) + L_BRACKET, // [ + R_BRACKET, // ] + + UNDERLINE, // _ + + EFF, // end of file + + // keywords + USE, + DEF, + PUB, + RET, + AND, + END, + IF, + EF, + NF, + FOR, + DO, + OUT, + TIN, + NEW, + MOD, + AS + }; + + // returns a string of each type + static std::string kindString[len] = { + "IDENT", "NUM", "STR", + "CHAR", "FLOAT", "ADD", + "SUB", "MUL", "DIV", + "AS_ADD", "AS_SUB", "AS_MUL", + "AS_DIV", "PLUS", "MINUS", + "R_ARROW", "L_ARROW", "L_CURVED_ARROW", + "DOLLAR", "DOT", "COMMA", + "COLON", "EQ", "GREATER", + "LESS", "GR_EQ", "LE_EQ", + "ADDR", "OR", "BANG", + "BANG_EQ", "EQ_EQ", "L_BRACE", + "R_BRACE", "L_PAREN", "R_PAREN", + "L_BRACKET", "R_BRACKET", "UNDERLINE", + "EFF", "USE", "DEF", + "PUB", "RET", "AND", + "END", "IF", "EF", + "NF", "FOR", "DO", + "OUT", "TIN", "NEW", + "MOD", "AS", + }; + + // token structure + struct Token { + // token type + Kind kind = EFF; + // token literal + std::string literal; + // line of source code + int line; + }; + + // keywords for drift + static std::map keyword; + + // 16 keywords + // initialize it when tokenizer + + // format return token structure + static std::string toString(const Token &token) { + std::stringstream str; + + str << ""; + + return str.str(); + } + + // return the corresponding keyword type according to the literal amount + static Kind getKeyword(const std::string &literal) { + auto i = keyword.find(literal); + // search map + if (i != keyword.end()) + return i->second; + else + return IDENT; + } +}; // namespace token + +#endif \ No newline at end of file diff --git a/src/type.hpp b/src/type.hpp index 82703a9..4cdfe2f 100644 --- a/src/type.hpp +++ b/src/type.hpp @@ -10,4 +10,140 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#ifndef DRIFT_TYPE_H +#define DRIFT_TYPE_H + +#include + +#include "token.hpp" + +// types for drift +enum TypeKind { + T_INT, // int + T_FLOAT, // float + T_STR, // str + T_CHAR, // char + T_BOOL, // bool + T_ARRAY, // [] + T_MAP, // + T_TUPLE, // (T) + T_USER, // user +}; + +// basic type for drift +// +#define S_INT "int" // 1 +#define S_FLOAT "float" // 2 +#define S_STR "str" // 3 +#define S_CHAR "char" // 4 +#define S_BOOL "bool" // 5 + +// TYPE +class Type { +public: + // stringer + virtual std::string stringer() = 0; + // kind of basic type + virtual TypeKind kind() = 0; +}; + +// +class Int : public Type { +public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_INT; } +}; + +// float +class Float : public Type { +public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_FLOAT; } +}; + +// str +class Str : public Type { +public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_STR; } +}; + +// char +class Char : public Type { +public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_CHAR; } +}; + +// bool +class Bool : public Type { +public: + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_BOOL; } +}; + +// array (not keyword, for compiler analysis) +// [] +class Array : public Type { +public: + Type *T; // type for elements + + explicit Array(Type *T) : T(T) {} + + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_ARRAY; } +}; + +// map (not keyword, for compiler analysis) +// +class Map : public Type { +public: + Type *T1; // K + Type *T2; // V + + explicit Map(Type *T1, Type *T2) : T1(T1), T2(T2) {} + + std::string stringer() override { + return ""; + } + + TypeKind kind() override { return T_MAP; } +}; + +// tuple (not keyword, for compiler analysis) +// (type) +class Tuple : public Type { +public: + Type *T; // type for elements + + explicit Tuple(Type *T) : T(T) {} + + std::string stringer() override { return ""; } + + TypeKind kind() override { return T_TUPLE; } +}; + +// user definition type +// `type` +class User : public Type { +public: + token::Token name; + + explicit User(token::Token name) { this->name = std::move(name); } + + std::string stringer() override { + return ""; + } + + TypeKind kind() override { return T_USER; } +}; + +#endif \ No newline at end of file diff --git a/src/vm.cpp b/src/vm.cpp index 82703a9..1aceaaa 100644 --- a/src/vm.cpp +++ b/src/vm.cpp @@ -10,4 +10,352 @@ // https://github.com/bingxio/drift // // https://www.drift-lang.fun/ -// \ No newline at end of file +// + +#include "vm.hpp" + +// top frame +Frame *vm::top() { return frames.back(); } + +// push object to the current frame +void vm::pushData(object::Object *obj) { top()->data.push(obj); } + +// pop the top of data stack +object::Object *vm::popData() { return top()->data.pop(); } + +// emit new name of table to the current frame +void vm::emitTable(std::string name, object::Object *obj) { + top()->local.symbols.insert(std::make_pair(name, obj)); +} + +// look up a name +object::Object *vm::lookUp(std::string n) { return top()->local.lookUp(n); } + +// first to end constant iterator for current frame's entity +object::Object *vm::retConstant() { + return top()->entity->constants.at(top()->entity->offsets.at(op)); +} + +// first to end +ast::Type *vm::retType() { + return top()->entity->types.at(top()->entity->offsets.at(op)); +} + +// first to end +std::string vm::retName() { + return top()->entity->names.at(top()->entity->offsets.at(op)); +} + +// first to end +int vm::retOffset() { return top()->entity->offsets.at(op); } + +// throw an exception +void error(std::string message) { + throw exp::Exp(exp::RUNTIME_ERROR, message, -1); +} + +// are the comparison types the same +void vm::typeChecker(ast::Type *x, object::Object *y) { + // base type error + if ( + // int + (x->kind() == ast::T_INT && y->kind() != object::INT) || + // float + (x->kind() == ast::T_FLOAT && y->kind() != object::FLOAT) || + // str + (x->kind() == ast::T_STR && y->kind() != object::STR) || + // char + (x->kind() == ast::T_CHAR && y->kind() != object::CHAR)) { + error("type error, require: " + x->stringer() + + ", found: " + y->stringer()); + } + // list + if (x->kind() == ast::T_ARRAY) { + ast::Array *T = static_cast(x); + object::Array *arr = static_cast(y); + + for (auto i : arr->elements) { + this->typeChecker(T->T, i); + } + } + // tuple + if (x->kind() == ast::T_TUPLE) { + ast::Tuple *T = static_cast(x); + object::Tuple *tup = static_cast(y); + + for (auto i : tup->elements) { + this->typeChecker(T->T, i); + } + } + // map + if (x->kind() == ast::T_MAP) { + ast::Map *T = static_cast(x); + object::Map *map = static_cast(y); + + for (auto &i : map->value) { + this->typeChecker(T->T1, i.first); // K + this->typeChecker(T->T2, i.second); // R + } + } +} + +void vm::evaluate() { + +#define BINARY_OP(T, L, OP, R) this->pushData(new T(L OP R)); + + for (int ip = 0; ip < top()->entity->codes.size(); ip++) { + + // bytecode + byte::Code co = top()->entity->codes.at(ip); + + switch (co) { + + case byte::CONST: + this->pushData(this->retConstant()); + this->op++; + break; + + case byte::ADD: + case byte::A_ADD: { + object::Object *y = this->popData(); + object::Object *x = this->popData(); + + if (x->kind() == object::INT) { + switch (y->kind()) { + case object::INT: { + BINARY_OP(object::Int, static_cast(x)->value, +, + static_cast(y)->value); + break; + } + case object::FLOAT: { + BINARY_OP(object::Float, static_cast(x)->value, +, + static_cast(y)->value); + break; + } + } + } + if (x->kind() == object::FLOAT) { + switch (y->kind()) { + case object::INT: { + BINARY_OP(object::Float, static_cast(x)->value, + +, static_cast(y)->value); + break; + } + case object::FLOAT: { + BINARY_OP(object::Float, static_cast(x)->value, + +, static_cast(y)->value); + break; + } + } + } + if (x->kind() == object::STR && y->kind() == object::STR) { + object::Str *l = static_cast(x); + object::Str *r = static_cast(y); + + if (l->longer || r->longer) { + error("cannot plus two long string literal"); + } + + this->pushData(new object::Str(l->value + r->value)); + } + break; + } + case byte::SUB: + case byte::A_SUB: { + object::Object *y = this->popData(); + object::Object *x = this->popData(); + + if (x->kind() == object::INT) { + switch (y->kind()) { + case object::INT: { + BINARY_OP(object::Int, static_cast(x)->value, -, + static_cast(y)->value); + break; + } + case object::FLOAT: { + BINARY_OP(object::Float, static_cast(x)->value, -, + static_cast(y)->value); + break; + } + } + } + if (x->kind() == object::FLOAT) { + switch (y->kind()) { + case object::INT: { + BINARY_OP(object::Float, static_cast(x)->value, + -, static_cast(y)->value); + break; + } + case object::FLOAT: { + BINARY_OP(object::Float, static_cast(x)->value, + -, static_cast(y)->value); + break; + } + } + } + break; + } + case byte::MUL: + case byte::A_MUL: { + object::Object *y = this->popData(); + object::Object *x = this->popData(); + + if (x->kind() == object::INT) { + switch (y->kind()) { + case object::INT: { + BINARY_OP(object::Int, static_cast(x)->value, *, + static_cast(y)->value); + break; + } + case object::FLOAT: { + BINARY_OP(object::Float, static_cast(x)->value, *, + static_cast(y)->value); + break; + } + } + } + if (x->kind() == object::FLOAT) { + switch (y->kind()) { + case object::INT: { + BINARY_OP(object::Float, + static_cast(x)->value, *, + static_cast(y)->value); + break; + } + case object::FLOAT: { + BINARY_OP(object::Float, + static_cast(x)->value, *, + static_cast(y)->value); + break; + } + } + } + break; + } + case byte::DIV: + case byte::A_DIV: { + object::Object *y = this->popData(); + object::Object *x = this->popData(); + + if (x->kind() == object::INT) { + switch (y->kind()) { + case object::INT: { + if (static_cast(y)->value == 0) + error("division by zero"); + BINARY_OP(object::Int, static_cast(x)->value, /, + static_cast(y)->value); + break; + } + case object::FLOAT: { + if (static_cast(y)->value == 0) + error("division by zero"); + BINARY_OP(object::Float, static_cast(x)->value, /, + static_cast(y)->value); + break; + } + } + } + if (x->kind() == object::FLOAT) { + switch (y->kind()) { + case object::INT: { + if (static_cast(y)->value == 0) + error("division by zero"); + BINARY_OP(object::Float, static_cast(x)->value, + /, static_cast(y)->value); + break; + } + case object::FLOAT: { + if (static_cast(y)->value == 0) + error("division by zero"); + BINARY_OP(object::Float, static_cast(x)->value, + /, static_cast(y)->value); + break; + } + } + } + break; + } + + case byte::STORE: { + object::Object *obj = this->popData(); // OBJECT + ast::Type *type = this->retType(); // TO TYPE + + std::string name = this->retName(); // TO + + this->typeChecker(type, obj); + + if (top()->local.lookUp(name) != nullptr) { + error("redefining name '" + name + "'"); + } + + top()->local.symbols[name] = obj; // store to table + this->op += 2; + break; + } + + case byte::LOAD: { + std::string name = this->retName(); // NAME + object::Object *obj = top()->local.lookUp(name); // OBJECT + + if (obj == nullptr) error("not defined name '" + name + "'"); + + this->pushData(obj); + this->op++; + break; + } + + case byte::B_ARR: { + int count = this->retOffset(); // COUNT + + object::Array *arr = new object::Array; + // emit elements + for (int i = 0; i < count; i++) { + arr->elements.push_back(this->popData()); + } + + this->pushData(arr); + this->op++; + break; + } + + case byte::B_TUP: { + int count = this->retOffset(); // COUNT + + object::Tuple *tup = new object::Tuple; + // emit elements + for (int i = 0; i < count; i++) { + tup->elements.push_back(this->popData()); + } + + this->pushData(tup); + this->op++; + break; + } + + case byte::B_MAP: { + int count = this->retOffset(); // COUNT + + object::Map *map = new object::Map; + // emit elements + for (int i = 0; i < count - 2; i++) { + object::Object *y = this->popData(); + object::Object *x = this->popData(); + + map->value.insert(std::make_pair(x, y)); + } + + this->pushData(map); + this->op++; + break; + } + + case byte::RET: { + // std::cout << top()->data.stringer() << std::endl; + while (!top()->data.empty()) { + std::cout << top()->data.pop()->stringer() << std::endl; + } + } + } + } +#undef BINARY_OP +} \ No newline at end of file diff --git a/src/vm.hpp b/src/vm.hpp index 4ca0546..59e05f5 100644 --- a/src/vm.hpp +++ b/src/vm.hpp @@ -15,4 +15,51 @@ #ifndef DRIFT_VM_H #define DRIFT_VM_H +#include "exp.hpp" +#include "frame.hpp" +#include "object.hpp" +#include "opcode.hpp" + +#include "entity.hpp" + +// structure +class vm { +private: + std::vector frames; // execute frames + // push object to the current frame + void pushData(object::Object *); + // pop the top of data stack + object::Object *popData(); + // emit new name of table to the current frame + void emitTable(std::string, object::Object *); + // look up a name from current top frame + object::Object *lookUp(std::string); + // first to end iterator + object::Object *retConstant(); + // first to end iterator + ast::Type *retType(); + // first to end iterator + std::string retName(); + // first to end iterator + int retOffset(); + // are the comparison types the same + void typeChecker(ast::Type *, object::Object *); + + int op = 0; // offset pointer + +public: + explicit vm(Entity *main) { + // to main frame as main + this->frames.push_back(new Frame(main)); + } + + // top frame + Frame *top(); + + // repl mode to clean pointer for offset + void clean() { this->op = 0; } + + void evaluate(); // evaluate the top of frame +}; + #endif \ No newline at end of file diff --git a/test/run.sh b/test/run.sh index 158ccbd..d12bf8a 100755 --- a/test/run.sh +++ b/test/run.sh @@ -1,7 +1,7 @@ -for f in $(ls $pwd) +for f in $(ls test) do if [ $f == 'run.sh' ]; then - continue + continue fi - time ../a.out $f -done \ No newline at end of file + time ./drift ./test/$f +done