diff --git a/grammar.txt b/grammar.txt index a63685796d1a81c205da6638141c56cf328e505f..6ffd388a6e406c4479577321bc725b29daa4e8e7 100644 --- a/grammar.txt +++ b/grammar.txt @@ -1,14 +1,18 @@ Program -> eof // eof | epsilon -Program -> GDecl ';' Program // 'const', 'var' +Program -> Decl ';' Program // 'const', 'var' +Program -> Expr ';' Program // 'const', 'var' Program -> Func Program // 'func' +Program -> Stmt Program // 'for', 'while', 'if' -GDecl -> 'const' identifier '=' GDecl_ // 'const' -GDecl -> 'var' identifier '=' GDecl_ // 'var' -GDecl_ -> identifier // 'identifier' -GDecl_ -> number_literal // 'number_literal' +Decl -> 'var' identifier DeclVal ';' // 'var' +Decl -> 'const' identifier DeclVal ';' // 'cons' +DeclVal -> '=' Expr // '=' -Func -> 'func' identifier '(' Args ')' Block // 'func' +// Decl_ -> '[' DeclArray // '[' +// DeclArray -> Expr // '(', identifier, number_literal, '-' +// DeclArray -> ']' '=' ArrayLiteral // ']' +Func -> 'func' identifier '(' Args ')' Block // 'func' Block -> '{' Block_ '}' // '{' Block_ -> epsilon // _ | '}' @@ -16,8 +20,6 @@ Block_ -> Decl ';' Block_ // 'var' | '}' Block_ -> Expr ';' Block_ // identifier, number_literal, '(' | '}' Block_ -> Stmt Block_ // 'for', 'while', 'if' | '}' -Decl -> 'var' identifier '=' Expr // 'var' - Expr -> AssignExpr // '(', identifier, number_literal, '-' AssignExpr -> LogExpr AssignExpr_ // '(', identifier, number_literal, '-' @@ -42,7 +44,7 @@ PrimaryExpr -> number_literal // number_literal PrimaryExpr -> '-' PrimaryExpr // '-' PrimaryExpr -> '!' PrimaryExpr // '!' -IdExpr -> epsilon // _ | '*', '/', '+', '-', '==', '!=', '<', '>', '<=', '>=', '=', '..', '...', ')', '{', ']', ',', ';' +IdExpr -> epsilon // _ | '*', '/', '%', '+', '-', '==', '!=', '<', '>', '<=', '>=', '=', '..', '...', ')', '{', ']', ',', ';' IdExpr -> '(' Args ')' // '(' IdExpr -> '[' Expr ']' // '[' @@ -79,6 +81,10 @@ Args -> identifier Args_ // identifier Args_ -> epsilon // _ | ')' Args_ -> ',' identifier Args_ // ',' +ArrayLiteral -> '[' ArrayBody ']' // +ArrayBody -> epsilon // +ArrayBody -> Expr ',' ArrayBody // + Range -> Expr Range_ // identifier, number_literal, '(' Range_ -> '..' Expr // '..' Range_ -> '...' Expr // '...' diff --git a/include/dusk/AST/Expr.h b/include/dusk/AST/Expr.h index ee583f58837228219068b18e53a8bbb07d1f618b..3c2308cd5528be9e4f18803a65d957123dca637c 100644 --- a/include/dusk/AST/Expr.h +++ b/include/dusk/AST/Expr.h @@ -16,6 +16,7 @@ namespace dusk { class NumberLiteralExpr; +class ArratLiteralExpr; class IdentifierExpr; class ParenExpr; class InfixExpr; @@ -50,8 +51,7 @@ public: ExprKind getKind() const { return Kind; } }; - - + /// Number literal expression encalsulation. class NumberLiteralExpr: public Expr { int Value; diff --git a/include/dusk/Parse/Parser.h b/include/dusk/Parse/Parser.h index 61ee964416ca1bbaa6ab2619d5ff03034a473ea2..4012911eadc465d388dc1b44b4e98e34f421ed1a 100644 --- a/include/dusk/Parse/Parser.h +++ b/include/dusk/Parse/Parser.h @@ -69,22 +69,21 @@ public: private: ASTNode *parseGlobal(); - // MARK: - Global variable declarations + // MARK: - Declarations + + VarDecl *parseVarDecl(); - ValDecl *parseGlobalValDecl(); - VarDecl *parseGlobalVarDecl(); - ConstDecl *parseGlobalConstDecl(); - Expr *parseGlobalDeclValue(); + ConstDecl *parseConstDecl(); - // MARK: - Declarations + Expr *parseDeclValue(); FuncDecl *parseFuncDecl(); BlockStmt *parseBlock(); + ASTNode *parseBlockBody(); - ParamDecl *parseParamDecl(); - VarDecl *parseVarDecl(); + ParamDecl *parseParamDecl(); // MARK: - Expressions diff --git a/include/dusk/Parse/Token.h b/include/dusk/Parse/Token.h index 4056e28155808872eda712d21f90c18ed4b8da28..f5155087ae63c9b9140a93c47dd403938e3b6929 100644 --- a/include/dusk/Parse/Token.h +++ b/include/dusk/Parse/Token.h @@ -87,6 +87,7 @@ public: case tok::elipsis_incl: case tok::plus: case tok::minus: + case tok::mod: case tok::divide: case tok::multipy: case tok::equals: @@ -128,7 +129,8 @@ public: /// \brief Determins, if current token is a number literal. /// - /// \note In \c dusk language, the only valid literals are integers. + /// \note In \c dusk language, the only valid literals are integers + /// and arrays. /// /// \return \c true, if token is a number literal, \c false otherwise. bool isLiteral() const { diff --git a/include/dusk/Parse/TokenDefinition.h b/include/dusk/Parse/TokenDefinition.h index 7d8aadf28415e2b4de3a994658c2eb76e6a5eae9..5668e00ac515b969cf8cbfe257d80abb42acea93 100644 --- a/include/dusk/Parse/TokenDefinition.h +++ b/include/dusk/Parse/TokenDefinition.h @@ -61,6 +61,7 @@ enum struct tok { // Arithmetic Operators plus, // + minus, // - + mod, // % divide, // / multipy, // * @@ -117,6 +118,7 @@ namespace llvm { // Arithmetic Operators case dusk::tok::plus: return OS << "+"; case dusk::tok::minus: return OS << "-"; + case dusk::tok::mod: return OS << "%"; case dusk::tok::divide: return OS << "/"; case dusk::tok::multipy: return OS << "*"; diff --git a/lib/Parser/Lexer.cpp b/lib/Parser/Lexer.cpp index 14ef7e89c30ff200b4085ab71b6bba3310953a02..0c7a636889040868c6ad8b95366bc43bc6bed411 100644 --- a/lib/Parser/Lexer.cpp +++ b/lib/Parser/Lexer.cpp @@ -190,6 +190,8 @@ void Lexer::lexToken() { return formToken(tok::minus, TokStart); case '*': return formToken(tok::multipy, TokStart); + case '%': + return formToken(tok::mod, TokStart); // Logical operands case '!': diff --git a/lib/Parser/ParseDecl.cpp b/lib/Parser/ParseDecl.cpp index a7e18387707b84defaf27b3d03e586762ea2b595..7b66f38c11d28ea03f1c8338bec78e649558bc52 100644 --- a/lib/Parser/ParseDecl.cpp +++ b/lib/Parser/ParseDecl.cpp @@ -11,91 +11,48 @@ using namespace dusk; -/// Parsing top-level symbols +/// Const declaration /// -/// GlobalScope ::= -/// 'var' identifier '=' ( identifier | number_literal ) ';' -/// 'const' identifier '=' ( identifier | number_literal ) ';' -/// 'func' identifier '(' Args ')' CodeBlock -ValDecl *Parser::parseGlobalValDecl() { - switch (Tok.getKind()) { - case tok::kwVar: - return parseGlobalVarDecl(); - case tok::kwConst: - return parseGlobalConstDecl(); - default: - llvm_unreachable("Unexpected token."); - } - return nullptr; -} - -/// Variable declaration -/// -/// VarDecl ::= -/// 'var' identifier '=' ( identifier | number_literal ) ';' -VarDecl *Parser::parseGlobalVarDecl() { - // Ensure `var`keyword. - assert(Tok.is(tok::kwVar) && "Invalid parsing method."); +/// ConstDecl ::= +/// 'const' identifier '=' Expr ';' +ConstDecl *Parser::parseConstDecl() { + // Validate correct variable decl + assert(Tok.is(tok::kwConst) && "Invalid parsing method."); - // Capture `var` keyword location. - auto VL = consumeToken(); + auto L = consumeToken(); auto ID = Tok; - if (!consumeIf(tok::identifier)) assert("Expected identifier" && false); - if (!consumeIf(tok::assign)) - assert("Expected assign" && false); - - auto Value = parseGlobalDeclValue(); - - if (!consumeIf(tok::semicolon)) - assert("Extected semicolon" && false); - - return new VarDecl(ID.getText(), ID.getLoc(), VL, Value); + return new ConstDecl(ID.getText(), ID.getLoc(), L, parseDeclValue()); } -/// Constant declaration +/// Var declaration /// -/// ConstDecl ::= -/// 'const' identifier '=' ( identifier | number_literal ) ';' -ConstDecl *Parser::parseGlobalConstDecl() { - // Ensure `const` keyword - assert(Tok.is(tok::kwConst) && "Invalid parsing method.."); +/// VarDecl ::= +/// 'var' identifier '=' Expr ';' +VarDecl *Parser::parseVarDecl() { + // Validate correct variable decl + assert(Tok.is(tok::kwVar) && "Invalid parsing method."); - // Capture `const` keyword location. - auto CL = consumeToken(); + auto L = consumeToken(); auto ID = Tok; - if (!consumeIf(tok::identifier)) assert("Expected identifier" && false); - if (!consumeIf(tok::assign) && false) - assert("Expected assign" && false); - - auto Value = parseGlobalDeclValue(); - - if (!consumeIf(tok::semicolon)) - assert("Extected semicolon" && false); - - return new ConstDecl(ID.getText(), ID.getLoc(), CL, Value); + return new VarDecl(ID.getText(), ID.getLoc(), L, parseDeclValue()); } -/// Global value -/// -/// GlobValue ::= -/// identifier | number_literal -Expr *Parser::parseGlobalDeclValue() { - switch (Tok.getKind()) { - case tok::identifier: - return parseIdentifierExpr(); - - case tok::number_literal: - return parseNumberLiteralExpr(); - - default: - llvm_unreachable("Unexpected token."); - } +/// DeclVal ::= +/// '=' Expr ';' +Expr *Parser::parseDeclValue() { + if (!consumeIf(tok::assign)) + assert("Expected `=`" && false); + + auto E = parseExpr(); + if (!consumeIf(tok::semicolon)) + assert("Missing semicolon at the end of the line" && false); + return E; } @@ -126,24 +83,4 @@ ParamDecl *Parser::parseParamDecl() { return new ParamDecl(ID.getText(), ID.getLoc()); } -/// Var declaration -/// -/// VarDecl ::= -/// 'var' identifier '=' Expr ';' -VarDecl *Parser::parseVarDecl() { - // Validate correct variable decl - assert(Tok.is(tok::kwVar) && "Invalid parsing method."); - - auto L = consumeToken(); - auto ID = Tok; - if (!consumeIf(tok::identifier)) - assert("Expected identifier" && false); - if (!consumeIf(tok::assign)) - assert("Expected `=`" && false); - - auto V = parseExpr(); - if (!consumeIf(tok::semicolon)) - assert("Expected `;`."); - - return new VarDecl(ID.getText(), ID.getLoc(), L, V); -} + diff --git a/lib/Parser/ParseExpr.cpp b/lib/Parser/ParseExpr.cpp index 78ecef10c462eec722dd14db8ddf71d1cd7e7cc9..4eec1db5f36928ed15753b3aa2f44ae1a6f5d552 100644 --- a/lib/Parser/ParseExpr.cpp +++ b/lib/Parser/ParseExpr.cpp @@ -13,191 +13,195 @@ using namespace dusk; Expr *Parser::parseExpr() { switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseAssignExpr(); - - default: - llvm_unreachable("Unexpected token."); + case tok::identifier: + case tok::number_literal: + case tok::l_paren: + case tok::minus: + return parseAssignExpr(); + + default: + llvm_unreachable("Unexpected token."); } } Expr *Parser::parseAssignExpr() { switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseAssignExprRHS(parseLogicalExpr()); - - default: - llvm_unreachable("Unexpected token."); + case tok::identifier: + case tok::number_literal: + case tok::l_paren: + case tok::minus: + return parseAssignExprRHS(parseLogicalExpr()); + + default: + llvm_unreachable("Unexpected token."); } } Expr *Parser::parseAssignExprRHS(Expr *LHS) { switch (Tok.getKind()) { - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return LHS; - - case tok::assign: - consumeToken(); - return new AssignExpr((IdentifierExpr *)LHS, parseExpr()); - - default: - llvm_unreachable("Unexpected token"); + case tok::elipsis_incl: + case tok::elipsis_excl: + case tok::r_paren: + case tok::r_bracket: + case tok::l_brace: + case tok::colon: + case tok::semicolon: + return LHS; + + case tok::assign: + consumeToken(); + return new AssignExpr((IdentifierExpr *)LHS, parseExpr()); + + default: + llvm_unreachable("Unexpected token"); } } Expr *Parser::parseLogicalExpr() { switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseLogicalExprRHS(parseArithExpr()); - - default: - llvm_unreachable("Unepected token."); + case tok::identifier: + case tok::number_literal: + case tok::l_paren: + case tok::minus: + return parseLogicalExprRHS(parseArithExpr()); + + default: + llvm_unreachable("Unepected token."); } } Expr *Parser::parseLogicalExprRHS(Expr *LHS) { auto T = Tok; switch (Tok.getKind()) { - case tok::assign: - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return LHS; - - case tok::equals: - case tok::nequals: - case tok::less: - case tok::less_eq: - case tok::greater: - case tok::greater_eq: - consumeToken(); - return new InfixExpr(LHS, parseArithExpr(), T); - - default: - llvm_unreachable("Unexpected token."); + case tok::assign: + case tok::elipsis_incl: + case tok::elipsis_excl: + case tok::r_paren: + case tok::r_bracket: + case tok::l_brace: + case tok::colon: + case tok::semicolon: + return LHS; + + case tok::equals: + case tok::nequals: + case tok::less: + case tok::less_eq: + case tok::greater: + case tok::greater_eq: + consumeToken(); + return new InfixExpr(LHS, parseArithExpr(), T); + + default: + llvm_unreachable("Unexpected token."); } } Expr *Parser::parseArithExpr() { switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseArithExprRHS(parseMulExpr()); - - default: - llvm_unreachable("Unexpected token."); + case tok::identifier: + case tok::number_literal: + case tok::l_paren: + case tok::minus: + return parseArithExprRHS(parseMulExpr()); + + default: + llvm_unreachable("Unexpected token."); } } Expr *Parser::parseArithExprRHS(Expr *LHS) { auto T = Tok; switch (Tok.getKind()) { - case tok::assign: - case tok::equals: - case tok::nequals: - case tok::less: - case tok::less_eq: - case tok::greater: - case tok::greater_eq: - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return LHS; + case tok::assign: + case tok::equals: + case tok::nequals: + case tok::less: + case tok::less_eq: + case tok::greater: + case tok::greater_eq: + case tok::elipsis_incl: + case tok::elipsis_excl: + case tok::r_paren: + case tok::r_bracket: + case tok::l_brace: + case tok::colon: + case tok::semicolon: + return LHS; + + case tok::plus: + case tok::minus: + consumeToken(); + return new InfixExpr(LHS, parseExpr(), T); - case tok::plus: - case tok::minus: - consumeToken(); - return new InfixExpr(LHS, parseExpr(), T); - - default: - llvm_unreachable("Unexpected token."); + default: + llvm_unreachable("Unexpected token."); } } Expr *Parser::parseMulExpr() { switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseMulExprRHS(parsePrimaryExpr()); - - default: - llvm_unreachable("Unexpected token."); + case tok::identifier: + case tok::number_literal: + case tok::l_paren: + case tok::minus: + return parseMulExprRHS(parsePrimaryExpr()); + + default: + llvm_unreachable("Unexpected token."); } } Expr *Parser::parseMulExprRHS(Expr *LHS) { auto T = Tok; switch (Tok.getKind()) { - case tok::plus: - case tok::minus: - case tok::equals: - case tok::nequals: - case tok::less: - case tok::less_eq: - case tok::greater: - case tok::greater_eq: - case tok::assign: - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return LHS; - - case tok::multipy: - case tok::divide: - consumeToken(); - return new InfixExpr(LHS, parseExpr(), T); - - default: - llvm_unreachable("Unexpected token."); + case tok::plus: + case tok::minus: + case tok::equals: + case tok::nequals: + case tok::less: + case tok::less_eq: + case tok::greater: + case tok::greater_eq: + case tok::assign: + case tok::elipsis_incl: + case tok::elipsis_excl: + case tok::r_paren: + case tok::r_bracket: + case tok::l_brace: + case tok::colon: + case tok::semicolon: + return LHS; + + case tok::mod: + case tok::multipy: + case tok::divide: + consumeToken(); + return new InfixExpr(LHS, parseExpr(), T); + + default: + llvm_unreachable("Unexpected token."); } } Expr *Parser::parsePrimaryExpr() { switch (Tok.getKind()) { - case tok::l_paren: - return parseParenExpr(); - case tok::identifier: - return parsePrimaryExprRHS(parseIdentifierExpr()); - case tok::number_literal: - return parseNumberLiteralExpr(); - case tok::minus: - case tok::neg: - return parseUnaryExpr(); - - default: - llvm_unreachable("Unexpected token."); + case tok::l_paren: + return parseParenExpr(); + + case tok::identifier: + return parsePrimaryExprRHS(parseIdentifierExpr()); + + case tok::number_literal: + return parseNumberLiteralExpr(); + + case tok::minus: + case tok::neg: + return parseUnaryExpr(); + + default: + llvm_unreachable("Unexpected token."); } } @@ -206,20 +210,23 @@ Expr *Parser::parsePrimaryExprRHS(IdentifierExpr *Dest) { return Dest; switch (Tok.getKind()) { - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return Dest; - case tok::l_paren: - return parseCallExpr(Dest); - case tok::l_bracket: - return parseSubscriptExpr(Dest); - default: - llvm_unreachable("Unexpected token."); + case tok::elipsis_incl: + case tok::elipsis_excl: + case tok::r_paren: + case tok::r_bracket: + case tok::l_brace: + case tok::colon: + case tok::semicolon: + return Dest; + + case tok::l_paren: + return parseCallExpr(Dest); + + case tok::l_bracket: + return parseSubscriptExpr(Dest); + + default: + llvm_unreachable("Unexpected token."); } } diff --git a/lib/Parser/ParseStmt.cpp b/lib/Parser/ParseStmt.cpp index f95f75fcecfdc7d6b1b3bee07e1dba3ecde0be9f..4fca21500f52fa7b3cc422c089f8550200cf16a2 100644 --- a/lib/Parser/ParseStmt.cpp +++ b/lib/Parser/ParseStmt.cpp @@ -22,13 +22,13 @@ ASTNode *Parser::parseStatement() { Expr *Parser::parseExprStmt() { Expr *E; switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - E = parseExpr(); - break; - default: - llvm_unreachable("Unexpected token."); + case tok::identifier: + case tok::number_literal: + case tok::l_paren: + E = parseExpr(); + break; + default: + llvm_unreachable("Unexpected token."); } if (!consumeIf(tok::semicolon)) assert("Missing semicolon at the end of the line." && false); diff --git a/lib/Parser/Parser.cpp b/lib/Parser/Parser.cpp index 14ee87a9f118be24f5deab1fe0fbeeb27405cae6..612825c3203e11fe4c1f2c0b52ed1505616a72cd 100644 --- a/lib/Parser/Parser.cpp +++ b/lib/Parser/Parser.cpp @@ -43,11 +43,22 @@ ASTNode *Parser::parse() { ASTNode *Parser::parseGlobal() { switch (Tok.getKind()) { case tok::kwVar: + return parseVarDecl(); case tok::kwConst: - return parseGlobalValDecl(); - + return parseConstDecl(); case tok::kwFunc: return parseFuncStmt(); + case tok::kwFor: + return parseForStmt(); + case tok::kwWhile: + return parseWhileStmt(); + case tok::kwIf: + return parseIfStmt(); + + case tok::identifier: + case tok::number_literal: + case tok::l_paren: + return parseExpr(); default: llvm_unreachable("Unexpected token.");