From 6f621e9298402b35065c69812516499b0cbf8627 Mon Sep 17 00:00:00 2001 From: Peter Matta <mattapet@fit.cvut.cz> Date: Thu, 26 Apr 2018 16:22:50 +0200 Subject: [PATCH] Simplify expression parsing --- examples/globExpr.dusk | 3 +- include/dusk/AST/Diagnostics.h | 2 +- include/dusk/AST/DiagnosticsParse.h | 5 +- include/dusk/Basic/TokenDefinition.h | 23 +-- include/dusk/Parse/Parser.h | 14 +- include/dusk/Parse/Token.h | 50 ++++++- lib/Parser/Lexer.cpp | 48 +++++-- lib/Parser/ParseExpr.cpp | 205 ++++----------------------- lib/Parser/ParsePattern.cpp | 4 +- 9 files changed, 133 insertions(+), 221 deletions(-) diff --git a/examples/globExpr.dusk b/examples/globExpr.dusk index bf70501..f5515b4 100644 --- a/examples/globExpr.dusk +++ b/examples/globExpr.dusk @@ -1,5 +1,6 @@ var a = 14; let d = a + 99; -a = 1 + 2 + 8; +a = (1 + 2) * 8 == 2; +a = (1 + 2 * 9) || (4 % 3); d; diff --git a/include/dusk/AST/Diagnostics.h b/include/dusk/AST/Diagnostics.h index f011490..a0c3e14 100644 --- a/include/dusk/AST/Diagnostics.h +++ b/include/dusk/AST/Diagnostics.h @@ -29,7 +29,7 @@ class DiagnosticEngine; namespace diag { -enum struct DiagID : unsigned; +enum DiagID : unsigned; } // namespace diag diff --git a/include/dusk/AST/DiagnosticsParse.h b/include/dusk/AST/DiagnosticsParse.h index 6260c39..1d33874 100644 --- a/include/dusk/AST/DiagnosticsParse.h +++ b/include/dusk/AST/DiagnosticsParse.h @@ -17,7 +17,7 @@ namespace dusk { namespace diag { -enum struct DiagID : unsigned { +enum DiagID : unsigned { // Lexer diagnostics lex_unexpected_symbol, lex_unterminated_multiline_comment, @@ -27,6 +27,7 @@ enum struct DiagID : unsigned { expected_semicolon, expected_identifier, + expected_expression, expected_variable_initialization, expected_l_paren, expected_l_brace, @@ -58,6 +59,8 @@ static StringRef getTextForID(DiagID ID) { case DiagID::expected_identifier: return "Expected identifier"; + case DiagID::expected_expression: + return "Expected expression"; case DiagID::expected_variable_initialization: return "Expected '=' initialization."; case DiagID::expected_l_paren: diff --git a/include/dusk/Basic/TokenDefinition.h b/include/dusk/Basic/TokenDefinition.h index 5263993..3ee5009 100644 --- a/include/dusk/Basic/TokenDefinition.h +++ b/include/dusk/Basic/TokenDefinition.h @@ -27,8 +27,6 @@ enum struct tok { kwFor, kwIn, kwFunc, - kwPrintln, - kwReadln, kwExtern, // Types @@ -48,12 +46,13 @@ enum struct tok { // Operators assign, // = arrow, // -> + colon, // : elipsis_excl, // .. elipsis_incl, // ... semicolon, // ; - colon, // , + comma, // , l_brace, // { r_brace, // } @@ -72,13 +71,17 @@ enum struct tok { multipy, // * // Logical Operators - lnot, // ! + land, // && + lor, // || + equals, // == nequals, // != less, // < less_eq, // <= greater, // > greater_eq, // >= + + lnot, // ! // End of file eof, @@ -113,10 +116,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const dusk::tok &T) { return OS << "in"; case dusk::tok::kwFunc: return OS << "func"; - case dusk::tok::kwPrintln: - return OS << "writeln"; - case dusk::tok::kwReadln: - return OS << "readln"; case dusk::tok::kwExtern: return OS << "extern"; @@ -136,9 +135,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const dusk::tok &T) { return OS << ".."; case dusk::tok::elipsis_incl: return OS << "..."; + case dusk::tok::colon: + return OS << ":"; case dusk::tok::semicolon: return OS << ";"; - case dusk::tok::colon: + case dusk::tok::comma: return OS << ","; case dusk::tok::l_brace: return OS << "{"; @@ -168,6 +169,10 @@ inline raw_ostream &operator<<(raw_ostream &OS, const dusk::tok &T) { // Logical Operators case dusk::tok::lnot: return OS << "!"; + case dusk::tok::land: + return OS << "&&"; + case dusk::tok::lor: + return OS << "||"; case dusk::tok::equals: return OS << "=="; case dusk::tok::nequals: diff --git a/include/dusk/Parse/Parser.h b/include/dusk/Parse/Parser.h index 8e1399e..762d29b 100644 --- a/include/dusk/Parse/Parser.h +++ b/include/dusk/Parse/Parser.h @@ -119,18 +119,8 @@ private: // MARK: - Expressions Expr *parseExpr(); - Expr *parseAssignExpr(); - Expr *parseAssignExprRHS(Expr *LHS); - - Expr *parseLogicalExpr(); - Expr *parseLogicalExprRHS(Expr *LHS); - - Expr *parseArithExpr(); - Expr *parseArithExprRHS(Expr *LHS); - - Expr *parseMulExpr(); - Expr *parseMulExprRHS(Expr *LHS); - + Expr *parseBinExprRHS(Expr *LHS, unsigned P); + Expr *parsePrimaryExpr(); Expr *parsePrimaryExprRHS(Expr *Dest); diff --git a/include/dusk/Parse/Token.h b/include/dusk/Parse/Token.h index 1dbb0fc..1583085 100644 --- a/include/dusk/Parse/Token.h +++ b/include/dusk/Parse/Token.h @@ -86,17 +86,19 @@ public: case tok::assign: case tok::elipsis_excl: case tok::elipsis_incl: - case tok::plus: - case tok::minus: - case tok::mod: - case tok::divide: - case tok::multipy: + case tok::land: + case tok::lor: case tok::equals: case tok::nequals: case tok::less: case tok::less_eq: case tok::greater: case tok::greater_eq: + case tok::plus: + case tok::minus: + case tok::mod: + case tok::divide: + case tok::multipy: return true; default: return false; @@ -123,8 +125,6 @@ public: case tok::kwFor: case tok::kwIn: case tok::kwFunc: - case tok::kwPrintln: - case tok::kwReadln: case tok::kwExtern: case tok::kwVoid: @@ -143,6 +143,42 @@ public: /// /// \return \c true, if token is a number literal, \c false otherwise. bool isLiteral() const { return is(tok::number_literal); } + + /// Returns operator precedence, where 0 is the lowest one. + unsigned getPrecedence() const { + switch (Kind) { + case tok::assign: + return 5; + + case tok::elipsis_excl: + case tok::elipsis_incl: + return 10; + + case tok::land: + case tok::lor: + return 20; + + case tok::equals: + case tok::nequals: + case tok::less: + case tok::less_eq: + case tok::greater: + case tok::greater_eq: + return 30; + + case tok::plus: + case tok::minus: + return 40; + + case tok::multipy: + case tok::divide: + case tok::mod: + return 50; // Max + + default: + return 0; + } + } }; } // namespace dusk diff --git a/lib/Parser/Lexer.cpp b/lib/Parser/Lexer.cpp index 3041ff5..39c5882 100644 --- a/lib/Parser/Lexer.cpp +++ b/lib/Parser/Lexer.cpp @@ -124,12 +124,13 @@ void Lexer::lexToken() { const char *TokStart = CurPtr; switch (*CurPtr++) { - case 0: + case 0: { // Not ending null character. if (CurPtr - 1 != BufferEnd) break; CurPtr--; return formToken(tok::eof, TokStart); + } // Skip whitespace case ' ': @@ -138,20 +139,24 @@ void Lexer::lexToken() { case '\r': break; - case '=': + case '=': { if (*CurPtr == '=') { CurPtr++; return formToken(tok::equals, TokStart); } return formToken(tok::assign, TokStart); + } - case '.': + case '.': { if (*CurPtr == '.') return lexElipsis(); formToken(tok::unknown, TokStart); return diagnose(); + } case ',': + return formToken(tok::comma, TokStart); + case ':': return formToken(tok::colon, TokStart); case ';': return formToken(tok::semicolon, TokStart); @@ -170,7 +175,7 @@ void Lexer::lexToken() { return formToken(tok::r_paren, TokStart); // Divide or comment start - case '/': + case '/': { // Check if start of a comment if (*CurPtr == '/') { // `//` skipLineComment(true); @@ -185,14 +190,16 @@ void Lexer::lexToken() { break; // Ignore comment } return formToken(tok::divide, TokStart); + } // Minus or arrow operator - case '-': + case '-': { if (*CurPtr == '>') { CurPtr++; return formToken(tok::arrow, TokStart); } return formToken(tok::minus, TokStart); + } // Algebraic operands case '+': @@ -203,29 +210,46 @@ void Lexer::lexToken() { return formToken(tok::mod, TokStart); // Logical operands - case '!': + case '!': { if (*CurPtr == '=') { CurPtr++; return formToken(tok::nequals, TokStart); } return formToken(tok::lnot, TokStart); + } + + case '&': { + if (*CurPtr == '&') { + CurPtr++; + return formToken(tok::land, TokStart); + } + return formToken(tok::unknown, TokStart); + } + + case '|': { + if (*CurPtr == '|') { + CurPtr++; + return formToken(tok::lor, TokStart); + } + return formToken(tok::unknown, TokStart); + } - case '<': + case '<': { if (*CurPtr == '=') { CurPtr++; return formToken(tok::less_eq, TokStart); } return formToken(tok::less, TokStart); + } - case '>': + case '>': { if (*CurPtr == '=') { CurPtr++; return formToken(tok::greater_eq, TokStart); } return formToken(tok::greater, TokStart); - - case ':': - return formToken(tok::unknown, TokStart); + } + // Numbers case '0': case '1': case '2': case '3': case '4': @@ -286,8 +310,6 @@ tok Lexer::kindOfIdentifier(StringRef Str) { .Case("for", tok::kwFor) .Case("in", tok::kwIn) .Case("func", tok::kwFunc) - .Case("println", tok::kwPrintln) - .Case("readln", tok::kwReadln) .Case("extern", tok::kwExtern) .Case("Void", tok::kwVoid) .Case("Int", tok::kwInt) diff --git a/lib/Parser/ParseExpr.cpp b/lib/Parser/ParseExpr.cpp index e4bc62c..2dab22b 100644 --- a/lib/Parser/ParseExpr.cpp +++ b/lib/Parser/ParseExpr.cpp @@ -12,184 +12,39 @@ using namespace dusk; Expr *Parser::parseExpr() { - switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseAssignExpr(); - - default: - diagnose(Tok.getLoc()); + auto Primary = parsePrimaryExpr(); + if (!Primary) return nullptr; - } -} - -Expr *Parser::parseAssignExpr() { - switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseAssignExprRHS(parseLogicalExpr()); - default: - diagnose(consumeToken()); - return nullptr; - } + return parseBinExprRHS(Primary, 0); } -Expr *Parser::parseAssignExprRHS(Expr *LHS) { - switch (Tok.getKind()) { - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return LHS; +Expr *Parser::parseBinExprRHS(Expr *LHS, unsigned P) { + while (true) { + auto Prec = Tok.getPrecedence(); - case tok::assign: + // If the precedence of current operator is lower or equal to previous one + // (encounting invalid 0), return already parsed part of the expression. + if (Prec <= P) + return LHS; + auto Op = Tok; consumeToken(); - return makeNode<AssignExpr>((IdentifierExpr *)LHS, parseExpr()); - - default: - diagnose(Tok.getLoc()); + + // Return nullptr on error + auto RHS = parsePrimaryExpr(); + if (!RHS) return nullptr; - } -} - -Expr *Parser::parseLogicalExpr() { - switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseLogicalExprRHS(parseArithExpr()); - - default: - diagnose(Tok.getLoc()); - return nullptr; - } -} - -Expr *Parser::parseLogicalExprRHS(Expr *LHS) { - auto T = Tok; - switch (Tok.getKind()) { - case tok::assign: - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return LHS; - case tok::equals: - case tok::nequals: - case tok::less: - case tok::less_eq: - case tok::greater: - case tok::greater_eq: - consumeToken(); - return makeNode<InfixExpr>(LHS, parseArithExpr(), T); + // If precedence of next operand is greater than the current one, parse + // expression in favor of the next operand. + if (Prec < Tok.getPrecedence()) { + RHS = parseBinExprRHS(RHS, Prec); + if (!RHS) + return nullptr; + } - default: - diagnose(Tok.getLoc()); - return nullptr; - } -} - -Expr *Parser::parseArithExpr() { - switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseArithExprRHS(parseMulExpr()); - - default: - diagnose(Tok.getLoc()); - return nullptr; - } -} - -Expr *Parser::parseArithExprRHS(Expr *LHS) { - auto T = Tok; - switch (Tok.getKind()) { - case tok::assign: - case tok::equals: - case tok::nequals: - case tok::less: - case tok::less_eq: - case tok::greater: - case tok::greater_eq: - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return LHS; - - case tok::plus: - case tok::minus: - consumeToken(); - return makeNode<InfixExpr>(LHS, parseExpr(), T); - - default: - diagnose(Tok.getLoc()); - return nullptr; - } -} - -Expr *Parser::parseMulExpr() { - switch (Tok.getKind()) { - case tok::identifier: - case tok::number_literal: - case tok::l_paren: - case tok::minus: - return parseMulExprRHS(parsePrimaryExpr()); - - default: - diagnose(Tok.getLoc()); - return nullptr; - } -} - -Expr *Parser::parseMulExprRHS(Expr *LHS) { - auto T = Tok; - switch (Tok.getKind()) { - case tok::plus: - case tok::minus: - case tok::equals: - case tok::nequals: - case tok::less: - case tok::less_eq: - case tok::greater: - case tok::greater_eq: - case tok::assign: - case tok::elipsis_incl: - case tok::elipsis_excl: - case tok::r_paren: - case tok::r_bracket: - case tok::l_brace: - case tok::colon: - case tok::semicolon: - return LHS; - - case tok::mod: - case tok::multipy: - case tok::divide: - consumeToken(); - return makeNode<InfixExpr>(LHS, parseExpr(), T); - - default: - diagnose(Tok.getLoc()); - return nullptr; + // Update the current expression. + LHS = makeNode<InfixExpr>(LHS, RHS, Op); } } @@ -209,8 +64,8 @@ Expr *Parser::parsePrimaryExpr() { return parseUnaryExpr(); default: - diagnose(Tok.getLoc()); - return nullptr; + diagnose(Tok.getLoc(), diag::expected_expression); + return nullptr; } } @@ -224,7 +79,7 @@ Expr *Parser::parsePrimaryExprRHS(Expr *Dest) { case tok::r_paren: case tok::r_bracket: case tok::l_brace: - case tok::colon: + case tok::comma: case tok::semicolon: return Dest; @@ -235,8 +90,8 @@ Expr *Parser::parsePrimaryExprRHS(Expr *Dest) { return parseSubscriptExpr(Dest); default: - diagnose(Tok.getLoc()); - return nullptr; + diagnose(Tok.getLoc()); + return nullptr; } } @@ -271,7 +126,7 @@ Expr *Parser::parseParenExpr() { auto E = parseExpr(); if (!consumeIf(tok::r_paren)) { diagnose(Tok.getLoc(), diag::DiagID::expected_r_paren) - .fixItAfter(")", Tok.getLoc()); + .fixItAfter(")", Tok.getLoc()); return nullptr; } return makeNode<ParenExpr>(E, L, PreviousLoc); diff --git a/lib/Parser/ParsePattern.cpp b/lib/Parser/ParsePattern.cpp index d42e443..bc04b42 100644 --- a/lib/Parser/ParsePattern.cpp +++ b/lib/Parser/ParsePattern.cpp @@ -69,7 +69,7 @@ Expr *Parser::parseExprPatternItem() { // ExprPatternItem -> epsilon return nullptr; - case tok::colon: + case tok::comma: // ExprPatternItem -> ',' Expr ExprPatternItem consumeToken(); return parseExpr(); @@ -137,7 +137,7 @@ Decl *Parser::parseVarPatternItem() { // VarPattern__ -> epsilon return nullptr; - case tok::colon: + case tok::comma: // VarPattern__ -> ',' identifier VarPattern__ consumeToken(); return parseParamDecl(); -- GitLab