From 6f621e9298402b35065c69812516499b0cbf8627 Mon Sep 17 00:00:00 2001
From: Peter Matta <mattapet@fit.cvut.cz>
Date: Thu, 26 Apr 2018 16:22:50 +0200
Subject: [PATCH] Simplify expression parsing

---
 examples/globExpr.dusk               |   3 +-
 include/dusk/AST/Diagnostics.h       |   2 +-
 include/dusk/AST/DiagnosticsParse.h  |   5 +-
 include/dusk/Basic/TokenDefinition.h |  23 +--
 include/dusk/Parse/Parser.h          |  14 +-
 include/dusk/Parse/Token.h           |  50 ++++++-
 lib/Parser/Lexer.cpp                 |  48 +++++--
 lib/Parser/ParseExpr.cpp             | 205 ++++-----------------------
 lib/Parser/ParsePattern.cpp          |   4 +-
 9 files changed, 133 insertions(+), 221 deletions(-)

diff --git a/examples/globExpr.dusk b/examples/globExpr.dusk
index bf70501..f5515b4 100644
--- a/examples/globExpr.dusk
+++ b/examples/globExpr.dusk
@@ -1,5 +1,6 @@
 var a = 14;
 let d = a + 99;
-a = 1 + 2 + 8;
+a = (1 + 2) * 8 == 2;
+a = (1 + 2 * 9) || (4 % 3);
 d;
 
diff --git a/include/dusk/AST/Diagnostics.h b/include/dusk/AST/Diagnostics.h
index f011490..a0c3e14 100644
--- a/include/dusk/AST/Diagnostics.h
+++ b/include/dusk/AST/Diagnostics.h
@@ -29,7 +29,7 @@ class DiagnosticEngine;
 
 namespace diag {
 
-enum struct DiagID : unsigned;
+enum DiagID : unsigned;
 
 } // namespace diag
 
diff --git a/include/dusk/AST/DiagnosticsParse.h b/include/dusk/AST/DiagnosticsParse.h
index 6260c39..1d33874 100644
--- a/include/dusk/AST/DiagnosticsParse.h
+++ b/include/dusk/AST/DiagnosticsParse.h
@@ -17,7 +17,7 @@ namespace dusk {
 
 namespace diag {
 
-enum struct DiagID : unsigned {
+enum DiagID : unsigned {
   // Lexer diagnostics
   lex_unexpected_symbol,
   lex_unterminated_multiline_comment,
@@ -27,6 +27,7 @@ enum struct DiagID : unsigned {
   expected_semicolon,
 
   expected_identifier,
+  expected_expression,
   expected_variable_initialization,
   expected_l_paren,
   expected_l_brace,
@@ -58,6 +59,8 @@ static StringRef getTextForID(DiagID ID) {
 
   case DiagID::expected_identifier:
     return "Expected identifier";
+  case DiagID::expected_expression:
+    return "Expected expression";
   case DiagID::expected_variable_initialization:
     return "Expected '=' initialization.";
   case DiagID::expected_l_paren:
diff --git a/include/dusk/Basic/TokenDefinition.h b/include/dusk/Basic/TokenDefinition.h
index 5263993..3ee5009 100644
--- a/include/dusk/Basic/TokenDefinition.h
+++ b/include/dusk/Basic/TokenDefinition.h
@@ -27,8 +27,6 @@ enum struct tok {
   kwFor,
   kwIn,
   kwFunc,
-  kwPrintln,
-  kwReadln,
   kwExtern,
   
   // Types
@@ -48,12 +46,13 @@ enum struct tok {
                   // Operators
   assign,         // =
   arrow,          // ->
+  colon,          // :
 
   elipsis_excl,   // ..
   elipsis_incl,   // ...
 
   semicolon,      // ;
-  colon,          // ,
+  comma,          // ,
 
   l_brace,        // {
   r_brace,        // }
@@ -72,13 +71,17 @@ enum struct tok {
   multipy,        // *
 
   // Logical Operators
-  lnot,           // !
+  land,           // &&
+  lor,            // ||
+  
   equals,         // ==
   nequals,        // !=
   less,           // <
   less_eq,        // <=
   greater,        // >
   greater_eq,     // >=
+  
+  lnot,           // !
 
   // End of file
   eof,
@@ -113,10 +116,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const dusk::tok &T) {
     return OS << "in";
   case dusk::tok::kwFunc:
     return OS << "func";
-  case dusk::tok::kwPrintln:
-    return OS << "writeln";
-  case dusk::tok::kwReadln:
-    return OS << "readln";
   case dusk::tok::kwExtern:
     return OS << "extern";
       
@@ -136,9 +135,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const dusk::tok &T) {
     return OS << "..";
   case dusk::tok::elipsis_incl:
     return OS << "...";
+  case dusk::tok::colon:
+    return OS << ":";
   case dusk::tok::semicolon:
     return OS << ";";
-  case dusk::tok::colon:
+  case dusk::tok::comma:
     return OS << ",";
   case dusk::tok::l_brace:
     return OS << "{";
@@ -168,6 +169,10 @@ inline raw_ostream &operator<<(raw_ostream &OS, const dusk::tok &T) {
   // Logical Operators
   case dusk::tok::lnot:
     return OS << "!";
+  case dusk::tok::land:
+    return OS << "&&";
+  case dusk::tok::lor:
+    return OS << "||";
   case dusk::tok::equals:
     return OS << "==";
   case dusk::tok::nequals:
diff --git a/include/dusk/Parse/Parser.h b/include/dusk/Parse/Parser.h
index 8e1399e..762d29b 100644
--- a/include/dusk/Parse/Parser.h
+++ b/include/dusk/Parse/Parser.h
@@ -119,18 +119,8 @@ private:
   // MARK: - Expressions
 
   Expr *parseExpr();
-  Expr *parseAssignExpr();
-  Expr *parseAssignExprRHS(Expr *LHS);
-
-  Expr *parseLogicalExpr();
-  Expr *parseLogicalExprRHS(Expr *LHS);
-
-  Expr *parseArithExpr();
-  Expr *parseArithExprRHS(Expr *LHS);
-
-  Expr *parseMulExpr();
-  Expr *parseMulExprRHS(Expr *LHS);
-
+  Expr *parseBinExprRHS(Expr *LHS, unsigned P);
+  
   Expr *parsePrimaryExpr();
   Expr *parsePrimaryExprRHS(Expr *Dest);
 
diff --git a/include/dusk/Parse/Token.h b/include/dusk/Parse/Token.h
index 1dbb0fc..1583085 100644
--- a/include/dusk/Parse/Token.h
+++ b/include/dusk/Parse/Token.h
@@ -86,17 +86,19 @@ public:
     case tok::assign:
     case tok::elipsis_excl:
     case tok::elipsis_incl:
-    case tok::plus:
-    case tok::minus:
-    case tok::mod:
-    case tok::divide:
-    case tok::multipy:
+    case tok::land:
+    case tok::lor:
     case tok::equals:
     case tok::nequals:
     case tok::less:
     case tok::less_eq:
     case tok::greater:
     case tok::greater_eq:
+    case tok::plus:
+    case tok::minus:
+    case tok::mod:
+    case tok::divide:
+    case tok::multipy:
       return true;
     default:
       return false;
@@ -123,8 +125,6 @@ public:
     case tok::kwFor:
     case tok::kwIn:
     case tok::kwFunc:
-    case tok::kwPrintln:
-    case tok::kwReadln:
     case tok::kwExtern:
         
     case tok::kwVoid:
@@ -143,6 +143,42 @@ public:
   ///
   /// \return \c true, if token is a number literal, \c false otherwise.
   bool isLiteral() const { return is(tok::number_literal); }
+
+  /// Returns operator precedence, where 0 is the lowest one.
+  unsigned getPrecedence() const {
+    switch (Kind) {
+    case tok::assign:
+      return 5;
+
+    case tok::elipsis_excl:
+    case tok::elipsis_incl:
+      return 10;
+
+    case tok::land:
+    case tok::lor:
+      return 20;
+
+    case tok::equals:
+    case tok::nequals:
+    case tok::less:
+    case tok::less_eq:
+    case tok::greater:
+    case tok::greater_eq:
+      return 30;
+
+    case tok::plus:
+    case tok::minus:
+      return 40;
+
+    case tok::multipy:
+    case tok::divide:
+    case tok::mod:
+      return 50; // Max
+
+    default:
+      return 0;
+    }
+  }
 };
 
 } // namespace dusk
diff --git a/lib/Parser/Lexer.cpp b/lib/Parser/Lexer.cpp
index 3041ff5..39c5882 100644
--- a/lib/Parser/Lexer.cpp
+++ b/lib/Parser/Lexer.cpp
@@ -124,12 +124,13 @@ void Lexer::lexToken() {
     const char *TokStart = CurPtr;
 
     switch (*CurPtr++) {
-    case 0:
+    case 0: {
       // Not ending null character.
       if (CurPtr - 1 != BufferEnd)
         break;
       CurPtr--;
       return formToken(tok::eof, TokStart);
+    }
 
     // Skip whitespace
     case ' ':
@@ -138,20 +139,24 @@ void Lexer::lexToken() {
     case '\r':
       break;
 
-    case '=':
+    case '=': {
       if (*CurPtr == '=') {
         CurPtr++;
         return formToken(tok::equals, TokStart);
       }
       return formToken(tok::assign, TokStart);
+    }
 
-    case '.':
+    case '.': {
       if (*CurPtr == '.')
         return lexElipsis();
       formToken(tok::unknown, TokStart);
       return diagnose();
+    }
 
     case ',':
+      return formToken(tok::comma, TokStart);
+    case ':':
       return formToken(tok::colon, TokStart);
     case ';':
       return formToken(tok::semicolon, TokStart);
@@ -170,7 +175,7 @@ void Lexer::lexToken() {
       return formToken(tok::r_paren, TokStart);
 
     // Divide or comment start
-    case '/':
+    case '/': {
       // Check if start of a comment
       if (*CurPtr == '/') { // `//`
         skipLineComment(true);
@@ -185,14 +190,16 @@ void Lexer::lexToken() {
         break; // Ignore comment
       }
       return formToken(tok::divide, TokStart);
+    }
 
     // Minus or arrow operator
-    case '-':
+    case '-': {
       if (*CurPtr == '>') {
         CurPtr++;
         return formToken(tok::arrow, TokStart);
       }
       return formToken(tok::minus, TokStart);
+    }
         
     // Algebraic operands
     case '+':
@@ -203,29 +210,46 @@ void Lexer::lexToken() {
       return formToken(tok::mod, TokStart);
 
     // Logical operands
-    case '!':
+    case '!': {
       if (*CurPtr == '=') {
         CurPtr++;
         return formToken(tok::nequals, TokStart);
       }
       return formToken(tok::lnot, TokStart);
+    }
+        
+    case '&': {
+      if (*CurPtr == '&') {
+        CurPtr++;
+        return formToken(tok::land, TokStart);
+      }
+      return formToken(tok::unknown, TokStart);
+    }
+        
+    case '|': {
+      if (*CurPtr == '|') {
+        CurPtr++;
+        return formToken(tok::lor, TokStart);
+      }
+      return formToken(tok::unknown, TokStart);
+    }
 
-    case '<':
+    case '<': {
       if (*CurPtr == '=') {
         CurPtr++;
         return formToken(tok::less_eq, TokStart);
       }
       return formToken(tok::less, TokStart);
+    }
 
-    case '>':
+    case '>': {
       if (*CurPtr == '=') {
         CurPtr++;
         return formToken(tok::greater_eq, TokStart);
       }
       return formToken(tok::greater, TokStart);
-
-    case ':':
-      return formToken(tok::unknown, TokStart);
+    }
+      
 
     // Numbers
     case '0': case '1': case '2': case '3': case '4':
@@ -286,8 +310,6 @@ tok Lexer::kindOfIdentifier(StringRef Str) {
       .Case("for", tok::kwFor)
       .Case("in", tok::kwIn)
       .Case("func", tok::kwFunc)
-      .Case("println", tok::kwPrintln)
-      .Case("readln", tok::kwReadln)
       .Case("extern", tok::kwExtern)
       .Case("Void", tok::kwVoid)
       .Case("Int", tok::kwInt)
diff --git a/lib/Parser/ParseExpr.cpp b/lib/Parser/ParseExpr.cpp
index e4bc62c..2dab22b 100644
--- a/lib/Parser/ParseExpr.cpp
+++ b/lib/Parser/ParseExpr.cpp
@@ -12,184 +12,39 @@
 using namespace dusk;
 
 Expr *Parser::parseExpr() {
-  switch (Tok.getKind()) {
-  case tok::identifier:
-  case tok::number_literal:
-  case tok::l_paren:
-  case tok::minus:
-    return parseAssignExpr();
-
-  default:
-    diagnose(Tok.getLoc());
+  auto Primary = parsePrimaryExpr();
+  if (!Primary)
     return nullptr;
-  }
-}
-
-Expr *Parser::parseAssignExpr() {
-  switch (Tok.getKind()) {
-  case tok::identifier:
-  case tok::number_literal:
-  case tok::l_paren:
-  case tok::minus:
-    return parseAssignExprRHS(parseLogicalExpr());
 
-  default:
-    diagnose(consumeToken());
-    return nullptr;
-  }
+  return parseBinExprRHS(Primary, 0);
 }
 
-Expr *Parser::parseAssignExprRHS(Expr *LHS) {
-  switch (Tok.getKind()) {
-  case tok::elipsis_incl:
-  case tok::elipsis_excl:
-  case tok::r_paren:
-  case tok::r_bracket:
-  case tok::l_brace:
-  case tok::colon:
-  case tok::semicolon:
-    return LHS;
+Expr *Parser::parseBinExprRHS(Expr *LHS, unsigned P) {
+  while (true) {
+    auto Prec = Tok.getPrecedence();
 
-  case tok::assign:
+    // If the precedence of current operator is lower or equal to previous one
+    // (encounting invalid 0), return already parsed part of the expression.
+    if (Prec <= P)
+      return LHS;
+    auto Op = Tok;
     consumeToken();
-    return makeNode<AssignExpr>((IdentifierExpr *)LHS, parseExpr());
-
-  default:
-      diagnose(Tok.getLoc());
+    
+    // Return nullptr on error
+    auto RHS = parsePrimaryExpr();
+    if (!RHS)
       return nullptr;
-  }
-}
-
-Expr *Parser::parseLogicalExpr() {
-  switch (Tok.getKind()) {
-  case tok::identifier:
-  case tok::number_literal:
-  case tok::l_paren:
-  case tok::minus:
-    return parseLogicalExprRHS(parseArithExpr());
-
-  default:
-      diagnose(Tok.getLoc());
-      return nullptr;
-  }
-}
-
-Expr *Parser::parseLogicalExprRHS(Expr *LHS) {
-  auto T = Tok;
-  switch (Tok.getKind()) {
-  case tok::assign:
-  case tok::elipsis_incl:
-  case tok::elipsis_excl:
-  case tok::r_paren:
-  case tok::r_bracket:
-  case tok::l_brace:
-  case tok::colon:
-  case tok::semicolon:
-    return LHS;
 
-  case tok::equals:
-  case tok::nequals:
-  case tok::less:
-  case tok::less_eq:
-  case tok::greater:
-  case tok::greater_eq:
-    consumeToken();
-    return makeNode<InfixExpr>(LHS, parseArithExpr(), T);
+    // If precedence of next operand is greater than the current one, parse
+    // expression in favor of the next operand.
+    if (Prec < Tok.getPrecedence()) {
+      RHS = parseBinExprRHS(RHS, Prec);
+      if (!RHS)
+        return nullptr;
+    }
 
-  default:
-      diagnose(Tok.getLoc());
-      return nullptr;
-  }
-}
-
-Expr *Parser::parseArithExpr() {
-  switch (Tok.getKind()) {
-  case tok::identifier:
-  case tok::number_literal:
-  case tok::l_paren:
-  case tok::minus:
-    return parseArithExprRHS(parseMulExpr());
-
-  default:
-      diagnose(Tok.getLoc());
-      return nullptr;
-  }
-}
-
-Expr *Parser::parseArithExprRHS(Expr *LHS) {
-  auto T = Tok;
-  switch (Tok.getKind()) {
-  case tok::assign:
-  case tok::equals:
-  case tok::nequals:
-  case tok::less:
-  case tok::less_eq:
-  case tok::greater:
-  case tok::greater_eq:
-  case tok::elipsis_incl:
-  case tok::elipsis_excl:
-  case tok::r_paren:
-  case tok::r_bracket:
-  case tok::l_brace:
-  case tok::colon:
-  case tok::semicolon:
-    return LHS;
-
-  case tok::plus:
-  case tok::minus:
-    consumeToken();
-    return makeNode<InfixExpr>(LHS, parseExpr(), T);
-
-  default:
-      diagnose(Tok.getLoc());
-      return nullptr;
-  }
-}
-
-Expr *Parser::parseMulExpr() {
-  switch (Tok.getKind()) {
-  case tok::identifier:
-  case tok::number_literal:
-  case tok::l_paren:
-  case tok::minus:
-    return parseMulExprRHS(parsePrimaryExpr());
-
-  default:
-      diagnose(Tok.getLoc());
-      return nullptr;
-  }
-}
-
-Expr *Parser::parseMulExprRHS(Expr *LHS) {
-  auto T = Tok;
-  switch (Tok.getKind()) {
-  case tok::plus:
-  case tok::minus:
-  case tok::equals:
-  case tok::nequals:
-  case tok::less:
-  case tok::less_eq:
-  case tok::greater:
-  case tok::greater_eq:
-  case tok::assign:
-  case tok::elipsis_incl:
-  case tok::elipsis_excl:
-  case tok::r_paren:
-  case tok::r_bracket:
-  case tok::l_brace:
-  case tok::colon:
-  case tok::semicolon:
-    return LHS;
-
-  case tok::mod:
-  case tok::multipy:
-  case tok::divide:
-    consumeToken();
-    return makeNode<InfixExpr>(LHS, parseExpr(), T);
-
-  default:
-      diagnose(Tok.getLoc());
-      return nullptr;
+    // Update the current expression.
+    LHS = makeNode<InfixExpr>(LHS, RHS, Op);
   }
 }
 
@@ -209,8 +64,8 @@ Expr *Parser::parsePrimaryExpr() {
     return parseUnaryExpr();
 
   default:
-      diagnose(Tok.getLoc());
-      return nullptr;
+    diagnose(Tok.getLoc(), diag::expected_expression);
+    return nullptr;
   }
 }
 
@@ -224,7 +79,7 @@ Expr *Parser::parsePrimaryExprRHS(Expr *Dest) {
   case tok::r_paren:
   case tok::r_bracket:
   case tok::l_brace:
-  case tok::colon:
+  case tok::comma:
   case tok::semicolon:
     return Dest;
 
@@ -235,8 +90,8 @@ Expr *Parser::parsePrimaryExprRHS(Expr *Dest) {
     return parseSubscriptExpr(Dest);
 
   default:
-      diagnose(Tok.getLoc());
-      return nullptr;
+    diagnose(Tok.getLoc());
+    return nullptr;
   }
 }
 
@@ -271,7 +126,7 @@ Expr *Parser::parseParenExpr() {
   auto E = parseExpr();
   if (!consumeIf(tok::r_paren)) {
     diagnose(Tok.getLoc(), diag::DiagID::expected_r_paren)
-      .fixItAfter(")", Tok.getLoc());
+        .fixItAfter(")", Tok.getLoc());
     return nullptr;
   }
   return makeNode<ParenExpr>(E, L, PreviousLoc);
diff --git a/lib/Parser/ParsePattern.cpp b/lib/Parser/ParsePattern.cpp
index d42e443..bc04b42 100644
--- a/lib/Parser/ParsePattern.cpp
+++ b/lib/Parser/ParsePattern.cpp
@@ -69,7 +69,7 @@ Expr *Parser::parseExprPatternItem() {
     // ExprPatternItem -> epsilon
     return nullptr;
 
-  case tok::colon:
+  case tok::comma:
     // ExprPatternItem -> ',' Expr ExprPatternItem
     consumeToken();
     return parseExpr();
@@ -137,7 +137,7 @@ Decl *Parser::parseVarPatternItem() {
     // VarPattern__ -> epsilon
     return nullptr;
 
-  case tok::colon:
+  case tok::comma:
     // VarPattern__ -> ',' identifier VarPattern__
     consumeToken();
     return parseParamDecl();
-- 
GitLab