From 7a121910f859c2e5242a38626138e97f9e661868 Mon Sep 17 00:00:00 2001 From: Peter Matta <mattapet@fit.cvut.cz> Date: Sat, 21 Apr 2018 12:15:10 +0200 Subject: [PATCH] Added better diagnostics --- include/dusk/AST/ASTNode.h | 2 +- include/dusk/AST/CMakeLists.txt | 1 + include/dusk/AST/Decl.h | 8 +- include/dusk/AST/Diagnostics.h | 174 +++++++++++++++++++---- include/dusk/AST/DiagnosticsParse.h | 83 +++++++++++ include/dusk/AST/Expr.h | 22 +-- include/dusk/AST/Pattern.h | 6 +- include/dusk/AST/Stmt.h | 45 +++--- include/dusk/Basic/CMakeLists.txt | 5 + include/dusk/Basic/SourceManager.h | 41 ++++++ include/dusk/Frontend/Compiler.h | 14 +- include/dusk/Parse/Lexer.h | 210 ++++++++++++++-------------- include/dusk/Parse/Parser.h | 75 +++++----- lib/AST/ASTWalker.cpp | 12 +- lib/AST/Decl.cpp | 11 +- lib/AST/Diagnostics.cpp | 74 ++++++++-- lib/AST/Expr.cpp | 24 +--- lib/AST/Pattern.cpp | 34 ++--- lib/AST/Stmt.cpp | 37 ++--- lib/Frontend/Compiler.cpp | 24 ++-- lib/Parser/Lexer.cpp | 192 +++++++------------------ lib/Parser/ParseDecl.cpp | 39 ++++-- lib/Parser/ParseExpr.cpp | 56 +++++--- lib/Parser/ParsePattern.cpp | 51 ++++--- lib/Parser/ParseStmt.cpp | 79 +++++++---- lib/Parser/Parser.cpp | 132 ++--------------- 26 files changed, 790 insertions(+), 661 deletions(-) create mode 100644 include/dusk/AST/DiagnosticsParse.h create mode 100644 include/dusk/Basic/SourceManager.h diff --git a/include/dusk/AST/ASTNode.h b/include/dusk/AST/ASTNode.h index 3261111..79f5392 100644 --- a/include/dusk/AST/ASTNode.h +++ b/include/dusk/AST/ASTNode.h @@ -39,7 +39,7 @@ public: /// Returns start of the text range represented by the node. llvm::SMLoc getLocStart() const { return getSourceRange().Start; } - /// Return end of the text range represented by the node. + /// Returns end of the text range represented by the node. llvm::SMLoc getLocEnd() const { return getSourceRange().End; } /// Walks AST node. diff --git a/include/dusk/AST/CMakeLists.txt b/include/dusk/AST/CMakeLists.txt index 77c531d..1b37153 100644 --- a/include/dusk/AST/CMakeLists.txt +++ b/include/dusk/AST/CMakeLists.txt @@ -5,6 +5,7 @@ set(HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/ASTWalker.h ${CMAKE_CURRENT_SOURCE_DIR}/Decl.h ${CMAKE_CURRENT_SOURCE_DIR}/Diagnostics.h + ${CMAKE_CURRENT_SOURCE_DIR}/DiagnosticsParse.h ${CMAKE_CURRENT_SOURCE_DIR}/Expr.h ${CMAKE_CURRENT_SOURCE_DIR}/Pattern.h ${CMAKE_CURRENT_SOURCE_DIR}/Stmt.h diff --git a/include/dusk/AST/Decl.h b/include/dusk/AST/Decl.h index d7a6684..3baeee0 100644 --- a/include/dusk/AST/Decl.h +++ b/include/dusk/AST/Decl.h @@ -24,6 +24,8 @@ class ConstDecl; class ParamDecl; class FuncDecl; class Expr; +class Stmt; +class Pattern; class VarPattern; class ASTWalker; @@ -111,13 +113,13 @@ class FuncDecl : public Decl { llvm::SMLoc FuncLoc; /// Function arguments - VarPattern *Params; + Pattern *Params; public: - FuncDecl(llvm::StringRef N, llvm::SMLoc NL, llvm::SMLoc FuncL, VarPattern *A); + FuncDecl(llvm::StringRef N, llvm::SMLoc NL, llvm::SMLoc FuncL, Pattern *A); llvm::SMLoc getFuncLoc() const { return FuncLoc; } - VarPattern *getArgs() const { return Params; } + Pattern *getArgs() const { return Params; } virtual llvm::SMRange getSourceRange() const override; }; diff --git a/include/dusk/AST/Diagnostics.h b/include/dusk/AST/Diagnostics.h index 1b515c7..7ec0d8a 100644 --- a/include/dusk/AST/Diagnostics.h +++ b/include/dusk/AST/Diagnostics.h @@ -10,62 +10,174 @@ #ifndef DUSK_DIAGNOSTICS_H #define DUSK_DIAGNOSTICS_H +#include "dusk/AST/DiagnosticsParse.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include <vector> namespace dusk { +class Decl; +class Diagnostic; +class DiagnosticRef; +class DiagnosticEngine; namespace diag { -enum struct LexerError { unexpected_symbol, missing_eol_multiline_comment }; +enum struct DiagID : unsigned; -enum struct ParserError { - // General - unexpected_token, - missing_semicolon, - missing_r_paren, +} // namespace diag + +/// Interface, which all diagnostic consumers must implement. +class DiagnosticConsumer { +public: + /// Consumes a single diagnotic. + virtual void consume(llvm::SMDiagnostic &Diagnostic) = 0; +}; + +/// Represents a single diagnostic. +/// +/// This is a container object holding all necessary information to create +/// a \c llvm::SMDiagnostic for a diagnostic consumer. +class Diagnostic { + diag::DiagID ID; + llvm::SmallVector<llvm::SMFixIt, 2> FixIts; + llvm::SMLoc SourceLoc; + + friend class DiagnosticEngine; + + /// Creates a diagnostic. This constructor is only available to \c + /// DiagnosticEngine. + Diagnostic(diag::DiagID ID) : ID(ID) {} + +public: + // Accessors - // Decl - missing_identfier, - missing_assign, + diag::DiagID getID() const { return ID; } + llvm::ArrayRef<llvm::SMFixIt> getFixIts() const { return FixIts; } + llvm::SMLoc getLoc() const { return SourceLoc; } - // Stmt - missing_in_kw, - missing_r_brace, - missing_ellipsis_op, + /// Sets the default location of the diagnostic. + void setLoc(llvm::SMLoc Loc) { SourceLoc = Loc; } - // Pattern - missing_colon, - missing_r_bracket + /// Adds a \c FixIt to the diagnostic. + void addFixIt(llvm::SMFixIt &&FixIt) { FixIts.push_back(std::move(FixIt)); } }; -/// Aggregates diagnostics. -class Diagnostics { - llvm::DenseMap<unsigned, llvm::SMDiagnostic> Diags; +/// Reference interface to a diagnostic, which is currently active within the +/// diagnostic engine. +/// +/// Only a single reference to a diagnostic can be active at a time. Diagnostic +/// can be emitted explicitly by calling a \c flush method or implicitly by +/// going out of scope. +class DiagnosticRef { + friend class DiagnosticEngine; - /// ID of next diagnostic. - unsigned NextID = 1; + DiagnosticEngine *Engine; + bool IsActive; + + /// Creates a single reference to an active diagnostic. + /// + /// This constructor is available only ot \c DiagnosticEngine. + DiagnosticRef(DiagnosticEngine &E) : Engine(&E), IsActive(true) {} + + DiagnosticRef(const DiagnosticRef &other) = delete; + DiagnosticRef &operator=(const DiagnosticRef &other) = delete; + DiagnosticRef &operator=(DiagnosticRef &&other) = delete; public: - Diagnostics() = default; - bool isEmpty() const { return Diags.size() == 0; } + /// Create an empty, but active reference. + /// + /// Empty reference is not attached to a diagnostic engine, therefore does not + /// emit any diagnostic, however it acts as a regular diagnostic reference. + DiagnosticRef() : Engine(nullptr), IsActive(true) {} + + /// Transfering refernce objects. + DiagnosticRef(DiagnosticRef &&Other) + : Engine(Other.Engine), IsActive(Other.IsActive) {} + + ~DiagnosticRef() { + if (IsActive) + flush(); + } + + /// Flushes referenced diagnotic. + void flush(); + + /// Adds a fixit. + /// + /// A fixit will reference a provided \c Loc. + DiagnosticRef &fixIt(llvm::StringRef FixIt, llvm::SMLoc Loc); + + /// Adds a fixit before a token located at provided location. + DiagnosticRef &fixItBefore(llvm::StringRef FixIt, llvm::SMLoc Loc); + + /// Adds a fixit after a token located at provided location. + DiagnosticRef &fixItAfter(llvm::StringRef FixIt, llvm::SMLoc Loc); +}; - /// Create a diagnosis. - unsigned diagnose(llvm::SMDiagnostic &&D); +/// This class is acts as a pipeline between custom diagnostic objects and +/// the diagnostic consumers which consume standart \c llvm::SMDiagnostic +/// objects. +class DiagnosticEngine { + llvm::SourceMgr &SourceManager; - /// Consumes a all diagnosis. - void consume(llvm::raw_ostream &OS); + /// Consumers of diagnostics + std::vector<DiagnosticConsumer *> Consumers; - /// Returns a diagnostics with given ID. - llvm::SMDiagnostic &operator[](unsigned ID); -}; + /// Currently active diagnostic + llvm::Optional<Diagnostic> ActiveDiag; -} // namespace diag + friend class Diagnostic; + friend class DiagnosticRef; + +public: + DiagnosticEngine(llvm::SourceMgr &SM) : SourceManager(SM), ActiveDiag() {} + + /// Adds another \c DiagnosticConsumer. + void addConsumer(DiagnosticConsumer *C) { Consumers.push_back(C); } + + /// Remove and return all \c DiagnosticConsumers. + std::vector<DiagnosticConsumer *> takeConsumers() { + auto Ret = + std::vector<DiagnosticConsumer *>(Consumers.begin(), Consumers.end()); + Consumers.clear(); + return Ret; + } + + /// \brief Create and emit a single diagnostic. + /// + /// \param SourceLoc Location to which the diagnostic referes in the source + /// code. + /// + /// \param ID The diagnostic ID. + /// + /// \return A \c DiagnosticRef object, which is an interface referencing + /// created diagnostic. User can add additional information via this + /// diagnostic reference. + DiagnosticRef diagnose(llvm::SMLoc SourceLoc, diag::DiagID ID) { + assert(!ActiveDiag && "Cannot have two active diagnostics at one."); + + ActiveDiag = Diagnostic(ID); + ActiveDiag->setLoc(SourceLoc); + return DiagnosticRef(*this); + } + +private: + /// Return active diagnostic. + Diagnostic &getActiveDiag() { return *ActiveDiag; } + + /// Flushes active diagnostic + void flushActiveDiag(); + + /// Emits given diagnostic to all registered consumers. + void emitDiagnostic(const Diagnostic &Diag); +}; } // namespace dusk diff --git a/include/dusk/AST/DiagnosticsParse.h b/include/dusk/AST/DiagnosticsParse.h new file mode 100644 index 0000000..4f40770 --- /dev/null +++ b/include/dusk/AST/DiagnosticsParse.h @@ -0,0 +1,83 @@ +//===--- DiagnosticsParse.h - Parser and Lexer diagnostics ------*- C++ -*-===// +// +// dusk-lang +// This source file is part of a dusk-lang project, which is a semestral +// assignement for BI-PJP course at Czech Technical University in Prague. +// The software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. +// +//===----------------------------------------------------------------------===// + +#ifndef DUSK_DIAGNOSTICS_PARSE_H +#define DUSK_DIAGNOSTICS_PARSE_H + +#include "llvm/ADT/StringRef.h" + +namespace dusk { + +namespace diag { + +enum struct DiagID : unsigned { + // Lexer diagnostics + lex_unexpected_symbol, + lex_unterminated_multiline_comment, + + // Parse diagnostics + unexpected_token, + expected_semicolon, + + expected_identifier, + expected_variable_initialization, + expected_l_paren, + expected_l_brace, + + expected_in_kw, + expected_ellipsis, + expected_colon_separator, + expected_r_paren, + expected_r_bracket, + expected_r_brace +}; + +static llvm::StringRef getTextForID(DiagID ID) { + switch (ID) { + // Lexer diagnostics + case DiagID::lex_unexpected_symbol: + return "Unexpected symbol"; + case DiagID::lex_unterminated_multiline_comment: + return "Unterminated '/*' comment"; + + // Parse diagnostics + case DiagID::unexpected_token: + return "Unexpected token"; + case DiagID::expected_semicolon: + return "Expected ';'"; + + case DiagID::expected_identifier: + return "Expected identifier"; + case DiagID::expected_variable_initialization: + return "Expected '=' initialization."; + case DiagID::expected_l_paren: + return "Expected '('"; + case DiagID::expected_l_brace: + return "Expected '{'"; + + case DiagID::expected_in_kw: + return "Expected 'in' keyword"; + case DiagID::expected_ellipsis: + return "Expected '..' or '...'"; + case DiagID::expected_colon_separator: + return "Expected ',' after a list item"; + case DiagID::expected_r_paren: + return "Expected ')' at the end of param list"; + case DiagID::expected_r_bracket: + return "Expected ']' at the end of subcript"; + case DiagID::expected_r_brace: + return "Expected '}' at the end of block"; + } +} + +} // namespace diag + +} // namespace dusk + +#endif /* DUSK_DIAGNOSTICS_PARSE_H */ diff --git a/include/dusk/AST/Expr.h b/include/dusk/AST/Expr.h index 00221df..7cbcb1a 100644 --- a/include/dusk/AST/Expr.h +++ b/include/dusk/AST/Expr.h @@ -25,6 +25,8 @@ class CallExpr; class SubscriptExpr; class BlockStmt; class ExprPattern; +class Stmt; +class Pattern; class SubscriptPattern; class ASTWalker; @@ -144,32 +146,32 @@ public: class CallExpr : public Expr { /// Function identifier - IdentifierExpr *Callee; + Expr *Callee; /// Function arguments - ExprPattern *Args; + Pattern *Args; public: - CallExpr(IdentifierExpr *C, ExprPattern *A); + CallExpr(Expr *C, Pattern *A); - IdentifierExpr *getCalle() const { return Callee; } - ExprPattern *getArgs() { return Args; } + Expr *getCalle() const { return Callee; } + Pattern *getArgs() { return Args; } virtual llvm::SMRange getSourceRange() const override; }; class SubscriptExpr : public Expr { /// Base identifier - IdentifierExpr *Base; + Expr *Base; /// Subscription pattern - SubscriptPattern *Subscript; + Pattern *Subscript; public: - SubscriptExpr(IdentifierExpr *B, SubscriptPattern *S); + SubscriptExpr(Expr *B, Pattern *S); - IdentifierExpr *getBase() { return Base; } - SubscriptPattern *getSubscript() { return Subscript; } + Expr *getBase() { return Base; } + Pattern *getSubscript() { return Subscript; } virtual llvm::SMRange getSourceRange() const override; }; diff --git a/include/dusk/AST/Pattern.h b/include/dusk/AST/Pattern.h index 1ccb18e..e5b86a3 100644 --- a/include/dusk/AST/Pattern.h +++ b/include/dusk/AST/Pattern.h @@ -65,7 +65,7 @@ public: /// declaration. class VarPattern : public Pattern { /// Variables of the expression - llvm::SmallVector<ParamDecl *, 128> Vars; + llvm::SmallVector<Decl *, 128> Vars; /// Location of left parenthesis llvm::SMLoc LPar; @@ -74,10 +74,10 @@ class VarPattern : public Pattern { llvm::SMLoc RPar; public: - VarPattern(llvm::SmallVector<ParamDecl *, 128> &&V, llvm::SMLoc L, + VarPattern(llvm::SmallVector<Decl *, 128> &&V, llvm::SMLoc L, llvm::SMLoc R); - llvm::ArrayRef<ParamDecl *> getVars() const { return Vars; } + llvm::ArrayRef<Decl *> getVars() const { return Vars; } llvm::SMLoc getLPar() const { return LPar; } llvm::SMLoc getRPar() const { return RPar; } diff --git a/include/dusk/AST/Stmt.h b/include/dusk/AST/Stmt.h index 7ef135a..0e04d89 100644 --- a/include/dusk/AST/Stmt.h +++ b/include/dusk/AST/Stmt.h @@ -19,10 +19,9 @@ #include <vector> namespace dusk { +class Decl; class Expr; -class BlockStmt; -class ParamDecl; -class FuncDecl; +class Stmt; class IdentifierExpr; class ASTWalker; @@ -110,14 +109,14 @@ public: /// Represents a Function statement a.k.a declaration and definition. class FuncStmt : public Stmt { - FuncDecl *Prototype; - BlockStmt *Body; + Decl *Prototype; + Stmt *Body; public: - FuncStmt(FuncDecl *FP, BlockStmt *B); + FuncStmt(Decl *FP, Stmt *B); - FuncDecl *getPrototype() { return Prototype; } - BlockStmt *getBody() { return Body; } + Decl *getPrototype() { return Prototype; } + Stmt *getBody() { return Body; } virtual llvm::SMRange getSourceRange() const override; }; @@ -128,20 +127,20 @@ class ForStmt : public Stmt { llvm::SMLoc ForLoc; /// Iterabling variable - IdentifierExpr *Var; + Expr *Var; /// For-in range statement - RangeStmt *Range; + Stmt *Range; /// For's block. - BlockStmt *Body; + Stmt *Body; public: - ForStmt(llvm::SMLoc FL, IdentifierExpr *V, RangeStmt *R, BlockStmt *C); + ForStmt(llvm::SMLoc FL, Expr *V, Stmt *R, Stmt *C); - IdentifierExpr *getVar() const { return Var; } - RangeStmt *getRange() const { return Range; } - BlockStmt *getBody() const { return Body; } + Expr *getVar() const { return Var; } + Stmt *getRange() const { return Range; } + Stmt *getBody() const { return Body; } virtual llvm::SMRange getSourceRange() const override; }; @@ -152,13 +151,13 @@ class WhileStmt : public Stmt { llvm::SMLoc WhileLoc; Expr *Cond; - BlockStmt *Body; + Stmt *Body; public: - WhileStmt(llvm::SMLoc WL, Expr *C, BlockStmt *B); + WhileStmt(llvm::SMLoc WL, Expr *C, Stmt *B); Expr *getCond() const { return Cond; } - BlockStmt *getBody() const { return Body; } + Stmt *getBody() const { return Body; } virtual llvm::SMRange getSourceRange() const override; }; @@ -169,17 +168,17 @@ class IfStmt : public Stmt { llvm::SMLoc IfLoc; Expr *Cond; - BlockStmt *Then; + Stmt *Then; /// An else code block, which may be \c nullptr. - BlockStmt *Else; + Stmt *Else; public: - IfStmt(llvm::SMLoc IL, Expr *C, BlockStmt *T, BlockStmt *E = nullptr); + IfStmt(llvm::SMLoc IL, Expr *C, Stmt *T, Stmt *E = nullptr); Expr *getCond() const { return Cond; } - BlockStmt *getThen() const { return Then; } - BlockStmt *getElse() const { return Else; } + Stmt *getThen() const { return Then; } + Stmt *getElse() const { return Else; } bool hasElseBlock() const { return Else != nullptr; } virtual llvm::SMRange getSourceRange() const override; diff --git a/include/dusk/Basic/CMakeLists.txt b/include/dusk/Basic/CMakeLists.txt index e69de29..6798479 100644 --- a/include/dusk/Basic/CMakeLists.txt +++ b/include/dusk/Basic/CMakeLists.txt @@ -0,0 +1,5 @@ +set(HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/SourceManager.h + ${HEADERS} + PARENT_SCOPE +) diff --git a/include/dusk/Basic/SourceManager.h b/include/dusk/Basic/SourceManager.h new file mode 100644 index 0000000..b4c395d --- /dev/null +++ b/include/dusk/Basic/SourceManager.h @@ -0,0 +1,41 @@ +//===--- SourceManager.h - Convenience functions ----------------*- C++ -*-===// +// +// dusk-lang +// This source file is part of a dusk-lang project, which is a semestral +// assignement for BI-PJP course at Czech Technical University in Prague. +// The software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. +// +//===----------------------------------------------------------------------===// + +#ifndef DUSK_SOURCE_MANAGER_H +#define DUSK_SOURCE_MANAGER_H + +#include "llvm/Support/SourceMgr.h" +#include <cassert> + +namespace dusk { + +/// \brief Returns an ID of the buffer containing provided \c Loc. +/// +/// \param SM Source manager providing buffers for context. +/// +/// \param Loc Location refering to a buffer. +/// +/// \return ID of buffer containing provided location. +static unsigned getBufferForLoc(const llvm::SourceMgr &SM, llvm::SMLoc Loc) { + // Validate location + assert(Loc.isValid()); + + auto ptr = Loc.getPointer(); + for (unsigned i = 1; i <= SM.getNumBuffers(); i++) { + auto Buff = SM.getMemoryBuffer(i); + if (ptr >= Buff->getBufferStart() && ptr <= Buff->getBufferEnd()) + return i; + } + llvm_unreachable("Location in non-existing buffer."); +} + +} // namespace dusk + +#endif /* DUSK_SOURCE_MANAGER_H */ + diff --git a/include/dusk/Frontend/Compiler.h b/include/dusk/Frontend/Compiler.h index ebf1dcb..7b1ec48 100644 --- a/include/dusk/Frontend/Compiler.h +++ b/include/dusk/Frontend/Compiler.h @@ -10,29 +10,35 @@ #ifndef DUSK_COMPILER_H #define DUSK_COMPILER_H +#include "dusk/AST/Diagnostics.h" #include "dusk/AST/Diagnostics.h" #include "dusk/Frontend/InputFile.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_os_ostream.h" #include <vector> #include <memory> namespace dusk { class ParserResult; -class Compiler { +class Compiler : public DiagnosticConsumer { llvm::SourceMgr SourceManager; - diag::Diagnostics Diag; + + DiagnosticEngine Engine; + std::vector<std::unique_ptr<InputFile>> InputFiles; + /// Diagnostic output stream + llvm::raw_os_ostream OS; + public: Compiler(std::vector<llvm::StringRef> Filenames); void Compile(); void Lex(); -private: - void consumeDiagnostics(); + void consume(llvm::SMDiagnostic &Diag); }; } // namesapce dusk diff --git a/include/dusk/Parse/Lexer.h b/include/dusk/Parse/Lexer.h index 0c5cec2..8b2dc6f 100644 --- a/include/dusk/Parse/Lexer.h +++ b/include/dusk/Parse/Lexer.h @@ -23,130 +23,124 @@ namespace dusk { /// Dusk language lexer. class Lexer { - const llvm::SourceMgr &SourceManager; + const llvm::SourceMgr &SourceManager; - diag::Diagnostics *Diag; + DiagnosticEngine *Engine; - /// Pointer to the start of the buffer. - const char *BufferStart; + /// Pointer to the start of the buffer. + const char *BufferStart; - /// Pointer to one past character of the buffer. This character will always - /// be a NUL terminator, since the buffer is NUL-terminated. - const char *BufferEnd; + /// Pointer to one past character of the buffer. This character will always + /// be a NUL terminator, since the buffer is NUL-terminated. + const char *BufferEnd; - /// Pointer to the currently consumed character - const char *CurPtr; + /// Pointer to the currently consumed character + const char *CurPtr; - /// Next lexed token - Token NextToken; + /// Next lexed token + Token NextToken; - // Option to keep comments as tokens - bool KeepComments; + // Option to keep comments as tokens + bool KeepComments; private: - Lexer(const Lexer &other) = delete; - void operator=(const Lexer &other) = delete; + Lexer(const Lexer &other) = delete; + void operator=(const Lexer &other) = delete; public: - // MARK: - Public interface - - /// Create default lexer that scans the whole buffer. - Lexer(const llvm::SourceMgr &SM, - unsigned BufferID, - diag::Diagnostics *Diag = nullptr, - bool KeepComments = false); - - /// Returns a next token needed to parse a LL(1) grammar. - const Token &peekNextToken() const { return NextToken; } - - /// \brief Lex a token. - /// - /// \param Ret - A reference to a token, which should be set to the result - /// next lexed token. - void lex(Token &Ret); - - /// Sets the state (position) of the lexer. The position can be either - /// a location before or after the current location. - void setState(llvm::SMLoc Loc) { - assert(Loc.isValid()); - CurPtr = Loc.getPointer(); - lexToken(); - } - - // MARK: - Diagnostics methods. - - void diagnose(diag::LexerError E = diag::LexerError::unexpected_symbol); - void diagnose(Token T, diag::LexerError E); - - // MARK: - Static interface - - /// \brief Determins if the given string is a valid non-keyword identifier. - /// In case if match with a keyword, the appropriate token type - /// is returned. - /// - /// \return \c tok::identifier, if the string does not match any keyword, - /// otherwise approriate \c tok::... token type. - static tok kindOfIdentifier(llvm::StringRef Str); - - /// Returns a location for given \c Ptr. - static llvm::SMLoc getSourceLoc(const char *Ptr) { - return llvm::SMLoc::getFromPointer(Ptr); - } - - /// \brief Retrieve a Token, which starts at location \c Loc. - /// - /// \param SM A \c SourceMgr instance, which provides the buffer context. - /// - /// \param Loc The source location at which the token starts. The location - /// must be from provided source manager. - static Token - getTokenAtLocation(const llvm::SourceMgr &SM, llvm::SMLoc Loc); - - /// \brief Retrieve a location that points one character pass the end - /// of the Token referenced by the \c Loc. - /// - /// \param SM A \c SourceMgr instance, which provides the buffer context. - /// - /// \param Loc Location of the beginning of the token. - static llvm::SMLoc - getLocForEndOfToken(const llvm::SourceMgr &SM, llvm::SMLoc Loc); - - /// Retrieve a location for the start of the line referenced by the \c Loc. - static llvm::SMLoc - getLocForStartOfLine(const llvm::SourceMgr &SM, llvm::SMLoc Loc); - - /// Retrieve a location for end of line (start of next line) referenced - /// by the \c Loc. - static llvm::SMLoc - getLocForEndOfLine(const llvm::SourceMgr &SM, llvm::SMLoc Loc); - - /// Retrive a line in the source code referenced by the \c Loc. - static llvm::StringRef - getLineForLoc(const llvm::SourceMgr &SM, llvm::SMLoc Loc); - + // MARK: - Public interface + + /// Create default lexer that scans the whole buffer. + Lexer(const llvm::SourceMgr &SM, unsigned BufferID, + DiagnosticEngine *Engine = nullptr, bool KeepComments = false); + + /// Returns a next token needed to parse a LL(1) grammar. + const Token &peekNextToken() const { return NextToken; } + + /// \brief Lex a token. + /// + /// \param Ret - A reference to a token, which should be set to the result + /// next lexed token. + void lex(Token &Ret); + + /// Sets the state (position) of the lexer. The position can be either + /// a location before or after the current location. + void setState(llvm::SMLoc Loc) { + assert(Loc.isValid()); + CurPtr = Loc.getPointer(); + lexToken(); + } + + // MARK: - Diagnostics methods. + + void diagnose(diag::DiagID ID = diag::DiagID::lex_unexpected_symbol); + void diagnose(Token T, diag::DiagID ID); + + // MARK: - Static interface + + /// \brief Determins if the given string is a valid non-keyword identifier. + /// In case if match with a keyword, the appropriate token type + /// is returned. + /// + /// \return \c tok::identifier, if the string does not match any keyword, + /// otherwise approriate \c tok::... token type. + static tok kindOfIdentifier(llvm::StringRef Str); + + /// Returns a location for given \c Ptr. + static llvm::SMLoc getSourceLoc(const char *Ptr) { + return llvm::SMLoc::getFromPointer(Ptr); + } + + /// \brief Retrieve a Token, which starts at location \c Loc. + /// + /// \param SM A \c SourceMgr instance, which provides the buffer context. + /// + /// \param Loc The source location at which the token starts. The location + /// must be from provided source manager. + static Token getTokenAtLocation(const llvm::SourceMgr &SM, llvm::SMLoc Loc); + + /// \brief Retrieve a location that points one character pass the end + /// of the Token referenced by the \c Loc. + /// + /// \param SM A \c SourceMgr instance, which provides the buffer context. + /// + /// \param Loc Location of the beginning of the token. + static llvm::SMLoc getLocForEndOfToken(const llvm::SourceMgr &SM, + llvm::SMLoc Loc); + + /// Retrieve a location for the start of the line referenced by the \c Loc. + static llvm::SMLoc getLocForStartOfLine(const llvm::SourceMgr &SM, + llvm::SMLoc Loc); + + /// Retrieve a location for end of line (start of next line) referenced + /// by the \c Loc. + static llvm::SMLoc getLocForEndOfLine(const llvm::SourceMgr &SM, + llvm::SMLoc Loc); + + /// Retrive a line in the source code referenced by the \c Loc. + static llvm::StringRef getLineForLoc(const llvm::SourceMgr &SM, + llvm::SMLoc Loc); private: // MARK: - Private interface + void skipToEndOfLine(bool ConsumeNewline); + void skipLineComment(bool ConsumeNewLine); + void skipMultilineComment(); - void skipToEndOfLine(bool ConsumeNewline); - void skipLineComment(bool ConsumeNewLine); - void skipMultilineComment(); - - /// Updates \c NextToken property with the new token. - void formToken(tok Kind, const char *TokStart); - + /// Updates \c NextToken property with the new token. + void formToken(tok Kind, const char *TokStart); - /// Main lexing loop method. - void lexToken(); + /// Main lexing loop method. + void lexToken(); - // MARK: - Helper lexing methods + // MARK: - Helper lexing methods - void lexElipsis(); - void lexIdentifier(); - void lexNumber(); - void lexHexNumber(); - void lexBinNumber(); - void lexOctNumber(); - void lexDecNumber(); + void lexElipsis(); + void lexIdentifier(); + void lexNumber(); + void lexHexNumber(); + void lexBinNumber(); + void lexOctNumber(); + void lexDecNumber(); }; } // namespace dusk diff --git a/include/dusk/Parse/Parser.h b/include/dusk/Parse/Parser.h index 18e8777..bd008e6 100644 --- a/include/dusk/Parse/Parser.h +++ b/include/dusk/Parse/Parser.h @@ -6,6 +6,10 @@ // The software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. // //===----------------------------------------------------------------------===// +// +// This file defines a parser interface. +// +//===----------------------------------------------------------------------===// #ifndef DUSK_PARSER_H #define DUSK_PARSER_H @@ -22,28 +26,14 @@ #include "dusk/Frontend/InputFile.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" -#include <exception> namespace dusk { -/// Represents a single parse exception -/// -/// Each exception holds it's diagnostic type. -class ParseError : public std::runtime_error { - diag::ParserError Kind = diag::ParserError::unexpected_token; - -public: - ParseError() = default; - ParseError(diag::ParserError K); - - diag::ParserError getKind() const { return Kind; } -}; - /// The main class used for parsing a dusk-lang (.dusk) source file. class Parser { llvm::SourceMgr &SourceManager; InputFile &SourceFile; - diag::Diagnostics &Diag; + DiagnosticEngine &Engine; Lexer *L; /// Parsing result. @@ -56,7 +46,7 @@ class Parser { llvm::SMLoc PreviousLoc; public: - Parser(llvm::SourceMgr &SM, InputFile &SF, diag::Diagnostics &Diag, + Parser(llvm::SourceMgr &SM, InputFile &SF, DiagnosticEngine &Engine, unsigned BufferID); ~Parser(); @@ -84,9 +74,8 @@ public: /// Force immediate termination of parsing. void terminateParsing() { Tok.setKind(tok::eof); } - unsigned diagnose(); - unsigned diagnose(diag::ParserError E); - unsigned diagnoseUnexpectedToken(); + DiagnosticRef diagnose(llvm::SMLoc Loc, + diag::DiagID ID = diag::DiagID::unexpected_token); /// Main parsing method. /// @@ -105,19 +94,19 @@ private: // MARK: - Declarations - VarDecl *parseVarDecl(); + Decl *parseVarDecl(); - ConstDecl *parseConstDecl(); + Decl *parseConstDecl(); Expr *parseDeclValue(); - FuncDecl *parseFuncDecl(); + Decl *parseFuncDecl(); - BlockStmt *parseBlock(); + Stmt *parseBlock(); ASTNode *parseBlockBody(); - ParamDecl *parseParamDecl(); + Decl *parseParamDecl(); // MARK: - Expressions @@ -135,15 +124,15 @@ private: Expr *parseMulExprRHS(Expr *LHS); Expr *parsePrimaryExpr(); - Expr *parsePrimaryExprRHS(IdentifierExpr *Dest); + Expr *parsePrimaryExprRHS(Expr *Dest); - IdentifierExpr *parseIdentifierExpr(); - CallExpr *parseCallExpr(IdentifierExpr *Dest); - SubscriptExpr *parseSubscriptExpr(IdentifierExpr *Dest); + Expr *parseIdentifierExpr(); + Expr *parseCallExpr(Expr *Dest); + Expr *parseSubscriptExpr(Expr *Dest); Expr *parseParenExpr(); - NumberLiteralExpr *parseNumberLiteralExpr(); - PrefixExpr *parseUnaryExpr(); + Expr *parseNumberLiteralExpr(); + Expr *parseUnaryExpr(); // MARK: - Statements @@ -151,30 +140,30 @@ private: Expr *parseExprStmt(); - BreakStmt *parseBreakStmt(); - ReturnStmt *parseReturnStmt(); + Stmt *parseBreakStmt(); + Stmt *parseReturnStmt(); - FuncStmt *parseFuncStmt(); + Stmt *parseFuncStmt(); - ForStmt *parseForStmt(); - RangeStmt *parseRangeStmt(); + Stmt *parseForStmt(); + Stmt *parseRangeStmt(); - WhileStmt *parseWhileStmt(); + Stmt *parseWhileStmt(); - IfStmt *parseIfStmt(); - BlockStmt *parseElseStmt(); + Stmt *parseIfStmt(); + Stmt *parseElseStmt(); // MARK: - Patterns - ExprPattern *parseExprPattern(); + Pattern *parseExprPattern(); llvm::SmallVector<Expr *, 128> parseExprPatternBody(); Expr *parseExprPatternItem(); - VarPattern *parseVarPattern(); - llvm::SmallVector<ParamDecl *, 128> parseVarPatternBody(); - ParamDecl *parseVarPatternItem(); + Pattern *parseVarPattern(); + llvm::SmallVector<Decl *, 128> parseVarPatternBody(); + Decl *parseVarPatternItem(); - SubscriptPattern *parseSubscriptPattern(); + Pattern *parseSubscriptPattern(); /// Creates and adds a new instance of \c ASTNode to the parser result /// and returns a pointer to it. diff --git a/lib/AST/ASTWalker.cpp b/lib/AST/ASTWalker.cpp index cf89b35..d406656 100644 --- a/lib/AST/ASTWalker.cpp +++ b/lib/AST/ASTWalker.cpp @@ -62,9 +62,8 @@ public: if (!Walker.preWalk(D)) return true; - for (auto V : D->getArgs()->getVars()) - if (!super::visit(V)) - return false; + if (!super::visit(D->getArgs())) + return false; return Walker.postWalk(D); } @@ -162,9 +161,8 @@ public: if (!Walker.preWalk(E)) return true; - for (auto V : E->getArgs()->getValues()) - if (!super::visit(V)) - return false; + if (!super::visit(E->getArgs())) + return false; return Walker.postWalk(E); } @@ -175,7 +173,7 @@ public: if (!super::visit(E->getBase())) return false; - if (!super::visit(E->getSubscript()->getValue())) + if (!super::visit(E->getSubscript())) return false; return Walker.postWalk(E); } diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp index 5bb1d0b..68f3bb8 100644 --- a/lib/AST/Decl.cpp +++ b/lib/AST/Decl.cpp @@ -38,9 +38,7 @@ llvm::SMRange Decl::getSourceRange() const { // MARK: - ValDecl class ValDecl::ValDecl(DeclKind K, llvm::StringRef N, llvm::SMLoc NL, Expr *E) - : Decl(K, N, NL), Value(E) { - assert(Value && "Invalid `ValDecl` declaraion."); -} + : Decl(K, N, NL), Value(E) {} // MARK: - VarDecl class @@ -69,10 +67,8 @@ ParamDecl::ParamDecl(llvm::StringRef N, llvm::SMLoc NL) // MARK: - FuncDecl class FuncDecl::FuncDecl(llvm::StringRef N, llvm::SMLoc NL, llvm::SMLoc FuncL, - VarPattern *A) - : Decl(DeclKind::Func, N, NL), FuncLoc(FuncL), Params(A) { - assert(Params && "Invalid `FuncDecl` declaration."); -} + Pattern *A) + : Decl(DeclKind::Func, N, NL), FuncLoc(FuncL), Params(A) {} llvm::SMRange FuncDecl::getSourceRange() const { return {FuncLoc, Params->getLocEnd()}; @@ -89,3 +85,4 @@ llvm::SMRange ModuleDecl::getSourceRange() const { } return llvm::SMRange(); } + diff --git a/lib/AST/Diagnostics.cpp b/lib/AST/Diagnostics.cpp index 849d84d..02430fe 100644 --- a/lib/AST/Diagnostics.cpp +++ b/lib/AST/Diagnostics.cpp @@ -8,22 +8,74 @@ //===----------------------------------------------------------------------===// #include "dusk/AST/Diagnostics.h" +#include "dusk/Parse/Lexer.h" +#include "dusk/Basic/SourceManager.h" +#include <iostream> using namespace dusk; -using namespace diag; -unsigned Diagnostics::diagnose(llvm::SMDiagnostic &&D) { - auto ID = NextID++; - Diags.insert({ID, std::move(D)}); - return ID; +// MARK: - Diagnostic reference + +void DiagnosticRef::flush() { + // Ensure the current diagnostic is active. + if (!IsActive) + return; + + IsActive = false; + if (Engine) + Engine->flushActiveDiag(); +} + +DiagnosticRef &DiagnosticRef::fixIt(llvm::StringRef FixIt, llvm::SMLoc Loc) { + // Verify that the referenced diagnostic is till active. + assert(IsActive && "Cannot modify inactive diagnostic."); + // Add location iff there is an engine and the location is valid. + if (Engine && Loc.isValid()) + Engine->getActiveDiag().addFixIt({Loc, FixIt}); + return *this; +} + +DiagnosticRef &DiagnosticRef::fixItBefore(llvm::StringRef FixIt, + llvm::SMLoc Loc) { + // Add location iff there is an engine and the location is valid. + if (Engine && Loc.isValid()) { + auto T = Lexer::getTokenAtLocation(Engine->SourceManager, Loc); + auto L = T.getLoc(); + return fixIt(FixIt, L); + } + return *this; } -void Diagnostics::consume(llvm::raw_ostream &OS) { - for (unsigned i = 1; i <= Diags.size(); i++) - Diags[i].print("duskc", OS); +DiagnosticRef &DiagnosticRef::fixItAfter(llvm::StringRef FixIt, + llvm::SMLoc Loc) { + // Add location iff there is an engine and the location is valid. + if (Engine && Loc.isValid()) { + auto L = Lexer::getLocForEndOfToken(Engine->SourceManager, Loc); + return fixIt(FixIt, L); + } + return *this; } -llvm::SMDiagnostic &Diagnostics::operator[](unsigned ID) { - assert(ID > 0 && ID < NextID && "Invalid diagnostic ID."); - return Diags[ID]; +// MARK: - Diagnostic engine + +void DiagnosticEngine::flushActiveDiag() { + assert(ActiveDiag && "No active diagnostic to flush."); + emitDiagnostic(*ActiveDiag); + ActiveDiag.reset(); +} + +void DiagnosticEngine::emitDiagnostic(const Diagnostic &Diag) { + auto Loc = Diag.getLoc(); + auto ID = getBufferForLoc(SourceManager, Loc); + auto FN = SourceManager.getMemoryBuffer(ID)->getBufferIdentifier(); + auto[L, C] = SourceManager.getLineAndColumn(Loc); + auto K = llvm::SourceMgr::DiagKind::DK_Error; + auto Line = Lexer::getLineForLoc(SourceManager, Loc); + auto Msg = diag::getTextForID(Diag.getID()); + auto D = llvm::SMDiagnostic(SourceManager, Loc, FN, L, C, K, Msg, Line, + llvm::None, Diag.getFixIts()); + + for (auto C : Consumers) { + C->consume(D); + } } diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp index 09cba08..74c4f5f 100644 --- a/lib/AST/Expr.cpp +++ b/lib/AST/Expr.cpp @@ -34,18 +34,14 @@ llvm::SMRange IdentifierExpr::getSourceRange() const { // MARK: - Parenthesis expression ParenExpr::ParenExpr(Expr *E, llvm::SMLoc L, llvm::SMLoc R) - : Expr(ExprKind::Paren), Expression(E), LPar(L), RPar(R) { - assert(Expression && "Invalid `paren` expression"); -} + : Expr(ExprKind::Paren), Expression(E), LPar(L), RPar(R) {} llvm::SMRange ParenExpr::getSourceRange() const { return {LPar, RPar}; } // MARK: - Infix expression InfixExpr::InfixExpr(Expr *L, Expr *R, Token O) - : Expr(ExprKind::Infix), LHS(L), RHS(R), Op(O) { - assert(LHS && RHS && "Invalid `infix` expresssion."); -} + : Expr(ExprKind::Infix), LHS(L), RHS(R), Op(O) {} llvm::SMRange InfixExpr::getSourceRange() const { return {LHS->getLocStart(), RHS->getLocEnd()}; @@ -54,9 +50,7 @@ llvm::SMRange InfixExpr::getSourceRange() const { // MARK: - Infix expression AssignExpr::AssignExpr(Expr *L, Expr *R) - : Expr(ExprKind::Assign), Dest(L), Source(R) { - assert(Dest && Source && "Invalid `assign` expression."); -} + : Expr(ExprKind::Assign), Dest(L), Source(R) {} llvm::SMRange AssignExpr::getSourceRange() const { return {Dest->getLocStart(), Source->getLocEnd()}; @@ -65,9 +59,7 @@ llvm::SMRange AssignExpr::getSourceRange() const { // MARK: - Unary expresssion PrefixExpr::PrefixExpr(Expr *D, Token O) - : Expr(ExprKind::Prefix), Dest(D), Op(O) { - assert(Dest && "Invalid `unary` expression."); -} + : Expr(ExprKind::Prefix), Dest(D), Op(O) {} llvm::SMRange PrefixExpr::getSourceRange() const { return {Op.getLoc(), Dest->getLocEnd()}; @@ -75,10 +67,8 @@ llvm::SMRange PrefixExpr::getSourceRange() const { // MARK: - FuncCall expression -CallExpr::CallExpr(IdentifierExpr *C, ExprPattern *A) - : Expr(ExprKind::Call), Callee(C), Args(A) { - assert(C && Args && "Invalid `FuncCall` expression."); -} +CallExpr::CallExpr(Expr *C, Pattern *A) + : Expr(ExprKind::Call), Callee(C), Args(A) {} llvm::SMRange CallExpr::getSourceRange() const { return {Callee->getLocStart(), Args->getLocEnd()}; @@ -86,7 +76,7 @@ llvm::SMRange CallExpr::getSourceRange() const { // MARK: - Subscript expression -SubscriptExpr::SubscriptExpr(IdentifierExpr *B, SubscriptPattern *S) +SubscriptExpr::SubscriptExpr(Expr *B, Pattern *S) : Expr(ExprKind::Subscript), Base(B), Subscript(S) {} llvm::SMRange SubscriptExpr::getSourceRange() const { diff --git a/lib/AST/Pattern.cpp b/lib/AST/Pattern.cpp index 2446ca9..d924663 100644 --- a/lib/AST/Pattern.cpp +++ b/lib/AST/Pattern.cpp @@ -16,41 +16,29 @@ using namespace dusk; // MARK: - Pattern -Pattern::Pattern(PatternKind K) -: Kind(K) -{} +Pattern::Pattern(PatternKind K) : Kind(K) {} // MARK: - Expression pattern -ExprPattern::ExprPattern(llvm::SmallVector<Expr *, 128> &&V, - llvm::SMLoc L, llvm::SMLoc R) -: Pattern(PatternKind::Expr), Values(V), LPar(L), RPar(R) -{} - -llvm::SMRange ExprPattern::getSourceRange() const { - return { LPar, RPar }; -} +ExprPattern::ExprPattern(llvm::SmallVector<Expr *, 128> &&V, llvm::SMLoc L, + llvm::SMLoc R) + : Pattern(PatternKind::Expr), Values(V), LPar(L), RPar(R) {} +llvm::SMRange ExprPattern::getSourceRange() const { return {LPar, RPar}; } // MARK: - Variable pattern -VarPattern::VarPattern(llvm::SmallVector<ParamDecl *, 128> &&V, - llvm::SMLoc L, llvm::SMLoc R) -: Pattern(PatternKind::Variable), Vars(V), LPar(L), RPar(R) -{} +VarPattern::VarPattern(llvm::SmallVector<Decl *, 128> &&V, llvm::SMLoc L, + llvm::SMLoc R) + : Pattern(PatternKind::Variable), Vars(V), LPar(L), RPar(R) {} -llvm::SMRange VarPattern::getSourceRange() const { - return { LPar, RPar }; -} +llvm::SMRange VarPattern::getSourceRange() const { return {LPar, RPar}; } // MARK: - Subscript pattern SubscriptPattern::SubscriptPattern(Expr *V, llvm::SMLoc L, llvm::SMLoc R) -: Pattern(PatternKind::Subscript), Value(V), LBracet(L), RBracet(R) -{ - assert(Value && "Invalid pattern"); -} + : Pattern(PatternKind::Subscript), Value(V), LBracet(L), RBracet(R) {} llvm::SMRange SubscriptPattern::getSourceRange() const { - return { LBracet, RBracet }; + return {LBracet, RBracet}; } diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp index 55cb4c9..bc2f740 100644 --- a/lib/AST/Stmt.cpp +++ b/lib/AST/Stmt.cpp @@ -23,9 +23,7 @@ llvm::SMRange BreakStmt::getSourceRange() const { return BreakLoc; } // MARK: - Return statement ReturnStmt::ReturnStmt(llvm::SMLoc RL, Expr *V) - : Stmt(StmtKind::Return), RetLoc(RL), Value(V) { - assert(V && "Invalid `return` statement."); -} + : Stmt(StmtKind::Return), RetLoc(RL), Value(V) {} llvm::SMRange ReturnStmt::getSourceRange() const { return {RetLoc, Value->getLocEnd()}; @@ -34,9 +32,7 @@ llvm::SMRange ReturnStmt::getSourceRange() const { // MARK: - Range statement RangeStmt::RangeStmt(Expr *S, Expr *E, Token O) - : Stmt(StmtKind::Range), Start(S), End(E), Op(O) { - assert(Start && End && "Invalid `range` expression."); -} + : Stmt(StmtKind::Range), Start(S), End(E), Op(O) {} bool RangeStmt::isInclusive() const { return Op.is(tok::elipsis_incl); } @@ -47,10 +43,7 @@ llvm::SMRange RangeStmt::getSourceRange() const { // MARK: - Block statement BlockStmt::BlockStmt(llvm::SMLoc S, llvm::SMLoc E, std::vector<ASTNode *> &&N) - : Stmt(StmtKind::Block), BlockStart(S), BlockEnd(E), Nodes(N) { - for (const auto Node : Nodes) - assert(Node && "Invalid `CodeBlock` statement."); -} + : Stmt(StmtKind::Block), BlockStart(S), BlockEnd(E), Nodes(N) {} llvm::SMRange BlockStmt::getSourceRange() const { return {BlockStart, BlockEnd}; @@ -58,10 +51,8 @@ llvm::SMRange BlockStmt::getSourceRange() const { // MARK: - Funcion statement -FuncStmt::FuncStmt(FuncDecl *FP, BlockStmt *B) - : Stmt(StmtKind::Func), Prototype(FP), Body(B) { - assert(Prototype && Body && "Invalid `func` statement"); -} +FuncStmt::FuncStmt(Decl *FP, Stmt *B) + : Stmt(StmtKind::Func), Prototype(FP), Body(B) {} llvm::SMRange FuncStmt::getSourceRange() const { return {Prototype->getLocStart(), Body->getLocEnd()}; @@ -69,10 +60,8 @@ llvm::SMRange FuncStmt::getSourceRange() const { // MARK: Fot-in statement -ForStmt::ForStmt(llvm::SMLoc FL, IdentifierExpr *V, RangeStmt *R, BlockStmt *B) - : Stmt(StmtKind::For), ForLoc(FL), Var(V), Range(R), Body(B) { - assert(Var && Range && Body && "Invalid `for-in` statement"); -} +ForStmt::ForStmt(llvm::SMLoc FL, Expr *V, Stmt *R, Stmt *B) + : Stmt(StmtKind::For), ForLoc(FL), Var(V), Range(R), Body(B) {} llvm::SMRange ForStmt::getSourceRange() const { return {ForLoc, Body->getLocEnd()}; @@ -80,10 +69,8 @@ llvm::SMRange ForStmt::getSourceRange() const { // MARK: - While statement -WhileStmt::WhileStmt(llvm::SMLoc WL, Expr *C, BlockStmt *B) - : Stmt(StmtKind::While), WhileLoc(WL), Cond(C), Body(B) { - assert(C && B && "Invalid `while` statement"); -} +WhileStmt::WhileStmt(llvm::SMLoc WL, Expr *C, Stmt *B) + : Stmt(StmtKind::While), WhileLoc(WL), Cond(C), Body(B) {} llvm::SMRange WhileStmt::getSourceRange() const { return {WhileLoc, Body->getLocEnd()}; @@ -91,10 +78,8 @@ llvm::SMRange WhileStmt::getSourceRange() const { // MARK: - If statement -IfStmt::IfStmt(llvm::SMLoc IL, Expr *C, BlockStmt *T, BlockStmt *E) - : Stmt(StmtKind::If), IfLoc(IL), Cond(C), Then(T), Else(E) { - assert(C && T && "Invalid `if` statement"); -} +IfStmt::IfStmt(llvm::SMLoc IL, Expr *C, Stmt *T, Stmt *E) + : Stmt(StmtKind::If), IfLoc(IL), Cond(C), Then(T), Else(E) {} llvm::SMRange IfStmt::getSourceRange() const { if (Else != nullptr) diff --git a/lib/Frontend/Compiler.cpp b/lib/Frontend/Compiler.cpp index abd87e1..ee01c79 100644 --- a/lib/Frontend/Compiler.cpp +++ b/lib/Frontend/Compiler.cpp @@ -10,7 +10,9 @@ using namespace dusk; -Compiler::Compiler(std::vector<llvm::StringRef> Filenames) { +Compiler::Compiler(std::vector<llvm::StringRef> Filenames) + : Engine(SourceManager), OS(llvm::raw_os_ostream(std::cerr)) { + Engine.addConsumer(this); for (auto &F : Filenames) { auto File = std::make_unique<InputFile>(SourceManager, F); InputFiles.push_back(std::move(File)); @@ -23,19 +25,21 @@ void Compiler::Compile() { std::vector<ParserResult> Results; for (auto &&File : InputFiles) { - Parser P(SourceManager, *File, Diag, File->bufferID()); - Results.push_back(P.parse()); - if (!Diag.isEmpty()) - return consumeDiagnostics(); + Parser P(SourceManager, *File, Engine, File->bufferID()); + auto R = P.parse(); + // Stop compilation after error encounterment + if (R.isError()) + return; + Results.push_back(std::move(R)); } - + for (auto &&R : Results) F.format(R.getRoot(), OS); } void Compiler::Lex() { for (auto &&File : InputFiles) { - Lexer L(SourceManager, File->bufferID(), &Diag, true); + Lexer L(SourceManager, File->bufferID(), &Engine, true); Token T; do { L.lex(T); @@ -46,9 +50,7 @@ void Compiler::Lex() { } } -void Compiler::consumeDiagnostics() { - llvm::raw_os_ostream OS(std::cerr); - Diag.consume(OS); +void Compiler::consume(llvm::SMDiagnostic &Diag) { + Diag.print("duskc", OS); } - diff --git a/lib/Parser/Lexer.cpp b/lib/Parser/Lexer.cpp index 1b553f2..1c441ca 100644 --- a/lib/Parser/Lexer.cpp +++ b/lib/Parser/Lexer.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "dusk/Parse/Lexer.h" +#include "dusk/Basic/SourceManager.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/SmallVector.h" @@ -19,21 +20,8 @@ #include <functional> using namespace dusk; -using namespace diag; - -static unsigned getBufferForLoc(const llvm::SourceMgr &SM, llvm::SMLoc Loc) { - // Validate location - assert(Loc.isValid()); - - auto Ptr = Loc.getPointer(); - for (unsigned i = 1; i <= SM.getNumBuffers(); i++) { - auto Buff = SM.getMemoryBuffer(i); - if (Ptr >= Buff->getBufferStart() && Ptr <= Buff->getBufferEnd()) - return i; - } - llvm_unreachable("Location in non-existing buffer."); -} +/// Returns start of line containing a given \c currPtr. static const char *getStartOfLine(const char *buffStart, const char *currPtr) { while (buffStart != currPtr) { if (*currPtr == '\n' || *currPtr == '\r') { @@ -62,8 +50,8 @@ static bool isValidBinDigit(const char *c) { return *c == '0' || *c == '1'; } static bool isValidOctDigit(const char *c) { return *c >= '0' && *c <= '7'; } static bool isValidHexDigit(const char *c) { - return std::isdigit(*c) || (*c >= 'a' && *c <= 'f') || - (*c >= 'A' && *c <= 'F'); + return std::isdigit(*c) || + (*c >= 'a' && *c <= 'f') | (*c >= 'A' && *c <= 'F'); } // MARK: - Contitional character consumtion functions @@ -107,8 +95,8 @@ static bool consumeIfValidHexDigit(const char *&ptr) { // MARK: - Lexer Lexer::Lexer(const llvm::SourceMgr &SM, unsigned BufferID, - diag::Diagnostics *Diag, bool KeepComments) - : SourceManager(SM), Diag(Diag), KeepComments(KeepComments) { + DiagnosticEngine *Engine, bool KeepComments) + : SourceManager(SM), Engine(Engine), KeepComments(KeepComments) { // Extract buffer from source manager auto B = SourceManager.getMemoryBuffer(BufferID); @@ -126,6 +114,7 @@ void Lexer::lex(Token &Ret) { // Assign next token to the reference. Ret = NextToken; + // Never lex pass the eof. if (Ret.isNot(tok::eof)) lexToken(); } @@ -139,6 +128,7 @@ void Lexer::lexToken() { // Not ending null character. if (CurPtr - 1 != BufferEnd) break; + CurPtr--; return formToken(tok::eof, TokStart); // Skip whitespace @@ -146,21 +136,20 @@ void Lexer::lexToken() { case '\t': case '\n': case '\r': - break; + break; case '=': if (*CurPtr == '=') { CurPtr++; return formToken(tok::equals, TokStart); - } else { - return formToken(tok::assign, TokStart); } + return formToken(tok::assign, TokStart); case '.': if (*CurPtr == '.') return lexElipsis(); - else - return formToken(tok::unknown, TokStart); + formToken(tok::unknown, TokStart); + return diagnose(); case ',': return formToken(tok::colon, TokStart); @@ -195,7 +184,6 @@ void Lexer::lexToken() { return formToken(tok::comment, TokStart); break; // Ignore comment } - return formToken(tok::divide, TokStart); // Algebraic operands @@ -213,94 +201,44 @@ void Lexer::lexToken() { if (*CurPtr == '=') { CurPtr++; return formToken(tok::nequals, TokStart); - } else - return formToken(tok::neg, TokStart); + } + return formToken(tok::neg, TokStart); case '<': if (*CurPtr == '=') { CurPtr++; return formToken(tok::less_eq, TokStart); - - } else - return formToken(tok::less, TokStart); + } + return formToken(tok::less, TokStart); case '>': if (*CurPtr == '=') { CurPtr++; return formToken(tok::greater_eq, TokStart); - } else - return formToken(tok::greater, TokStart); + } + return formToken(tok::greater, TokStart); case ':': return formToken(tok::unknown, TokStart); // Numbers - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': return lexNumber(); // Identifiers - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - case 'g': - case 'h': - case 'i': - case 'j': - case 'k': - case 'l': - case 'm': - case 'n': - case 'o': - case 'p': - case 'q': - case 'r': - case 's': - case 't': - case 'u': - case 'v': - case 'w': - case 'x': - case 'y': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'G': - case 'H': - case 'I': - case 'J': - case 'K': - case 'L': - case 'M': - case 'N': - case 'O': - case 'P': - case 'Q': - case 'R': - case 'S': - case 'T': - case 'U': - case 'V': - case 'W': - case 'X': - case 'Y': - case 'Z': - case '_': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': case '_': return lexIdentifier(); default: @@ -310,34 +248,22 @@ void Lexer::lexToken() { } } -void Lexer::diagnose(LexerError E) { diagnose(NextToken, E); } +void Lexer::diagnose(diag::DiagID ID) { diagnose(NextToken, ID); } -void Lexer::diagnose(Token T, LexerError E) { - if (Diag == nullptr) +void Lexer::diagnose(Token T, diag::DiagID ID) { + if (Engine == nullptr) return; - auto ID = getBufferForLoc(SourceManager, T.getLoc()); - auto FN = SourceManager.getMemoryBuffer(ID)->getBufferIdentifier(); - auto[L, C] = SourceManager.getLineAndColumn(getSourceLoc(CurPtr)); - auto K = llvm::SourceMgr::DiagKind::DK_Error; - llvm::StringRef Line; - llvm::StringRef MSG; - llvm::SmallVector<llvm::SMFixIt, 2> FixIts; - - switch (E) { - case LexerError::missing_eol_multiline_comment: - Line = T.getText(); - MSG = "Missing end of multiline comment."; - FixIts.push_back({getSourceLoc(CurPtr), "*/"}); + switch (ID) { + case diag::DiagID::lex_unexpected_symbol: + Engine->diagnose(T.getLoc(), ID); break; - case LexerError::unexpected_symbol: - Line = getLineForLoc(SourceManager, T.getLoc()); - MSG = "Unexpected symbol"; + case diag::DiagID::lex_unterminated_multiline_comment: + Engine->diagnose(T.getLoc(), ID).fixItAfter("*/", T.getLoc()); break; - } - auto D = llvm::SMDiagnostic(SourceManager, T.getLoc(), FN, L, C, K, MSG, Line, - llvm::None, FixIts); - Diag->diagnose(std::move(D)); + default: + llvm_unreachable("Invalid diagnostics"); + } } // MARK: - Static methods @@ -459,8 +385,8 @@ void Lexer::skipMultilineComment() { const char *TokStart = CurPtr - 1; // Validate start of line comment assert(CurPtr[-1] == '/' && CurPtr[0] == '*' && "Not a /* comment"); + CurPtr++; - while (true) { switch (*CurPtr++) { // Consume next character @@ -482,7 +408,7 @@ void Lexer::skipMultilineComment() { break; CurPtr--; formToken(tok::unknown, TokStart); - return diagnose(LexerError::missing_eol_multiline_comment); + return diagnose(diag::DiagID::lex_unterminated_multiline_comment); } } } @@ -501,7 +427,7 @@ void Lexer::lexElipsis() { // Validate start of elipsis assert(CurPtr[-1] == '.' && *CurPtr++ == '.' && "Invalid elipsis token"); - if (CurPtr[1] == '.') { + if (CurPtr[0] == '.') { // '...' token CurPtr++; return formToken(tok::elipsis_incl, TokStart); @@ -519,8 +445,7 @@ void Lexer::lexIdentifier() { assert(didStart && "Unexpected start of identifier"); // Continue moving until invalid character or buffer end found - while (consumeIfValidIdentifierCont(CurPtr)) - ; + while (consumeIfValidIdentifierCont(CurPtr)); // Construct token auto TokenText = llvm::StringRef{TokStart, (size_t)(CurPtr - TokStart)}; @@ -554,15 +479,13 @@ void Lexer::lexHexNumber() { // Consume [0-9][a-z][A-Z] character to get token string. // We'll validate it later. - while (consumeIfValidIdentifierCont(CurPtr)) - ; + while (consumeIfValidIdentifierCont(CurPtr)); const char *TokEnd = CurPtr; CurPtr = TokStart + 2; // skip `0x` prefix // Consume only valid [0-9][a-f][A-F] character. - while (consumeIfValidHexDigit(CurPtr)) - ; + while (consumeIfValidHexDigit(CurPtr)); // Validate number of consumed characters. if (TokEnd == CurPtr) @@ -582,15 +505,13 @@ void Lexer::lexBinNumber() { // Consume [0-9][a-z][A-Z] character to get token string. // We'll validate it later. - while (consumeIfValidIdentifierCont(CurPtr)) - ; + while (consumeIfValidIdentifierCont(CurPtr)); const char *TokEnd = CurPtr; CurPtr = TokStart + 2; // skip `0b` prefix // Consume only valid [0-7] character. - while (consumeIfValidBinDigit(CurPtr)) - ; + while (consumeIfValidBinDigit(CurPtr)); // Validate number of consumed characters. if (TokEnd == CurPtr) @@ -609,15 +530,13 @@ void Lexer::lexOctNumber() { // Consume [0-9][a-z][A-Z] character to get token string. // We'll validate it later. - while (consumeIfValidIdentifierCont(CurPtr)) - ; + while (consumeIfValidIdentifierCont(CurPtr)); const char *TokEnd = CurPtr; CurPtr = TokStart + 2; // skip `0o` prefix // Consume only valid [0-7] character. - while (consumeIfValidOctDigit(CurPtr)) - ; + while (consumeIfValidOctDigit(CurPtr)); // Validate number of consumed characters. if (TokEnd == CurPtr) @@ -636,15 +555,13 @@ void Lexer::lexDecNumber() { // Consume [0-9][a-z][A-Z] character to get token string. // We'll validate it later. - while (consumeIfValidIdentifierCont(CurPtr)) - ; + while (consumeIfValidIdentifierCont(CurPtr)); const char *TokEnd = CurPtr; CurPtr = TokEnd; // Consume only valid [0-9] character. - while (consumeIfValidDecDigit(CurPtr)) - ; + while (consumeIfValidDecDigit(CurPtr)); // Validate number of consumed characters. if (TokEnd == CurPtr) @@ -653,4 +570,3 @@ void Lexer::lexDecNumber() { formToken(tok::unknown, TokStart); return diagnose(); } - diff --git a/lib/Parser/ParseDecl.cpp b/lib/Parser/ParseDecl.cpp index f7f86a5..0bf0968 100644 --- a/lib/Parser/ParseDecl.cpp +++ b/lib/Parser/ParseDecl.cpp @@ -15,14 +15,16 @@ using namespace dusk; /// /// ConstDecl ::= /// 'const' identifier '=' Expr ';' -ConstDecl *Parser::parseConstDecl() { +Decl *Parser::parseConstDecl() { // Validate correct variable decl assert(Tok.is(tok::kwConst) && "Invalid parsing method."); auto L = consumeToken(); auto ID = Tok; - if (!consumeIf(tok::identifier)) - throw ParseError(diag::ParserError::missing_identfier); + if (!consumeIf(tok::identifier)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_identifier); + return nullptr; + } return make<ConstDecl>(ID.getText(), ID.getLoc(), L, parseDeclValue()); } @@ -31,14 +33,16 @@ ConstDecl *Parser::parseConstDecl() { /// /// VarDecl ::= /// 'var' identifier '=' Expr ';' -VarDecl *Parser::parseVarDecl() { +Decl *Parser::parseVarDecl() { // Validate correct variable decl assert(Tok.is(tok::kwVar) && "Invalid parsing method."); auto L = consumeToken(); auto ID = Tok; - if (!consumeIf(tok::identifier)) - throw ParseError(diag::ParserError::missing_identfier); + if (!consumeIf(tok::identifier)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_identifier); + return nullptr; + } return make<VarDecl>(ID.getText(), ID.getLoc(), L, parseDeclValue()); } @@ -46,12 +50,17 @@ VarDecl *Parser::parseVarDecl() { /// DeclVal ::= /// '=' Expr ';' Expr *Parser::parseDeclValue() { - if (!consumeIf(tok::assign)) - throw ParseError(diag::ParserError::missing_assign); + if (!consumeIf(tok::assign)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_identifier); + return nullptr; + } auto E = parseExpr(); - if (!consumeIf(tok::semicolon)) - throw ParseError(diag::ParserError::missing_semicolon); + if (!consumeIf(tok::semicolon)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_semicolon) + .fixItAfter(";", Tok.getLoc()); + return nullptr; + } return E; } @@ -59,21 +68,23 @@ Expr *Parser::parseDeclValue() { /// /// FuncDecl ::= /// 'func' identifier '(' Args ')' CodeBlock -FuncDecl *Parser::parseFuncDecl() { +Decl *Parser::parseFuncDecl() { // Ensure `func` keyword assert(Tok.is(tok::kwFunc) && "Invalid parsing method."); auto FL = consumeToken(); auto ID = Tok; - if (!consumeIf(tok::identifier)) - throw ParseError(diag::ParserError::missing_identfier); + if (!consumeIf(tok::identifier)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_identifier); + return nullptr; + } return make<FuncDecl>(ID.getText(), ID.getLoc(), FL, parseVarPattern()); } /// Param declaration -ParamDecl *Parser::parseParamDecl() { +Decl *Parser::parseParamDecl() { // Validate correct param declaration assert(Tok.is(tok::identifier) && "Invalid parsing method."); diff --git a/lib/Parser/ParseExpr.cpp b/lib/Parser/ParseExpr.cpp index 6605167..84c547d 100644 --- a/lib/Parser/ParseExpr.cpp +++ b/lib/Parser/ParseExpr.cpp @@ -20,7 +20,8 @@ Expr *Parser::parseExpr() { return parseAssignExpr(); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } @@ -33,7 +34,8 @@ Expr *Parser::parseAssignExpr() { return parseAssignExprRHS(parseLogicalExpr()); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(consumeToken()); + return nullptr; } } @@ -53,7 +55,8 @@ Expr *Parser::parseAssignExprRHS(Expr *LHS) { return make<AssignExpr>((IdentifierExpr *)LHS, parseExpr()); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } @@ -66,7 +69,8 @@ Expr *Parser::parseLogicalExpr() { return parseLogicalExprRHS(parseArithExpr()); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } @@ -93,7 +97,8 @@ Expr *Parser::parseLogicalExprRHS(Expr *LHS) { return make<InfixExpr>(LHS, parseArithExpr(), T); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } @@ -106,7 +111,8 @@ Expr *Parser::parseArithExpr() { return parseArithExprRHS(parseMulExpr()); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } @@ -135,7 +141,8 @@ Expr *Parser::parseArithExprRHS(Expr *LHS) { return make<InfixExpr>(LHS, parseExpr(), T); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } @@ -148,7 +155,8 @@ Expr *Parser::parseMulExpr() { return parseMulExprRHS(parsePrimaryExpr()); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } @@ -180,7 +188,8 @@ Expr *Parser::parseMulExprRHS(Expr *LHS) { return make<InfixExpr>(LHS, parseExpr(), T); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } @@ -200,11 +209,12 @@ Expr *Parser::parsePrimaryExpr() { return parseUnaryExpr(); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } -Expr *Parser::parsePrimaryExprRHS(IdentifierExpr *Dest) { +Expr *Parser::parsePrimaryExprRHS(Expr *Dest) { if (Tok.isOperator()) return Dest; @@ -225,11 +235,12 @@ Expr *Parser::parsePrimaryExprRHS(IdentifierExpr *Dest) { return parseSubscriptExpr(Dest); default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; } } -IdentifierExpr *Parser::parseIdentifierExpr() { +Expr *Parser::parseIdentifierExpr() { // Validate that we really have an identifier to parse assert(Tok.is(tok::identifier) && "Invalid parsing method."); @@ -239,14 +250,14 @@ IdentifierExpr *Parser::parseIdentifierExpr() { } /// CallExpr ::= idenifier '(' Args ')' -CallExpr *Parser::parseCallExpr(IdentifierExpr *Dest) { +Expr *Parser::parseCallExpr(Expr *Dest) { // Validate `(` assert(Tok.is(tok::l_paren) && "Invalid parse method."); return make<CallExpr>(Dest, parseExprPattern()); } /// SubscriptExpr ::= idenifier '[' Args ']' -SubscriptExpr *Parser::parseSubscriptExpr(IdentifierExpr *Dest) { +Expr *Parser::parseSubscriptExpr(Expr *Dest) { // Validate `[` assert(Tok.is(tok::l_bracket) && "Invalid parse method."); return make<SubscriptExpr>(Dest, parseSubscriptPattern()); @@ -258,12 +269,15 @@ Expr *Parser::parseParenExpr() { assert(Tok.is(tok::l_paren) && "Invalid parse method."); auto L = consumeToken(); auto E = parseExpr(); - if (!consumeIf(tok::r_paren)) - throw ParseError(diag::ParserError::missing_r_paren); + if (!consumeIf(tok::r_paren)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_r_paren) + .fixItAfter(")", Tok.getLoc()); + return nullptr; + } return make<ParenExpr>(E, L, PreviousLoc); } -PrefixExpr *Parser::parseUnaryExpr() { +Expr *Parser::parseUnaryExpr() { // Validate that we have a unary operand. assert(Tok.isAny(tok::neg, tok::minus) && "Invalid parse method."); @@ -273,11 +287,12 @@ PrefixExpr *Parser::parseUnaryExpr() { } /// Properly parse number literal -NumberLiteralExpr *Parser::parseNumberLiteralExpr() { +Expr *Parser::parseNumberLiteralExpr() { // Validate that we have a number literal assert(Tok.is(tok::number_literal) && "Invalid parsing method."); auto Str = Tok.getText(); + auto R = Tok.getRange(); int Value; if (Str.size() > 1) { llvm::StringRef B = Str.slice(2, Str.size() - 1); @@ -302,6 +317,5 @@ NumberLiteralExpr *Parser::parseNumberLiteralExpr() { } consumeToken(); - auto E = llvm::SMLoc::getFromPointer(Str.data() + Str.size()); - return make<NumberLiteralExpr>(Value, llvm::SMRange{PreviousLoc, E}); + return make<NumberLiteralExpr>(Value, R); } diff --git a/lib/Parser/ParsePattern.cpp b/lib/Parser/ParsePattern.cpp index 9239788..6cf4a71 100644 --- a/lib/Parser/ParsePattern.cpp +++ b/lib/Parser/ParsePattern.cpp @@ -14,7 +14,7 @@ using namespace dusk; /// ExprPattern ::= /// '(' ExprPattern_ ')' -ExprPattern *Parser::parseExprPattern() { +Pattern *Parser::parseExprPattern() { assert(Tok.is(tok::l_paren) && "Invalid parsing method."); // Consume '(' auto LP = consumeToken(); @@ -23,8 +23,12 @@ ExprPattern *Parser::parseExprPattern() { auto C = parseExprPatternBody(); // Consume ')' - auto RP = consumeToken(); - return make<ExprPattern>(std::move(C), LP, RP); + if (!consumeIf(tok::r_paren)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_r_paren) + .fixItBefore(")", Tok.getLoc()); + return nullptr; + } + return make<ExprPattern>(std::move(C), LP, PreviousLoc); } /// ExprPatternBody ::= @@ -50,7 +54,8 @@ llvm::SmallVector<Expr *, 128> Parser::parseExprPatternBody() { break; default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return llvm::SmallVector<Expr *, 128>(); } return C; } @@ -70,13 +75,15 @@ Expr *Parser::parseExprPatternItem() { return parseExpr(); default: - throw ParseError(diag::ParserError::missing_colon); + diagnose(Tok.getLoc(), diag::DiagID::expected_semicolon) + .fixItAfter(";", Tok.getLoc()); + return nullptr; } } /// VarPatter ::= /// '(' VarPatternBody ')' -VarPattern *Parser::parseVarPattern() { +Pattern *Parser::parseVarPattern() { assert(Tok.is(tok::l_paren) && "Invalid parse method."); // Consume '(' auto LP = consumeToken(); @@ -84,16 +91,19 @@ VarPattern *Parser::parseVarPattern() { // Consume VarPatterBody auto C = parseVarPatternBody(); - // Consume ')' - auto RP = consumeToken(); - return make<VarPattern>(std::move(C), LP, RP); + if (!consumeIf(tok::r_paren)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_r_paren) + .fixItBefore(")", Tok.getLoc()); + return nullptr; + } + return make<VarPattern>(std::move(C), LP, PreviousLoc); } /// VarPatternBody ::= /// epsilon /// identifier VarPatternItem -llvm::SmallVector<ParamDecl *, 128> Parser::parseVarPatternBody() { - llvm::SmallVector<ParamDecl *, 128> C; +llvm::SmallVector<Decl *, 128> Parser::parseVarPatternBody() { + llvm::SmallVector<Decl *, 128> C; switch (Tok.getKind()) { case tok::r_paren: // VarPattern -> epsilon @@ -111,7 +121,9 @@ llvm::SmallVector<ParamDecl *, 128> Parser::parseVarPatternBody() { break; default: - throw ParseError(diag::ParserError::missing_colon); + diagnose(Tok.getLoc(), diag::DiagID::expected_colon_separator) + .fixItAfter(",", PreviousLoc); + return llvm::SmallVector<Decl *, 128>(); } return C; } @@ -119,7 +131,7 @@ llvm::SmallVector<ParamDecl *, 128> Parser::parseVarPatternBody() { /// VarPatternItem ::= /// epsilon /// ',' identifier VarPatternItem -ParamDecl *Parser::parseVarPatternItem() { +Decl *Parser::parseVarPatternItem() { switch (Tok.getKind()) { case tok::r_paren: // VarPattern__ -> epsilon @@ -130,20 +142,25 @@ ParamDecl *Parser::parseVarPatternItem() { consumeToken(); return parseParamDecl(); default: - throw ParseError(diag::ParserError::missing_colon); + diagnose(Tok.getLoc(), diag::DiagID::expected_colon_separator) + .fixItAfter(",", PreviousLoc); + return nullptr; } } /// SubscriptionPattern ::= /// [ Expr ] -SubscriptPattern *Parser::parseSubscriptPattern() { +Pattern *Parser::parseSubscriptPattern() { // Validate `[` start. assert(Tok.is(tok::l_bracket) && "Invalid parse method."); auto L = consumeToken(); auto V = parseExpr(); - if (!consumeIf(tok::r_bracket)) - throw ParseError(diag::ParserError::missing_r_bracket); + if (!consumeIf(tok::r_bracket)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_r_bracket) + .fixItAfter("]", PreviousLoc); + return nullptr; + } return make<SubscriptPattern>(V, L, PreviousLoc); } diff --git a/lib/Parser/ParseStmt.cpp b/lib/Parser/ParseStmt.cpp index 4a0000d..d0720dd 100644 --- a/lib/Parser/ParseStmt.cpp +++ b/lib/Parser/ParseStmt.cpp @@ -26,43 +26,53 @@ Expr *Parser::parseExprStmt() { E = parseExpr(); break; default: - throw ParseError(diag::ParserError::unexpected_token); + diagnose(Tok.getLoc()); + return nullptr; + } + if (!consumeIf(tok::semicolon)) { + diagnose(PreviousLoc, diag::DiagID::expected_semicolon) + .fixItAfter(";", PreviousLoc); + return nullptr; } - if (!consumeIf(tok::semicolon)) - throw ParseError(diag::ParserError::missing_semicolon); return E; } /// BreakStmt ::= /// break ';' -BreakStmt *Parser::parseBreakStmt() { +Stmt *Parser::parseBreakStmt() { // Validate `break` keyword assert(Tok.is(tok::kwBreak) && "Invalid parse method."); auto T = Tok.getText(); auto S = consumeToken(); auto E = llvm::SMLoc::getFromPointer(T.data() + T.size()); - if (!consumeIf(tok::semicolon)) - throw ParseError(diag::ParserError::missing_semicolon); + if (!consumeIf(tok::semicolon)) { + diagnose(PreviousLoc, diag::DiagID::expected_semicolon) + .fixItAfter(";", PreviousLoc); + return nullptr; + } return make<BreakStmt>(llvm::SMRange{S, E}); } /// ReturnStmt ::= /// return Expr ';' -ReturnStmt *Parser::parseReturnStmt() { +Stmt *Parser::parseReturnStmt() { // Validate `return` keyword assert(Tok.is(tok::kwReturn) && "Invalid parse method."); auto RL = consumeToken(); auto E = parseExpr(); - if (!consumeIf(tok::semicolon)) - throw ParseError(diag::ParserError::missing_semicolon); + if (!consumeIf(tok::semicolon)) { + diagnose(PreviousLoc, diag::DiagID::expected_semicolon) + .fixItAfter(";", PreviousLoc); + return nullptr; + } return make<ReturnStmt>(RL, E); } /// Block ::= /// '{' BlockBody '}' -BlockStmt *Parser::parseBlock() { +Stmt *Parser::parseBlock() { // Validate `l_brace` token assert(Tok.is(tok::l_brace) && "Invalid parse method."); auto L = consumeToken(); @@ -72,8 +82,12 @@ BlockStmt *Parser::parseBlock() { while (auto Node = parseBlockBody()) Nodes.push_back(Node); - auto R = consumeToken(); - return make<BlockStmt>(L, R, std::move(Nodes)); + if (!consumeIf(tok::r_brace)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_r_brace) + .fixItBefore("}", Tok.getLoc()); + return nullptr; + } + return make<BlockStmt>(L, PreviousLoc, std::move(Nodes)); } ASTNode *Parser::parseBlockBody() { @@ -109,45 +123,58 @@ ASTNode *Parser::parseBlockBody() { return parseWhileStmt(); default: - throw ParseError(diag::ParserError::missing_r_brace); + return nullptr; } } } -FuncStmt *Parser::parseFuncStmt() { +Stmt *Parser::parseFuncStmt() { // Validate `func` keyword assert(Tok.is(tok::kwFunc) && "Invalid parse method"); return make<FuncStmt>(parseFuncDecl(), parseBlock()); } -ForStmt *Parser::parseForStmt() { +Stmt *Parser::parseForStmt() { // Validate `for` keyword. assert(Tok.is(tok::kwFor) && "Invalid parse method"); auto FLoc = consumeToken(); - if (!Tok.is(tok::identifier)) - throw ParseError(diag::ParserError::missing_identfier); + if (!Tok.is(tok::identifier)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_identifier); + return nullptr; + } auto Var = parseIdentifierExpr(); - if (!consumeIf(tok::kwIn)) - throw ParseError(diag::ParserError::missing_in_kw); - + if (!consumeIf(tok::kwIn)) { + diagnose(Tok.getLoc(), diag::DiagID::expected_in_kw) + .fixItBefore("in", Tok.getLoc()); + return nullptr; + } + auto Rng = parseRangeStmt(); + if (!Tok.is(tok::l_brace)) { + diagnose(Tok.getLoc()); + return nullptr; + } auto Body = parseBlock(); return make<ForStmt>(FLoc, Var, Rng, Body); } -RangeStmt *Parser::parseRangeStmt() { +Stmt *Parser::parseRangeStmt() { auto S = parseExpr(); auto Op = Tok; - if (!Tok.isAny(tok::elipsis_incl, tok::elipsis_excl)) - throw ParseError(diag::ParserError::missing_ellipsis_op); + if (!Tok.isAny(tok::elipsis_incl, tok::elipsis_excl)) { + diagnose(PreviousLoc, diag::DiagID::expected_ellipsis) + .fixItAfter("..", PreviousLoc) + .fixItAfter("...", PreviousLoc); + return nullptr; + } consumeToken(); auto E = parseExpr(); return make<RangeStmt>(S, E, Op); } -WhileStmt *Parser::parseWhileStmt() { +Stmt *Parser::parseWhileStmt() { assert(Tok.is(tok::kwWhile) && "Invalid parse method."); auto L = consumeToken(); @@ -156,7 +183,7 @@ WhileStmt *Parser::parseWhileStmt() { return make<WhileStmt>(L, Cond, Body); } -IfStmt *Parser::parseIfStmt() { +Stmt *Parser::parseIfStmt() { assert(Tok.is(tok::kwIf) && "Invalid parse method."); auto L = consumeToken(); auto Cond = parseExpr(); @@ -165,7 +192,7 @@ IfStmt *Parser::parseIfStmt() { return make<IfStmt>(L, Cond, Then, Else); } -BlockStmt *Parser::parseElseStmt() { +Stmt *Parser::parseElseStmt() { if (!consumeIf(tok::kwElse)) return nullptr; return parseBlock(); diff --git a/lib/Parser/Parser.cpp b/lib/Parser/Parser.cpp index 3b6a854..f92dec6 100644 --- a/lib/Parser/Parser.cpp +++ b/lib/Parser/Parser.cpp @@ -9,23 +9,18 @@ #include "dusk/Parse/Parser.h" #include "dusk/Parse/ParserResult.h" +#include "dusk/Basic/SourceManager.h" #include "llvm/ADT/SmallVector.h" #include <vector> using namespace dusk; -using namespace diag; - -// MARK: - Parsing error - -ParseError::ParseError(ParserError K) - : std::runtime_error("Parsing error"), Kind(K) {} // MARK: - Parser -Parser::Parser(llvm::SourceMgr &SM, InputFile &SF, diag::Diagnostics &Diag, +Parser::Parser(llvm::SourceMgr &SM, InputFile &SF, DiagnosticEngine &Engine, unsigned BufferID) - : SourceManager(SM), SourceFile(SF), Diag(Diag), - L(new Lexer(SM, BufferID, &Diag)) {} + : SourceManager(SM), SourceFile(SF), Engine(Engine), + L(new Lexer(SM, BufferID, &Engine)) {} Parser::~Parser() { delete L; } @@ -44,109 +39,15 @@ llvm::SMLoc Parser::consumeToken(tok T) { return consumeToken(); } -unsigned Parser::diagnose() { - // Terminate parsing. - terminateParsing(); - - if (Tok.is(tok::unknown)) - // Handeled by lexer. - return 0; - return diagnoseUnexpectedToken(); -} - -// TODO: Somehow remove. -static unsigned getBufferForLoc(const llvm::SourceMgr &SM, llvm::SMLoc Loc) { - // Validate location - assert(Loc.isValid()); - - auto Ptr = Loc.getPointer(); - for (unsigned i = 1; i <= SM.getNumBuffers(); i++) { - auto Buff = SM.getMemoryBuffer(i); - if (Ptr >= Buff->getBufferStart() && Ptr <= Buff->getBufferEnd()) - return i; - } - llvm_unreachable("Location in non-existing buffer."); -} - -unsigned Parser::diagnose(ParserError E) { +DiagnosticRef Parser::diagnose(llvm::SMLoc Loc, diag::DiagID ID) { + if (diag::DiagID::unexpected_token == ID && R.isError()) + // No better diagnostics than already given. + return DiagnosticRef(); + R.setError(); if (Tok.is(tok::unknown)) // Handeled by lexer. - return 0; - - auto ID = getBufferForLoc(SourceManager, PreviousLoc); - auto FN = SourceManager.getMemoryBuffer(ID)->getBufferIdentifier(); - auto Loc = Tok.getLoc(); - auto K = llvm::SourceMgr::DiagKind::DK_Error; - auto Line = Lexer::getLineForLoc(SourceManager, PreviousLoc); - llvm::StringRef MSG; - llvm::SmallVector<llvm::SMFixIt, 2> FixIts; - - switch (E) { - case ParserError::unexpected_token: - MSG = "Unexpected token"; - break; - case ParserError::missing_semicolon: - MSG = "Missing semicolon."; - Loc = Lexer::getLocForEndOfToken(SourceManager, PreviousLoc); - FixIts.push_back({Loc, ";"}); - break; - case ParserError::missing_r_paren: - MSG = "Missing closing parenthesis )."; - Loc = Lexer::getLocForEndOfToken(SourceManager, PreviousLoc); - FixIts.push_back({Loc, ")"}); - break; - case ParserError::missing_identfier: - MSG = "Expected identifier."; - break; - case ParserError::missing_assign: - MSG = "Expected assign operator."; - Loc = Lexer::getLocForEndOfToken(SourceManager, PreviousLoc); - FixIts.push_back({Loc, "="}); - break; - case ParserError::missing_in_kw: - MSG = "Expected `in` keyword."; - Loc = Lexer::getLocForEndOfToken(SourceManager, PreviousLoc); - FixIts.push_back({Loc, "in"}); - break; - case ParserError::missing_r_brace: - MSG = "Expected closing brace }."; - Loc = Lexer::getLocForEndOfToken(SourceManager, PreviousLoc); - FixIts.push_back({Loc, "}"}); - break; - case ParserError::missing_ellipsis_op: - MSG = "Expected range operator."; - Loc = Lexer::getLocForEndOfToken(SourceManager, PreviousLoc); - FixIts.push_back({Loc, ".."}); - FixIts.push_back({Loc, "..."}); - break; - case ParserError::missing_colon: - MSG = "Expected colon separator."; - Loc = Lexer::getLocForEndOfToken(SourceManager, PreviousLoc); - FixIts.push_back({Loc, ","}); - break; - case ParserError::missing_r_bracket: - MSG = "Expected closing bracket ]."; - Loc = Lexer::getLocForEndOfToken(SourceManager, PreviousLoc); - FixIts.push_back({Loc, "]"}); - break; - } - - auto[L, C] = SourceManager.getLineAndColumn(Loc); - auto D = llvm::SMDiagnostic(SourceManager, Loc, FN, L, C, K, MSG, Line, - llvm::None, FixIts); - return Diag.diagnose(std::move(D)); -} - -unsigned Parser::diagnoseUnexpectedToken() { - auto ID = getBufferForLoc(SourceManager, Tok.getLoc()); - auto FN = SourceManager.getMemoryBuffer(ID)->getBufferIdentifier(); - auto[L, C] = SourceManager.getLineAndColumn(Tok.getLoc()); - auto K = llvm::SourceMgr::DiagKind::DK_Error; - auto Line = Tok.getText(); - auto MSG = "Unexpexted token"; - auto D = llvm::SMDiagnostic(SourceManager, Tok.getLoc(), FN, L, C, K, MSG, - Line, llvm::None); - return Diag.diagnose(std::move(D)); + return DiagnosticRef(); + return Engine.diagnose(Loc, ID); } // MARK: - Main parsing loop @@ -154,12 +55,8 @@ unsigned Parser::diagnoseUnexpectedToken() { ParserResult &&Parser::parse() { std::vector<ASTNode *> Nodes; consumeToken(); - try { - while (Tok.isNot(tok::eof)) - Nodes.push_back(parseGlobal()); - } catch (ParseError &E) { - diagnose(E.getKind()); - } + while (Tok.isNot(tok::eof)) + Nodes.push_back(parseGlobal()); if (Nodes.size() != 0) R.setRoot(make<ModuleDecl>(SourceFile.file(), std::move(Nodes))); @@ -187,7 +84,8 @@ ASTNode *Parser::parseGlobal() { return parseExpr(); default: - throw ParserError(); + diagnose(consumeToken()); + return nullptr; } } -- GitLab