From f30dded731dba0ae2a4e5d515f3de55263b79cec Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 14 Nov 2019 08:12:48 +0100 Subject: [PATCH] improved parser error reporting and parser refactoring --- alib2cli/src/parser/Parser.cpp | 94 +++++++++++++++++----------------- alib2cli/src/parser/Parser.h | 67 +++++++++++++++++------- 2 files changed, 95 insertions(+), 66 deletions(-) diff --git a/alib2cli/src/parser/Parser.cpp b/alib2cli/src/parser/Parser.cpp index 19e8628aaf..6d6d77ba17 100644 --- a/alib2cli/src/parser/Parser.cpp +++ b/alib2cli/src/parser/Parser.cpp @@ -140,24 +140,6 @@ std::unique_ptr < Arg > Parser::optional_binding ( ) { } } -std::shared_ptr < Statement > Parser::in_redirect_file ( ) { - std::unique_ptr < Arg > fileType; - - if ( check ( cli::Lexer::TokenType::LEFT_BRACKET ) ) { - match ( cli::Lexer::TokenType::LEFT_BRACKET ); - fileType = arg ( ); - match ( cli::Lexer::TokenType::RIGHT_BRACKET ); - } - - std::unique_ptr < TypeOption > type = optional_type_option ( ); - ext::vector < std::unique_ptr < cli::Arg > > templateArgs; - while ( check ( cli::Lexer::TokenType::AT_SIGN ) ) { - templateArgs.emplace_back ( template_arg ( ) ); - } - - return std::make_shared < FileStatement > ( file ( ), std::move ( fileType ), std::move ( type ), std::move ( templateArgs ) ); -} - std::shared_ptr < Statement > Parser::in_redirect ( ) { if ( check ( cli::Lexer::TokenType::LEFT_PAREN ) ) { match ( cli::Lexer::TokenType::LEFT_PAREN ); @@ -165,11 +147,44 @@ std::shared_ptr < Statement > Parser::in_redirect ( ) { match ( cli::Lexer::TokenType::RIGHT_PAREN ); return res; } else { - return in_redirect_file ( ); + std::unique_ptr < Arg > fileType; + + if ( check ( cli::Lexer::TokenType::LEFT_BRACKET ) ) { + match ( cli::Lexer::TokenType::LEFT_BRACKET ); + fileType = arg ( ); + match ( cli::Lexer::TokenType::RIGHT_BRACKET ); + } + + std::unique_ptr < TypeOption > type = optional_type_option ( ); + ext::vector < std::unique_ptr < cli::Arg > > templateArgs; + while ( check ( cli::Lexer::TokenType::AT_SIGN ) ) { + templateArgs.emplace_back ( template_arg ( ) ); + } + + return std::make_shared < FileStatement > ( file ( ), std::move ( fileType ), std::move ( type ), std::move ( templateArgs ) ); + } +} + +std::unique_ptr < Statement > Parser::out_redirect ( ) { + if ( check ( cli::Lexer::TokenType::DOLAR_SIGN ) ) { + match ( cli::Lexer::TokenType::DOLAR_SIGN ); + std::unique_ptr < Arg > name = arg ( ); + return std::make_unique < ResultVariableStatement > ( std::move ( name ) ); + } else { + std::unique_ptr < Arg > fileType; + + if ( check ( cli::Lexer::TokenType::LEFT_BRACKET ) ) { + match ( cli::Lexer::TokenType::LEFT_BRACKET ); + fileType = arg ( ); + match ( cli::Lexer::TokenType::RIGHT_BRACKET ); + } + + return std::make_unique < ResultFileStatement > ( file ( ), std::move ( fileType ) ); } } std::shared_ptr < Statement > Parser::common ( ) { + clearCheckOptions ( ); if ( check ( cli::Lexer::TokenType::DOLAR_SIGN ) ) { match ( cli::Lexer::TokenType::DOLAR_SIGN ); std::unique_ptr < Arg > name = arg ( ); @@ -203,11 +218,12 @@ std::shared_ptr < Statement > Parser::common ( ) { match ( cli::Lexer::TokenType::RIGHT_BRACE ); return res; } else { - throw exception::CommonException ( "Mismatched set while expanding common rule. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); + throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding common rule. Token is " + ( ( std::string ) m_current ) + "." ); } } std::shared_ptr < Statement > Parser::param ( ) { + clearCheckOptions ( ); if ( check ( cli::Lexer::TokenType::DOLAR_SIGN, cli::Lexer::TokenType::LESS_SIGN, cli::Lexer::TokenType::STRING, cli::Lexer::TokenType::UNSIGNED, cli::Lexer::TokenType::HASH_SIGN, cli::Lexer::TokenType::LEFT_BRACE ) ) { return common ( ); } else if ( check ( cli::Lexer::TokenType::MINUS_SIGN ) ) { @@ -224,11 +240,12 @@ std::shared_ptr < Statement > Parser::param ( ) { std::shared_ptr < Statement > castedParam = param ( ); return std::make_shared < CastStatement > ( std::move ( result_type ), castedParam, move ); } else { - throw exception::CommonException ( "Mismatched set while expanding param rule. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); + throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding param rule. Token is " + ( ( std::string ) m_current ) + "." ); } } std::shared_ptr < Statement > Parser::statement ( ) { + clearCheckOptions ( ); if ( check ( cli::Lexer::TokenType::DOLAR_SIGN, cli::Lexer::TokenType::LESS_SIGN, cli::Lexer::TokenType::STRING, cli::Lexer::TokenType::UNSIGNED, cli::Lexer::TokenType::HASH_SIGN, cli::Lexer::TokenType::LEFT_BRACE ) ) { return common ( ); } else if ( check ( cli::Lexer::TokenType::IDENTIFIER ) ) { @@ -255,8 +272,7 @@ std::shared_ptr < Statement > Parser::statement ( ) { std::shared_ptr < Statement > castedStatement = statement ( ); return std::make_shared < CastStatement > ( std::move ( result_type ), castedStatement, move ); } else { - // TODO builtin statement type to get string type - throw exception::CommonException ( "Mismatched set while expanding param statement. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); + throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding statement rule. Token is " + ( ( std::string ) m_current ) + "." ); } } @@ -275,33 +291,13 @@ std::shared_ptr < StatementList > Parser::statement_list ( ) { return std::make_shared < StatementList > ( std::move ( list ) ); } -std::unique_ptr < Statement > Parser::out_redirect_file ( ) { - std::unique_ptr < Arg > fileType; - - if ( check ( cli::Lexer::TokenType::LEFT_BRACKET ) ) { - match ( cli::Lexer::TokenType::LEFT_BRACKET ); - fileType = arg ( ); - match ( cli::Lexer::TokenType::RIGHT_BRACKET ); - } - - return std::make_unique < ResultFileStatement > ( file ( ), std::move ( fileType ) ); -} - -std::unique_ptr < Statement > Parser::out_redirect ( ) { - if ( check ( cli::Lexer::TokenType::DOLAR_SIGN ) ) { - match ( cli::Lexer::TokenType::DOLAR_SIGN ); - std::unique_ptr < Arg > name = arg ( ); - return std::make_unique < ResultVariableStatement > ( std::move ( name ) ); - } else { - return out_redirect_file ( ); - } -} - std::pair < bool, bool > Parser::introspect_cast_from_to ( ) { bool from = false; bool to = false; while ( check ( cli::Lexer::TokenType::COLON_SIGN ) ) { match ( cli::Lexer::TokenType::COLON_SIGN ); + + clearCheckOptions ( ); if ( check_nonreserved_kw ( "from" ) ) { match_nonreserved_kw ( "from" ); from = true; @@ -309,13 +305,14 @@ std::pair < bool, bool > Parser::introspect_cast_from_to ( ) { match_nonreserved_kw ( "to" ); to = true; } else { - throw exception::CommonException ( "Mismatched set while expanding param introspect_cast_from_to. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); + throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding introspect_cast_type. Token is " + ( ( std::string ) m_current ) + "." ); } } return std::make_pair ( from, to ); } std::unique_ptr < Command > Parser::introspect_command ( ) { + clearCheckOptions ( ); if ( check_nonreserved_kw ( "algorithms" ) ) { match_nonreserved_kw ( "algorithms" ); std::unique_ptr < cli::Arg > param = optional_arg ( ); @@ -348,7 +345,7 @@ std::unique_ptr < Command > Parser::introspect_command ( ) { std::unique_ptr < cli::Arg > param = optional_arg ( ); return std::make_unique < BindingsIntrospectionCommand > ( std::move ( param ) ); } else { - throw exception::CommonException ( "Mismatched set while expanding param introspect_command. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); + throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding introspection_command rule. Token is " + ( ( std::string ) m_current ) + "." ); } } @@ -370,6 +367,7 @@ std::unique_ptr < CommandList > Parser::parse ( ) { } std::unique_ptr < Command > Parser::command ( ) { + clearCheckOptions ( ); if ( check_nonreserved_kw ( "execute" ) ) { match_nonreserved_kw ( "execute" ); std::shared_ptr < StatementList > res = statement_list ( ); @@ -426,7 +424,7 @@ std::unique_ptr < Command > Parser::command ( ) { return std::make_unique < UnloadCommand > ( std::move ( libraryName ) ); } else { - throw exception::CommonException ( "Mismatched set while expanding parse rule. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); + throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding parse rule. Token is " + ( ( std::string ) m_current ) + "." ); } } diff --git a/alib2cli/src/parser/Parser.h b/alib2cli/src/parser/Parser.h index 4582901935..78cdec00c8 100644 --- a/alib2cli/src/parser/Parser.h +++ b/alib2cli/src/parser/Parser.h @@ -24,6 +24,20 @@ class Parser { cli::Lexer m_lexer; cli::Lexer::Token m_current; + ext::set < cli::Lexer::Token > m_checkedOptions; + + void clearCheckOptions ( ) { + m_checkedOptions.clear ( ); + } + + const ext::set < cli::Lexer::Token > & getCheckOptions ( ) { + return m_checkedOptions; + } + + void restoreCheckOptions ( ext::set < cli::Lexer::Token > checkOptions ) { + m_checkedOptions = std::move ( checkOptions ); + } + public: Parser ( cli::Lexer lexer ) : m_lexer ( std::move ( lexer ) ), m_current ( m_lexer.nextToken ( true ) ) { } @@ -35,34 +49,55 @@ public: m_lexer.setHint ( Lexer::Hint::NONE ); } - template < class ... Tokens > - bool check ( Tokens ... tokens ) const { + template < class ... TokenTypes > + bool check ( TokenTypes ... tokens ) { + m_checkedOptions.merge ( std::set < cli::Lexer::Token > { cli::Lexer::Token { "", "", tokens } ... } ); return ( ... || ( m_current.m_type == tokens ) ); } template < class ... NonreservedTokens > - bool check_nonreserved_kw ( const NonreservedTokens & ... kw ) const { + bool check_nonreserved_kw ( const NonreservedTokens & ... kw ) { + m_checkedOptions.merge ( std::set < cli::Lexer::Token > { cli::Lexer::Token { kw, "", cli::Lexer::TokenType::IDENTIFIER } ... } ); return m_current.m_type == Lexer::TokenType::IDENTIFIER && ( ... || ( m_current.m_value == kw ) ); } - template < class Token, class ... Tokens > - bool match ( Token token, Tokens ... tokens ) { + template < class ... TokenTypes > + bool match ( cli::Lexer::TokenType token, TokenTypes ... tokens ) { if ( ! check ( token, tokens ... ) ) - throw exception::CommonException ( std::string ( "Mismatched token while matching a token " ) + ( Lexer::tokenTypeToString ( token ) + ... + ( ", " + Lexer::tokenTypeToString ( tokens ) ) ) + ". Actual was " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); + throw exception::CommonException ( std::string ( "Mismatched token while matching a token " ) + ( Lexer::tokenTypeToString ( token ) + ... + ( ", " + Lexer::tokenTypeToString ( tokens ) ) ) + ". Actual was " + Lexer::tokenTypeToString ( m_current.m_type ) + ". Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." ); m_current = m_lexer.nextToken ( false ); return true; } - bool match_nonreserved_kw ( const std::string & kw ) { - if ( ! check_nonreserved_kw ( kw ) ) - throw exception::CommonException ( "Mismatched token while matching a non reseved keyword: " + kw + "." ); + template < class ... NonreservedTokens > + bool match_nonreserved_kw ( const std::string & kw, const NonreservedTokens & ... kws ) { + if ( ! check_nonreserved_kw ( kw, kws ... ) ) + throw exception::CommonException ( "Mismatched token while matching a non reseved keyword: " + kw + ". Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." ); m_current = m_lexer.nextToken ( false ); return true; } + template < class ... TokenTypes > + bool check_then_match ( cli::Lexer::TokenType token, TokenTypes ... tokens ) { + if ( ! check ( token, tokens ... ) ) + return false; + + match ( token, tokens ... ); + return true; + } + + template < class ... NonreservedTokens > + bool check_then_match_nonreserved_kw ( const std::string & kw, const NonreservedTokens & ... kws ) { + if ( ! check ( kw, kws ... ) ) + return false; + + match ( kw, kws ... ); + return true; + } + std::string matchIdentifier ( ) { if ( ! check ( Lexer::TokenType::IDENTIFIER ) ) - throw exception::CommonException ( "Mismatched token while matching an identifier." ); + throw exception::CommonException ( "Mismatched token while matching an identifier. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." ); std::string res = m_current.m_value; m_current = m_lexer.nextToken ( false ); return res; @@ -70,7 +105,7 @@ public: std::string matchString ( ) { if ( ! check ( Lexer::TokenType::STRING ) ) - throw exception::CommonException ( "Mismatched token while matching a string." ); + throw exception::CommonException ( "Mismatched token while matching a string. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." ); std::string res = m_current.m_value; m_current = m_lexer.nextToken ( false ); return res; @@ -78,7 +113,7 @@ public: std::string matchType ( ) { if ( ! check ( Lexer::TokenType::TYPE ) ) - throw exception::CommonException ( "Mismatched token while matching a type." ); + throw exception::CommonException ( "Mismatched token while matching a type. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." ); std::string res = m_current.m_value; m_current = m_lexer.nextToken ( false ); return res; @@ -86,7 +121,7 @@ public: std::string matchFile ( ) { if ( ! check ( Lexer::TokenType::FILE ) ) - throw exception::CommonException ( "Mismatched token while matching a file." ); + throw exception::CommonException ( "Mismatched token while matching a file. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." ); std::string res = m_current.m_value; m_current = m_lexer.nextToken ( false ); return res; @@ -94,7 +129,7 @@ public: int matchInteger ( ) { if ( ! check ( Lexer::TokenType::UNSIGNED ) ) - throw exception::CommonException ( "Mismatched token while matching an integer." ); + throw exception::CommonException ( "Mismatched token while matching an integer. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." ); int res = ext::from_string < int > ( m_current.m_value ); m_current = m_lexer.nextToken ( false ); return res; @@ -126,8 +161,6 @@ public: bool move_arg ( ); - std::shared_ptr < Statement > in_redirect_file ( ); - std::shared_ptr < Statement > in_redirect ( ); std::shared_ptr < Statement > common ( ); @@ -138,8 +171,6 @@ public: std::shared_ptr < StatementList > statement_list ( ); - std::unique_ptr < Statement > out_redirect_file ( ); - std::unique_ptr < Statement > out_redirect ( ); std::pair < bool, bool > introspect_cast_from_to ( ); -- GitLab