From f30dded731dba0ae2a4e5d515f3de55263b79cec Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Thu, 14 Nov 2019 08:12:48 +0100
Subject: [PATCH] improved parser error reporting and parser refactoring

---
 alib2cli/src/parser/Parser.cpp | 94 +++++++++++++++++-----------------
 alib2cli/src/parser/Parser.h   | 67 +++++++++++++++++-------
 2 files changed, 95 insertions(+), 66 deletions(-)

diff --git a/alib2cli/src/parser/Parser.cpp b/alib2cli/src/parser/Parser.cpp
index 19e8628aaf..6d6d77ba17 100644
--- a/alib2cli/src/parser/Parser.cpp
+++ b/alib2cli/src/parser/Parser.cpp
@@ -140,24 +140,6 @@ std::unique_ptr < Arg > Parser::optional_binding ( ) {
 	}
 }
 
-std::shared_ptr < Statement > Parser::in_redirect_file ( ) {
-	std::unique_ptr < Arg > fileType;
-
-	if ( check ( cli::Lexer::TokenType::LEFT_BRACKET ) ) {
-		match ( cli::Lexer::TokenType::LEFT_BRACKET );
-		fileType = arg ( );
-		match ( cli::Lexer::TokenType::RIGHT_BRACKET );
-	}
-
-	std::unique_ptr < TypeOption > type = optional_type_option ( );
-	ext::vector < std::unique_ptr < cli::Arg > > templateArgs;
-	while ( check ( cli::Lexer::TokenType::AT_SIGN ) ) {
-		templateArgs.emplace_back ( template_arg ( ) );
-	}
-
-	return std::make_shared < FileStatement > ( file ( ), std::move ( fileType ), std::move ( type ), std::move ( templateArgs ) );
-}
-
 std::shared_ptr < Statement > Parser::in_redirect ( ) {
 	if ( check ( cli::Lexer::TokenType::LEFT_PAREN ) ) {
 		match ( cli::Lexer::TokenType::LEFT_PAREN );
@@ -165,11 +147,44 @@ std::shared_ptr < Statement > Parser::in_redirect ( ) {
 		match ( cli::Lexer::TokenType::RIGHT_PAREN );
 		return res;
 	} else {
-		return in_redirect_file ( );
+		std::unique_ptr < Arg > fileType;
+
+		if ( check ( cli::Lexer::TokenType::LEFT_BRACKET ) ) {
+			match ( cli::Lexer::TokenType::LEFT_BRACKET );
+			fileType = arg ( );
+			match ( cli::Lexer::TokenType::RIGHT_BRACKET );
+		}
+
+		std::unique_ptr < TypeOption > type = optional_type_option ( );
+		ext::vector < std::unique_ptr < cli::Arg > > templateArgs;
+		while ( check ( cli::Lexer::TokenType::AT_SIGN ) ) {
+			templateArgs.emplace_back ( template_arg ( ) );
+		}
+
+		return std::make_shared < FileStatement > ( file ( ), std::move ( fileType ), std::move ( type ), std::move ( templateArgs ) );
+	}
+}
+
+std::unique_ptr < Statement > Parser::out_redirect ( ) {
+	if ( check ( cli::Lexer::TokenType::DOLAR_SIGN ) ) {
+		match ( cli::Lexer::TokenType::DOLAR_SIGN );
+		std::unique_ptr < Arg > name = arg ( );
+		return std::make_unique < ResultVariableStatement > ( std::move ( name ) );
+	} else {
+		std::unique_ptr < Arg > fileType;
+
+		if ( check ( cli::Lexer::TokenType::LEFT_BRACKET ) ) {
+			match ( cli::Lexer::TokenType::LEFT_BRACKET );
+			fileType = arg ( );
+			match ( cli::Lexer::TokenType::RIGHT_BRACKET );
+		}
+
+		return std::make_unique < ResultFileStatement > ( file ( ), std::move ( fileType ) );
 	}
 }
 
 std::shared_ptr < Statement > Parser::common ( ) {
+	clearCheckOptions ( );
 	if ( check ( cli::Lexer::TokenType::DOLAR_SIGN ) ) {
 		match ( cli::Lexer::TokenType::DOLAR_SIGN );
 		std::unique_ptr < Arg > name = arg ( );
@@ -203,11 +218,12 @@ std::shared_ptr < Statement > Parser::common ( ) {
 		match ( cli::Lexer::TokenType::RIGHT_BRACE );
 		return res;
 	} else {
-		throw exception::CommonException ( "Mismatched set while expanding common rule. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." );
+		throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding common rule. Token is " + ( ( std::string ) m_current ) + "." );
 	}
 }
 
 std::shared_ptr < Statement > Parser::param ( ) {
+	clearCheckOptions ( );
 	if ( check ( cli::Lexer::TokenType::DOLAR_SIGN, cli::Lexer::TokenType::LESS_SIGN, cli::Lexer::TokenType::STRING, cli::Lexer::TokenType::UNSIGNED, cli::Lexer::TokenType::HASH_SIGN, cli::Lexer::TokenType::LEFT_BRACE ) ) {
 		return common ( );
 	} else if ( check ( cli::Lexer::TokenType::MINUS_SIGN ) ) {
@@ -224,11 +240,12 @@ std::shared_ptr < Statement > Parser::param ( ) {
 		std::shared_ptr < Statement > castedParam = param ( );
 		return std::make_shared < CastStatement > ( std::move ( result_type ), castedParam, move );
 	} else {
-		throw exception::CommonException ( "Mismatched set while expanding param rule. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." );
+		throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding param rule. Token is " + ( ( std::string ) m_current ) + "." );
 	}
 }
 
 std::shared_ptr < Statement > Parser::statement ( ) {
+	clearCheckOptions ( );
 	if ( check ( cli::Lexer::TokenType::DOLAR_SIGN, cli::Lexer::TokenType::LESS_SIGN, cli::Lexer::TokenType::STRING, cli::Lexer::TokenType::UNSIGNED, cli::Lexer::TokenType::HASH_SIGN, cli::Lexer::TokenType::LEFT_BRACE ) ) {
 		return common ( );
 	} else if ( check ( cli::Lexer::TokenType::IDENTIFIER ) ) {
@@ -255,8 +272,7 @@ std::shared_ptr < Statement > Parser::statement ( ) {
 		std::shared_ptr < Statement > castedStatement = statement ( );
 		return std::make_shared < CastStatement > ( std::move ( result_type ), castedStatement, move );
 	} else {
-	// TODO builtin statement type to get string type
-		throw exception::CommonException ( "Mismatched set while expanding param statement. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." );
+		throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding statement rule. Token is " + ( ( std::string ) m_current ) + "." );
 	}
 }
 
@@ -275,33 +291,13 @@ std::shared_ptr < StatementList > Parser::statement_list ( ) {
 	return std::make_shared < StatementList > ( std::move ( list ) );
 }
 
-std::unique_ptr < Statement > Parser::out_redirect_file ( ) {
-	std::unique_ptr < Arg > fileType;
-
-	if ( check ( cli::Lexer::TokenType::LEFT_BRACKET ) ) {
-		match ( cli::Lexer::TokenType::LEFT_BRACKET );
-		fileType = arg ( );
-		match ( cli::Lexer::TokenType::RIGHT_BRACKET );
-	}
-
-	return std::make_unique < ResultFileStatement > ( file ( ), std::move ( fileType ) );
-}
-
-std::unique_ptr < Statement > Parser::out_redirect ( ) {
-	if ( check ( cli::Lexer::TokenType::DOLAR_SIGN ) ) {
-		match ( cli::Lexer::TokenType::DOLAR_SIGN );
-		std::unique_ptr < Arg > name = arg ( );
-		return std::make_unique < ResultVariableStatement > ( std::move ( name ) );
-	} else {
-		return out_redirect_file ( );
-	}
-}
-
 std::pair < bool, bool > Parser::introspect_cast_from_to ( ) {
 	bool from = false;
 	bool to = false;
 	while ( check ( cli::Lexer::TokenType::COLON_SIGN ) ) {
 		match ( cli::Lexer::TokenType::COLON_SIGN );
+
+		clearCheckOptions ( );
 		if ( check_nonreserved_kw ( "from" ) ) {
 			match_nonreserved_kw ( "from" );
 			from = true;
@@ -309,13 +305,14 @@ std::pair < bool, bool > Parser::introspect_cast_from_to ( ) {
 			match_nonreserved_kw ( "to" );
 			to = true;
 		} else {
-			throw exception::CommonException ( "Mismatched set while expanding param introspect_cast_from_to. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." );
+			throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding introspect_cast_type. Token is " + ( ( std::string ) m_current ) + "." );
 		}
 	}
 	return std::make_pair ( from, to );
 }
 
 std::unique_ptr < Command > Parser::introspect_command ( ) {
+	clearCheckOptions ( );
 	if ( check_nonreserved_kw ( "algorithms" ) ) {
 		match_nonreserved_kw ( "algorithms" );
 		std::unique_ptr < cli::Arg > param = optional_arg ( );
@@ -348,7 +345,7 @@ std::unique_ptr < Command > Parser::introspect_command ( ) {
 		std::unique_ptr < cli::Arg > param = optional_arg ( );
 		return std::make_unique < BindingsIntrospectionCommand > ( std::move ( param ) );
 	} else {
-		throw exception::CommonException ( "Mismatched set while expanding param introspect_command. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." );
+		throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding introspection_command rule. Token is " + ( ( std::string ) m_current ) + "." );
 	}
 }
 
@@ -370,6 +367,7 @@ std::unique_ptr < CommandList > Parser::parse ( ) {
 }
 
 std::unique_ptr < Command > Parser::command ( ) {
+	clearCheckOptions ( );
 	if ( check_nonreserved_kw ( "execute" ) ) {
 		match_nonreserved_kw ( "execute" );
 		std::shared_ptr < StatementList > res = statement_list ( );
@@ -426,7 +424,7 @@ std::unique_ptr < Command > Parser::command ( ) {
 
 		return std::make_unique < UnloadCommand > ( std::move ( libraryName ) );
 	} else {
-		throw exception::CommonException ( "Mismatched set while expanding parse rule. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." );
+		throw exception::CommonException ( "Mismatched set " + ext::to_string ( getCheckOptions ( ) ) + " while expanding parse rule. Token is " + ( ( std::string ) m_current ) + "." );
 	}
 }
 
diff --git a/alib2cli/src/parser/Parser.h b/alib2cli/src/parser/Parser.h
index 4582901935..78cdec00c8 100644
--- a/alib2cli/src/parser/Parser.h
+++ b/alib2cli/src/parser/Parser.h
@@ -24,6 +24,20 @@ class Parser {
 	cli::Lexer m_lexer;
 	cli::Lexer::Token m_current;
 
+	ext::set < cli::Lexer::Token > m_checkedOptions;
+
+	void clearCheckOptions ( ) {
+		m_checkedOptions.clear ( );
+	}
+
+	const ext::set < cli::Lexer::Token > & getCheckOptions ( ) {
+		return m_checkedOptions;
+	}
+
+	void restoreCheckOptions ( ext::set < cli::Lexer::Token > checkOptions ) {
+		m_checkedOptions = std::move ( checkOptions );
+	}
+
 public:
 	Parser ( cli::Lexer lexer ) : m_lexer ( std::move ( lexer ) ), m_current ( m_lexer.nextToken ( true ) ) {
 	}
@@ -35,34 +49,55 @@ public:
 		m_lexer.setHint ( Lexer::Hint::NONE );
 	}
 
-	template < class ... Tokens >
-	bool check ( Tokens ... tokens ) const {
+	template < class ... TokenTypes >
+	bool check ( TokenTypes ... tokens ) {
+		m_checkedOptions.merge ( std::set < cli::Lexer::Token > { cli::Lexer::Token { "", "", tokens } ... } );
 		return ( ... || ( m_current.m_type == tokens ) );
 	}
 
 	template < class ... NonreservedTokens >
-	bool check_nonreserved_kw ( const NonreservedTokens & ... kw ) const {
+	bool check_nonreserved_kw ( const NonreservedTokens & ... kw ) {
+		m_checkedOptions.merge ( std::set < cli::Lexer::Token > { cli::Lexer::Token { kw, "", cli::Lexer::TokenType::IDENTIFIER } ... } );
 		return m_current.m_type == Lexer::TokenType::IDENTIFIER && ( ... || ( m_current.m_value == kw ) );
 	}
 
-	template < class Token, class ... Tokens >
-	bool match ( Token token, Tokens ... tokens ) {
+	template < class ... TokenTypes >
+	bool match ( cli::Lexer::TokenType token, TokenTypes ... tokens ) {
 		if ( ! check ( token, tokens ... ) )
-			throw exception::CommonException ( std::string ( "Mismatched token while matching a token " ) + ( Lexer::tokenTypeToString ( token ) + ... + ( ", " + Lexer::tokenTypeToString ( tokens ) ) ) + ". Actual was " + Lexer::tokenTypeToString ( m_current.m_type ) + "." );
+			throw exception::CommonException ( std::string ( "Mismatched token while matching a token " ) + ( Lexer::tokenTypeToString ( token ) + ... + ( ", " + Lexer::tokenTypeToString ( tokens ) ) ) + ". Actual was " + Lexer::tokenTypeToString ( m_current.m_type ) + ". Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." );
 		m_current = m_lexer.nextToken ( false );
 		return true;
 	}
 
-	bool match_nonreserved_kw ( const std::string & kw ) {
-		if ( ! check_nonreserved_kw ( kw ) )
-			throw exception::CommonException ( "Mismatched token while matching a non reseved keyword: " + kw + "." );
+	template < class ... NonreservedTokens >
+	bool match_nonreserved_kw ( const std::string & kw, const NonreservedTokens & ... kws ) {
+		if ( ! check_nonreserved_kw ( kw, kws ... ) )
+			throw exception::CommonException ( "Mismatched token while matching a non reseved keyword: " + kw + ". Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." );
 		m_current = m_lexer.nextToken ( false );
 		return true;
 	}
 
+	template < class ... TokenTypes >
+	bool check_then_match ( cli::Lexer::TokenType token, TokenTypes ... tokens ) {
+		if ( ! check ( token, tokens ... ) )
+			return false;
+
+		match ( token, tokens ... );
+		return true;
+	}
+
+	template < class ... NonreservedTokens >
+	bool check_then_match_nonreserved_kw ( const std::string & kw, const NonreservedTokens & ... kws ) {
+		if ( ! check ( kw, kws ... ) )
+			return false;
+
+		match ( kw, kws ... );
+		return true;
+	}
+
 	std::string matchIdentifier ( ) {
 		if ( ! check ( Lexer::TokenType::IDENTIFIER ) )
-			throw exception::CommonException ( "Mismatched token while matching an identifier." );
+			throw exception::CommonException ( "Mismatched token while matching an identifier. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." );
 		std::string res = m_current.m_value;
 		m_current = m_lexer.nextToken ( false );
 		return res;
@@ -70,7 +105,7 @@ public:
 
 	std::string matchString ( ) {
 		if ( ! check ( Lexer::TokenType::STRING ) )
-			throw exception::CommonException ( "Mismatched token while matching a string." );
+			throw exception::CommonException ( "Mismatched token while matching a string. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." );
 		std::string res = m_current.m_value;
 		m_current = m_lexer.nextToken ( false );
 		return res;
@@ -78,7 +113,7 @@ public:
 
 	std::string matchType ( ) {
 		if ( ! check ( Lexer::TokenType::TYPE ) )
-			throw exception::CommonException ( "Mismatched token while matching a type." );
+			throw exception::CommonException ( "Mismatched token while matching a type. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." );
 		std::string res = m_current.m_value;
 		m_current = m_lexer.nextToken ( false );
 		return res;
@@ -86,7 +121,7 @@ public:
 
 	std::string matchFile ( ) {
 		if ( ! check ( Lexer::TokenType::FILE ) )
-			throw exception::CommonException ( "Mismatched token while matching a file." );
+			throw exception::CommonException ( "Mismatched token while matching a file. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "." );
 		std::string res = m_current.m_value;
 		m_current = m_lexer.nextToken ( false );
 		return res;
@@ -94,7 +129,7 @@ public:
 
 	int matchInteger ( ) {
 		if ( ! check ( Lexer::TokenType::UNSIGNED ) )
-			throw exception::CommonException ( "Mismatched token while matching an integer."  );
+			throw exception::CommonException ( "Mismatched token while matching an integer. Tokens in active set " + ext::to_string ( getCheckOptions ( ) ) + "."  );
 		int res = ext::from_string < int > ( m_current.m_value );
 		m_current = m_lexer.nextToken ( false );
 		return res;
@@ -126,8 +161,6 @@ public:
 
 	bool move_arg ( );
 
-	std::shared_ptr < Statement > in_redirect_file ( );
-
 	std::shared_ptr < Statement > in_redirect ( );
 
 	std::shared_ptr < Statement > common ( );
@@ -138,8 +171,6 @@ public:
 
 	std::shared_ptr < StatementList > statement_list ( );
 
-	std::unique_ptr < Statement > out_redirect_file ( );
-
 	std::unique_ptr < Statement > out_redirect ( );
 
 	std::pair < bool, bool > introspect_cast_from_to ( );
-- 
GitLab