From 92da37c65997dd6daebda41629dbe17d5b85f337 Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Thu, 25 Oct 2018 12:29:50 +0200
Subject: [PATCH] fix automata from string recognition with front newlines

---
 .../automaton/AutomatonFromStringLexer.cpp    |  8 +++---
 alib2str/src/automaton/string/FSM/DFA.h       |  5 ++++
 .../src/automaton/string/FSM/EpsilonNFA.h     |  5 ++++
 .../string/FSM/MultiInitialStateNFA.h         |  5 ++++
 alib2str/src/automaton/string/FSM/NFA.h       |  7 ++++-
 alib2str/src/core/stringApi.hpp               |  4 ++-
 .../src/grammar/GrammarFromStringLexer.cpp    | 26 +++++++++----------
 .../src/registry/StringReaderRegistry.cpp     |  7 +++++
 alib2str/src/tree/TreeFromStringLexer.cpp     | 12 ++++-----
 alib2str/test-src/automaton/AutomatonTest.cpp |  3 ++-
 10 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/alib2str/src/automaton/AutomatonFromStringLexer.cpp b/alib2str/src/automaton/AutomatonFromStringLexer.cpp
index 2ba51cc9cd..58bc59eed4 100644
--- a/alib2str/src/automaton/AutomatonFromStringLexer.cpp
+++ b/alib2str/src/automaton/AutomatonFromStringLexer.cpp
@@ -63,22 +63,22 @@ L0:
 	} else if(in.clear(), in.unget(), in >> ext::string ( "MISNFA" ) ) {
 		token.type = TokenType::MULTI_INITIAL_STATE_NFA;
 		token.value = "MISNFA";
-		token.raw = "MISNFA";
+		token.raw += "MISNFA";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "ENFA" ) ) {
 		token.type = TokenType::EPSILON_NFA;
 		token.value = "ENFA";
-		token.raw = "ENFA";
+		token.raw += "ENFA";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "NFA" ) ) {
 		token.type = TokenType::NFA;
 		token.value = "NFA";
-		token.raw = "NFA";
+		token.raw += "NFA";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "DFA" ) ) {
 		token.type = TokenType::DFA;
 		token.value = "DFA";
-		token.raw = "DFA";
+		token.raw += "DFA";
 		return token;
 	} else {
 		in.clear();
diff --git a/alib2str/src/automaton/string/FSM/DFA.h b/alib2str/src/automaton/string/FSM/DFA.h
index 88f4a60f5c..0a7ac71448 100644
--- a/alib2str/src/automaton/string/FSM/DFA.h
+++ b/alib2str/src/automaton/string/FSM/DFA.h
@@ -30,6 +30,11 @@ private:
 template<class SymbolType, class StateType >
 automaton::DFA < SymbolType, StateType > stringApi < automaton::DFA < SymbolType, StateType > >::parse ( std::istream & input ) {
 	automaton::AutomatonFromStringLexer::Token token = automaton::AutomatonFromStringLexer::next(input);
+
+	while(token.type == automaton::AutomatonFromStringLexer::TokenType::NEW_LINE) {
+		token = automaton::AutomatonFromStringLexer::next(input);
+	}
+
 	if(token.type != automaton::AutomatonFromStringLexer::TokenType::DFA)
 		throw exception::CommonException("Unrecognised DFA token.");
 
diff --git a/alib2str/src/automaton/string/FSM/EpsilonNFA.h b/alib2str/src/automaton/string/FSM/EpsilonNFA.h
index 892e66cc28..1d0d066e98 100644
--- a/alib2str/src/automaton/string/FSM/EpsilonNFA.h
+++ b/alib2str/src/automaton/string/FSM/EpsilonNFA.h
@@ -30,6 +30,11 @@ private:
 template<class SymbolType, class EpsilonType, class StateType >
 automaton::EpsilonNFA < SymbolType, EpsilonType, StateType > stringApi < automaton::EpsilonNFA < SymbolType, EpsilonType, StateType > >::parse ( std::istream & input ) {
 	automaton::AutomatonFromStringLexer::Token token = automaton::AutomatonFromStringLexer::next(input);
+
+	while(token.type == automaton::AutomatonFromStringLexer::TokenType::NEW_LINE) {
+		token = automaton::AutomatonFromStringLexer::next(input);
+	}
+
 	if(token.type != automaton::AutomatonFromStringLexer::TokenType::EPSILON_NFA) {
 		throw exception::CommonException("Unrecognised ENFA token.");
 	}
diff --git a/alib2str/src/automaton/string/FSM/MultiInitialStateNFA.h b/alib2str/src/automaton/string/FSM/MultiInitialStateNFA.h
index cdd808d977..c20df2e09b 100644
--- a/alib2str/src/automaton/string/FSM/MultiInitialStateNFA.h
+++ b/alib2str/src/automaton/string/FSM/MultiInitialStateNFA.h
@@ -32,6 +32,11 @@ automaton::MultiInitialStateNFA < SymbolType, StateType > stringApi < automaton:
 	automaton::MultiInitialStateNFA < > res;
 
 	automaton::AutomatonFromStringLexer::Token token = automaton::AutomatonFromStringLexer::next(input);
+
+	while(token.type == automaton::AutomatonFromStringLexer::TokenType::NEW_LINE) {
+		token = automaton::AutomatonFromStringLexer::next(input);
+	}
+
 	if(token.type != automaton::AutomatonFromStringLexer::TokenType::MULTI_INITIAL_STATE_NFA) {
 		throw exception::CommonException("Unrecognised MISNFA token.");
 	}
diff --git a/alib2str/src/automaton/string/FSM/NFA.h b/alib2str/src/automaton/string/FSM/NFA.h
index 125bbc4769..c72ed22b0b 100644
--- a/alib2str/src/automaton/string/FSM/NFA.h
+++ b/alib2str/src/automaton/string/FSM/NFA.h
@@ -30,8 +30,13 @@ private:
 template<class SymbolType, class StateType >
 automaton::NFA < SymbolType, StateType > stringApi < automaton::NFA < SymbolType, StateType > >::parse ( std::istream & input ) {
 	automaton::AutomatonFromStringLexer::Token token = automaton::AutomatonFromStringLexer::next(input);
+
+	while(token.type == automaton::AutomatonFromStringLexer::TokenType::NEW_LINE) {
+		token = automaton::AutomatonFromStringLexer::next(input);
+	}
+
 	if(token.type != automaton::AutomatonFromStringLexer::TokenType::NFA) {
-		throw exception::CommonException("Unrecognised NFA token.");
+		throw exception::CommonException("Unrecognised NFA token." + ext::to_string((int)token.type));
 	}
 	ext::vector < SymbolType > symbols;
 
diff --git a/alib2str/src/core/stringApi.hpp b/alib2str/src/core/stringApi.hpp
index fbe0c8e9ec..e3a74a949c 100644
--- a/alib2str/src/core/stringApi.hpp
+++ b/alib2str/src/core/stringApi.hpp
@@ -96,8 +96,10 @@ public:
 	}
 
 	static object::Object parse ( std::istream & input ) {
-		auto lambda = [ & ] ( const std::pair < std::function < bool ( std::istream & ) >, std::unique_ptr < GroupReader > > & entry ) {
+		while ( isspace ( input.peek ( ) ) )
+			input.get ( );
 
+		auto lambda = [ & ] ( const std::pair < std::function < bool ( std::istream & ) >, std::unique_ptr < GroupReader > > & entry ) {
 			return entry.first ( input );
 		};
 
diff --git a/alib2str/src/grammar/GrammarFromStringLexer.cpp b/alib2str/src/grammar/GrammarFromStringLexer.cpp
index dd8395f0c6..7b3a63cffc 100644
--- a/alib2str/src/grammar/GrammarFromStringLexer.cpp
+++ b/alib2str/src/grammar/GrammarFromStringLexer.cpp
@@ -65,67 +65,67 @@ L0:
 	} else if(in.clear (), in.unget(), in >> ext::string ( "RIGHT_RG" ) ) {
 		token.type = TokenType::RIGHT_RG;
 		token.value = "RIGHT_RG";
-		token.raw = "RIGHT_RG";
+		token.raw += "RIGHT_RG";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "LEFT_RG" ) ) {
 		token.type = TokenType::LEFT_RG;
 		token.value = "LEFT_RG";
-		token.raw = "LEFT_RG";
+		token.raw += "LEFT_RG";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "RIGHT_LG" ) ) {
 		token.type = TokenType::RIGHT_LG;
 		token.value = "RIGHT_LG";
-		token.raw = "RIGHT_LG";
+		token.raw += "RIGHT_LG";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "LEFT_LG" ) ) {
 		token.type = TokenType::LEFT_LG;
 		token.value = "LEFT_LG";
-		token.raw = "LEFT_LG";
+		token.raw += "LEFT_LG";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "LG" ) ) {
 		token.type = TokenType::LG;
 		token.value = "LG";
-		token.raw = "LG";
+		token.raw += "LG";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "CFG" ) ) {
 		token.type = TokenType::CFG;
 		token.value = "CFG";
-		token.raw = "CFG";
+		token.raw += "CFG";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "EPSILON_FREE_CFG" ) ) {
 		token.type = TokenType::EPSILON_FREE_CFG;
 		token.value = "EPSILON_FREE_CFG";
-		token.raw = "EPSILON_FREE_CFG";
+		token.raw += "EPSILON_FREE_CFG";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "GNF" ) ) {
 		token.type = TokenType::GNF;
 		token.value = "GNF";
-		token.raw = "GNF";
+		token.raw += "GNF";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "CNF" ) ) {
 		token.type = TokenType::CNF;
 		token.value = "CNF";
-		token.raw = "CNF";
+		token.raw += "CNF";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "CSG" ) ) {
 		token.type = TokenType::CSG;
 		token.value = "CSG";
-		token.raw = "CSG";
+		token.raw += "CSG";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "NON_CONTRACTING_GRAMMAR" ) ) {
 		token.type = TokenType::NON_CONTRACTING_GRAMMAR;
 		token.value = "NON_CONTRACTING_GRAMMAR";
-		token.raw = "NON_CONTRACTING_GRAMMAR";
+		token.raw += "NON_CONTRACTING_GRAMMAR";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR" ) ) {
 		token.type = TokenType::CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR;
 		token.value = "CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR";
-		token.raw = "CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR";
+		token.raw += "CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "UNRESTRICTED_GRAMMAR" ) ) {
 		token.type = TokenType::UNRESTRICTED_GRAMMAR;
 		token.value = "UNRESTRICTED_GRAMMAR";
-		token.raw = "UNRESTRICTED_GRAMMAR";
+		token.raw += "UNRESTRICTED_GRAMMAR";
 		return token;
 	} else {
 		in.clear();
diff --git a/alib2str/src/registry/StringReaderRegistry.cpp b/alib2str/src/registry/StringReaderRegistry.cpp
index 5946424596..79018fccb9 100644
--- a/alib2str/src/registry/StringReaderRegistry.cpp
+++ b/alib2str/src/registry/StringReaderRegistry.cpp
@@ -11,6 +11,8 @@ namespace abstraction {
 
 std::shared_ptr < abstraction::OperationAbstraction > StringReaderRegistry::getAbstraction ( const std::string & group, const std::string & str ) {
 	std::stringstream ss ( str );
+	while ( isspace ( ss.peek ( ) ) )
+		ss.get ( );
 
 	auto lambda = [ & ] ( const std::pair < std::function < bool ( std::istream & ) >, std::unique_ptr < Entry > > & entry ) {
 		return entry.first ( ss );
@@ -22,10 +24,15 @@ std::shared_ptr < abstraction::OperationAbstraction > StringReaderRegistry::getA
 
 	const auto & entries = entryIterator->second;
 
+	int pos = ss.tellg();
+
 	typename ext::deque < std::pair < std::function < bool ( std::istream & ) >, std::unique_ptr < Entry > > >::const_iterator callback = find_if ( entries.begin ( ), entries.end ( ), lambda );
 	if ( callback == entries.end ( ) )
 		throw exception::CommonException ( "No callback handling input found." );
 
+	if ( pos != ss.tellg ( ) )
+		throw exception::CommonException ( "First function of registered callback moved the stream." );
+
 	return callback->second->getAbstraction ( );
 }
 
diff --git a/alib2str/src/tree/TreeFromStringLexer.cpp b/alib2str/src/tree/TreeFromStringLexer.cpp
index 8cae1beeb6..90bb022d4d 100644
--- a/alib2str/src/tree/TreeFromStringLexer.cpp
+++ b/alib2str/src/tree/TreeFromStringLexer.cpp
@@ -45,32 +45,32 @@ L0:
 	} else if(in.clear (), in.unget(), in >> ext::string ( "RANKED_TREE" ) ) {
 		token.type = TokenType::RANKED_TREE;
 		token.value = "RANKED_TREE";
-		token.raw = "RANKED_TREE";
+		token.raw += "RANKED_TREE";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "RANKED_PATTERN" ) ) {
 		token.type = TokenType::RANKED_PATTERN;
 		token.value = "RANKED_PATTERN";
-		token.raw = "RANKED_PATTERN";
+		token.raw += "RANKED_PATTERN";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "RANKED_NONLINEAR_PATTERN" ) ) {
 		token.type = TokenType::RANKED_NONLINEAR_PATTERN;
 		token.value = "RANKED_NONLINEAR_PATTERN";
-		token.raw = "RANKED_NONLINEAR_PATTERN";
+		token.raw += "RANKED_NONLINEAR_PATTERN";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "UNRANKED_TREE" ) ) {
 		token.type = TokenType::UNRANKED_TREE;
 		token.value = "UNRANKED_TREE";
-		token.raw = "UNRANKED_TREE";
+		token.raw += "UNRANKED_TREE";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "UNRANKED_PATTERN" ) ) {
 		token.type = TokenType::UNRANKED_PATTERN;
 		token.value = "UNRANKED_PATTERN";
-		token.raw = "UNRANKED_PATTERN";
+		token.raw += "UNRANKED_PATTERN";
 		return token;
 	} else if(in.clear(), in >> ext::string ( "UNRANKED_NONLINEAR_PATTERN" ) ) {
 		token.type = TokenType::UNRANKED_NONLINEAR_PATTERN;
 		token.value = "UNRANKED_NONLINEAR_PATTERN";
-		token.raw = "UNRANKED_NONLINEAR_PATTERN";
+		token.raw += "UNRANKED_NONLINEAR_PATTERN";
 		return token;
 	} else {
 		in.clear ( );
diff --git a/alib2str/test-src/automaton/AutomatonTest.cpp b/alib2str/test-src/automaton/AutomatonTest.cpp
index 0510c251d8..20bc1216ed 100644
--- a/alib2str/test-src/automaton/AutomatonTest.cpp
+++ b/alib2str/test-src/automaton/AutomatonTest.cpp
@@ -26,7 +26,8 @@ void AutomatonTest::tearDown() {
 
 void AutomatonTest::FSMStringParserTest() {
 	{
-		std::string input = 	"ENFA a b c d #E\n"
+		std::string input = 	"\n"
+					"ENFA a b c d #E\n"
 					">0 3|4 5 1|3|4 - 2\n"
 					"1 2 - - - -\n"
 					"2 3 - - - -\n"
-- 
GitLab