From 92da37c65997dd6daebda41629dbe17d5b85f337 Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Thu, 25 Oct 2018 12:29:50 +0200 Subject: [PATCH] fix automata from string recognition with front newlines --- .../automaton/AutomatonFromStringLexer.cpp | 8 +++--- alib2str/src/automaton/string/FSM/DFA.h | 5 ++++ .../src/automaton/string/FSM/EpsilonNFA.h | 5 ++++ .../string/FSM/MultiInitialStateNFA.h | 5 ++++ alib2str/src/automaton/string/FSM/NFA.h | 7 ++++- alib2str/src/core/stringApi.hpp | 4 ++- .../src/grammar/GrammarFromStringLexer.cpp | 26 +++++++++---------- .../src/registry/StringReaderRegistry.cpp | 7 +++++ alib2str/src/tree/TreeFromStringLexer.cpp | 12 ++++----- alib2str/test-src/automaton/AutomatonTest.cpp | 3 ++- 10 files changed, 56 insertions(+), 26 deletions(-) diff --git a/alib2str/src/automaton/AutomatonFromStringLexer.cpp b/alib2str/src/automaton/AutomatonFromStringLexer.cpp index 2ba51cc9cd..58bc59eed4 100644 --- a/alib2str/src/automaton/AutomatonFromStringLexer.cpp +++ b/alib2str/src/automaton/AutomatonFromStringLexer.cpp @@ -63,22 +63,22 @@ L0: } else if(in.clear(), in.unget(), in >> ext::string ( "MISNFA" ) ) { token.type = TokenType::MULTI_INITIAL_STATE_NFA; token.value = "MISNFA"; - token.raw = "MISNFA"; + token.raw += "MISNFA"; return token; } else if(in.clear(), in >> ext::string ( "ENFA" ) ) { token.type = TokenType::EPSILON_NFA; token.value = "ENFA"; - token.raw = "ENFA"; + token.raw += "ENFA"; return token; } else if(in.clear(), in >> ext::string ( "NFA" ) ) { token.type = TokenType::NFA; token.value = "NFA"; - token.raw = "NFA"; + token.raw += "NFA"; return token; } else if(in.clear(), in >> ext::string ( "DFA" ) ) { token.type = TokenType::DFA; token.value = "DFA"; - token.raw = "DFA"; + token.raw += "DFA"; return token; } else { in.clear(); diff --git a/alib2str/src/automaton/string/FSM/DFA.h b/alib2str/src/automaton/string/FSM/DFA.h index 88f4a60f5c..0a7ac71448 100644 --- a/alib2str/src/automaton/string/FSM/DFA.h +++ b/alib2str/src/automaton/string/FSM/DFA.h @@ -30,6 +30,11 @@ private: template<class SymbolType, class StateType > automaton::DFA < SymbolType, StateType > stringApi < automaton::DFA < SymbolType, StateType > >::parse ( std::istream & input ) { automaton::AutomatonFromStringLexer::Token token = automaton::AutomatonFromStringLexer::next(input); + + while(token.type == automaton::AutomatonFromStringLexer::TokenType::NEW_LINE) { + token = automaton::AutomatonFromStringLexer::next(input); + } + if(token.type != automaton::AutomatonFromStringLexer::TokenType::DFA) throw exception::CommonException("Unrecognised DFA token."); diff --git a/alib2str/src/automaton/string/FSM/EpsilonNFA.h b/alib2str/src/automaton/string/FSM/EpsilonNFA.h index 892e66cc28..1d0d066e98 100644 --- a/alib2str/src/automaton/string/FSM/EpsilonNFA.h +++ b/alib2str/src/automaton/string/FSM/EpsilonNFA.h @@ -30,6 +30,11 @@ private: template<class SymbolType, class EpsilonType, class StateType > automaton::EpsilonNFA < SymbolType, EpsilonType, StateType > stringApi < automaton::EpsilonNFA < SymbolType, EpsilonType, StateType > >::parse ( std::istream & input ) { automaton::AutomatonFromStringLexer::Token token = automaton::AutomatonFromStringLexer::next(input); + + while(token.type == automaton::AutomatonFromStringLexer::TokenType::NEW_LINE) { + token = automaton::AutomatonFromStringLexer::next(input); + } + if(token.type != automaton::AutomatonFromStringLexer::TokenType::EPSILON_NFA) { throw exception::CommonException("Unrecognised ENFA token."); } diff --git a/alib2str/src/automaton/string/FSM/MultiInitialStateNFA.h b/alib2str/src/automaton/string/FSM/MultiInitialStateNFA.h index cdd808d977..c20df2e09b 100644 --- a/alib2str/src/automaton/string/FSM/MultiInitialStateNFA.h +++ b/alib2str/src/automaton/string/FSM/MultiInitialStateNFA.h @@ -32,6 +32,11 @@ automaton::MultiInitialStateNFA < SymbolType, StateType > stringApi < automaton: automaton::MultiInitialStateNFA < > res; automaton::AutomatonFromStringLexer::Token token = automaton::AutomatonFromStringLexer::next(input); + + while(token.type == automaton::AutomatonFromStringLexer::TokenType::NEW_LINE) { + token = automaton::AutomatonFromStringLexer::next(input); + } + if(token.type != automaton::AutomatonFromStringLexer::TokenType::MULTI_INITIAL_STATE_NFA) { throw exception::CommonException("Unrecognised MISNFA token."); } diff --git a/alib2str/src/automaton/string/FSM/NFA.h b/alib2str/src/automaton/string/FSM/NFA.h index 125bbc4769..c72ed22b0b 100644 --- a/alib2str/src/automaton/string/FSM/NFA.h +++ b/alib2str/src/automaton/string/FSM/NFA.h @@ -30,8 +30,13 @@ private: template<class SymbolType, class StateType > automaton::NFA < SymbolType, StateType > stringApi < automaton::NFA < SymbolType, StateType > >::parse ( std::istream & input ) { automaton::AutomatonFromStringLexer::Token token = automaton::AutomatonFromStringLexer::next(input); + + while(token.type == automaton::AutomatonFromStringLexer::TokenType::NEW_LINE) { + token = automaton::AutomatonFromStringLexer::next(input); + } + if(token.type != automaton::AutomatonFromStringLexer::TokenType::NFA) { - throw exception::CommonException("Unrecognised NFA token."); + throw exception::CommonException("Unrecognised NFA token." + ext::to_string((int)token.type)); } ext::vector < SymbolType > symbols; diff --git a/alib2str/src/core/stringApi.hpp b/alib2str/src/core/stringApi.hpp index fbe0c8e9ec..e3a74a949c 100644 --- a/alib2str/src/core/stringApi.hpp +++ b/alib2str/src/core/stringApi.hpp @@ -96,8 +96,10 @@ public: } static object::Object parse ( std::istream & input ) { - auto lambda = [ & ] ( const std::pair < std::function < bool ( std::istream & ) >, std::unique_ptr < GroupReader > > & entry ) { + while ( isspace ( input.peek ( ) ) ) + input.get ( ); + auto lambda = [ & ] ( const std::pair < std::function < bool ( std::istream & ) >, std::unique_ptr < GroupReader > > & entry ) { return entry.first ( input ); }; diff --git a/alib2str/src/grammar/GrammarFromStringLexer.cpp b/alib2str/src/grammar/GrammarFromStringLexer.cpp index dd8395f0c6..7b3a63cffc 100644 --- a/alib2str/src/grammar/GrammarFromStringLexer.cpp +++ b/alib2str/src/grammar/GrammarFromStringLexer.cpp @@ -65,67 +65,67 @@ L0: } else if(in.clear (), in.unget(), in >> ext::string ( "RIGHT_RG" ) ) { token.type = TokenType::RIGHT_RG; token.value = "RIGHT_RG"; - token.raw = "RIGHT_RG"; + token.raw += "RIGHT_RG"; return token; } else if(in.clear(), in >> ext::string ( "LEFT_RG" ) ) { token.type = TokenType::LEFT_RG; token.value = "LEFT_RG"; - token.raw = "LEFT_RG"; + token.raw += "LEFT_RG"; return token; } else if(in.clear(), in >> ext::string ( "RIGHT_LG" ) ) { token.type = TokenType::RIGHT_LG; token.value = "RIGHT_LG"; - token.raw = "RIGHT_LG"; + token.raw += "RIGHT_LG"; return token; } else if(in.clear(), in >> ext::string ( "LEFT_LG" ) ) { token.type = TokenType::LEFT_LG; token.value = "LEFT_LG"; - token.raw = "LEFT_LG"; + token.raw += "LEFT_LG"; return token; } else if(in.clear(), in >> ext::string ( "LG" ) ) { token.type = TokenType::LG; token.value = "LG"; - token.raw = "LG"; + token.raw += "LG"; return token; } else if(in.clear(), in >> ext::string ( "CFG" ) ) { token.type = TokenType::CFG; token.value = "CFG"; - token.raw = "CFG"; + token.raw += "CFG"; return token; } else if(in.clear(), in >> ext::string ( "EPSILON_FREE_CFG" ) ) { token.type = TokenType::EPSILON_FREE_CFG; token.value = "EPSILON_FREE_CFG"; - token.raw = "EPSILON_FREE_CFG"; + token.raw += "EPSILON_FREE_CFG"; return token; } else if(in.clear(), in >> ext::string ( "GNF" ) ) { token.type = TokenType::GNF; token.value = "GNF"; - token.raw = "GNF"; + token.raw += "GNF"; return token; } else if(in.clear(), in >> ext::string ( "CNF" ) ) { token.type = TokenType::CNF; token.value = "CNF"; - token.raw = "CNF"; + token.raw += "CNF"; return token; } else if(in.clear(), in >> ext::string ( "CSG" ) ) { token.type = TokenType::CSG; token.value = "CSG"; - token.raw = "CSG"; + token.raw += "CSG"; return token; } else if(in.clear(), in >> ext::string ( "NON_CONTRACTING_GRAMMAR" ) ) { token.type = TokenType::NON_CONTRACTING_GRAMMAR; token.value = "NON_CONTRACTING_GRAMMAR"; - token.raw = "NON_CONTRACTING_GRAMMAR"; + token.raw += "NON_CONTRACTING_GRAMMAR"; return token; } else if(in.clear(), in >> ext::string ( "CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR" ) ) { token.type = TokenType::CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR; token.value = "CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR"; - token.raw = "CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR"; + token.raw += "CONTEXT_PRESERVING_UNRESTRICTED_GRAMMAR"; return token; } else if(in.clear(), in >> ext::string ( "UNRESTRICTED_GRAMMAR" ) ) { token.type = TokenType::UNRESTRICTED_GRAMMAR; token.value = "UNRESTRICTED_GRAMMAR"; - token.raw = "UNRESTRICTED_GRAMMAR"; + token.raw += "UNRESTRICTED_GRAMMAR"; return token; } else { in.clear(); diff --git a/alib2str/src/registry/StringReaderRegistry.cpp b/alib2str/src/registry/StringReaderRegistry.cpp index 5946424596..79018fccb9 100644 --- a/alib2str/src/registry/StringReaderRegistry.cpp +++ b/alib2str/src/registry/StringReaderRegistry.cpp @@ -11,6 +11,8 @@ namespace abstraction { std::shared_ptr < abstraction::OperationAbstraction > StringReaderRegistry::getAbstraction ( const std::string & group, const std::string & str ) { std::stringstream ss ( str ); + while ( isspace ( ss.peek ( ) ) ) + ss.get ( ); auto lambda = [ & ] ( const std::pair < std::function < bool ( std::istream & ) >, std::unique_ptr < Entry > > & entry ) { return entry.first ( ss ); @@ -22,10 +24,15 @@ std::shared_ptr < abstraction::OperationAbstraction > StringReaderRegistry::getA const auto & entries = entryIterator->second; + int pos = ss.tellg(); + typename ext::deque < std::pair < std::function < bool ( std::istream & ) >, std::unique_ptr < Entry > > >::const_iterator callback = find_if ( entries.begin ( ), entries.end ( ), lambda ); if ( callback == entries.end ( ) ) throw exception::CommonException ( "No callback handling input found." ); + if ( pos != ss.tellg ( ) ) + throw exception::CommonException ( "First function of registered callback moved the stream." ); + return callback->second->getAbstraction ( ); } diff --git a/alib2str/src/tree/TreeFromStringLexer.cpp b/alib2str/src/tree/TreeFromStringLexer.cpp index 8cae1beeb6..90bb022d4d 100644 --- a/alib2str/src/tree/TreeFromStringLexer.cpp +++ b/alib2str/src/tree/TreeFromStringLexer.cpp @@ -45,32 +45,32 @@ L0: } else if(in.clear (), in.unget(), in >> ext::string ( "RANKED_TREE" ) ) { token.type = TokenType::RANKED_TREE; token.value = "RANKED_TREE"; - token.raw = "RANKED_TREE"; + token.raw += "RANKED_TREE"; return token; } else if(in.clear(), in >> ext::string ( "RANKED_PATTERN" ) ) { token.type = TokenType::RANKED_PATTERN; token.value = "RANKED_PATTERN"; - token.raw = "RANKED_PATTERN"; + token.raw += "RANKED_PATTERN"; return token; } else if(in.clear(), in >> ext::string ( "RANKED_NONLINEAR_PATTERN" ) ) { token.type = TokenType::RANKED_NONLINEAR_PATTERN; token.value = "RANKED_NONLINEAR_PATTERN"; - token.raw = "RANKED_NONLINEAR_PATTERN"; + token.raw += "RANKED_NONLINEAR_PATTERN"; return token; } else if(in.clear(), in >> ext::string ( "UNRANKED_TREE" ) ) { token.type = TokenType::UNRANKED_TREE; token.value = "UNRANKED_TREE"; - token.raw = "UNRANKED_TREE"; + token.raw += "UNRANKED_TREE"; return token; } else if(in.clear(), in >> ext::string ( "UNRANKED_PATTERN" ) ) { token.type = TokenType::UNRANKED_PATTERN; token.value = "UNRANKED_PATTERN"; - token.raw = "UNRANKED_PATTERN"; + token.raw += "UNRANKED_PATTERN"; return token; } else if(in.clear(), in >> ext::string ( "UNRANKED_NONLINEAR_PATTERN" ) ) { token.type = TokenType::UNRANKED_NONLINEAR_PATTERN; token.value = "UNRANKED_NONLINEAR_PATTERN"; - token.raw = "UNRANKED_NONLINEAR_PATTERN"; + token.raw += "UNRANKED_NONLINEAR_PATTERN"; return token; } else { in.clear ( ); diff --git a/alib2str/test-src/automaton/AutomatonTest.cpp b/alib2str/test-src/automaton/AutomatonTest.cpp index 0510c251d8..20bc1216ed 100644 --- a/alib2str/test-src/automaton/AutomatonTest.cpp +++ b/alib2str/test-src/automaton/AutomatonTest.cpp @@ -26,7 +26,8 @@ void AutomatonTest::tearDown() { void AutomatonTest::FSMStringParserTest() { { - std::string input = "ENFA a b c d #E\n" + std::string input = "\n" + "ENFA a b c d #E\n" ">0 3|4 5 1|3|4 - 2\n" "1 2 - - - -\n" "2 3 - - - -\n" -- GitLab