diff --git a/alib2data/src/tree/TreeFeatures.h b/alib2data/src/tree/TreeFeatures.h index 8c7e94ac7b63080d5fee745db896723d7699d87f..60737ab7186a1c9f31755e9aa95b813464921081 100644 --- a/alib2data/src/tree/TreeFeatures.h +++ b/alib2data/src/tree/TreeFeatures.h @@ -13,12 +13,14 @@ namespace tree { enum class FEATURES { RANKED_TREE, RANKED_PATTERN, + RANKED_NONLINEAR_PATTERN, PREFIX_RANKED_TREE, PREFIX_RANKED_BAR_TREE, PREFIX_RANKED_PATTERN, PREFIX_RANKED_BAR_PATTERN, UNRANKED_TREE, - UNRANKED_PATTERN + UNRANKED_PATTERN, + UNRANKED_NONLINEAR_PATTERN }; class Tree; diff --git a/alib2str/src/tree/TreeFromStringLexer.cpp b/alib2str/src/tree/TreeFromStringLexer.cpp index 8a4fab3c1ffedfc35fbfd44f59ccc86e713f8d1d..56e994fdf0637020edffeb2cef28467f5b170c66 100644 --- a/alib2str/src/tree/TreeFromStringLexer.cpp +++ b/alib2str/src/tree/TreeFromStringLexer.cpp @@ -12,7 +12,7 @@ namespace tree { TreeFromStringLexer::Token TreeFromStringLexer::next ( std::istream & in ) const { TreeFromStringLexer::Token token; - token.type = TokenType::ERROR; + token.type = TokenType::ERROR; token.value = ""; token.raw = ""; char character; @@ -29,13 +29,16 @@ L0: character = in.get ( ); token.value += character; token.raw += character; goto L1; + } else if ( character == '$' ) { + token.raw += character; + goto L3; } else if ( character == '|' ) { - token.type = TokenType::BAR; + token.type = TokenType::BAR; token.value += character; token.raw += character; return token; } else if ( ( character >= '0' ) && ( character <= '9' ) ) { - token.type = TokenType::RANK; + token.type = TokenType::RANK; token.value += character; token.raw += character; goto L2; @@ -53,7 +56,7 @@ L1: character = in.get ( ); token.type = TokenType::TEOF; return token; } else if ( character == 'S' ) { - token.type = TokenType::SUBTREE_WILDCARD; + token.type = TokenType::SUBTREE_WILDCARD; token.value += character; token.raw += character; return token; @@ -76,6 +79,24 @@ L2: character = in.get ( ); in.unget ( ); return token; } + +L3: character = in.get ( ); + + if ( in.eof ( ) ) { + token.type = TokenType::TEOF; + return token; + } else if ( character >= 'A' && character <= 'Z' ) { + token.type = TokenType::NONLINEAR_VARIABLE; + token.value += character; + token.raw += character; + return token; + } else { + in.putback ( character ); + putback ( in, std::move ( token ) ); + token.type = TokenType::ERROR; + return token; + } + } void TreeFromStringLexer::putback ( std::istream & in, TreeFromStringLexer::Token token ) const { diff --git a/alib2str/src/tree/TreeFromStringLexer.h b/alib2str/src/tree/TreeFromStringLexer.h index 59990027faaf91c3458d2d69113bb19ef42d6f4f..2740ba6c2acb69284205e31fa1622ffb1b3900e2 100644 --- a/alib2str/src/tree/TreeFromStringLexer.h +++ b/alib2str/src/tree/TreeFromStringLexer.h @@ -16,7 +16,7 @@ namespace tree { class TreeFromStringLexer { public: enum class TokenType { - BAR, RANK, SUBTREE_WILDCARD, TEOF, ERROR + BAR, RANK, SUBTREE_WILDCARD, NONLINEAR_VARIABLE, TEOF, ERROR }; struct Token { diff --git a/alib2str/src/tree/TreeFromStringParser.cpp b/alib2str/src/tree/TreeFromStringParser.cpp index e4948f2ac42d61d8172b9bca12a32418d2c8c8be..76908ef6d5896f2b820db0b965ccda1ed5c3b808 100644 --- a/alib2str/src/tree/TreeFromStringParser.cpp +++ b/alib2str/src/tree/TreeFromStringParser.cpp @@ -11,11 +11,12 @@ #include "../StringApi.hpp" #include "alphabet/SubtreeWildcardSymbol.h" +#include "alphabet/NonlinearVariableSymbol.h" namespace tree { Tree TreeFromStringParser::parseTree ( std::istream & input ) const { - return parseTree ( input, std::set < FEATURES > ( { FEATURES::RANKED_TREE, FEATURES::RANKED_PATTERN, FEATURES::UNRANKED_TREE, FEATURES::UNRANKED_PATTERN } ) ); + return parseTree ( input, std::set < FEATURES > ( { FEATURES::RANKED_TREE, FEATURES::RANKED_PATTERN, FEATURES::RANKED_NONLINEAR_PATTERN, FEATURES::UNRANKED_TREE, FEATURES::UNRANKED_PATTERN, FEATURES::UNRANKED_NONLINEAR_PATTERN } ) ); } Tree TreeFromStringParser::parseTree ( std::istream & input, const std::set < FEATURES > & features ) const { @@ -31,6 +32,18 @@ Tree TreeFromStringParser::parseTree ( std::istream & input, const std::set < FE alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ); return Tree ( RankedPattern ( subtreeWildcard, RankedNode ( subtreeWildcard, { } ) ) ); } + } else if ( token.type == TreeFromStringLexer::TokenType::NONLINEAR_VARIABLE ) { + token = m_TreeLexer.next ( input ); + + if ( token.type == TreeFromStringLexer::TokenType::BAR ) { + alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ); + alphabet::Symbol nonlinearVariable ( alphabet::NonlinearVariableSymbol ( alphabet::symbolFrom ( token.value ) ) ); + return Tree ( UnrankedNonlinearPattern ( subtreeWildcard, { nonlinearVariable }, UnrankedNode ( subtreeWildcard, { } ) ) ); + } else { + alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ); + alphabet::RankedSymbol nonlinearVariable ( alphabet::Symbol ( alphabet::NonlinearVariableSymbol ( alphabet::symbolFrom ( token.value ) ) ), 0 ); + return Tree ( RankedNonlinearPattern ( subtreeWildcard, { nonlinearVariable }, RankedNode ( nonlinearVariable, { } ) ) ); + } } else { m_TreeLexer.putback ( input, token ); alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); @@ -40,15 +53,22 @@ Tree TreeFromStringParser::parseTree ( std::istream & input, const std::set < FE token = m_TreeLexer.next ( input ); if ( token.type == TreeFromStringLexer::TokenType::RANK ) { + std::set<alphabet::RankedSymbol> nonlinearVariables; + rank = std::stou ( token.value ); std::vector < tree::RankedNode * > childs; bool isPattern = false; for ( unsigned i = 0; i < rank; i++ ) - childs.push_back ( parseRankedContent ( input, isPattern ) ); + childs.push_back ( parseRankedContent ( input, isPattern, nonlinearVariables ) ); - if ( isPattern ) { + if ( isPattern && nonlinearVariables.size ( ) ) { + alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ); + RankedNonlinearPattern tree ( std::move ( subtreeWildcard ), nonlinearVariables, RankedNode ( alphabet::RankedSymbol ( symbol, rank ), std::move ( childs ) ) ); + + if ( features.count ( FEATURES::RANKED_NONLINEAR_PATTERN ) ) return Tree ( tree ); + } else if ( isPattern ) { alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ); RankedPattern tree ( std::move ( subtreeWildcard ), RankedNode ( alphabet::RankedSymbol ( symbol, rank ), std::move ( childs ) ) ); @@ -59,20 +79,26 @@ Tree TreeFromStringParser::parseTree ( std::istream & input, const std::set < FE if ( features.count ( FEATURES::RANKED_TREE ) ) return Tree ( tree ); } } else { + std::set<alphabet::Symbol> nonlinearVariables; + std::vector < tree::UnrankedNode * > childs; bool isPattern = false; while ( token.type != TreeFromStringLexer::TokenType::BAR ) { m_TreeLexer.putback ( input, token ); - childs.push_back ( parseUnrankedContent ( input, isPattern ) ); - rank++; + childs.push_back ( parseUnrankedContent ( input, isPattern, nonlinearVariables ) ); token = m_TreeLexer.next ( input ); } if ( token.type != TreeFromStringLexer::TokenType::BAR ) throw exception::AlibException ( "Missing bar" ); - if ( isPattern ) { + if ( isPattern && nonlinearVariables.size ( ) ) { + alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ); + UnrankedNonlinearPattern tree ( std::move ( subtreeWildcard ), nonlinearVariables, UnrankedNode ( symbol, childs ) ); + + if ( features.count ( FEATURES::UNRANKED_NONLINEAR_PATTERN ) ) return Tree ( tree ); + } else if ( isPattern ) { alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ); UnrankedPattern tree ( std::move ( subtreeWildcard ), UnrankedNode ( symbol, childs ) ); @@ -88,13 +114,18 @@ Tree TreeFromStringParser::parseTree ( std::istream & input, const std::set < FE } } -tree::RankedNode * TreeFromStringParser::parseRankedContent ( std::istream & input, bool & isPattern ) const { +tree::RankedNode * TreeFromStringParser::parseRankedContent ( std::istream & input, bool & isPattern, std::set<alphabet::RankedSymbol> & nonlinearVariables ) const { TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); if ( token.type == TreeFromStringLexer::TokenType::SUBTREE_WILDCARD ) { isPattern = true; alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ); return new RankedNode ( std::move ( subtreeWildcard ), { } ); + } else if ( token.type == TreeFromStringLexer::TokenType::NONLINEAR_VARIABLE ) { + isPattern = true; + alphabet::RankedSymbol nonlinearVariable ( alphabet::Symbol ( alphabet::NonlinearVariableSymbol ( alphabet::symbolFrom ( token.value ) ) ), 0 ); + nonlinearVariables.insert(nonlinearVariable); + return new RankedNode ( std::move ( nonlinearVariable ), { } ); } else { m_TreeLexer.putback ( input, token ); alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); @@ -110,13 +141,13 @@ tree::RankedNode * TreeFromStringParser::parseRankedContent ( std::istream & inp throw exception::AlibException ( "Missing rank" ); for ( unsigned i = 0; i < rank; i++ ) - childs.push_back ( parseRankedContent ( input, isPattern ) ); + childs.push_back ( parseRankedContent ( input, isPattern, nonlinearVariables ) ); return new RankedNode ( alphabet::RankedSymbol ( std::move ( symbol ), rank ), std::move ( childs ) ); } } -tree::UnrankedNode * TreeFromStringParser::parseUnrankedContent ( std::istream & input, bool & isPattern ) const { +tree::UnrankedNode * TreeFromStringParser::parseUnrankedContent ( std::istream & input, bool & isPattern, std::set<alphabet::Symbol>& nonlinearVariables ) const { TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); if ( token.type == TreeFromStringLexer::TokenType::SUBTREE_WILDCARD ) { @@ -128,19 +159,27 @@ tree::UnrankedNode * TreeFromStringParser::parseUnrankedContent ( std::istream & isPattern = true; alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ); return new UnrankedNode ( std::move ( subtreeWildcard ), { } ); + } else if ( token.type == TreeFromStringLexer::TokenType::NONLINEAR_VARIABLE ) { + token = m_TreeLexer.next ( input ); + + if ( token.type != TreeFromStringLexer::TokenType::BAR ) + throw exception::AlibException ( "Missing bar" ); + + isPattern = true; + alphabet::Symbol nonlinearVariable ( alphabet::NonlinearVariableSymbol ( alphabet::symbolFrom ( token.value ) ) ); + nonlinearVariables.insert(nonlinearVariable); + return new UnrankedNode ( std::move ( nonlinearVariable ), { } ); } else { m_TreeLexer.putback ( input, token ); alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); - unsigned rank = 0; std::vector < tree::UnrankedNode * > childs; TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); while ( token.type != TreeFromStringLexer::TokenType::BAR ) { m_TreeLexer.putback ( input, token ); - childs.push_back ( parseUnrankedContent ( input, isPattern ) ); - rank++; + childs.push_back ( parseUnrankedContent ( input, isPattern, nonlinearVariables ) ); token = m_TreeLexer.next ( input ); } diff --git a/alib2str/src/tree/TreeFromStringParser.h b/alib2str/src/tree/TreeFromStringParser.h index 3de6888782c1112631f9592ca80ef89e76c26603..76585fea079438232a902aa4f0fe379ede45f5cf 100644 --- a/alib2str/src/tree/TreeFromStringParser.h +++ b/alib2str/src/tree/TreeFromStringParser.h @@ -12,6 +12,8 @@ #include "TreeFromStringLexer.h" #include <set> +#include <alphabet/Symbol.h> +#include <alphabet/RankedSymbol.h> namespace alib { @@ -27,8 +29,8 @@ public: TreeFromStringParser ( ) { } private: - tree::RankedNode * parseRankedContent ( std::istream &, bool & ) const; - tree::UnrankedNode * parseUnrankedContent ( std::istream &, bool & ) const; + tree::RankedNode * parseRankedContent ( std::istream &, bool &, std::set<alphabet::RankedSymbol> & ) const; + tree::UnrankedNode * parseUnrankedContent ( std::istream &, bool &, std::set<alphabet::Symbol>& ) const; TreeFromStringLexer m_TreeLexer;