From 6ff915da4f52bd91e5d8ce9bdd9e632ca60cfb9f Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Mon, 21 Sep 2015 08:46:54 +0200 Subject: [PATCH] string parsing and composing of tree patterns --- alib2str/src/tree/TreeFromStringParser.cpp | 159 ++++++++++++++------- alib2str/src/tree/TreeFromStringParser.h | 4 +- alib2str/src/tree/TreeToStringComposer.cpp | 44 ++++++ alib2str/src/tree/TreeToStringComposer.h | 6 + alib2str/test-src/tree/TreeTest.cpp | 27 +++- 5 files changed, 183 insertions(+), 57 deletions(-) diff --git a/alib2str/src/tree/TreeFromStringParser.cpp b/alib2str/src/tree/TreeFromStringParser.cpp index 8b09091ac9..e4948f2ac4 100644 --- a/alib2str/src/tree/TreeFromStringParser.cpp +++ b/alib2str/src/tree/TreeFromStringParser.cpp @@ -10,94 +10,145 @@ #include "tree/TreeClasses.h" #include "../StringApi.hpp" +#include "alphabet/SubtreeWildcardSymbol.h" namespace tree { Tree TreeFromStringParser::parseTree ( std::istream & input ) const { - return parseTree ( input, std::set < FEATURES > ( { FEATURES::RANKED_TREE, FEATURES::UNRANKED_TREE } ) ); + return parseTree ( input, std::set < FEATURES > ( { FEATURES::RANKED_TREE, FEATURES::RANKED_PATTERN, FEATURES::UNRANKED_TREE, FEATURES::UNRANKED_PATTERN } ) ); } Tree TreeFromStringParser::parseTree ( std::istream & input, const std::set < FEATURES > & features ) const { - alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); + TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); - unsigned rank = 0; + if ( token.type == TreeFromStringLexer::TokenType::SUBTREE_WILDCARD ) { + token = m_TreeLexer.next ( input ); - TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); + if ( token.type == TreeFromStringLexer::TokenType::BAR ) { + alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ); + return Tree ( UnrankedPattern ( subtreeWildcard, UnrankedNode ( subtreeWildcard, { } ) ) ); + } else { + alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ); + return Tree ( RankedPattern ( subtreeWildcard, RankedNode ( subtreeWildcard, { } ) ) ); + } + } else { + m_TreeLexer.putback ( input, token ); + alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); - if ( token.type == TreeFromStringLexer::TokenType::RANK ) { - rank = std::stou ( token.value ); + unsigned rank = 0; - std::vector < tree::RankedNode * > childs; + token = m_TreeLexer.next ( input ); - for ( unsigned i = 0; i < rank; i++ ) - childs.push_back ( parseRankedContent ( input ) ); + if ( token.type == TreeFromStringLexer::TokenType::RANK ) { + rank = std::stou ( token.value ); - RankedTree tree ( RankedNode ( alphabet::RankedSymbol ( symbol, rank ), std::move ( childs ) ) ); + std::vector < tree::RankedNode * > childs; + bool isPattern = false; - if ( features.count ( FEATURES::RANKED_TREE ) ) return Tree { - tree - }; - } else { - std::vector < tree::UnrankedNode * > childs; + for ( unsigned i = 0; i < rank; i++ ) + childs.push_back ( parseRankedContent ( input, isPattern ) ); - while ( token.type != TreeFromStringLexer::TokenType::BAR ) { - m_TreeLexer.putback ( input, token ); - childs.push_back ( parseUnrankedContent ( input ) ); - rank++; - token = m_TreeLexer.next ( input ); - } + if ( isPattern ) { + alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ); + RankedPattern tree ( std::move ( subtreeWildcard ), RankedNode ( alphabet::RankedSymbol ( symbol, rank ), std::move ( childs ) ) ); - if ( token.type != TreeFromStringLexer::TokenType::BAR ) - throw exception::AlibException ( "Missing bar" ); + if ( features.count ( FEATURES::RANKED_PATTERN ) ) return Tree ( tree ); + } else { + RankedTree tree ( RankedNode ( alphabet::RankedSymbol ( symbol, rank ), std::move ( childs ) ) ); - UnrankedTree tree ( UnrankedNode ( symbol, childs ) ); + if ( features.count ( FEATURES::RANKED_TREE ) ) return Tree ( tree ); + } + } else { + std::vector < tree::UnrankedNode * > childs; + bool isPattern = false; - if ( features.count ( FEATURES::UNRANKED_TREE ) ) return Tree { - tree - }; - } + while ( token.type != TreeFromStringLexer::TokenType::BAR ) { + m_TreeLexer.putback ( input, token ); + childs.push_back ( parseUnrankedContent ( input, isPattern ) ); + rank++; + token = m_TreeLexer.next ( input ); + } - throw exception::AlibException ( "Invalid input" ); -} + if ( token.type != TreeFromStringLexer::TokenType::BAR ) + throw exception::AlibException ( "Missing bar" ); -tree::RankedNode * TreeFromStringParser::parseRankedContent ( std::istream & input ) const { - alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); + if ( isPattern ) { + alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ); + UnrankedPattern tree ( std::move ( subtreeWildcard ), UnrankedNode ( symbol, childs ) ); - unsigned rank = 0; - std::vector < tree::RankedNode * > childs; + if ( features.count ( FEATURES::UNRANKED_PATTERN ) ) return Tree ( tree ); + } else { + UnrankedTree tree ( UnrankedNode ( symbol, childs ) ); + if ( features.count ( FEATURES::UNRANKED_TREE ) ) return Tree ( tree ); + } + } + + throw exception::AlibException ( "Invalid input" ); + } +} + +tree::RankedNode * TreeFromStringParser::parseRankedContent ( std::istream & input, bool & isPattern ) const { TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); - if ( token.type == TreeFromStringLexer::TokenType::RANK ) - rank = std::stou ( token.value ); - else - throw exception::AlibException ( "Missing rank" ); + if ( token.type == TreeFromStringLexer::TokenType::SUBTREE_WILDCARD ) { + isPattern = true; + alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 ); + return new RankedNode ( std::move ( subtreeWildcard ), { } ); + } else { + m_TreeLexer.putback ( input, token ); + alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); - for ( unsigned i = 0; i < rank; i++ ) - childs.push_back ( parseRankedContent ( input ) ); + unsigned rank = 0; + std::vector < tree::RankedNode * > childs; - return new RankedNode ( alphabet::RankedSymbol ( std::move ( symbol ), rank ), std::move ( childs ) ); -} + TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); -tree::UnrankedNode * TreeFromStringParser::parseUnrankedContent ( std::istream & input ) const { - alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); + if ( token.type == TreeFromStringLexer::TokenType::RANK ) + rank = std::stou ( token.value ); + else + throw exception::AlibException ( "Missing rank" ); - unsigned rank = 0; - std::vector < tree::UnrankedNode * > childs; + for ( unsigned i = 0; i < rank; i++ ) + childs.push_back ( parseRankedContent ( input, isPattern ) ); + return new RankedNode ( alphabet::RankedSymbol ( std::move ( symbol ), rank ), std::move ( childs ) ); + } +} + +tree::UnrankedNode * TreeFromStringParser::parseUnrankedContent ( std::istream & input, bool & isPattern ) const { TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); - while ( token.type != TreeFromStringLexer::TokenType::BAR ) { - m_TreeLexer.putback ( input, token ); - childs.push_back ( parseUnrankedContent ( input ) ); - rank++; + if ( token.type == TreeFromStringLexer::TokenType::SUBTREE_WILDCARD ) { token = m_TreeLexer.next ( input ); - } - if ( token.type != TreeFromStringLexer::TokenType::BAR ) - throw exception::AlibException ( "Missing bar" ); + if ( token.type != TreeFromStringLexer::TokenType::BAR ) + throw exception::AlibException ( "Missing bar" ); + + isPattern = true; + alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ); + return new UnrankedNode ( std::move ( subtreeWildcard ), { } ); + } else { + m_TreeLexer.putback ( input, token ); + alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input ); + + unsigned rank = 0; + std::vector < tree::UnrankedNode * > childs; + + TreeFromStringLexer::Token token = m_TreeLexer.next ( input ); - return new UnrankedNode ( std::move ( symbol ), std::move ( childs ) ); + while ( token.type != TreeFromStringLexer::TokenType::BAR ) { + m_TreeLexer.putback ( input, token ); + childs.push_back ( parseUnrankedContent ( input, isPattern ) ); + rank++; + token = m_TreeLexer.next ( input ); + } + + if ( token.type != TreeFromStringLexer::TokenType::BAR ) + throw exception::AlibException ( "Missing bar" ); + + return new UnrankedNode ( std::move ( symbol ), std::move ( childs ) ); + } } } /* namespace tree */ diff --git a/alib2str/src/tree/TreeFromStringParser.h b/alib2str/src/tree/TreeFromStringParser.h index 27c986a392..3de6888782 100644 --- a/alib2str/src/tree/TreeFromStringParser.h +++ b/alib2str/src/tree/TreeFromStringParser.h @@ -27,8 +27,8 @@ public: TreeFromStringParser ( ) { } private: - tree::RankedNode * parseRankedContent ( std::istream & ) const; - tree::UnrankedNode * parseUnrankedContent ( std::istream & ) const; + tree::RankedNode * parseRankedContent ( std::istream &, bool & ) const; + tree::UnrankedNode * parseUnrankedContent ( std::istream &, bool & ) const; TreeFromStringLexer m_TreeLexer; diff --git a/alib2str/src/tree/TreeToStringComposer.cpp b/alib2str/src/tree/TreeToStringComposer.cpp index 382a732e04..d76d4651f2 100644 --- a/alib2str/src/tree/TreeToStringComposer.cpp +++ b/alib2str/src/tree/TreeToStringComposer.cpp @@ -7,7 +7,9 @@ #include "TreeToStringComposer.h" #include <tree/ranked/RankedTree.h> +#include <tree/ranked/RankedPattern.h> #include <tree/unranked/UnrankedTree.h> +#include <tree/unranked/UnrankedPattern.h> #include "../StringApi.hpp" @@ -30,6 +32,27 @@ void TreeToStringComposer::compose ( std::ostream & out, const RankedNode & node } } +void TreeToStringComposer::compose ( std::ostream & out, const RankedPattern & tree ) { + compose ( out, tree.getSubtreeWildcard ( ), tree.getRoot ( ) ); +} + +TreeToStringComposer::RegistratorWrapper < void, RankedPattern > StringToStringComposerRankedPattern = TreeToStringComposer::RegistratorWrapper < void, RankedPattern > ( TreeToStringComposer::getInstance ( ), TreeToStringComposer::compose ); + +void TreeToStringComposer::compose ( std::ostream & out, const alphabet::RankedSymbol & subtreeWildcard, const RankedNode & node ) { + if ( node.getSymbol ( ) == subtreeWildcard ) { + out << "#S"; + } else { + alib::stringApi < alphabet::Symbol >::compose ( out, node.getSymbol ( ).getSymbol ( ) ); + + out << std::utos ( node.getSymbol ( ).getRank ( ).getData ( ) ); + + for ( const RankedNode * node : node.getChildren ( ) ) { + out << " "; + compose ( out, subtreeWildcard, * node ); + } + } +} + void TreeToStringComposer::compose ( std::ostream & out, const UnrankedTree & tree ) { compose ( out, tree.getRoot ( ) ); } @@ -47,6 +70,27 @@ void TreeToStringComposer::compose ( std::ostream & out, const UnrankedNode & no out << " |"; } +void TreeToStringComposer::compose ( std::ostream & out, const UnrankedPattern & tree ) { + compose ( out, tree.getSubtreeWildcard ( ), tree.getRoot ( ) ); +} + +TreeToStringComposer::RegistratorWrapper < void, UnrankedPattern > StringToStringComposerUnrankedPattern = TreeToStringComposer::RegistratorWrapper < void, UnrankedPattern > ( TreeToStringComposer::getInstance ( ), TreeToStringComposer::compose ); + +void TreeToStringComposer::compose ( std::ostream & out, const alphabet::Symbol & subtreeWildcard, const UnrankedNode & node ) { + if ( node.getSymbol ( ) == subtreeWildcard ) { + out << "#S |"; + } else { + alib::stringApi < alphabet::Symbol >::compose ( out, node.getSymbol ( ) ); + + for ( const UnrankedNode * node : node.getChildren ( ) ) { + out << " "; + compose ( out, subtreeWildcard, * node ); + } + + out << " |"; + } +} + void TreeToStringComposer::compose ( std::ostream & out, const Tree & tree ) { getInstance ( ).dispatch ( out, tree.getData ( ) ); } diff --git a/alib2str/src/tree/TreeToStringComposer.h b/alib2str/src/tree/TreeToStringComposer.h index 12cc21ab18..5a61cd5405 100644 --- a/alib2str/src/tree/TreeToStringComposer.h +++ b/alib2str/src/tree/TreeToStringComposer.h @@ -13,6 +13,8 @@ #include "tree/Tree.h" #include "tree/TreeFeatures.h" +#include "alphabet/RankedSymbol.h" + namespace tree { /** @@ -20,11 +22,15 @@ namespace tree { */ class TreeToStringComposer : public std::SingleDispatchFirstStaticParam < void, std::ostream &, TreeBase > { static void compose ( std::ostream &, const RankedNode & tree ); + static void compose ( std::ostream &, const alphabet::RankedSymbol & subtreeWildcard, const RankedNode & tree ); static void compose ( std::ostream &, const UnrankedNode & tree ); + static void compose ( std::ostream &, const alphabet::Symbol & subtreeWildcard, const UnrankedNode & tree ); public: static void compose ( std::ostream &, const RankedTree & tree ); + static void compose ( std::ostream &, const RankedPattern & tree ); static void compose ( std::ostream &, const UnrankedTree & tree ); + static void compose ( std::ostream &, const UnrankedPattern & tree ); /** * Prints XML representation of String to the output stream. diff --git a/alib2str/test-src/tree/TreeTest.cpp b/alib2str/test-src/tree/TreeTest.cpp index 837318fe07..cc0ac48464 100644 --- a/alib2str/test-src/tree/TreeTest.cpp +++ b/alib2str/test-src/tree/TreeTest.cpp @@ -60,7 +60,6 @@ void TreeTest::testEqual ( ) { CPPUNIT_ASSERT ( tree == tree2 ); } - { std::string input = "a a | a a | | |"; tree::Tree tree = alib::StringDataFactory::fromString < tree::Tree > ( input ); @@ -71,6 +70,32 @@ void TreeTest::testEqual ( ) { tree::Tree tree2 = alib::StringDataFactory::fromString < tree::Tree > ( output ); + CPPUNIT_ASSERT ( tree == tree2 ); + } + { + std::string input = "a2 #S a1 a0"; + tree::Tree tree = alib::StringDataFactory::fromString < tree::Tree > ( input ); + + std::string output = alib::StringDataFactory::toString ( tree ); + + std::cout << output << std::endl; + CPPUNIT_ASSERT ( input == output ); + + tree::Tree tree2 = alib::StringDataFactory::fromString < tree::Tree > ( output ); + + CPPUNIT_ASSERT ( tree == tree2 ); + } + { + std::string input = "a #S | a a | | |"; + tree::Tree tree = alib::StringDataFactory::fromString < tree::Tree > ( input ); + + std::string output = alib::StringDataFactory::toString ( tree ); + + std::cout << output << std::endl; + CPPUNIT_ASSERT ( input == output ); + + tree::Tree tree2 = alib::StringDataFactory::fromString < tree::Tree > ( output ); + CPPUNIT_ASSERT ( tree == tree2 ); } } -- GitLab