From 6ff915da4f52bd91e5d8ce9bdd9e632ca60cfb9f Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Mon, 21 Sep 2015 08:46:54 +0200
Subject: [PATCH] string parsing and composing of tree patterns

---
 alib2str/src/tree/TreeFromStringParser.cpp | 159 ++++++++++++++-------
 alib2str/src/tree/TreeFromStringParser.h   |   4 +-
 alib2str/src/tree/TreeToStringComposer.cpp |  44 ++++++
 alib2str/src/tree/TreeToStringComposer.h   |   6 +
 alib2str/test-src/tree/TreeTest.cpp        |  27 +++-
 5 files changed, 183 insertions(+), 57 deletions(-)

diff --git a/alib2str/src/tree/TreeFromStringParser.cpp b/alib2str/src/tree/TreeFromStringParser.cpp
index 8b09091ac9..e4948f2ac4 100644
--- a/alib2str/src/tree/TreeFromStringParser.cpp
+++ b/alib2str/src/tree/TreeFromStringParser.cpp
@@ -10,94 +10,145 @@
 #include "tree/TreeClasses.h"
 
 #include "../StringApi.hpp"
+#include "alphabet/SubtreeWildcardSymbol.h"
 
 namespace tree {
 
 Tree TreeFromStringParser::parseTree ( std::istream & input ) const {
-	return parseTree ( input, std::set < FEATURES > ( { FEATURES::RANKED_TREE, FEATURES::UNRANKED_TREE } ) );
+	return parseTree ( input, std::set < FEATURES > ( { FEATURES::RANKED_TREE, FEATURES::RANKED_PATTERN, FEATURES::UNRANKED_TREE, FEATURES::UNRANKED_PATTERN } ) );
 }
 
 Tree TreeFromStringParser::parseTree ( std::istream & input, const std::set < FEATURES > & features ) const {
-	alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input );
+	TreeFromStringLexer::Token token = m_TreeLexer.next ( input );
 
-	unsigned rank = 0;
+	if ( token.type == TreeFromStringLexer::TokenType::SUBTREE_WILDCARD ) {
+		token = m_TreeLexer.next ( input );
 
-	TreeFromStringLexer::Token token = m_TreeLexer.next ( input );
+		if ( token.type == TreeFromStringLexer::TokenType::BAR ) {
+			alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD );
+			return Tree ( UnrankedPattern ( subtreeWildcard, UnrankedNode ( subtreeWildcard, { } ) ) );
+		} else {
+			alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 );
+			return Tree ( RankedPattern ( subtreeWildcard, RankedNode ( subtreeWildcard, { } ) ) );
+		}
+	} else {
+		m_TreeLexer.putback ( input, token );
+		alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input );
 
-	if ( token.type == TreeFromStringLexer::TokenType::RANK ) {
-		rank = std::stou ( token.value );
+		unsigned rank = 0;
 
-		std::vector < tree::RankedNode * > childs;
+		token = m_TreeLexer.next ( input );
 
-		for ( unsigned i = 0; i < rank; i++ )
-			childs.push_back ( parseRankedContent ( input ) );
+		if ( token.type == TreeFromStringLexer::TokenType::RANK ) {
+			rank = std::stou ( token.value );
 
-		RankedTree tree ( RankedNode ( alphabet::RankedSymbol ( symbol, rank ), std::move ( childs ) ) );
+			std::vector < tree::RankedNode * > childs;
+			bool isPattern = false;
 
-		if ( features.count ( FEATURES::RANKED_TREE ) ) return Tree {
-					   tree
-			};
-	} else {
-		std::vector < tree::UnrankedNode * > childs;
+			for ( unsigned i = 0; i < rank; i++ )
+				childs.push_back ( parseRankedContent ( input, isPattern ) );
 
-		while ( token.type != TreeFromStringLexer::TokenType::BAR ) {
-			m_TreeLexer.putback ( input, token );
-			childs.push_back ( parseUnrankedContent ( input ) );
-			rank++;
-			token = m_TreeLexer.next ( input );
-		}
+			if ( isPattern ) {
+				alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 );
+				RankedPattern tree ( std::move ( subtreeWildcard ), RankedNode ( alphabet::RankedSymbol ( symbol, rank ), std::move ( childs ) ) );
 
-		if ( token.type != TreeFromStringLexer::TokenType::BAR )
-			throw exception::AlibException ( "Missing bar" );
+				if ( features.count ( FEATURES::RANKED_PATTERN ) ) return Tree ( tree );
+			} else {
+				RankedTree tree ( RankedNode ( alphabet::RankedSymbol ( symbol, rank ), std::move ( childs ) ) );
 
-		UnrankedTree tree ( UnrankedNode ( symbol, childs ) );
+				if ( features.count ( FEATURES::RANKED_TREE ) ) return Tree ( tree );
+			}
+		} else {
+			std::vector < tree::UnrankedNode * > childs;
+			bool isPattern = false;
 
-		if ( features.count ( FEATURES::UNRANKED_TREE ) ) return Tree {
-					   tree
-			};
-	}
+			while ( token.type != TreeFromStringLexer::TokenType::BAR ) {
+				m_TreeLexer.putback ( input, token );
+				childs.push_back ( parseUnrankedContent ( input, isPattern ) );
+				rank++;
+				token = m_TreeLexer.next ( input );
+			}
 
-	throw exception::AlibException ( "Invalid input" );
-}
+			if ( token.type != TreeFromStringLexer::TokenType::BAR )
+				throw exception::AlibException ( "Missing bar" );
 
-tree::RankedNode * TreeFromStringParser::parseRankedContent ( std::istream & input ) const {
-	alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input );
+			if ( isPattern ) {
+				alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD );
+				UnrankedPattern tree ( std::move ( subtreeWildcard ), UnrankedNode ( symbol, childs ) );
 
-	unsigned rank = 0;
-	std::vector < tree::RankedNode * > childs;
+				if ( features.count ( FEATURES::UNRANKED_PATTERN ) ) return Tree ( tree );
+			} else {
+				UnrankedTree tree ( UnrankedNode ( symbol, childs ) );
 
+				if ( features.count ( FEATURES::UNRANKED_TREE ) ) return Tree ( tree );
+			}
+		}
+
+		throw exception::AlibException ( "Invalid input" );
+	}
+}
+
+tree::RankedNode * TreeFromStringParser::parseRankedContent ( std::istream & input, bool & isPattern ) const {
 	TreeFromStringLexer::Token token = m_TreeLexer.next ( input );
 
-	if ( token.type == TreeFromStringLexer::TokenType::RANK )
-		rank = std::stou ( token.value );
-	else
-		throw exception::AlibException ( "Missing rank" );
+	if ( token.type == TreeFromStringLexer::TokenType::SUBTREE_WILDCARD ) {
+		isPattern = true;
+		alphabet::RankedSymbol subtreeWildcard ( alphabet::Symbol ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD ), 0 );
+		return new RankedNode ( std::move ( subtreeWildcard ), { } );
+	} else {
+		m_TreeLexer.putback ( input, token );
+		alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input );
 
-	for ( unsigned i = 0; i < rank; i++ )
-		childs.push_back ( parseRankedContent ( input ) );
+		unsigned rank = 0;
+		std::vector < tree::RankedNode * > childs;
 
-	return new RankedNode ( alphabet::RankedSymbol ( std::move ( symbol ), rank ), std::move ( childs ) );
-}
+		TreeFromStringLexer::Token token = m_TreeLexer.next ( input );
 
-tree::UnrankedNode * TreeFromStringParser::parseUnrankedContent ( std::istream & input ) const {
-	alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input );
+		if ( token.type == TreeFromStringLexer::TokenType::RANK )
+			rank = std::stou ( token.value );
+		else
+			throw exception::AlibException ( "Missing rank" );
 
-	unsigned rank = 0;
-	std::vector < tree::UnrankedNode * > childs;
+		for ( unsigned i = 0; i < rank; i++ )
+			childs.push_back ( parseRankedContent ( input, isPattern ) );
 
+		return new RankedNode ( alphabet::RankedSymbol ( std::move ( symbol ), rank ), std::move ( childs ) );
+	}
+}
+
+tree::UnrankedNode * TreeFromStringParser::parseUnrankedContent ( std::istream & input, bool & isPattern ) const {
 	TreeFromStringLexer::Token token = m_TreeLexer.next ( input );
 
-	while ( token.type != TreeFromStringLexer::TokenType::BAR ) {
-		m_TreeLexer.putback ( input, token );
-		childs.push_back ( parseUnrankedContent ( input ) );
-		rank++;
+	if ( token.type == TreeFromStringLexer::TokenType::SUBTREE_WILDCARD ) {
 		token = m_TreeLexer.next ( input );
-	}
 
-	if ( token.type != TreeFromStringLexer::TokenType::BAR )
-		throw exception::AlibException ( "Missing bar" );
+		if ( token.type != TreeFromStringLexer::TokenType::BAR )
+			throw exception::AlibException ( "Missing bar" );
+
+		isPattern = true;
+		alphabet::Symbol subtreeWildcard ( alphabet::SubtreeWildcardSymbol::SUBTREE_WILDCARD );
+		return new UnrankedNode ( std::move ( subtreeWildcard ), { } );
+	} else {
+		m_TreeLexer.putback ( input, token );
+		alphabet::Symbol symbol = alib::stringApi < alphabet::Symbol >::parse ( input );
+
+		unsigned rank = 0;
+		std::vector < tree::UnrankedNode * > childs;
+
+		TreeFromStringLexer::Token token = m_TreeLexer.next ( input );
 
-	return new UnrankedNode ( std::move ( symbol ), std::move ( childs ) );
+		while ( token.type != TreeFromStringLexer::TokenType::BAR ) {
+			m_TreeLexer.putback ( input, token );
+			childs.push_back ( parseUnrankedContent ( input, isPattern ) );
+			rank++;
+			token = m_TreeLexer.next ( input );
+		}
+
+		if ( token.type != TreeFromStringLexer::TokenType::BAR )
+			throw exception::AlibException ( "Missing bar" );
+
+		return new UnrankedNode ( std::move ( symbol ), std::move ( childs ) );
+	}
 }
 
 } /* namespace tree */
diff --git a/alib2str/src/tree/TreeFromStringParser.h b/alib2str/src/tree/TreeFromStringParser.h
index 27c986a392..3de6888782 100644
--- a/alib2str/src/tree/TreeFromStringParser.h
+++ b/alib2str/src/tree/TreeFromStringParser.h
@@ -27,8 +27,8 @@ public:
 	TreeFromStringParser ( ) { }
 
 private:
-	tree::RankedNode * parseRankedContent ( std::istream & ) const;
-	tree::UnrankedNode * parseUnrankedContent ( std::istream & ) const;
+	tree::RankedNode * parseRankedContent ( std::istream &, bool & ) const;
+	tree::UnrankedNode * parseUnrankedContent ( std::istream &, bool & ) const;
 
 	TreeFromStringLexer m_TreeLexer;
 
diff --git a/alib2str/src/tree/TreeToStringComposer.cpp b/alib2str/src/tree/TreeToStringComposer.cpp
index 382a732e04..d76d4651f2 100644
--- a/alib2str/src/tree/TreeToStringComposer.cpp
+++ b/alib2str/src/tree/TreeToStringComposer.cpp
@@ -7,7 +7,9 @@
 
 #include "TreeToStringComposer.h"
 #include <tree/ranked/RankedTree.h>
+#include <tree/ranked/RankedPattern.h>
 #include <tree/unranked/UnrankedTree.h>
+#include <tree/unranked/UnrankedPattern.h>
 
 #include "../StringApi.hpp"
 
@@ -30,6 +32,27 @@ void TreeToStringComposer::compose ( std::ostream & out, const RankedNode & node
 	}
 }
 
+void TreeToStringComposer::compose ( std::ostream & out, const RankedPattern & tree ) {
+	compose ( out, tree.getSubtreeWildcard ( ), tree.getRoot ( ) );
+}
+
+TreeToStringComposer::RegistratorWrapper < void, RankedPattern > StringToStringComposerRankedPattern = TreeToStringComposer::RegistratorWrapper < void, RankedPattern > ( TreeToStringComposer::getInstance ( ), TreeToStringComposer::compose );
+
+void TreeToStringComposer::compose ( std::ostream & out, const alphabet::RankedSymbol & subtreeWildcard, const RankedNode & node ) {
+	if ( node.getSymbol ( ) == subtreeWildcard ) {
+		out << "#S";
+	} else {
+		alib::stringApi < alphabet::Symbol >::compose ( out, node.getSymbol ( ).getSymbol ( ) );
+
+		out << std::utos ( node.getSymbol ( ).getRank ( ).getData ( ) );
+
+		for ( const RankedNode * node : node.getChildren ( ) ) {
+			out << " ";
+			compose ( out, subtreeWildcard, * node );
+		}
+	}
+}
+
 void TreeToStringComposer::compose ( std::ostream & out, const UnrankedTree & tree ) {
 	compose ( out, tree.getRoot ( ) );
 }
@@ -47,6 +70,27 @@ void TreeToStringComposer::compose ( std::ostream & out, const UnrankedNode & no
 	out << " |";
 }
 
+void TreeToStringComposer::compose ( std::ostream & out, const UnrankedPattern & tree ) {
+	compose ( out, tree.getSubtreeWildcard ( ), tree.getRoot ( ) );
+}
+
+TreeToStringComposer::RegistratorWrapper < void, UnrankedPattern > StringToStringComposerUnrankedPattern = TreeToStringComposer::RegistratorWrapper < void, UnrankedPattern > ( TreeToStringComposer::getInstance ( ), TreeToStringComposer::compose );
+
+void TreeToStringComposer::compose ( std::ostream & out, const alphabet::Symbol & subtreeWildcard, const UnrankedNode & node ) {
+	if ( node.getSymbol ( ) == subtreeWildcard ) {
+		out << "#S |";
+	} else {
+		alib::stringApi < alphabet::Symbol >::compose ( out, node.getSymbol ( ) );
+
+		for ( const UnrankedNode * node : node.getChildren ( ) ) {
+			out << " ";
+			compose ( out, subtreeWildcard, * node );
+		}
+
+		out << " |";
+	}
+}
+
 void TreeToStringComposer::compose ( std::ostream & out, const Tree & tree ) {
 	getInstance ( ).dispatch ( out, tree.getData ( ) );
 }
diff --git a/alib2str/src/tree/TreeToStringComposer.h b/alib2str/src/tree/TreeToStringComposer.h
index 12cc21ab18..5a61cd5405 100644
--- a/alib2str/src/tree/TreeToStringComposer.h
+++ b/alib2str/src/tree/TreeToStringComposer.h
@@ -13,6 +13,8 @@
 #include "tree/Tree.h"
 #include "tree/TreeFeatures.h"
 
+#include "alphabet/RankedSymbol.h"
+
 namespace tree {
 
 /**
@@ -20,11 +22,15 @@ namespace tree {
  */
 class TreeToStringComposer : public std::SingleDispatchFirstStaticParam < void, std::ostream &, TreeBase > {
 	static void compose ( std::ostream &, const RankedNode & tree );
+	static void compose ( std::ostream &, const alphabet::RankedSymbol & subtreeWildcard, const RankedNode & tree );
 	static void compose ( std::ostream &, const UnrankedNode & tree );
+	static void compose ( std::ostream &, const alphabet::Symbol & subtreeWildcard, const UnrankedNode & tree );
 
 public:
 	static void compose ( std::ostream &, const RankedTree & tree );
+	static void compose ( std::ostream &, const RankedPattern & tree );
 	static void compose ( std::ostream &, const UnrankedTree & tree );
+	static void compose ( std::ostream &, const UnrankedPattern & tree );
 
 	/**
 	 * Prints XML representation of String to the output stream.
diff --git a/alib2str/test-src/tree/TreeTest.cpp b/alib2str/test-src/tree/TreeTest.cpp
index 837318fe07..cc0ac48464 100644
--- a/alib2str/test-src/tree/TreeTest.cpp
+++ b/alib2str/test-src/tree/TreeTest.cpp
@@ -60,7 +60,6 @@ void TreeTest::testEqual ( ) {
 
 		CPPUNIT_ASSERT ( tree == tree2 );
 	}
-
 	{
 		std::string input = "a a | a a | | |";
 		tree::Tree tree = alib::StringDataFactory::fromString < tree::Tree > ( input );
@@ -71,6 +70,32 @@ void TreeTest::testEqual ( ) {
 
 		tree::Tree tree2 = alib::StringDataFactory::fromString < tree::Tree > ( output );
 
+		CPPUNIT_ASSERT ( tree == tree2 );
+	}
+	{
+		std::string input = "a2 #S a1 a0";
+		tree::Tree tree = alib::StringDataFactory::fromString < tree::Tree > ( input );
+
+		std::string output = alib::StringDataFactory::toString ( tree );
+
+		std::cout << output << std::endl;
+		CPPUNIT_ASSERT ( input == output );
+
+		tree::Tree tree2 = alib::StringDataFactory::fromString < tree::Tree > ( output );
+
+		CPPUNIT_ASSERT ( tree == tree2 );
+	}
+	{
+		std::string input = "a #S | a a | | |";
+		tree::Tree tree = alib::StringDataFactory::fromString < tree::Tree > ( input );
+
+		std::string output = alib::StringDataFactory::toString ( tree );
+
+		std::cout << output << std::endl;
+		CPPUNIT_ASSERT ( input == output );
+
+		tree::Tree tree2 = alib::StringDataFactory::fromString < tree::Tree > ( output );
+
 		CPPUNIT_ASSERT ( tree == tree2 );
 	}
 }
-- 
GitLab