Skip to content
Snippets Groups Projects
Commit b3695643 authored by Jan Trávníček's avatar Jan Trávníček
Browse files

basic position heap data struct and construct algo

parent 7aa3e592
No related branches found
No related tags found
No related merge requests found
/*
* PositionHeapNaive.cpp
*
* Created on: 6. 2. 2017
* Author: Jan Travnicek
*/
#include "PositionHeapNaive.h"
#include <string/LinearString.h>
namespace stringology {
namespace indexing {
indexes::PositionHeap < DefaultSymbolType > PositionHeapNaive::construct ( const string::String & string ) {
return dispatch ( string.getData ( ) );
}
auto PositionHeapNaiveLinearString = PositionHeapNaive::RegistratorWrapper < indexes::PositionHeap < DefaultSymbolType >, string::LinearString < > > ( PositionHeapNaive::construct );
} /* namespace indexing */
} /* namespace stringology */
/*
* PositionHeapNaive.h
*
* Created on: 6. 2. 2017
* Author: Jan Travnicek
*/
#ifndef POSITION_HEAP_NAIVE_H_
#define POSITION_HEAP_NAIVE_H_
#include <indexes/PositionHeap.h>
#include <string/String.h>
#include <string/LinearString.h>
#include <core/multipleDispatch.hpp>
#include <exception/CommonException.h>
namespace stringology {
namespace indexing {
/**
* Constructs a position heap for given string.
*/
class PositionHeapNaive : public std::SingleDispatch < PositionHeapNaive, indexes::PositionHeap < DefaultSymbolType >, const string::StringBase & > {
public:
/**
* Creates suffix trie
* @param string string to construct suffix trie for
* @return automaton
*/
static indexes::PositionHeap < DefaultSymbolType > construct ( const string::String & string );
template < class SymbolType >
static indexes::PositionHeap < SymbolType > construct ( const string::LinearString < SymbolType > & string );
};
template < class SymbolType >
indexes::PositionHeap < SymbolType > PositionHeapNaive::construct ( const string::LinearString < SymbolType > & w ) {
if ( w.getContent ( ).size ( ) == 0 )
throw exception::CommonException ( "Position heap can't index empty string" );
std::trie < SymbolType, unsigned > trie ( 1 );
for ( unsigned i = w.getContent ( ).size ( ) - 1; i > 0; i-- ) {
unsigned k = i - 1;
std::trie < SymbolType, unsigned > * n = & trie;
while ( k < w.getContent ( ).size ( ) && n->getChildren ( ).count ( w.getContent ( )[k] ) )
n = & n->getChildren ( ).find ( w.getContent ( )[k++] )->second;
unsigned node = w.getContent ( ).size ( ) - i + 1;
n = & n->getChildren ( ).insert ( std::make_pair ( w.getContent ( )[k], std::trie < SymbolType, unsigned > ( node ) ) ).first->second;
}
return indexes::PositionHeap < SymbolType > ( w.getAlphabet ( ), trie );
}
} /* namespace indexing */
} /* namespace stringology */
#endif /* POSITION_HEAP_NAIVE_H_ */
/*
* PositionHeap.cpp
*
* Created on: Nov 23, 2013
* Author: Jan Travnicek
*/
#include "PositionHeap.h"
namespace alib {
auto positionHeapParserRegister = xmlApi < alib::Object >::ParserRegister < indexes::PositionHeap < > > ( );
} /* namespace alib */
/*
* PositionHeap.h
*
* Created on: Nov 23, 2013
* Author: Jan Travnicek
*/
#ifndef POSITION_HEAP_H_
#define POSITION_HEAP_H_
#include <string>
#include <set>
#include <trie>
#include <iostream>
#include <algorithm>
#include <sstream>
#include <common/DefaultSymbolType.h>
#include <core/components.hpp>
#include <exception/CommonException.h>
#include <object/Object.h>
#include <object/UniqueObject.h>
#include <object/ObjectBase.h>
#include <sax/FromXMLParserHelper.h>
#include <core/xmlApi.hpp>
#include <container/ObjectsSet.h>
#include <container/ObjectsTrie.h>
#include <primitive/Unsigned.h>
namespace indexes {
class GeneralAlphabet;
/**
* Represents regular expression parsed from the XML. Regular expression is stored
* as a tree of RegExpElement.
*/
template < class SymbolType = DefaultSymbolType >
class PositionHeap : public alib::ObjectBase, public std::Components < PositionHeap < SymbolType >, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > {
protected:
std::trie < SymbolType, unsigned > m_trie;
public:
/**
* @copydoc PositionHeap::clone() const
*/
virtual ObjectBase * clone ( ) const;
/**
* @copydoc PositionHeap::plunder() const
*/
virtual ObjectBase * plunder ( ) &&;
explicit PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie );
explicit PositionHeap ( std::trie < SymbolType, unsigned > trie );
void checkTrie ( const std::trie < SymbolType, unsigned > & trie );
/**
* @return Root node of the trie
*/
const std::trie < SymbolType, unsigned > & getRoot ( ) const;
const std::set < SymbolType > & getAlphabet ( ) const {
return this->template accessComponent < GeneralAlphabet > ( ).get ( );
}
/**
* Sets the root node of the regular expression tree
* @param tree root node to set
*/
void setTree ( std::trie < SymbolType, unsigned > tree );
/**
* Removes symbol from the alphabet of symbol available in the regular expression
* @param symbol removed symbol from the alphabet
*/
bool removeSymbolFromEdgeAlphabet ( const SymbolType & symbol ) {
return this->template accessComponent < GeneralAlphabet > ( ).remove ( symbol );
}
/**
* Prints XML representation of the tree to the output stream.
* @param out output stream to which print the tree
* @param tree tree to print
*/
virtual void operator >>( std::ostream & out ) const;
virtual int compare ( const ObjectBase & other ) const {
if ( std::type_index ( typeid ( * this ) ) == std::type_index ( typeid ( other ) ) ) return this->compare ( ( decltype ( * this ) )other );
return std::type_index ( typeid ( * this ) ) - std::type_index ( typeid ( other ) );
}
virtual int compare ( const PositionHeap & other ) const;
virtual explicit operator std::string ( ) const;
static const std::string & getXmlTagName() {
static std::string xmlTagName = "PositionHeap";
return xmlTagName;
}
static PositionHeap parse ( std::deque < sax::Token >::iterator & input );
void compose ( std::deque < sax::Token > & out ) const;
virtual alib::ObjectBase * inc ( ) &&;
};
} /* namespace indexes */
namespace indexes {
template < class SymbolType >
PositionHeap < SymbolType >::PositionHeap ( std::set < SymbolType > edgeAlphabet, std::trie < SymbolType, unsigned > trie ) : std::Components < PositionHeap, SymbolType, std::tuple < GeneralAlphabet >, std::tuple < > > ( std::make_tuple ( std::move ( edgeAlphabet ) ), std::tuple < > ( ) ), m_trie ( std::move ( trie ) ) {
checkTrie ( this->m_trie );
}
template < class SymbolType >
PositionHeap < SymbolType >::PositionHeap ( std::trie < SymbolType, unsigned > trie ) : PositionHeap ( computeMinimalEdgeAlphabet ( trie ), trie ) {
}
template < class SymbolType >
alib::ObjectBase * PositionHeap < SymbolType >::clone ( ) const {
return new PositionHeap ( * this );
}
template < class SymbolType >
alib::ObjectBase * PositionHeap < SymbolType >::plunder ( ) && {
return new PositionHeap ( std::move ( * this ) );
}
template < class SymbolType >
void PositionHeap < SymbolType >::checkTrie ( const std::trie < SymbolType, unsigned > & trie ) {
for ( const std::pair < const SymbolType, std::trie < SymbolType, unsigned > > & child : trie.getChildren ( ) ) {
if ( ! getAlphabet ( ).count ( child.first ) )
throw exception::CommonException ( "Symbol " + std::to_string ( child.first ) + "not in the alphabet." );
checkTrie ( child.second );
}
}
template < class SymbolType >
const std::trie < SymbolType, unsigned > & PositionHeap < SymbolType >::getRoot ( ) const {
return m_trie;
}
template < class SymbolType >
void PositionHeap < SymbolType >::setTree ( std::trie < SymbolType, unsigned > trie ) {
checkTrie ( trie );
this->m_trie = std::move ( trie ).plunder ( );
}
template < class SymbolType >
void PositionHeap < SymbolType >::operator >>( std::ostream & out ) const {
out << "(PositionHeap " << this->m_trie << ")";
}
template < class SymbolType >
int PositionHeap < SymbolType >::compare ( const PositionHeap & other ) const {
auto first = std::tie ( getRoot ( ), getAlphabet ( ) );
auto second = std::tie ( other.getRoot ( ), other.getAlphabet ( ) );
static std::compare < decltype ( first ) > comp;
return comp ( first, second );
}
template < class SymbolType >
PositionHeap < SymbolType >::operator std::string ( ) const {
std::stringstream ss;
ss << * this;
return ss.str ( );
}
template < class SymbolType >
PositionHeap < SymbolType > PositionHeap < SymbolType >::parse ( std::deque < sax::Token >::iterator & input ) {
sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::START_ELEMENT, PositionHeap::getXmlTagName() );
std::set < SymbolType > edgeAlphabet = alib::xmlApi < std::set < SymbolType > >::parse ( input );
std::trie < SymbolType, unsigned > root = alib::xmlApi < std::trie < SymbolType, unsigned > >::parse ( input );
PositionHeap < SymbolType > trie ( std::move ( edgeAlphabet ), std::move ( root ) );
sax::FromXMLParserHelper::popToken ( input, sax::Token::TokenType::END_ELEMENT, PositionHeap::getXmlTagName() );
return trie;
}
template < class SymbolType >
void PositionHeap < SymbolType >::compose ( std::deque < sax::Token > & out ) const {
out.emplace_back ( PositionHeap::getXmlTagName(), sax::Token::TokenType::START_ELEMENT );
alib::xmlApi < std::set < SymbolType > >::compose ( out, getAlphabet ( ) );
alib::xmlApi < std::trie < SymbolType, unsigned > >::compose ( out, getRoot ( ) );
out.emplace_back ( PositionHeap::getXmlTagName(), sax::Token::TokenType::END_ELEMENT );
}
template < class SymbolType >
alib::ObjectBase* PositionHeap < SymbolType >::inc() && {
return new alib::UniqueObject(alib::Object(std::move(*this)), primitive::Integer(0));
}
} /* namespace indexes */
namespace std {
template < class SymbolType >
class ComponentConstraint < indexes::PositionHeap < SymbolType >, SymbolType, indexes::GeneralAlphabet > {
static bool used ( const std::trie < SymbolType, unsigned > & trie, const SymbolType & symbol ) {
for ( const std::pair < const SymbolType, std::trie < SymbolType, unsigned > > & child : trie.getChildren ( ) ) {
if ( symbol == child.first || checkTrie ( trie, child.second ) )
return true;
}
return false;
}
public:
static bool used ( const indexes::PositionHeap < SymbolType > & index, const SymbolType & symbol ) {
return used ( index.getRoot ( ), symbol );
}
static bool available ( const indexes::PositionHeap < SymbolType > &, const SymbolType & ) {
return true;
}
static void valid ( const indexes::PositionHeap < SymbolType > &, const SymbolType & ) {
}
};
} /* namespace std */
#endif /* POSITION_HEAP_H_ */
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <stringology/exact/SuffixAutomaton.h> #include <stringology/exact/SuffixAutomaton.h>
#include <string/properties/BorderArray.h> #include <string/properties/BorderArray.h>
#include <stringology/indexing/SuffixTrieNaive.h> #include <stringology/indexing/SuffixTrieNaive.h>
#include <stringology/indexing/PositionHeapNaive.h>
#include <stringology/indexing/SuffixArrayNaive.h> #include <stringology/indexing/SuffixArrayNaive.h>
   
int main ( int argc, char * argv[] ) { int main ( int argc, char * argv[] ) {
...@@ -61,6 +62,7 @@ int main ( int argc, char * argv[] ) { ...@@ -61,6 +62,7 @@ int main ( int argc, char * argv[] ) {
allowed.push_back ( "backwardDAWGMatching" ); allowed.push_back ( "backwardDAWGMatching" );
allowed.push_back ( "borderArray" ); allowed.push_back ( "borderArray" );
allowed.push_back ( "suffixTrie" ); allowed.push_back ( "suffixTrie" );
allowed.push_back ( "positionHeap" );
allowed.push_back ( "suffixArray" ); allowed.push_back ( "suffixArray" );
TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); TCLAP::ValuesConstraint < std::string > allowedVals ( allowed );
   
...@@ -291,6 +293,18 @@ int main ( int argc, char * argv[] ) { ...@@ -291,6 +293,18 @@ int main ( int argc, char * argv[] ) {
measurements::start ( "Output write", measurements::Type::AUXILIARY ); measurements::start ( "Output write", measurements::Type::AUXILIARY );
   
alib::XmlDataFactory::toStdout ( suffixTrie ); alib::XmlDataFactory::toStdout ( suffixTrie );
} else if ( algorithm.getValue ( ) == "positionHeap" ) {
string::String subject = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );
measurements::end ( );
measurements::start ( "Algorithm", measurements::Type::MAIN );
indexes::PositionHeap < DefaultSymbolType > positionHeap = stringology::indexing::PositionHeapNaive::construct ( subject );
measurements::end ( );
measurements::start ( "Output write", measurements::Type::AUXILIARY );
alib::XmlDataFactory::toStdout ( positionHeap );
} else if ( algorithm.getValue ( ) == "suffixArray" ) { } else if ( algorithm.getValue ( ) == "suffixArray" ) {
string::String subject = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) ); string::String subject = alib::XmlDataFactory::fromTokens < string::String > ( std::move ( sax::FromXMLParserHelper::parseInput(true, subjectInput).front ( ) ) );
   
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment