diff --git a/alib2algo/src/string/simplify/NormalizeAlphabet.cpp b/alib2algo/src/string/simplify/NormalizeAlphabet.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef83a38e1d641e71db4696eef1200dfc487be301 --- /dev/null +++ b/alib2algo/src/string/simplify/NormalizeAlphabet.cpp @@ -0,0 +1,74 @@ +/* + * NormalizeAlphabet.cpp + * + * Created on: Dec 9, 2013 + * Author: Jan Travnicek + */ + +#include "NormalizeAlphabet.h" + +#include "std/map.hpp" +#include <deque> +#include <vector> +#include <set> +#include <algorithm> +#include <sstream> +#include <iostream> +#include "exception/AlibException.h" + +#include "alphabet/Symbol.h" + +namespace string { + +namespace simplify { + +string::String NormalizeAlphabet::normalize(const string::String& string) { + string::String* out = NULL; + string.getData().Accept((void*) &out, NormalizeAlphabet::NORMALIZE); + string::String res = std::move(*out); + delete out; + return res; +} + +string::LinearString NormalizeAlphabet::normalize(const string::LinearString& string) { + int counter = 0; + std::map<alphabet::Symbol, char > normalizationData; + + for(const alphabet::Symbol& symbol : string.getContent()) { + if(normalizationData.find(symbol) == normalizationData.end()) { + normalizationData.insert(std::make_pair(symbol, counter++)); + } + } + + std::set<alphabet::Symbol> alphabet; + for(const auto& symbol : normalizationData) { + alphabet.insert(alphabet::symbolFrom((char) (symbol.second + 'a'))); + } + + std::vector<alphabet::Symbol> data; + for(const alphabet::Symbol& symbol : string.getContent()) { + data.push_back(alphabet::symbolFrom((char) (normalizationData.find(symbol)->second + 'a'))); + } + + string::LinearString result(alphabet, data); + return result; +} + +void NormalizeAlphabet::Visit(void*, const string::Epsilon&) const { + throw exception::AlibException("Unsupported automaton type Epsilon"); +} + +void NormalizeAlphabet::Visit(void* data, const string::LinearString& str) const { + string::String* & out = *((string::String**) data); + out = new string::String(this->normalize(str)); +} + +void NormalizeAlphabet::Visit(void*, const string::CyclicString&) const { + throw exception::AlibException("Unsupported automaton type CyclicString"); +} + +const NormalizeAlphabet NormalizeAlphabet::NORMALIZE; + +} /* namespace simplify */ + +} /* namespace string */ diff --git a/alib2algo/src/string/simplify/NormalizeAlphabet.h b/alib2algo/src/string/simplify/NormalizeAlphabet.h new file mode 100644 index 0000000000000000000000000000000000000000..3452bf8e5783539765c0ab5013a8298030464ee2 --- /dev/null +++ b/alib2algo/src/string/simplify/NormalizeAlphabet.h @@ -0,0 +1,39 @@ +/* + * NormalizeAlphabet.h + * + * Created on: Dec 9, 2013 + * Author: Jan Travnicek + */ + +#ifndef NORMALIZE_ALPHABET_H_ +#define NORMALIZE_ALPHABET_H_ + +#include <string/LinearString.h> +#include <string/String.h> + +namespace string { + +namespace simplify { + +class NormalizeAlphabet : public string::VisitableStringBase::const_visitor_type { +public: + /** + * @param dfa automaton to normalize + */ + static string::String normalize(const string::String& str); + + static string::LinearString normalize(const string::LinearString& str); + +protected: + void Visit(void*, const string::Epsilon& string) const; + void Visit(void*, const string::LinearString& string) const; + void Visit(void*, const string::CyclicString& string) const; + + static const NormalizeAlphabet NORMALIZE; +}; + +} /* namespace simplify */ + +} /* namespace string */ + +#endif /* NORMALIZE_ALPHABET_H_ */ diff --git a/anormalize2/src/anormalize.cpp b/anormalize2/src/anormalize.cpp index 67a1eca73d50a91b979133bfff6da2d4dc4078d0..0bfaae7d016cf876abf09fccb5d1ddca62242d9c 100644 --- a/anormalize2/src/anormalize.cpp +++ b/anormalize2/src/anormalize.cpp @@ -9,6 +9,7 @@ #include "exception/AlibException.h" #include "factory/XmlDataFactory.hpp" +#include "string/simplify/NormalizeAlphabet.h" #include "automaton/simplify/Normalize.h" #include "automaton/simplify/SingleInitialState.h" #include "grammar/convert/ToGrammarLeftRG.h" @@ -28,12 +29,18 @@ int main(int argc, char** argv) { TCLAP::ValueArg<std::string> form( "f", "form", "Convert to different form", false, "", &formVals); cmd.add( form ); - std::vector<std::string> inputTypes {"automaton", "grammar" }; + std::vector<std::string> inputTypes {"automaton", "grammar"}; TCLAP::ValuesConstraint<std::string> inputTypeVals( inputTypes ); TCLAP::ValueArg<std::string> labels( "l", "labels", "Normalize labels", false, "", &inputTypeVals); cmd.add( labels ); + std::vector<std::string> inputTypes2 {"string"}; + TCLAP::ValuesConstraint<std::string> inputTypeVals2( inputTypes2 ); + + TCLAP::ValueArg<std::string> alphabet( "a", "alphabet", "Normalize alphabet", false, "", &inputTypeVals2); + cmd.add( alphabet ); + cmd.parse(argc, argv); std::list<sax::Token> tokens; @@ -53,6 +60,9 @@ int main(int argc, char** argv) { } else if(labels.getValue() == "grammar") { // alib::XmlDataFactory::toStdout(automaton::simplify::Normalize::normalize(alib::XmlDataFactory::fromTokens<automaton::Automaton>(tokens))); return 0; + } else if(alphabet.getValue() == "string") { + alib::XmlDataFactory::toStdout(string::simplify::NormalizeAlphabet::normalize(alib::XmlDataFactory::fromTokens<string::String>(tokens))); + return 0; } else if(form.getValue() == "leftRG") { alib::XmlDataFactory::toStdout(grammar::convert::ToGrammarLeftRG::convert(alib::XmlDataFactory::fromTokens<grammar::Grammar>( tokens ))); return 0;