Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Brzozowski.cpp 3.61 KiB
/*
 * Brzozowski.cpp
 *
 *  Created on: 11. 1. 2014
 *      Author: tomas
 */

#include "Brzozowski.h"

using namespace alib;
using namespace automaton;
using namespace regexp;

namespace conversions
{

Brzozowski::Brzozowski( const RegExp & re ) : AbstractREtoFAConverter( re )
{

}

Brzozowski::~Brzozowski( void )
{

}

FSM Brzozowski::convert( void )
{
    RegExpOptimize opt;

    RegExp V = opt.optimize( m_re );
    set<RegExpSymbol> alphabet = RegExpAlphabet::getSymbols( m_re );
    set<RegExp> Q = { V }, Qprev = { V }, Qcurr;
    // In practice, only two Q_i sets are needed, the previous one and current. No need for having Q_1, Q_2, ... Q_n

    while( true )
    {
        for( const auto & regexp : Qprev )
        {
            RegExpDerivation deriv( regexp );

            for( const auto & symbol : alphabet )
            {
                RegExpElement* dSymbol = new RegExpSymbol( symbol.getSymbol( ). getSymbol( ) );
                RegExp derived = deriv.derivation( vector<RegExpElement*>( 1, dSymbol ) );
                derived = opt.optimize( derived );

                if( ! isInSet( derived, Q ) ) // if this state has already been found, do not add
                    Qcurr.insert( derived );

                m_transitions.insert( Transition( regexp, symbol, derived ) );

                delete dSymbol;
            }
        }

        if( Qcurr.empty( ) )
            break;

        // i += 1
        Q.insert( Qcurr.begin( ), Qcurr.end( ) );
        Qprev = Qcurr;
        Qcurr.clear( );
    }

    // ------------------------------------------------------------------------

    StateBuilder builder( Q );

    for( const auto & r : Q )
        m_fsm.addState( builder.getState( r ) );

    for( const auto & symbol : alphabet )
        m_fsm.addInputSymbol( symbol.getSymbol( ) );

    for( const auto & t : m_transitions )
        m_fsm.addTransition( TransitionFSM( builder.getState( t.m_from ), Symbol( t.m_regexpSymbol.getSymbol( ) ), builder.getState( t.m_to ) ) );

    m_fsm.addInitialState( builder.getState( m_re ) );

    for( const auto & r : Q )
        if( r.containsEmptyString( ) )
            m_fsm.addFinalState( builder.getState( r ) );

    return m_fsm;
}

// ----------------------------------------------------------------------------

Brzozowski::Transition::Transition( const RegExp & from, const RegExpSymbol & symb, const RegExp & to )
    : m_from( from ), m_to( to ), m_regexpSymbol( symb )
{

}

bool Brzozowski::Transition::operator<( const Transition & x ) const
{
    if( m_from != x.m_from )
        return m_from < x.m_from;
    else if( m_regexpSymbol != x.m_regexpSymbol )
        return m_regexpSymbol < x.m_regexpSymbol;
    else
        return m_to < x.m_to;
}

// ----------------------------------------------------------------------------

Brzozowski::StateBuilder::StateBuilder( const set<RegExp> & Q )
{
    m_stateId = 0;

    for( const auto & regexp : Q )
        m_states.insert( pair<RegExp, State>( regexp, State( createNewName( ) ) ) );
}

const State & Brzozowski::StateBuilder::getState( const RegExp & re ) const
{
    auto it = m_states.find( re );

    if( it == m_states.end( ) )
        throw AlibException( "Brzozowski::StateBuilder - Regular expression not found!" );

    return it->second;
}

string Brzozowski::StateBuilder::createNewName( void )
{
    // http://en.wikipedia.org/wiki/Hexavigesimal

    unsigned int n = m_stateId ++;
    string name;
    do
    {
        unsigned int remainder = n % 26;
        name += ( char )( remainder + 'A' );
        n = (n - remainder) / 26;
    } while (n > 0);

    return string( name.rbegin( ), name.rend( ) );
}

} /* namespace conversions */