diff --git a/alib2cli/src/lexer/CharSequenceBase.h b/alib2cli/src/lexer/CharSequenceBase.h index 7b838399127e3e50eb29c2ca832c4dc8acc80709..76337644612cdb1c184b962690cd8e224778b132 100644 --- a/alib2cli/src/lexer/CharSequenceBase.h +++ b/alib2cli/src/lexer/CharSequenceBase.h @@ -12,22 +12,29 @@ protected: bool endOfSequence = false; virtual void fetch ( bool readNextLine ) = 0; + std::string putbackBuffer; public: virtual ~CharSequenceBase ( ) noexcept = default; int getCharacter ( ) const { - if ( * linePtr ) + if ( ! putbackBuffer.empty ( ) ) + return putbackBuffer.back ( ); + else if ( * linePtr ) return * linePtr; else return EOF; } void advance ( bool readNextLine ) { - if ( * linePtr ) - ++ linePtr; - if ( * linePtr == '\0' ) - fetch ( readNextLine ); + if ( ! putbackBuffer.empty ( ) ) { + putbackBuffer.pop_back ( ); + } else { + if ( * linePtr ) + ++ linePtr; + if ( * linePtr == '\0' ) + fetch ( readNextLine ); + } } virtual std::string getLine ( ) const = 0; @@ -39,6 +46,10 @@ public: bool isEndOfSequence ( ) const { return endOfSequence; } + + void putback ( std::string string ) { + putbackBuffer.insert ( putbackBuffer.end ( ), string.rbegin ( ), string.rend ( ) ); + } }; } /* namespace cli */ diff --git a/alib2cli/src/lexer/Lexer.cpp b/alib2cli/src/lexer/Lexer.cpp index 7f84595a23a71a8729726a1badaa9f3ed4c02dc2..48e23090c6da64e5bc1e05db175b3b1b42f94500 100644 --- a/alib2cli/src/lexer/Lexer.cpp +++ b/alib2cli/src/lexer/Lexer.cpp @@ -7,6 +7,15 @@ namespace cli { Lexer::Token Lexer::nextToken ( bool readNextLine ) { Token res { "", "", TokenType::ERROR }; + switch ( m_hint ) { + case Hint::NONE: + goto q0; + case Hint::FILE: + goto qFile; + case Hint::TYPE: + goto qType; + } + q0: if ( m_source->isEndOfTransmition ( ) ) { res.m_type = TokenType::EOT; return res; @@ -138,9 +147,7 @@ q0: if ( m_source->isEndOfTransmition ( ) ) { } if ( ( m_source->getCharacter ( ) >= 'a' && m_source->getCharacter ( ) <= 'z' ) - || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) - || m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' - || m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' ) { + || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) ) { res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); @@ -184,8 +191,7 @@ q2: if ( m_source->isEndOfSequence ( ) ) { if ( ( m_source->getCharacter ( ) >= 'a' && m_source->getCharacter ( ) <= 'z' ) || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) - || m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '-' - || m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' || m_source->getCharacter ( ) == ':' ) { + || ( m_source->getCharacter ( ) == ':' ) ) { res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); @@ -207,8 +213,7 @@ q3: if ( m_source->isEndOfSequence ( ) ) { if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) || ( m_source->getCharacter ( ) >= 'a' && m_source->getCharacter ( ) <= 'z' ) || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) - || m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '-' - || m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' || m_source->getCharacter ( ) == ':' ) { + || ( m_source->getCharacter ( ) == ':' ) ) { res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); @@ -290,6 +295,105 @@ q6: if ( m_source->isEndOfSequence ( ) ) { } else { return res; } + +qFile: + if ( m_source->isEndOfTransmition ( ) ) { + res.m_type = TokenType::EOT; + return res; + } + if ( m_source->isEndOfSequence ( ) ) { + res.m_type = TokenType::EOS; + return res; + } + if ( isspace ( m_source->getCharacter ( ) ) ) { + res.m_raw += m_source->getCharacter ( ); + m_source->advance ( readNextLine ); + goto qFile; + } + if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) + || ( m_source->getCharacter ( ) >= 'a' && m_source->getCharacter ( ) <= 'z' ) + || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) + || m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '-' + || m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' || m_source->getCharacter ( ) == ':' ) { + res.m_raw += m_source->getCharacter ( ); + res.m_value += m_source->getCharacter ( ); + m_source->advance ( readNextLine ); + goto qFile2; + } else { + goto q0; + } + +qFile2: + if ( m_source->isEndOfSequence ( ) ) { + res.m_type = TokenType::FILE; + return res; + } + if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) + || ( m_source->getCharacter ( ) >= 'a' && m_source->getCharacter ( ) <= 'z' ) + || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) + || m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '-' + || m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' || m_source->getCharacter ( ) == ':' ) { + res.m_raw += m_source->getCharacter ( ); + res.m_value += m_source->getCharacter ( ); + m_source->advance ( readNextLine ); + goto qFile2; + } else if ( m_source->getCharacter ( ) == '\\' ) { + res.m_raw += m_source->getCharacter ( ); + m_source->advance ( true ); + goto qFileEscape; + } else { + res.m_type = TokenType::FILE; + return res; + } + +qFileEscape: + if ( m_source->isEndOfSequence ( ) ) { + res.m_type = TokenType::ERROR; + return res; + } + + res.m_raw += m_source->getCharacter ( ); + res.m_value += m_source->getCharacter ( ); + m_source->advance ( readNextLine ); + goto qFile2; + +qType: + if ( m_source->isEndOfTransmition ( ) ) { + res.m_type = TokenType::EOT; + return res; + } + if ( m_source->isEndOfSequence ( ) ) { + res.m_type = TokenType::EOS; + return res; + } + if ( isspace ( m_source->getCharacter ( ) ) ) { + res.m_raw += m_source->getCharacter ( ); + m_source->advance ( readNextLine ); + goto qType; + } + if ( m_source->getCharacter ( ) == ')' ) { + goto q0; + } + + { + unsigned lparens = 0; + while ( ! m_source->isEndOfSequence ( ) ) { + if ( m_source->getCharacter ( ) == '(' ) + ++ lparens; + else if ( m_source->getCharacter ( ) == ')' && lparens > 0 ) + -- lparens; + else if ( m_source->getCharacter ( ) == ')' ) { + break; + } + res.m_raw += m_source->getCharacter ( ); + res.m_value += m_source->getCharacter ( ); + m_source->advance ( readNextLine ); + } + ext::rtrim ( res.m_value ); + + res.m_type = TokenType::TYPE; + return res; + } } } /* namespace cli */ diff --git a/alib2cli/src/lexer/Lexer.h b/alib2cli/src/lexer/Lexer.h index 8fbe0f7dc1a301dc5cda5187c5bc4cdae21ac38a..80dce138eec86cb87156cfa31312c1e51bd933e9 100644 --- a/alib2cli/src/lexer/Lexer.h +++ b/alib2cli/src/lexer/Lexer.h @@ -14,7 +14,16 @@ namespace cli { class Lexer { +public: + enum class Hint { + NONE, + TYPE, + FILE + }; + +private: std::unique_ptr < CharSequenceBase > m_source; + Hint m_hint; public: enum class TokenType { @@ -38,6 +47,8 @@ public: DASH_SIGN, EQUAL_SIGN, HASH_SIGN, + FILE, + TYPE, ERROR, EOT, EOS @@ -85,6 +96,10 @@ public: return "equal_sign"; case TokenType::HASH_SIGN : return "hash_sign"; + case TokenType::FILE : + return "file"; + case TokenType::TYPE : + return "type"; case TokenType::ERROR : return "error"; case TokenType::EOT : @@ -129,7 +144,7 @@ public: Lexer ( T && source ) : Lexer ( std::unique_ptr < CharSequenceBase > ( new T ( std::forward < T && > ( source ) ) ) ) { } - Lexer ( std::unique_ptr < CharSequenceBase > source ) : m_source ( std::move ( source ) ) { + Lexer ( std::unique_ptr < CharSequenceBase > source ) : m_source ( std::move ( source ) ), m_hint ( Hint::NONE ) { } Lexer ( std::string source ) : Lexer ( std::unique_ptr < CharSequenceBase > ( new StringCharSequence ( std::move ( source ) ) ) ) { @@ -140,6 +155,14 @@ public: Token nextToken ( bool readNextLine = false ); + void putback ( Token && token ) { + m_source->putback ( std::move ( token.m_raw ) ); + } + + void setHint ( Hint hint ) { + m_hint = hint; + } + std::string getLine ( ) const { return m_source->getLine ( ); } diff --git a/alib2cli/src/parser/Parser.cpp b/alib2cli/src/parser/Parser.cpp index b1ef6a61d8934755eee13fa36f62a7c26f06398d..ca72b598bd71c62d3af8e6baf4117e918edb45d5 100644 --- a/alib2cli/src/parser/Parser.cpp +++ b/alib2cli/src/parser/Parser.cpp @@ -60,6 +60,29 @@ std::unique_ptr < TypeOption > Parser::optional_type_option ( ) { } } +std::unique_ptr < Arg > Parser::file ( ) { + setHint ( Lexer::Hint::FILE ); + if ( check ( cli::Lexer::TokenType::HASH_SIGN ) ) { + match ( cli::Lexer::TokenType::HASH_SIGN ); + std::string value = getTokenValue ( ); + match ( cli::Lexer::TokenType::INTEGER, cli::Lexer::TokenType::IDENTIFIER ); + return std::make_unique < BindedArg > ( std::move ( value ) ); + } else if ( check ( cli::Lexer::TokenType::STRING ) ) { + return std::make_unique < ImmediateArg > ( matchString ( ) ); + } else { + return std::make_unique < ImmediateArg > ( matchFile ( ) ); + } +} + +std::unique_ptr < Arg > Parser::type ( ) { + setHint ( Lexer::Hint::TYPE ); + if ( check ( cli::Lexer::TokenType::STRING ) ) + return std::make_unique < ImmediateArg > ( matchString ( ) ); + else { + return std::make_unique < ImmediateArg > ( matchType ( ) ); + } +} + std::unique_ptr < Arg > Parser::arg ( ) { if ( check ( cli::Lexer::TokenType::HASH_SIGN ) ) { match ( cli::Lexer::TokenType::HASH_SIGN ); @@ -131,13 +154,7 @@ std::shared_ptr < Statement > Parser::in_redirect_file ( ) { templateArgs.emplace_back ( template_arg ( ) ); } - std::unique_ptr < Arg > file; - if ( check ( cli::Lexer::TokenType::STRING ) ) - file = std::make_unique < ImmediateArg > ( matchString ( ) ); - else { - file = arg ( ); - } - return std::make_shared < FileStatement > ( std::move ( file ), std::move ( fileType ), std::move ( type ), std::move ( templateArgs ) ); + return std::make_shared < FileStatement > ( file ( ), std::move ( fileType ), std::move ( type ), std::move ( templateArgs ) ); } std::shared_ptr < Statement > Parser::in_redirect ( ) { @@ -200,11 +217,11 @@ std::shared_ptr < Statement > Parser::param ( ) { return std::make_shared < ImmediateStatement < std::string > > ( value ); } else if ( check ( cli::Lexer::TokenType::LEFT_PAREN ) ) { match ( cli::Lexer::TokenType::LEFT_PAREN ); - std::unique_ptr < Arg > type = arg ( ); + std::unique_ptr < Arg > result_type = type ( ); match ( cli::Lexer::TokenType::RIGHT_PAREN ); bool move = move_arg ( ); std::shared_ptr < Statement > castedParam = param ( ); - return std::make_shared < CastStatement > ( std::move ( type ), castedParam, move ); + return std::make_shared < CastStatement > ( std::move ( result_type ), castedParam, move ); } else { throw exception::CommonException ( "Mismatched set while expanding param rule. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); } @@ -231,11 +248,11 @@ std::shared_ptr < Statement > Parser::statement ( ) { return std::make_shared < SingleStatement > ( std::move ( name ), std::move ( templateArgs ), std::move ( params ), std::move ( category ), std::move ( moves ) ); } else if ( check ( cli::Lexer::TokenType::LEFT_PAREN ) ) { match ( cli::Lexer::TokenType::LEFT_PAREN ); - std::unique_ptr < Arg > type = arg ( ); + std::unique_ptr < Arg > result_type = type ( ); match ( cli::Lexer::TokenType::RIGHT_PAREN ); bool move = move_arg ( ); std::shared_ptr < Statement > castedStatement = statement ( ); - return std::make_shared < CastStatement > ( std::move ( type ), castedStatement, move ); + return std::make_shared < CastStatement > ( std::move ( result_type ), castedStatement, move ); } else { // TODO builtin statement type to get string type throw exception::CommonException ( "Mismatched set while expanding param statement. Token is " + Lexer::tokenTypeToString ( m_current.m_type ) + "." ); @@ -261,13 +278,7 @@ void Parser::out_redirect_file ( std::shared_ptr < StatementList > & list ) { match ( cli::Lexer::TokenType::RIGHT_BRACKET ); } - std::unique_ptr < Arg > file; - if ( check ( cli::Lexer::TokenType::STRING ) ) - file = std::make_unique < ImmediateArg > ( matchString ( ) ); - else { - file = arg ( ); - } - list->append ( std::make_unique < ResultFileStatement > ( std::move ( file ), std::move ( fileType ) ) ); + list->append ( std::make_unique < ResultFileStatement > ( file ( ), std::move ( fileType ) ) ); } void Parser::out_redirect ( std::shared_ptr < StatementList > & list ) { diff --git a/alib2cli/src/parser/Parser.h b/alib2cli/src/parser/Parser.h index aae76ffdbfa1b908e2aed4ad51d25d7d38e234ca..3e5695c4b1bda39ad68ac510f7593ec9a7e7675d 100644 --- a/alib2cli/src/parser/Parser.h +++ b/alib2cli/src/parser/Parser.h @@ -32,6 +32,13 @@ public: return m_lexer; } + void setHint ( Lexer::Hint hint ) { + m_lexer.setHint ( hint ); + m_lexer.putback ( std::move ( m_current ) ); + m_current = m_lexer.nextToken ( false ); + m_lexer.setHint ( Lexer::Hint::NONE ); + } + template < class ... Tokens > bool check ( Tokens ... tokens ) const { return ( ... || ( m_current.m_type == tokens ) ); @@ -73,6 +80,22 @@ public: return res; } + std::string matchType ( ) { + if ( ! check ( Lexer::TokenType::TYPE ) ) + throw exception::CommonException ( "Mismatched token while matching a type." ); + std::string res = m_current.m_value; + m_current = m_lexer.nextToken ( false ); + return res; + } + + std::string matchFile ( ) { + if ( ! check ( Lexer::TokenType::FILE ) ) + throw exception::CommonException ( "Mismatched token while matching a file." ); + std::string res = m_current.m_value; + m_current = m_lexer.nextToken ( false ); + return res; + } + int matchInteger ( ) { if ( ! check ( Lexer::TokenType::INTEGER ) ) throw exception::CommonException ( "Mismatched token while matching an integer." ); @@ -91,6 +114,10 @@ public: std::unique_ptr < TypeOption > optional_type_option ( ); + std::unique_ptr < Arg > file ( ); + + std::unique_ptr < Arg > type ( ); + std::unique_ptr < Arg > arg ( ); std::unique_ptr < Arg > optional_arg ( );