From 80f892da2d076e020aac4258b2b7761db8a57a3b Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Wed, 6 Mar 2019 07:55:55 +0100 Subject: [PATCH] remember raw representation of lexems in cli --- alib2cli/src/lexer/Lexer.cpp | 41 ++++++++++++++++++++++++++++++++++-- alib2cli/src/lexer/Lexer.h | 1 + 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/alib2cli/src/lexer/Lexer.cpp b/alib2cli/src/lexer/Lexer.cpp index 12731fa3f0..7f84595a23 100644 --- a/alib2cli/src/lexer/Lexer.cpp +++ b/alib2cli/src/lexer/Lexer.cpp @@ -5,7 +5,7 @@ namespace cli { Lexer::Token Lexer::nextToken ( bool readNextLine ) { - Token res { "", TokenType::ERROR }; + Token res { "", "", TokenType::ERROR }; q0: if ( m_source->isEndOfTransmition ( ) ) { res.m_type = TokenType::EOT; @@ -16,102 +16,122 @@ q0: if ( m_source->isEndOfTransmition ( ) ) { return res; } if ( isspace ( m_source->getCharacter ( ) ) ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q0; } if ( m_source->getCharacter ( ) == '<' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::IN_REDIRECT; return res; } if ( m_source->getCharacter ( ) == '>' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::OUT_REDIRECT; return res; } if ( m_source->getCharacter ( ) == '(' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::LEFT_PAREN; return res; } if ( m_source->getCharacter ( ) == ')' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::RIGHT_PAREN; return res; } if ( m_source->getCharacter ( ) == '{' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::LEFT_BRACE; return res; } if ( m_source->getCharacter ( ) == '}' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::RIGHT_BRACE; return res; } if ( m_source->getCharacter ( ) == '[' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::LEFT_BRACKET; return res; } if ( m_source->getCharacter ( ) == ']' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::RIGHT_BRACKET; return res; } if ( m_source->getCharacter ( ) == '@' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::AT_SIGN; return res; } if ( m_source->getCharacter ( ) == '$' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::DOLAR_SIGN; return res; } if ( m_source->getCharacter ( ) == '&' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::AMPERSAND_SIGN; return res; } if ( m_source->getCharacter ( ) == '|' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::PIPE_SIGN; return res; } if ( m_source->getCharacter ( ) == '^' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::CARET_SIGN; return res; } if ( m_source->getCharacter ( ) == ':' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::COLON_SIGN; return res; } if ( m_source->getCharacter ( ) == '=' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::EQUAL_SIGN; return res; } if ( m_source->getCharacter ( ) == '#' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); res.m_type = TokenType::HASH_SIGN; return res; } if ( m_source->getCharacter ( ) == '-' ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q2; } if ( m_source->getCharacter ( ) == '"' ) { + res.m_raw += m_source->getCharacter ( ); res.m_type = TokenType::STRING; m_source->advance ( true ); goto q4; } if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q1; @@ -121,12 +141,14 @@ q0: if ( m_source->isEndOfTransmition ( ) ) { || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) || m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q3; } if ( m_source->getCharacter ( ) == '\\' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( true ); goto q3Escape; } @@ -139,6 +161,7 @@ q1: if ( m_source->isEndOfSequence ( ) ) { return res; } if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q1; @@ -153,6 +176,7 @@ q2: if ( m_source->isEndOfSequence ( ) ) { return res; } if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q1; @@ -162,10 +186,12 @@ q2: if ( m_source->isEndOfSequence ( ) ) { || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) || m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '-' || m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' || m_source->getCharacter ( ) == ':' ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q3; } else if ( m_source->getCharacter ( ) == '\\' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( true ); goto q3Escape; } @@ -183,10 +209,12 @@ q3: if ( m_source->isEndOfSequence ( ) ) { || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' ) || m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '-' || m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' || m_source->getCharacter ( ) == ':' ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q3; } else if ( m_source->getCharacter ( ) == '\\' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( true ); goto q3Escape; } else { @@ -200,8 +228,8 @@ q3Escape: return res; } + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); - m_source->advance ( readNextLine ); goto q3; @@ -210,13 +238,16 @@ q4: if ( m_source->isEndOfSequence ( ) ) { return res; } if ( m_source->getCharacter ( ) == '"' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q6; } if ( m_source->getCharacter ( ) == '\\' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( true ); goto q5; } else { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); m_source->advance ( true ); goto q4; @@ -228,12 +259,16 @@ q5: if ( m_source->isEndOfSequence ( ) ) { } if ( m_source->getCharacter ( ) == 'n' ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += '\n'; } else if ( m_source->getCharacter ( ) == 't' ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += '\t'; } else if ( m_source->getCharacter ( ) == '"' ) { + res.m_raw += m_source->getCharacter ( ); res.m_value += '"'; } else { + res.m_raw += m_source->getCharacter ( ); res.m_value += m_source->getCharacter ( ); } @@ -244,10 +279,12 @@ q6: if ( m_source->isEndOfSequence ( ) ) { return res; } if ( isspace ( m_source->getCharacter ( ) ) ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( readNextLine ); goto q6; } if ( m_source->getCharacter ( ) == '"' ) { + res.m_raw += m_source->getCharacter ( ); m_source->advance ( true ); goto q4; } else { diff --git a/alib2cli/src/lexer/Lexer.h b/alib2cli/src/lexer/Lexer.h index 3f45944874..8fbe0f7dc1 100644 --- a/alib2cli/src/lexer/Lexer.h +++ b/alib2cli/src/lexer/Lexer.h @@ -102,6 +102,7 @@ public: struct Token { std::string m_value; + std::string m_raw; TokenType m_type; operator std::string ( ) const { -- GitLab