From 80f892da2d076e020aac4258b2b7761db8a57a3b Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Wed, 6 Mar 2019 07:55:55 +0100
Subject: [PATCH] remember raw representation of lexems in cli

---
 alib2cli/src/lexer/Lexer.cpp | 41 ++++++++++++++++++++++++++++++++++--
 alib2cli/src/lexer/Lexer.h   |  1 +
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/alib2cli/src/lexer/Lexer.cpp b/alib2cli/src/lexer/Lexer.cpp
index 12731fa3f0..7f84595a23 100644
--- a/alib2cli/src/lexer/Lexer.cpp
+++ b/alib2cli/src/lexer/Lexer.cpp
@@ -5,7 +5,7 @@
 namespace cli {
 
 Lexer::Token Lexer::nextToken ( bool readNextLine ) {
-	Token res { "", TokenType::ERROR };
+	Token res { "", "", TokenType::ERROR };
 
 q0:	if ( m_source->isEndOfTransmition ( ) ) {
 		res.m_type = TokenType::EOT;
@@ -16,102 +16,122 @@ q0:	if ( m_source->isEndOfTransmition ( ) ) {
 		return res;
 	}
 	if ( isspace ( m_source->getCharacter ( ) ) ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q0;
 	}
 	if ( m_source->getCharacter ( ) == '<' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::IN_REDIRECT;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '>' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::OUT_REDIRECT;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '(' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::LEFT_PAREN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == ')' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::RIGHT_PAREN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '{' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::LEFT_BRACE;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '}' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::RIGHT_BRACE;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '[' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::LEFT_BRACKET;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == ']' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::RIGHT_BRACKET;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '@' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::AT_SIGN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '$' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::DOLAR_SIGN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '&' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::AMPERSAND_SIGN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '|' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::PIPE_SIGN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '^' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::CARET_SIGN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == ':' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::COLON_SIGN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '=' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::EQUAL_SIGN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '#' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		res.m_type = TokenType::HASH_SIGN;
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '-' ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q2;
 	}
 
 	if ( m_source->getCharacter ( ) == '"' ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_type = TokenType::STRING;
 		m_source->advance ( true );
 		goto q4;
 	}
 
 	if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q1;
@@ -121,12 +141,14 @@ q0:	if ( m_source->isEndOfTransmition ( ) ) {
 	  || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' )
 	  ||   m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.'
 	  ||   m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q3;
 	}
 
 	if ( m_source->getCharacter ( ) == '\\' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( true );
 		goto q3Escape;
 	}
@@ -139,6 +161,7 @@ q1:	if ( m_source->isEndOfSequence ( ) ) {
 		return res;
 	}
 	if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q1;
@@ -153,6 +176,7 @@ q2:	if ( m_source->isEndOfSequence ( ) ) {
 		return res;
 	}
 	if ( ( m_source->getCharacter ( ) >= '0' && m_source->getCharacter ( ) <= '9' ) ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q1;
@@ -162,10 +186,12 @@ q2:	if ( m_source->isEndOfSequence ( ) ) {
 	  || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' )
 	  ||   m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '-'
 	  ||   m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' || m_source->getCharacter ( ) == ':' ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q3;
 	} else if ( m_source->getCharacter ( ) == '\\' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( true );
 		goto q3Escape;
 	}
@@ -183,10 +209,12 @@ q3:	if ( m_source->isEndOfSequence ( ) ) {
 	  || ( m_source->getCharacter ( ) >= 'A' && m_source->getCharacter ( ) <= 'Z' )
 	  ||   m_source->getCharacter ( ) == '/' || m_source->getCharacter ( ) == '.' || m_source->getCharacter ( ) == '-'
 	  ||   m_source->getCharacter ( ) == '~' || m_source->getCharacter ( ) == '_' || m_source->getCharacter ( ) == ':' ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q3;
 	} else if ( m_source->getCharacter ( ) == '\\' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( true );
 		goto q3Escape;
 	} else {
@@ -200,8 +228,8 @@ q3Escape:
 		return res;
 	}
 
+	res.m_raw += m_source->getCharacter ( );
 	res.m_value += m_source->getCharacter ( );
-
 	m_source->advance ( readNextLine );
 	goto q3;
 
@@ -210,13 +238,16 @@ q4:	if ( m_source->isEndOfSequence ( ) ) {
 		return res;
 	}
 	if ( m_source->getCharacter ( ) == '"' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q6;
 	}
 	if ( m_source->getCharacter ( ) == '\\' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( true );
 		goto q5;
 	} else {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 		m_source->advance ( true );
 		goto q4;
@@ -228,12 +259,16 @@ q5:	if ( m_source->isEndOfSequence ( ) ) {
 	}
 
 	if ( m_source->getCharacter ( ) == 'n' ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += '\n';
 	} else if ( m_source->getCharacter ( ) == 't' ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += '\t';
 	} else if ( m_source->getCharacter ( ) == '"' ) {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += '"';
 	} else {
+		res.m_raw += m_source->getCharacter ( );
 		res.m_value += m_source->getCharacter ( );
 	}
 
@@ -244,10 +279,12 @@ q6:	if ( m_source->isEndOfSequence ( ) ) {
 		return res;
 	}
 	if ( isspace ( m_source->getCharacter ( ) ) ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( readNextLine );
 		goto q6;
 	}
 	if ( m_source->getCharacter ( ) == '"' ) {
+		res.m_raw += m_source->getCharacter ( );
 		m_source->advance ( true );
 		goto q4;
 	} else {
diff --git a/alib2cli/src/lexer/Lexer.h b/alib2cli/src/lexer/Lexer.h
index 3f45944874..8fbe0f7dc1 100644
--- a/alib2cli/src/lexer/Lexer.h
+++ b/alib2cli/src/lexer/Lexer.h
@@ -102,6 +102,7 @@ public:
 
 	struct Token {
 		std::string m_value;
+		std::string m_raw;
 		TokenType m_type;
 
 		operator std::string ( ) const {
-- 
GitLab