Skip to content
Snippets Groups Projects
RegExpFromStringLexer.cpp 2.68 KiB
Newer Older
#include "RegExpFromStringLexer.h"

namespace regexp {

RegExpFromStringLexer::RegExpFromStringLexer(const std::string& in) : m_In(in) {
	this->next();
}

RegExpFromStringLexer& RegExpFromStringLexer::next() {
	char character;
	m_Current.value = "";

L0:
	character = m_In.get();
	if(m_In.eof()) {
		m_Current.type = TokenType::TEOF;
		return *this;
	} else if(character == ' ' || character == '\n' || character == '\t') {
		goto L0;
	} else if(character == '"') {
		goto L3;
	} else if((character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z') || (character >= '0' && character <= '9')) {
		m_Current.type = TokenType::SYMBOL;
		m_Current.value += character;
		goto L2;
	} else if(character == '(') {
		m_Current.type = TokenType::LPAR;
		return *this;
	} else if(character == ')') {
		m_Current.type = TokenType::RPAR;
		return *this;
	} else if(character == '+') {
		m_Current.type = TokenType::PLUS;
		return *this;
	} else if(character == '*') {
		m_Current.type = TokenType::STAR;
		return *this;
	} else if(character == '\\') {
		goto L1;
	} else {
		m_In.unget();
		m_Current.type = TokenType::ERROR;
		return *this;
	}
L1:
	character = m_In.get();
	if(m_In.eof()) {
		m_Current.type = TokenType::ERROR;
		return *this;
	} else if(character == 'e') {
		m_Current.type = TokenType::EPS;
		return *this;
	} else if(character == '0') {
		m_Current.type = TokenType::EMPTY;
		return *this;
	} else {
		m_In.unget();
		m_Current.type = TokenType::ERROR;
		return *this;
	}
L2:
	character = m_In.get();
	if(m_In.eof()) {
		return *this;
	} else if((character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z') || (character >= '0' && character <= '9')) {
		m_Current.value += character;
		goto L2;
	} else {
		m_In.unget();
		return *this;
	}
L3:
	character = m_In.get();
	if(m_In.eof()) {
		m_Current.type = TokenType::ERROR;
		return *this;
	} else if(character == '"') {
		m_Current.type = TokenType::EPS;
		return *this;
	} else if(character == '\\') {
		m_Current.type = TokenType::SYMBOL;
		m_Current.type = TokenType::SYMBOL;
		m_Current.value += character;
		goto L4;
	}
L4:
	character = m_In.get();
	if(m_In.eof()) {
		m_Current.type = TokenType::ERROR;
		return *this;
	} else if(character == '"') {
		return *this;
	} else if(character == '\\') {
		goto L5;
	} else {
		m_Current.value += character;
		goto L4;
	}
L5:
	character = m_In.get();
	if(m_In.eof()) {
		m_Current.type = TokenType::ERROR;
		return *this;
	} else if(character == '"' || character == '\\') {
		m_Current.value += character;
		goto L4;
	} else {
		m_Current.type = TokenType::ERROR;
}

RegExpFromStringLexer::Token RegExpFromStringLexer::token() {
	return m_Current;