From 15e0f4f1460a37a668d41a35beae54f016069d9f Mon Sep 17 00:00:00 2001
From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz>
Date: Sat, 6 Jun 2015 14:43:50 +0200
Subject: [PATCH] new xmlparser

---
 alib2data/src/sax/SaxParseInterface.cpp | 97 ++++++++++++-------------
 alib2data/src/sax/SaxParseInterface.h   | 42 +----------
 alib2data/test-src/sax/SaxTest.cpp      |  2 +
 3 files changed, 52 insertions(+), 89 deletions(-)

diff --git a/alib2data/src/sax/SaxParseInterface.cpp b/alib2data/src/sax/SaxParseInterface.cpp
index 3060f72a5e..6c0a157cdb 100644
--- a/alib2data/src/sax/SaxParseInterface.cpp
+++ b/alib2data/src/sax/SaxParseInterface.cpp
@@ -17,35 +17,27 @@
 
 namespace sax {
 
-void SaxParseInterface::initSAXHandler(xmlSAXHandler& handler) {
-	memset(&handler, 0, sizeof(handler));
-	handler.initialized = XML_SAX2_MAGIC;
-
-	handler.startDocument = &sax::SaxParseInterface::startDocument;
-	handler.startElement = &sax::SaxParseInterface::startElement;
-	handler.endElement = &sax::SaxParseInterface::endElement;
-	handler.endDocument = &sax::SaxParseInterface::endDocument;
-	handler.characters = &sax::SaxParseInterface::characters;
-}
-
 void SaxParseInterface::parseMemory(const std::string& xmlIn, std::deque<Token>& out) {
-	xmlSAXHandler handler;
-	initSAXHandler(handler);
+	xmlParserInputBufferPtr buf = xmlParserInputBufferCreateMem (xmlIn.c_str(), xmlIn.length(), XML_CHAR_ENCODING_NONE);
+	xmlTextReaderPtr reader = xmlNewTextReader(buf, "");
 
-	int result = xmlSAXUserParseMemory(&handler, (void*) &out, xmlIn.c_str(), xmlIn.length());
-	xmlCleanupParser();
+	int result = SaxParseInterface::xmlSAXUserParse(reader, out);
+
+	xmlFreeTextReader(reader);
+	xmlFreeParserInputBuffer(buf);
+	xmlCleanupCharEncodingHandlers();
 
 	if (result != 0) {
-		throw exception::AlibException("Cannot parse the XML string." + xmlIn);
+		throw exception::AlibException("Cannot parse the XML file " + xmlIn);
 	}
 }
 
 void SaxParseInterface::parseFile(const std::string& filename, std::deque<Token>& out) {
-	xmlSAXHandler handler;
-	initSAXHandler(handler);
+	xmlTextReaderPtr reader = xmlNewTextReaderFilename(filename.c_str());
+
+	int result = SaxParseInterface::xmlSAXUserParse(reader, out);
 
-	int result = xmlSAXUserParseFile(&handler, (void*) &out, filename.c_str());
-	xmlCleanupParser();
+	xmlFreeTextReader(reader);
 
 	if (result != 0) {
 		throw exception::AlibException("Cannot parse the XML file " + filename);
@@ -61,38 +53,45 @@ void SaxParseInterface::parseStream(std::istream& in, std::deque<Token>& out) {
 	SaxParseInterface::parseMemory(input, out);
 }
 
-void SaxParseInterface::characters(void * userData, const xmlChar * ch, int len) {
-	std::deque<Token> &out = *((std::deque<Token>*) userData);
-	std::string tmp((const char*) ch, len);
-
-	if(! std::all_of(tmp.begin(), tmp.end(), isspace)) out.emplace_back(std::move(tmp), Token::TokenType::CHARACTER);
-}
-
-void SaxParseInterface::startDocument(void *) {
+int SaxParseInterface::xmlSAXUserParse(xmlTextReaderPtr reader, std::deque<Token>& out) {
+	int ret = xmlTextReaderRead(reader);
 	std::chrono::measurements::start("Sax Parser", std::chrono::measurements::Type::INIT);
-}
-
-void SaxParseInterface::startElement(void* userData, const xmlChar* name, const xmlChar** attrs) {
-	std::deque<Token> &out = *((std::deque<Token>*) userData);
-	out.emplace_back(Token((const char*) name, Token::TokenType::START_ELEMENT));
-
-	while(attrs && *attrs && *(attrs+1)) {
-		out.emplace_back((const char*) *attrs, Token::TokenType::START_ATTRIBUTE);
-
-		out.emplace_back((const char*) *(attrs + 1), Token::TokenType::CHARACTER);
-
-		out.emplace_back((const char*) *attrs, Token::TokenType::END_ATTRIBUTE);
-		attrs+=2;
+	while (ret == 1) {
+		xmlChar* name = xmlTextReaderName(reader);
+		xmlChar* value;
+
+		switch(xmlTextReaderNodeType(reader)) {
+			case 1: // START_ELEMENT
+				out.emplace_back((const char*) name, Token::TokenType::START_ELEMENT);
+				while(xmlTextReaderMoveToNextAttribute(reader)) {
+					xmlChar* attrName = xmlTextReaderName(reader);
+					xmlChar* attrValue = xmlTextReaderValue(reader);
+
+					out.emplace_back((const char*) attrName, Token::TokenType::START_ATTRIBUTE);
+					out.emplace_back((const char*) attrValue, Token::TokenType::CHARACTER);
+					out.emplace_back((const char*) attrName, Token::TokenType::END_ATTRIBUTE);
+
+					xmlFree(attrName);
+					xmlFree(attrValue);
+				}
+				if(xmlTextReaderIsEmptyElement(reader)) out.emplace_back((const char*) name, Token::TokenType::END_ELEMENT);
+				break;
+			case 3: //CHARACTER
+				value = xmlTextReaderValue(reader);
+				if(! std::all_of(value, value + strlen((const char*) value), isspace)) out.emplace_back((const char*) value, Token::TokenType::CHARACTER);
+				xmlFree(value);
+				break;
+			case 15: //END_EMENENT
+				out.emplace_back((const char*) name, Token::TokenType::END_ELEMENT);
+				break;
+		}
+
+		xmlFree(name);
+
+		ret = xmlTextReaderRead(reader);
 	}
-}
-
-void SaxParseInterface::endElement(void * userData, const xmlChar * name) {
-	std::deque<Token> &out = *((std::deque<Token>*) userData);
-	out.emplace_back((const  char*) name, Token::TokenType::END_ELEMENT);
-}
-
-void SaxParseInterface::endDocument(void *) {
 	std::chrono::measurements::end();
+	return ret;
 }
 
 } /* namespace sax */
diff --git a/alib2data/src/sax/SaxParseInterface.h b/alib2data/src/sax/SaxParseInterface.h
index 4f08bff4a6..d5ec09f5ab 100644
--- a/alib2data/src/sax/SaxParseInterface.h
+++ b/alib2data/src/sax/SaxParseInterface.h
@@ -8,7 +8,7 @@
 #ifndef SAX_PARSE_INTERFACE_H_
 #define SAX_PARSE_INTERFACE_H_
 
-#include <libxml/parser.h>
+#include <libxml/xmlreader.h>
 #include <deque>
 #include "Token.h"
 
@@ -19,45 +19,7 @@ namespace sax {
  * methods for libxml SAX parser.
  */
 class SaxParseInterface {
-protected:
-	/**
-	 * Initializes the SAX parser.
-	 */
-	static void initSAXHandler(xmlSAXHandler&);
-
-	/**
-	 * Callback method called when charactes (between tags) are read.
-	 * @param userData contains list of parsed tokens
-	 * @param ch array of parsed characters
-	 * @param len length of the array
-	 */
-	static void characters(void * userData, const xmlChar * ch, int len);
-
-	/**
-	 * Callback method called when start of the document is read.
-	 */
-	static void startDocument(void * userData);
-
-	/**
-	 * Callback method called when start of the tag is read.
-	 * @param userData contains list of parsed tokens
-	 * @param name array of characters containing name of the tag
-	 * @param attrs array containing attributes (arrays of characters) of the tag
-	 */
-	static void startElement(void *userData, const xmlChar *name, const xmlChar **attrs);
-
-	/**
-	 * Callback method called when end of the tag is read.
-	 * @param userData contains list of parsed tokens
-	 * @param name array of characters containing name of the tag
-	 */
-	static void endElement(void * userData, const xmlChar * name);
-
-	/**
-	 * Callback method called when end of the document is read.
-	 */
-	static void endDocument(void * userData);
-
+	static int xmlSAXUserParse(xmlTextReaderPtr writer, std::deque<Token>& out);
 public:
 	/**
 	 * Parses the string containing XML.
diff --git a/alib2data/test-src/sax/SaxTest.cpp b/alib2data/test-src/sax/SaxTest.cpp
index 5f33b9cf59..a904c0176f 100644
--- a/alib2data/test-src/sax/SaxTest.cpp
+++ b/alib2data/test-src/sax/SaxTest.cpp
@@ -22,6 +22,8 @@ void SaxTest::testSax() {
 	std::deque<sax::Token> tokens;
 	sax::SaxParseInterface::parseMemory(tmp, tokens);
 
+	std::cout << tokens << std::endl;
+
 	std::string tmp2;
 	sax::SaxComposeInterface::printMemory(tmp2, tokens);
 
-- 
GitLab