From 15e0f4f1460a37a668d41a35beae54f016069d9f Mon Sep 17 00:00:00 2001 From: Jan Travnicek <Jan.Travnicek@fit.cvut.cz> Date: Sat, 6 Jun 2015 14:43:50 +0200 Subject: [PATCH] new xmlparser --- alib2data/src/sax/SaxParseInterface.cpp | 97 ++++++++++++------------- alib2data/src/sax/SaxParseInterface.h | 42 +---------- alib2data/test-src/sax/SaxTest.cpp | 2 + 3 files changed, 52 insertions(+), 89 deletions(-) diff --git a/alib2data/src/sax/SaxParseInterface.cpp b/alib2data/src/sax/SaxParseInterface.cpp index 3060f72a5e..6c0a157cdb 100644 --- a/alib2data/src/sax/SaxParseInterface.cpp +++ b/alib2data/src/sax/SaxParseInterface.cpp @@ -17,35 +17,27 @@ namespace sax { -void SaxParseInterface::initSAXHandler(xmlSAXHandler& handler) { - memset(&handler, 0, sizeof(handler)); - handler.initialized = XML_SAX2_MAGIC; - - handler.startDocument = &sax::SaxParseInterface::startDocument; - handler.startElement = &sax::SaxParseInterface::startElement; - handler.endElement = &sax::SaxParseInterface::endElement; - handler.endDocument = &sax::SaxParseInterface::endDocument; - handler.characters = &sax::SaxParseInterface::characters; -} - void SaxParseInterface::parseMemory(const std::string& xmlIn, std::deque<Token>& out) { - xmlSAXHandler handler; - initSAXHandler(handler); + xmlParserInputBufferPtr buf = xmlParserInputBufferCreateMem (xmlIn.c_str(), xmlIn.length(), XML_CHAR_ENCODING_NONE); + xmlTextReaderPtr reader = xmlNewTextReader(buf, ""); - int result = xmlSAXUserParseMemory(&handler, (void*) &out, xmlIn.c_str(), xmlIn.length()); - xmlCleanupParser(); + int result = SaxParseInterface::xmlSAXUserParse(reader, out); + + xmlFreeTextReader(reader); + xmlFreeParserInputBuffer(buf); + xmlCleanupCharEncodingHandlers(); if (result != 0) { - throw exception::AlibException("Cannot parse the XML string." + xmlIn); + throw exception::AlibException("Cannot parse the XML file " + xmlIn); } } void SaxParseInterface::parseFile(const std::string& filename, std::deque<Token>& out) { - xmlSAXHandler handler; - initSAXHandler(handler); + xmlTextReaderPtr reader = xmlNewTextReaderFilename(filename.c_str()); + + int result = SaxParseInterface::xmlSAXUserParse(reader, out); - int result = xmlSAXUserParseFile(&handler, (void*) &out, filename.c_str()); - xmlCleanupParser(); + xmlFreeTextReader(reader); if (result != 0) { throw exception::AlibException("Cannot parse the XML file " + filename); @@ -61,38 +53,45 @@ void SaxParseInterface::parseStream(std::istream& in, std::deque<Token>& out) { SaxParseInterface::parseMemory(input, out); } -void SaxParseInterface::characters(void * userData, const xmlChar * ch, int len) { - std::deque<Token> &out = *((std::deque<Token>*) userData); - std::string tmp((const char*) ch, len); - - if(! std::all_of(tmp.begin(), tmp.end(), isspace)) out.emplace_back(std::move(tmp), Token::TokenType::CHARACTER); -} - -void SaxParseInterface::startDocument(void *) { +int SaxParseInterface::xmlSAXUserParse(xmlTextReaderPtr reader, std::deque<Token>& out) { + int ret = xmlTextReaderRead(reader); std::chrono::measurements::start("Sax Parser", std::chrono::measurements::Type::INIT); -} - -void SaxParseInterface::startElement(void* userData, const xmlChar* name, const xmlChar** attrs) { - std::deque<Token> &out = *((std::deque<Token>*) userData); - out.emplace_back(Token((const char*) name, Token::TokenType::START_ELEMENT)); - - while(attrs && *attrs && *(attrs+1)) { - out.emplace_back((const char*) *attrs, Token::TokenType::START_ATTRIBUTE); - - out.emplace_back((const char*) *(attrs + 1), Token::TokenType::CHARACTER); - - out.emplace_back((const char*) *attrs, Token::TokenType::END_ATTRIBUTE); - attrs+=2; + while (ret == 1) { + xmlChar* name = xmlTextReaderName(reader); + xmlChar* value; + + switch(xmlTextReaderNodeType(reader)) { + case 1: // START_ELEMENT + out.emplace_back((const char*) name, Token::TokenType::START_ELEMENT); + while(xmlTextReaderMoveToNextAttribute(reader)) { + xmlChar* attrName = xmlTextReaderName(reader); + xmlChar* attrValue = xmlTextReaderValue(reader); + + out.emplace_back((const char*) attrName, Token::TokenType::START_ATTRIBUTE); + out.emplace_back((const char*) attrValue, Token::TokenType::CHARACTER); + out.emplace_back((const char*) attrName, Token::TokenType::END_ATTRIBUTE); + + xmlFree(attrName); + xmlFree(attrValue); + } + if(xmlTextReaderIsEmptyElement(reader)) out.emplace_back((const char*) name, Token::TokenType::END_ELEMENT); + break; + case 3: //CHARACTER + value = xmlTextReaderValue(reader); + if(! std::all_of(value, value + strlen((const char*) value), isspace)) out.emplace_back((const char*) value, Token::TokenType::CHARACTER); + xmlFree(value); + break; + case 15: //END_EMENENT + out.emplace_back((const char*) name, Token::TokenType::END_ELEMENT); + break; + } + + xmlFree(name); + + ret = xmlTextReaderRead(reader); } -} - -void SaxParseInterface::endElement(void * userData, const xmlChar * name) { - std::deque<Token> &out = *((std::deque<Token>*) userData); - out.emplace_back((const char*) name, Token::TokenType::END_ELEMENT); -} - -void SaxParseInterface::endDocument(void *) { std::chrono::measurements::end(); + return ret; } } /* namespace sax */ diff --git a/alib2data/src/sax/SaxParseInterface.h b/alib2data/src/sax/SaxParseInterface.h index 4f08bff4a6..d5ec09f5ab 100644 --- a/alib2data/src/sax/SaxParseInterface.h +++ b/alib2data/src/sax/SaxParseInterface.h @@ -8,7 +8,7 @@ #ifndef SAX_PARSE_INTERFACE_H_ #define SAX_PARSE_INTERFACE_H_ -#include <libxml/parser.h> +#include <libxml/xmlreader.h> #include <deque> #include "Token.h" @@ -19,45 +19,7 @@ namespace sax { * methods for libxml SAX parser. */ class SaxParseInterface { -protected: - /** - * Initializes the SAX parser. - */ - static void initSAXHandler(xmlSAXHandler&); - - /** - * Callback method called when charactes (between tags) are read. - * @param userData contains list of parsed tokens - * @param ch array of parsed characters - * @param len length of the array - */ - static void characters(void * userData, const xmlChar * ch, int len); - - /** - * Callback method called when start of the document is read. - */ - static void startDocument(void * userData); - - /** - * Callback method called when start of the tag is read. - * @param userData contains list of parsed tokens - * @param name array of characters containing name of the tag - * @param attrs array containing attributes (arrays of characters) of the tag - */ - static void startElement(void *userData, const xmlChar *name, const xmlChar **attrs); - - /** - * Callback method called when end of the tag is read. - * @param userData contains list of parsed tokens - * @param name array of characters containing name of the tag - */ - static void endElement(void * userData, const xmlChar * name); - - /** - * Callback method called when end of the document is read. - */ - static void endDocument(void * userData); - + static int xmlSAXUserParse(xmlTextReaderPtr writer, std::deque<Token>& out); public: /** * Parses the string containing XML. diff --git a/alib2data/test-src/sax/SaxTest.cpp b/alib2data/test-src/sax/SaxTest.cpp index 5f33b9cf59..a904c0176f 100644 --- a/alib2data/test-src/sax/SaxTest.cpp +++ b/alib2data/test-src/sax/SaxTest.cpp @@ -22,6 +22,8 @@ void SaxTest::testSax() { std::deque<sax::Token> tokens; sax::SaxParseInterface::parseMemory(tmp, tokens); + std::cout << tokens << std::endl; + std::string tmp2; sax::SaxComposeInterface::printMemory(tmp2, tokens); -- GitLab