/* Ousía Copyright (C) 2015 Benjamin Paaßen, Andreas Stöckel This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include "TestXmlParser.hpp" namespace ousia { namespace test { /* Class XmlNode */ std::string XmlNode::path() { std::string p; if (!parent.expired()) { std::shared_ptr parentPtr = parent.lock(); if (parentPtr.get() != nullptr) { p = parentPtr->path() + "/"; } } return p + name; } bool XmlNode::compareTo(Logger &logger, std::shared_ptr other, std::set &errExpected, std::set &errActual) { bool ok = true; // Compare name and text if (name != other->name) { logger.fail(path() + ": names differ, expected \"" + name + "\", but got \"" + other->name + "\""); ok = false; } if (text != other->text) { logger.fail(path() + ": texts differ, expected \"" + text + "\", but got \"" + other->text + "\""); ok = false; } // Compare the attributes if (attributes.size() != other->attributes.size()) { logger.fail( path() + ": attribute count differs, expected " + std::to_string(attributes.size()) + " attributes, but got " + std::to_string(other->attributes.size()) + " attributes"); ok = false; } for (const auto &attribute : attributes) { auto it = other->attributes.find(attribute.first); if (it == other->attributes.end()) { logger.fail(path() + ": attribute \"" + attribute.first + "\" is missing in actual output"); ok = false; } else if (it->second != attribute.second) { logger.fail(path() + ": expected \"" + attribute.second + "\" for attribute \"" + attribute.first + "\" but got \"" + it->second + "\""); ok = false; } } // Compare the children if (children.size() != other->children.size()) { logger.fail(path() + ": children count differs, expected " + std::to_string(children.size()) + " children, but got " + std::to_string(other->children.size()) + " children"); ok = false; } // Store the actual position if (!ok) { logger.fail("Location in expected output is " + std::to_string(line) + ":" + std::to_string(column) + ", location in actual output is " + std::to_string(other->line) + ":" + std::to_string(other->column)); errExpected.insert(line); errActual.insert(other->line); } // Compare the children const size_t count = std::min(children.size(), other->children.size()); for (size_t i = 0; i < count; i++) { ok = children[i]->compareTo(logger, other->children[i], errExpected, errActual) & ok; } return ok; } static const std::vector IGNORE_TAGS{"import"}; static const std::vector IGNORE_ATTRS{"xmlns"}; static bool checkIgnore(const std::vector &ignoreList, const std::string &name) { for (const auto &s : ignoreList) { if (Utils::startsWith(s, name)) { return true; } } return false; } /** * Callback called by eXpat whenever a start handler is reached. */ static void xmlStartElementHandler(void *ref, const XML_Char *name, const XML_Char **attrs) { XML_Parser parser = static_cast(ref); std::shared_ptr &node = *(static_cast *>(XML_GetUserData(parser))); // Store the child node in the parent node, check for ignoring nodes once // an element ends std::shared_ptr childNode = std::make_shared(node, name); childNode->line = XML_GetCurrentLineNumber(parser); childNode->column = XML_GetCurrentColumnNumber(parser); node->children.push_back(childNode); node = childNode; // Assemble the node attributes const XML_Char **attr = attrs; while (*attr) { // Convert the C string to a std::string const std::string key{*(attr++)}; const std::string value{*(attr++)}; // Ignore certain attributes if (!checkIgnore(IGNORE_ATTRS, key)) { childNode->attributes.emplace(key, value); } } } static void xmlEndElementHandler(void *ref, const XML_Char *name) { XML_Parser parser = static_cast(ref); std::shared_ptr &node = *(static_cast *>(XML_GetUserData(parser))); // Set the current node to the parent node node = node->parent.lock(); // If the child node should have been ignored, remove it now if (checkIgnore(IGNORE_TAGS, name)) { node->children.pop_back(); } } static void xmlCharacterDataHandler(void *ref, const XML_Char *s, int len) { // Fetch a reference at the currently active node XML_Parser parser = static_cast(ref); std::shared_ptr &node = *(static_cast *>(XML_GetUserData(parser))); // Store a new text node in the current node std::string text = std::string(s, len); if (Utils::hasNonWhitepaceChar(text)) { std::shared_ptr textNode = std::make_shared(node, "$text"); textNode->text = text; textNode->line = XML_GetCurrentLineNumber(parser); textNode->column = XML_GetCurrentColumnNumber(parser); node->children.push_back(textNode); } } std::pair> parseXml( Logger &logger, std::istream &is, std::set &errLines) { std::shared_ptr root = std::make_shared(); std::shared_ptr currentNode = root; XML_Parser parser = XML_ParserCreate("UTF-8"); // Pass the reference to this parser instance to the XML handler XML_UseParserAsHandlerArg(parser); XML_SetUserData(parser, ¤tNode); // Set the callback functions XML_SetStartElementHandler(parser, xmlStartElementHandler); XML_SetEndElementHandler(parser, xmlEndElementHandler); XML_SetCharacterDataHandler(parser, xmlCharacterDataHandler); // Feed data into expat while there is data to process constexpr size_t BUFFER_SIZE = 64 * 1024; bool ok = true; while (true) { // Fetch a buffer from expat for the input data char *buf = static_cast(XML_GetBuffer(parser, BUFFER_SIZE)); if (!buf) { logger.fail("Cannot parse XML, out of memory"); ok = false; break; } // Read into the buffer size_t bytesRead = is.read(buf, BUFFER_SIZE).gcount(); // Parse the data and handle any XML error as exception if (!XML_ParseBuffer(parser, bytesRead, bytesRead == 0)) { int line = XML_GetCurrentLineNumber(parser); int column = XML_GetCurrentColumnNumber(parser); logger.fail("Cannot parse XML, " + std::string(XML_ErrorString(XML_GetErrorCode(parser))) + ", at line " + std::to_string(line) + ", column " + std::to_string(column)); errLines.insert(line); ok = false; break; } // Abort once there are no more bytes in the stream if (bytesRead == 0) { break; } } XML_ParserFree(parser); return std::pair>(ok, root); } } }