From 2659b4595d809cbd69a77e5ff7e2fc08d225f065 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 15 Feb 2015 00:02:54 +0100 Subject: Tidied OsxmlEventParser up, implemented correct whitespace handling, started to write unit tests for the osxml parser --- src/core/common/Utils.hpp | 21 ++++++++++-- src/core/common/WhitespaceHandler.hpp | 60 +++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) (limited to 'src/core') diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 16a9136..8361973 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -119,9 +119,26 @@ public: */ template static std::pair trim(const T &s, Filter f) + { + return trim(s, s.size(), f); + } + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param len is the number of elements in the container. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template + static std::pair trim(const T &s, size_t len, Filter f) { size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { + for (size_t i = 0; i < len; i++) { if (!f(s[i])) { start = i; break; @@ -129,7 +146,7 @@ public: } size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast(start); i--) { + for (ssize_t i = len - 1; i >= static_cast(start); i--) { if (!f(s[i])) { end = i + 1; break; diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp index 79e0518..ed52ea3 100644 --- a/src/core/common/WhitespaceHandler.hpp +++ b/src/core/common/WhitespaceHandler.hpp @@ -97,6 +97,25 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd); + } + + /** + * Static version of PreservingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd) { if (textBuf.empty()) { textStart = start; @@ -129,6 +148,27 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf); + } + + /** + * Static version of TrimmingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param whitespaceBuf is a reference at the buffer for storing whitespace + * characters. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd, std::vector &whitespaceBuf) { // Handle whitespace characters if (Utils::isWhitespace(c)) { @@ -174,6 +214,26 @@ public: * @param end is the end byte offset of the given character. */ void append(char c, size_t start, size_t end) + { + append(c, start, end, textBuf, textStart, textEnd, hasWhitespace); + } + + /** + * Static version of CollapsingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param hasWhitespace is a reference at the "hasWhitespace" flag. + */ + static void append(char c, size_t start, size_t end, + std::vector &textBuf, size_t &textStart, + size_t &textEnd, bool &hasWhitespace) { // Handle whitespace characters if (Utils::isWhitespace(c)) { -- cgit v1.2.3