diff options
author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 00:02:54 +0100 |
---|---|---|
committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-02-15 00:02:54 +0100 |
commit | 2659b4595d809cbd69a77e5ff7e2fc08d225f065 (patch) | |
tree | be6a39fcf7d7070494076832a2e652ea1aa4c91e /src/core/common | |
parent | 974afd3fdc54380a43445a180263fb162e1ff2c0 (diff) |
Tidied OsxmlEventParser up, implemented correct whitespace handling, started to write unit tests for the osxml parser
Diffstat (limited to 'src/core/common')
-rw-r--r-- | src/core/common/Utils.hpp | 21 | ||||
-rw-r--r-- | src/core/common/WhitespaceHandler.hpp | 60 |
2 files changed, 79 insertions, 2 deletions
diff --git a/src/core/common/Utils.hpp b/src/core/common/Utils.hpp index 16a9136..8361973 100644 --- a/src/core/common/Utils.hpp +++ b/src/core/common/Utils.hpp @@ -120,8 +120,25 @@ public: template <class T, class Filter> static std::pair<size_t, size_t> trim(const T &s, Filter f) { + return trim(s, s.size(), f); + } + + /** + * Trims the given string or vector of chars by returning the start and end + * index. + * + * @param s is the container that should be trimmed. + * @param len is the number of elements in the container. + * @param f is a function that returns true for values that should be + * removed. + * @return start and end index. Note that "end" points at the character + * beyond the end, thus "end" minus "start" + */ + template <class T, class Filter> + static std::pair<size_t, size_t> trim(const T &s, size_t len, Filter f) + { size_t start = 0; - for (size_t i = 0; i < s.size(); i++) { + for (size_t i = 0; i < len; i++) { if (!f(s[i])) { start = i; break; @@ -129,7 +146,7 @@ public: } size_t end = 0; - for (ssize_t i = s.size() - 1; i >= static_cast<ssize_t>(start); i--) { + for (ssize_t i = len - 1; i >= static_cast<ssize_t>(start); i--) { if (!f(s[i])) { end = i + 1; break; diff --git a/src/core/common/WhitespaceHandler.hpp b/src/core/common/WhitespaceHandler.hpp index 79e0518..ed52ea3 100644 --- a/src/core/common/WhitespaceHandler.hpp +++ b/src/core/common/WhitespaceHandler.hpp @@ -98,6 +98,25 @@ public: */ void append(char c, size_t start, size_t end) { + append(c, start, end, textBuf, textStart, textEnd); + } + + /** + * Static version of PreservingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + */ + static void append(char c, size_t start, size_t end, + std::vector<char> &textBuf, size_t &textStart, + size_t &textEnd) + { if (textBuf.empty()) { textStart = start; } @@ -130,6 +149,27 @@ public: */ void append(char c, size_t start, size_t end) { + append(c, start, end, textBuf, textStart, textEnd, whitespaceBuf); + } + + /** + * Static version of TrimmingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param whitespaceBuf is a reference at the buffer for storing whitespace + * characters. + */ + static void append(char c, size_t start, size_t end, + std::vector<char> &textBuf, size_t &textStart, + size_t &textEnd, std::vector<char> &whitespaceBuf) + { // Handle whitespace characters if (Utils::isWhitespace(c)) { if (!textBuf.empty()) { @@ -175,6 +215,26 @@ public: */ void append(char c, size_t start, size_t end) { + append(c, start, end, textBuf, textStart, textEnd, hasWhitespace); + } + + /** + * Static version of CollapsingWhitespaceHandler append + * + * @param c is the character that should be appended to the internal buffer. + * @param start is the start byte offset of the given character. + * @param end is the end byte offset of the given character. + * @param textBuf is a reference at the text buffer that is to be used. + * @param textStart is a reference at the text start variable that is to be + * used. + * @param textEnd is a reference at the text end variable that is to be + * used. + * @param hasWhitespace is a reference at the "hasWhitespace" flag. + */ + static void append(char c, size_t start, size_t end, + std::vector<char> &textBuf, size_t &textStart, + size_t &textEnd, bool &hasWhitespace) + { // Handle whitespace characters if (Utils::isWhitespace(c)) { if (!textBuf.empty()) { |