From 667d9c4a082552fb64c5ffe7b0bd6212c8a8b1b3 Mon Sep 17 00:00:00 2001 From: Andreas Stöckel Date: Sun, 12 Apr 2015 18:47:29 +0200 Subject: Implement endAtWhitespace flag which tells TokenizedDataReader to stop reading data after the first whitespace character --- src/core/parser/utils/TokenizedData.cpp | 30 ++++++++++++++++++++++++------ src/core/parser/utils/TokenizedData.hpp | 12 ++++++++++-- 2 files changed, 34 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp index 276cd54..7c7d4a7 100644 --- a/src/core/parser/utils/TokenizedData.cpp +++ b/src/core/parser/utils/TokenizedData.cpp @@ -367,11 +367,13 @@ public: * @param cursor is the position in the character buffer from which on the * next token should be read. The cursor will be updated to the position * beyond the returned token. + * @param endAtWhitespace if true, only delivers data up to the next + * whitespace. * @return true if a token was returned, false if no more tokens are * available. */ bool next(Token &token, WhitespaceMode mode, const TokenSet &tokens, - TokenizedDataCursor &cursor) const + TokenizedDataCursor &cursor, bool endAtWhitespace) const { // Some variables for convenient access size_t &bufPos = cursor.bufPos; @@ -394,12 +396,28 @@ public: // Calculate the buffer start and end character, based on the returned // TokenMark instance - const size_t end = (it != marks.end()) ? it->bufStart : buf.size(); + size_t end = (it != marks.end()) ? it->bufStart : buf.size(); // Depending on the whitespace mode, fetch all the data between the // cursor position and the calculated end position and return a token // containing that data. if (bufPos < end && bufPos < buf.size()) { + // If endAtWhitespace is set to true, limit copying to the the first + // whitespace character after non-whitespace + if (endAtWhitespace) { + bool hasNonWhitespace = false; + for (size_t i = bufPos; i < end; i++) { + const bool isWhitespace = Utils::isWhitespace(buf[i]); + if (isWhitespace) { + if (hasNonWhitespace) { + end = i; + break; + } + } else { + hasNonWhitespace = true; + } + } + } switch (mode) { case WhitespaceMode::PRESERVE: { token = Token( @@ -685,15 +703,15 @@ bool TokenizedDataReader::atEnd() const } bool TokenizedDataReader::read(Token &token, const TokenSet &tokens, - WhitespaceMode mode) + WhitespaceMode mode, bool endAtWhitespace) { peekCursor = readCursor; - return impl->next(token, mode, tokens, readCursor); + return impl->next(token, mode, tokens, readCursor, endAtWhitespace); } bool TokenizedDataReader::peek(Token &token, const TokenSet &tokens, - WhitespaceMode mode) + WhitespaceMode mode, bool endAtWhitespace) { - return impl->next(token, mode, tokens, peekCursor); + return impl->next(token, mode, tokens, peekCursor, endAtWhitespace); } } diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp index 95af95e..83821d7 100644 --- a/src/core/parser/utils/TokenizedData.hpp +++ b/src/core/parser/utils/TokenizedData.hpp @@ -307,11 +307,15 @@ public: * enabled tokens. * @param mode is the whitespace mode that should be used when a text token * is returned. + * @param endAtWhitespace if true, only delivers data until the first + * whitespace character after a sequence of non-whitespace characters. Does + * not affect the delivery of non-data tokens. * @return true if the operation was successful and there is a next token, * false if there are no more tokens. */ bool read(Token &token, const TokenSet &tokens = TokenSet{}, - WhitespaceMode mode = WhitespaceMode::TRIM); + WhitespaceMode mode = WhitespaceMode::TRIM, + bool endAtWhitespace = false); /** * Stores the next token in the given token reference, returns true if the @@ -323,11 +327,15 @@ public: * enabled tokens. * @param mode is the whitespace mode that should be used when a text token * is returned. + * @param endAtWhitespace if true, only delivers data until the first + * whitespace character after a sequence of non-whitespace characters. Does + * not affect the delivery of non-data tokens. * @return true if the operation was successful and there is a next token, * false if there are no more tokens. */ bool peek(Token &token, const TokenSet &tokens = TokenSet{}, - WhitespaceMode mode = WhitespaceMode::TRIM); + WhitespaceMode mode = WhitespaceMode::TRIM, + bool endAtWhitespace = false); /** * Consumes the peeked tokens, the read cursor will now be at the position -- cgit v1.2.3