summaryrefslogtreecommitdiff
path: root/src/core/parser
diff options
context:
space:
mode:
authorAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2015-04-12 18:47:29 +0200
committerAndreas Stöckel <astoecke@techfak.uni-bielefeld.de>2016-04-25 22:24:16 +0200
commit667d9c4a082552fb64c5ffe7b0bd6212c8a8b1b3 (patch)
tree100e8e3fbd86970dec9ef97c773419ac2bba291b /src/core/parser
parent0884afe16263a110597671f60dcb4ff7df66f456 (diff)
Implement endAtWhitespace flag which tells TokenizedDataReader to stop reading data after the first whitespace character
Diffstat (limited to 'src/core/parser')
-rw-r--r--src/core/parser/utils/TokenizedData.cpp30
-rw-r--r--src/core/parser/utils/TokenizedData.hpp12
2 files changed, 34 insertions, 8 deletions
diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp
index 276cd54..7c7d4a7 100644
--- a/src/core/parser/utils/TokenizedData.cpp
+++ b/src/core/parser/utils/TokenizedData.cpp
@@ -367,11 +367,13 @@ public:
* @param cursor is the position in the character buffer from which on the
* next token should be read. The cursor will be updated to the position
* beyond the returned token.
+ * @param endAtWhitespace if true, only delivers data up to the next
+ * whitespace.
* @return true if a token was returned, false if no more tokens are
* available.
*/
bool next(Token &token, WhitespaceMode mode, const TokenSet &tokens,
- TokenizedDataCursor &cursor) const
+ TokenizedDataCursor &cursor, bool endAtWhitespace) const
{
// Some variables for convenient access
size_t &bufPos = cursor.bufPos;
@@ -394,12 +396,28 @@ public:
// Calculate the buffer start and end character, based on the returned
// TokenMark instance
- const size_t end = (it != marks.end()) ? it->bufStart : buf.size();
+ size_t end = (it != marks.end()) ? it->bufStart : buf.size();
// Depending on the whitespace mode, fetch all the data between the
// cursor position and the calculated end position and return a token
// containing that data.
if (bufPos < end && bufPos < buf.size()) {
+ // If endAtWhitespace is set to true, limit copying to the the first
+ // whitespace character after non-whitespace
+ if (endAtWhitespace) {
+ bool hasNonWhitespace = false;
+ for (size_t i = bufPos; i < end; i++) {
+ const bool isWhitespace = Utils::isWhitespace(buf[i]);
+ if (isWhitespace) {
+ if (hasNonWhitespace) {
+ end = i;
+ break;
+ }
+ } else {
+ hasNonWhitespace = true;
+ }
+ }
+ }
switch (mode) {
case WhitespaceMode::PRESERVE: {
token = Token(
@@ -685,15 +703,15 @@ bool TokenizedDataReader::atEnd() const
}
bool TokenizedDataReader::read(Token &token, const TokenSet &tokens,
- WhitespaceMode mode)
+ WhitespaceMode mode, bool endAtWhitespace)
{
peekCursor = readCursor;
- return impl->next(token, mode, tokens, readCursor);
+ return impl->next(token, mode, tokens, readCursor, endAtWhitespace);
}
bool TokenizedDataReader::peek(Token &token, const TokenSet &tokens,
- WhitespaceMode mode)
+ WhitespaceMode mode, bool endAtWhitespace)
{
- return impl->next(token, mode, tokens, peekCursor);
+ return impl->next(token, mode, tokens, peekCursor, endAtWhitespace);
}
}
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index 95af95e..83821d7 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -307,11 +307,15 @@ public:
* enabled tokens.
* @param mode is the whitespace mode that should be used when a text token
* is returned.
+ * @param endAtWhitespace if true, only delivers data until the first
+ * whitespace character after a sequence of non-whitespace characters. Does
+ * not affect the delivery of non-data tokens.
* @return true if the operation was successful and there is a next token,
* false if there are no more tokens.
*/
bool read(Token &token, const TokenSet &tokens = TokenSet{},
- WhitespaceMode mode = WhitespaceMode::TRIM);
+ WhitespaceMode mode = WhitespaceMode::TRIM,
+ bool endAtWhitespace = false);
/**
* Stores the next token in the given token reference, returns true if the
@@ -323,11 +327,15 @@ public:
* enabled tokens.
* @param mode is the whitespace mode that should be used when a text token
* is returned.
+ * @param endAtWhitespace if true, only delivers data until the first
+ * whitespace character after a sequence of non-whitespace characters. Does
+ * not affect the delivery of non-data tokens.
* @return true if the operation was successful and there is a next token,
* false if there are no more tokens.
*/
bool peek(Token &token, const TokenSet &tokens = TokenSet{},
- WhitespaceMode mode = WhitespaceMode::TRIM);
+ WhitespaceMode mode = WhitespaceMode::TRIM,
+ bool endAtWhitespace = false);
/**
* Consumes the peeked tokens, the read cursor will now be at the position