diff options
| author | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2015-04-12 18:47:29 +0200 | 
|---|---|---|
| committer | Andreas Stöckel <astoecke@techfak.uni-bielefeld.de> | 2016-04-25 22:24:16 +0200 | 
| commit | 667d9c4a082552fb64c5ffe7b0bd6212c8a8b1b3 (patch) | |
| tree | 100e8e3fbd86970dec9ef97c773419ac2bba291b /src | |
| parent | 0884afe16263a110597671f60dcb4ff7df66f456 (diff) | |
Implement endAtWhitespace flag which tells TokenizedDataReader to stop reading data after the first whitespace character
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/parser/utils/TokenizedData.cpp | 30 | ||||
| -rw-r--r-- | src/core/parser/utils/TokenizedData.hpp | 12 | 
2 files changed, 34 insertions, 8 deletions
| diff --git a/src/core/parser/utils/TokenizedData.cpp b/src/core/parser/utils/TokenizedData.cpp index 276cd54..7c7d4a7 100644 --- a/src/core/parser/utils/TokenizedData.cpp +++ b/src/core/parser/utils/TokenizedData.cpp @@ -367,11 +367,13 @@ public:  	 * @param cursor is the position in the character buffer from which on the  	 * next token should be read. The cursor will be updated to the position  	 * beyond the returned token. +	 * @param endAtWhitespace if true, only delivers data up to the next +	 * whitespace.  	 * @return true if a token was returned, false if no more tokens are  	 * available.  	 */  	bool next(Token &token, WhitespaceMode mode, const TokenSet &tokens, -	          TokenizedDataCursor &cursor) const +	          TokenizedDataCursor &cursor, bool endAtWhitespace) const  	{  		// Some variables for convenient access  		size_t &bufPos = cursor.bufPos; @@ -394,12 +396,28 @@ public:  		// Calculate the buffer start and end character, based on the returned  		// TokenMark instance -		const size_t end = (it != marks.end()) ? it->bufStart : buf.size(); +		size_t end = (it != marks.end()) ? it->bufStart : buf.size();  		// Depending on the whitespace mode, fetch all the data between the  		// cursor position and the calculated end position and return a token  		// containing that data.  		if (bufPos < end && bufPos < buf.size()) { +			// If endAtWhitespace is set to true, limit copying to the the first +			// whitespace character after non-whitespace +			if (endAtWhitespace) { +				bool hasNonWhitespace = false; +				for (size_t i = bufPos; i < end; i++) { +					const bool isWhitespace = Utils::isWhitespace(buf[i]); +					if (isWhitespace) { +						if (hasNonWhitespace) { +							end = i; +							break; +						} +					} else { +						hasNonWhitespace = true; +					} +				} +			}  			switch (mode) {  				case WhitespaceMode::PRESERVE: {  					token = Token( @@ -685,15 +703,15 @@ bool TokenizedDataReader::atEnd() const  }  bool TokenizedDataReader::read(Token &token, const TokenSet &tokens, -                               WhitespaceMode mode) +                               WhitespaceMode mode, bool endAtWhitespace)  {  	peekCursor = readCursor; -	return impl->next(token, mode, tokens, readCursor); +	return impl->next(token, mode, tokens, readCursor, endAtWhitespace);  }  bool TokenizedDataReader::peek(Token &token, const TokenSet &tokens, -                               WhitespaceMode mode) +                               WhitespaceMode mode, bool endAtWhitespace)  { -	return impl->next(token, mode, tokens, peekCursor); +	return impl->next(token, mode, tokens, peekCursor, endAtWhitespace);  }  } diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp index 95af95e..83821d7 100644 --- a/src/core/parser/utils/TokenizedData.hpp +++ b/src/core/parser/utils/TokenizedData.hpp @@ -307,11 +307,15 @@ public:  	 * enabled tokens.  	 * @param mode is the whitespace mode that should be used when a text token  	 * is returned. +	 * @param endAtWhitespace if true, only delivers data until the first +	 * whitespace character after a sequence of non-whitespace characters. Does +	 * not affect the delivery of non-data tokens.  	 * @return true if the operation was successful and there is a next token,  	 * false if there are no more tokens.  	 */  	bool read(Token &token, const TokenSet &tokens = TokenSet{}, -	          WhitespaceMode mode = WhitespaceMode::TRIM); +	          WhitespaceMode mode = WhitespaceMode::TRIM, +	          bool endAtWhitespace = false);  	/**  	 * Stores the next token in the given token reference, returns true if the @@ -323,11 +327,15 @@ public:  	 * enabled tokens.  	 * @param mode is the whitespace mode that should be used when a text token  	 * is returned. +	 * @param endAtWhitespace if true, only delivers data until the first +	 * whitespace character after a sequence of non-whitespace characters. Does +	 * not affect the delivery of non-data tokens.  	 * @return true if the operation was successful and there is a next token,  	 * false if there are no more tokens.  	 */  	bool peek(Token &token, const TokenSet &tokens = TokenSet{}, -	          WhitespaceMode mode = WhitespaceMode::TRIM); +	          WhitespaceMode mode = WhitespaceMode::TRIM, +	          bool endAtWhitespace = false);  	/**  	 * Consumes the peeked tokens, the read cursor will now be at the position | 
