1 files changed, 43 insertions, 27 deletions
diff --git a/src/core/parser/utils/TokenizedData.hpp b/src/core/parser/utils/TokenizedData.hpp
index 85b80ae..b72ca02 100644
--- a/src/core/parser/utils/TokenizedData.hpp
+++ b/src/core/parser/utils/TokenizedData.hpp
@@ -36,7 +36,6 @@
 #include <unordered_set>
 
 #include <core/common/Location.hpp>
-#include <core/common/Variant.hpp>
 #include <core/common/Whitespace.hpp>
 #include <core/common/Token.hpp>
 
@@ -48,6 +47,28 @@ class TokenizedDataReader;
 class TokenizedDataReaderFork;
 
 /**
+ * Internally used structure representing a cursor within the TokenizedData
+ * stream.
+ */
+struct TokenizedDataCursor {
+	/**
+	 * Position within the byte buffer.
+	 */
+	size_t bufPos;
+
+	/**
+	 * Position within the token mark buffer.
+	 */
+	size_t markPos;
+
+	/**
+	 * Default constructor. The resulting cursor points at the beginning of the
+	 * stream.
+	 */
+	TokenizedDataCursor() : bufPos(0), markPos(0) {}
+};
+
+/**
  * The TokenizedData class stores data extracted from a user defined document.
  * The data stored in TokenizedData
  */
@@ -88,10 +109,13 @@ public:
 	 *
 	 * @param data is the string that should be appended to the buffer.
 	 * @param offsStart is the start offset in bytes in the input file.
+	 * @param protect if set to true, the appended characters will not be
+	 * affected by whitespace handling, they will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(const std::string &data, SourceOffset offsStart = 0);
+	size_t append(const std::string &data, SourceOffset offsStart = 0,
+	              bool protect = false);
 
 	/**
 	 * Appends a single character to the internal character buffer.
@@ -99,10 +123,13 @@ public:
 	 * @param c is the character that should be appended to the buffer.
 	 * @param start is the start offset in bytes in the input file.
 	 * @param end is the end offset in bytes in the input file.
+	 * @param protect if set to true, the appended character will not be
+	 * affected by whitespace handling, it will be returned as is.
 	 * @return the current size of the internal byte buffer. The returned value
 	 * is intended to be used for the "mark" function.
 	 */
-	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd);
+	size_t append(char c, SourceOffset offsStart, SourceOffset offsEnd,
+	              bool protect = false);
 
 	/**
 	 * Stores a token ending at the last character of the current buffer.
@@ -187,15 +214,16 @@ private:
 	/**
 	 * Position from which the last element was read from the internal buffer.
 	 */
-	size_t readCursor;
+	TokenizedDataCursor readCursor;
 
 	/**
 	 * Position from which the last element was peeked from the internal buffer.
 	 */
-	size_t peekCursor;
+	TokenizedDataCursor peekCursor;
 
+protected:
 	/**
-	 * Private constructor of TokenizedDataReader, taking a reference to the
+	 * Protected constructor of TokenizedDataReader, taking a reference to the
 	 * internal TokenizedDataImpl structure storing the data that is accessed by
 	 * the reader.
 	 *
@@ -205,8 +233,9 @@ private:
 	 * @param peekCursor is the cursor position from which tokens and text are
 	 * peeked.
 	 */
-	TokenizedDataReader(std::shared_ptr<TokenizedDataImpl> impl,
-	                    size_t readCursor, size_t peekCursor);
+	TokenizedDataReader(std::shared_ptr<const TokenizedDataImpl> impl,
+	                    const TokenizedDataCursor &readCursor,
+	                    const TokenizedDataCursor &peekCursor);
 
 public:
 	/**
@@ -237,7 +266,7 @@ public:
 	 * false if there are no more tokens.
 	 */
 	bool read(Token &token, const TokenSet &tokens = TokenSet{},
-	          WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+	          WhitespaceMode mode = WhitespaceMode::TRIM);
 
 	/**
 	 * Stores the next token in the given token reference, returns true if the
@@ -253,7 +282,7 @@ public:
 	 * false if there are no more tokens.
 	 */
 	bool peek(Token &token, const TokenSet &tokens = TokenSet{},
-	          WhitespaceMode mode = WhitespaceMode::COLLAPSE);
+	          WhitespaceMode mode = WhitespaceMode::TRIM);
 
 	/**
 	 * Consumes the peeked tokens, the read cursor will now be at the position
@@ -265,20 +294,6 @@ public:
 	 * Resets the peek cursor to the position of the read cursor.
 	 */
 	void resetPeek() { peekCursor = readCursor; }
-
-	/**
-	 * Stores the next text token in the given token reference, returns true if
-	 * the operation was successful (there was indeed a text token), false if
-	 * the next token is not a text token or there were no more tokens.
-	 *
-	 * @param token is an output parameter into which the read token will be
-	 * stored. The TokenId is set to Tokens::Empty if there are no more tokens.
-	 * @param mode is the whitespace mode that should be used when a text token
-	 * is returned.
-	 * @return a string variant with the data if there is any data or a nullptr
-	 * variant if there is no text.
-	 */
-	Variant text(WhitespaceMode mode = WhitespaceMode::COLLAPSE);
 };
 
 /**
@@ -309,8 +324,9 @@ private:
 	 * peeked.
 	 */
 	TokenizedDataReaderFork(TokenizedDataReader &parent,
-	                        std::shared_ptr<TokenizedDataImpl> impl,
-	                        size_t readCursor, size_t peekCursor)
+	                        std::shared_ptr<const TokenizedDataImpl> impl,
+	                        const TokenizedDataCursor &readCursor,
+	                        const TokenizedDataCursor &peekCursor)
 	    : TokenizedDataReader(impl, readCursor, peekCursor), parent(parent)
 	{
 	}
@@ -320,7 +336,7 @@ public:
 	 * Commits the read/peek progress to the underlying parent.
 	 */
 	void commit() { parent = *this; }
-}
+};
 }
 
 #endif /* _OUSIA_TOKENIZED_DATA_HPP_ */